21#include "GPUChainTrackingGetters.inc"
45#include "GPUDefParametersRuntime.h"
64GPUChainTracking::GPUChainTracking(
GPUReconstruction*
rec, uint32_t maxTPCHits, uint32_t maxTRDTracklets) :
GPUChain(
rec), mIOPtrs(processors()->ioPtrs), mInputsHost(new
GPUTrackingInputProvider), mInputsShadow(new
GPUTrackingInputProvider), mClusterNativeAccess(new
ClusterNativeAccess), mTriggerBuffer(new
GPUTriggerOutputs), mMaxTPCHits(maxTPCHits), mMaxTRDTracklets(maxTRDTracklets), mDebugFile(new
std::ofstream)
113#ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT
158#ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT
165 gpuMem = constants::GPU_DEFAULT_MEMORY_SIZE;
166 pageLockedHostMem = constants::GPU_DEFAULT_HOST_MEMORY_SIZE;
172 GPUError(
"Invalid Reconstruction Step Setting: dEdx requires TPC Merger to be active");
176 GPUError(
"Invalid GPU Reconstruction Step Setting: dEdx requires TPC Merger to be active");
180 GPUError(
"Invalid Reconstruction Step Setting: Tracking requires TPC Conversion to be active");
184 GPUError(
"Invalid input, TPC Clusterizer needs TPC raw input");
188 GPUError(
"Invalid input / output / step, merger cannot read/store sectors tracks and needs TPC conversion");
193 GPUError(
"Invalid Inputs for track merging, TPC Clusters required");
198 GPUError(
"Can not run TPC GPU Cluster Finding with Run 2 Data");
203 GPUError(
"Missing input for TPC Cluster conversion / sector tracking / compression / dEdx: TPC Clusters required");
207 GPUError(
"Input for TPC merger missing");
211 GPUError(
"Input for TPC compressor missing");
215 GPUError(
"Input for TRD Tracker missing");
219 GPUError(
"TRD Reco Parameters are missing");
223 GPUError(
"TPC Raw / TPC Clusters / TRD Tracklets cannot be output");
227 GPUError(
"No TPC Merged Track Output available");
231 GPUError(
"No TPC Compression Output available");
235 GPUError(
"No TRD Tracker Output available");
239 GPUError(
"Cannot run dE/dx without dE/dx calibration container object");
243 GPUError(
"Cannot run gain calibration without calibration object");
247 GPUError(
"Cannot run TPC ZS Decoder without mapping object. (tpczslinkmapping.dump missing?)");
256 if ((
param().
rec.tpc.nWays & 1) == 0) {
257 GPUError(
"nWay setting musst be odd number!");
261 GPUError(
"Cannot do error interpolation with NWays < 3!");
265 GPUError(
"configured max time bin exceeds 256 orbits");
273 GPUError(
"noGPUMemoryRegistration only possible with gather mode 3 (set to %d / %d)",
mRec->
GetProcessingSettings().tpcCompressionGatherMode, gatherMode);
277 GPUError(
"Clusterizer and merger Sanity checks only supported when not running on GPU");
281 GPUError(
"tpcWriteClustersAfterRejection requires compressionTypeMask = 0, no GPU usage, and compression enabled");
286 GPUError(
"Cannot use double pipeline with tpcFreeAllocatedMemoryAfterProcessing");
290 GPUError(
"Invalid outputs for double pipeline mode 0x%x", (uint32_t)
GetRecoStepsOutputs());
297 GPUError(
"Must use external output for double pipeline mode");
300 if (gatherMode == 1) {
301 GPUError(
"Double pipeline incompatible to compression mode 1");
305 GPUError(
"Invalid reconstruction settings for double pipeline: Needs compression and cluster finding");
310 GPUError(
"Invalid tpcCompressionGatherMode for compression on CPU");
314 GPUError(
"tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with MC labels");
319 GPUError(
"TRD tracking can only run on O2 TPC tracks if createO2Output is enabled (%d), and matBudLUT is available (0x%p)", (int32_t)
GetProcessingSettings().createO2Output, (
void*)
GetMatLUT());
323 GPUError(
"TRD tracking can only run on GPU TPC tracks if the createO2Output setting does not suppress them");
327 GPUError(
"Cannot use TRD tracking or Refit on GPU without GPU polynomial field map (%d) or matlut table (%p)", (int32_t)
GetProcessingSettings().o2PropagatorUseGPUField, (
void*)
GetMatLUT());
339 for (uint32_t
i = 0;
i <
sizeof(gpudatatypes::RECO_STEP_NAMES) /
sizeof(gpudatatypes::RECO_STEP_NAMES[0]);
i++) {
341 printf(
" - %s", gpudatatypes::RECO_STEP_NAMES[
i]);
366 qa.reset(
new GPUQA(
this));
372 throw std::runtime_error(
"Error loading event display");
412 if (
processors()->calibObjects.matLUT && (ptrMask ==
nullptr || ptrMask->
matLUT)) {
453 throw std::runtime_error(
"Forwading zero-suppressed hits not supported");
463 qa.reset(
new GPUQA(
this));
465 if (!
GetQA()->IsInitialized()) {
488 char* fastTransformBase = (
char*)mem;
491 char* podBuf =
nullptr;
594 std::unique_ptr<GPUSettingsGRP> grp;
595 const GPUSettingsProcessing* p =
nullptr;
596 std::lock_guard lk(mMutexUpdateCalib);
620 GPUFatal(
"GPU magnetic field for propagator requested, but received an O2 propagator without GPU field");
645 memcpy(oldFlatPtrsDevice.data(), (
void*)&
mFlatObjectsDevice, oldFlatPtrsDevice.size());
647 bool ptrsChanged = memcmp(oldFlatPtrs.data(), (
void*)&
mFlatObjectsShadow, oldFlatPtrs.size()) || memcmp(oldFlatPtrsDevice.data(), (
void*)&
mFlatObjectsDevice, oldFlatPtrsDevice.size());
649 GPUInfo(
"Updating all calib objects since pointers changed");
668 GPUFatal(
"Cannot run TRD tracking or refit with o2 track model without o2 propagator");
694 }
catch (
const std::bad_alloc& e) {
695 GPUError(
"Memory Allocation Error");
745 GPUInfo(
"Preempting tpcZS input of foreign chain");
748 mPipelineFinalizationCtx->rec = this->
mRec;
749 foreignChain->mPipelineNotifyCtx = mPipelineFinalizationCtx.get();
784 int32_t retVal2 = RunChainFinalize();
785 return retVal2 ? retVal2 :
retVal;
788int32_t GPUChainTracking::RunChainFinalize()
820 GPUInfo(
"Starting Event Display...");
822 GPUError(
"Error starting Event Display");
836 GPUInfo(
"Press key for next event!");
843 iKey = kbhit() ? getch() : 0;
846 }
else if (iKey ==
'n') {
863 GPUInfo(
"Loading next event...");
873 if (mPipelineFinalizationCtx) {
875 std::unique_lock<std::mutex> lock(mPipelineFinalizationCtx->mutex);
876 auto* ctx = mPipelineFinalizationCtx.get();
877 mPipelineFinalizationCtx->cond.wait(lock, [ctx]() {
return ctx->ready; });
879 mPipelineFinalizationCtx.reset();
881 return RunChainFinalize();
887 bool hasDebugError =
false;
888 for (int32_t
i = 0;
i < 1 + (!cpuOnly &&
mRec->
IsGPU());
i++) {
899 static int32_t errorsShown = 0;
900 static bool quiet =
false;
901 static std::chrono::time_point<std::chrono::steady_clock> silenceFrom;
903 silenceFrom = std::chrono::steady_clock::now();
906 auto currentTime = std::chrono::steady_clock::now();
907 std::chrono::duration<double> elapsed_seconds = currentTime - silenceFrom;
908 if (elapsed_seconds.count() > 60 * 10) {
915 GPUWarning(
"GPUReconstruction suffered from an error in the %s part",
i ?
"GPU" :
"CPU");
917 GPUError(
"GPUReconstruction suffered from an error in the %s part",
i ?
"GPU" :
"CPU");
925 for (uint32_t
j = 0;
j < nErrors;
j++) {
926 fillErrors->emplace_back(std::array<uint32_t, 4>{pErrors[4 *
j], pErrors[4 *
j + 1], pErrors[4 *
j + 2], pErrors[4 *
j + 3]});
931 hasDebugError =
true;
935 for (uint32_t
j = 0;
j < nErrors;
j++) {
937 hasDebugError =
true;
964 std::lock_guard lk(mMutexUpdateCalib);
966 void*
const* pSrc = (
void*
const*)&obj;
968 for (uint32_t
i = 0;
i <
sizeof(*mNewCalibObjects) /
sizeof(
void*);
i++) {
993 GPUFatal(
"GPU magnetic field for propagator requested, but received an O2 propagator without GPU field");
1000 rec.useMatLUT =
false;
1002 if (proc.rtc.optSpecialCode == -1) {
1003 proc.rtc.optSpecialCode = syncMode;
1005 if (dEdxMode != -2) {
Definition of container class for dE/dx corrections.
Online TRD tracker based on extrapolated TPC tracks.
Used for storing the MC labels for the TRD tracklets.
TRD Tracklet word for GPU tracker - 32bit tracklet info + half chamber ID + index.
Definition of a container to keep Monte Carlo truth external to simulation objects.
bitfield & setBits(const bitfield v, bool w)
bool isSet(const bitfield &v) const
void SetMatLUT(std::unique_ptr< o2::base::MatLayerCylSet > &&lut)
~GPUChainTracking() override
static void ApplySyncSettings(GPUSettingsProcessing &proc, GPUSettingsRec &rec, gpudatatypes::RecoStepField &steps, bool syncMode, int32_t dEdxMode=-2)
std::unique_ptr< o2::base::MatLayerCylSet > mMatLUTU
void PrintKernelDebugOutput()
int32_t RunTPCCompression()
const o2::base::Propagator * GetDeviceO2Propagator()
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
std::unique_ptr< GPUTRDRecoParam > mTRDRecoParamU
void SetTRDGeometry(std::unique_ptr< o2::trd::GeometryFlat > &&geo)
GPUChainTracking * mQAFromForeignChain
void SetO2Propagator(const o2::base::Propagator *prop)
std::unique_ptr< GPUQA > mQA
GPUTrackingFlatObjects mFlatObjectsDevice
int32_t RunTPCClusterizer(bool synchronizeOutput=true)
int32_t RunTPCTrackingSectors()
void UpdateGPUCalibObjectsPtrs(int32_t stream)
int32_t RunTPCTrackingMerger(bool synchronizeOutput=true)
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::unique_ptr< GPUTPCClusterStatistics > mCompressionStatistics
const o2::base::MatLayerCylSet * GetMatLUT() const
void SetTRDRecoParam(std::unique_ptr< GPUTRDRecoParam > &&par)
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
void SetTPCFastTransform(aligned_unique_buffer_ptr< TPCFastTransformPOD > &&tpcFastTransform)
void SetUpdateCalibObjects(const GPUCalibObjectsConst &obj, const GPUNewCalibValues &vals)
bool mUpdateNewCalibObjects
std::unique_ptr< GPUCalibObjectsConst > mNewCalibObjects
std::unique_ptr< o2::trd::GeometryFlat > mTRDGeometryU
int32_t Finalize() override
GPUTrackingFlatObjects mFlatObjectsShadow
void RegisterPermanentMemoryAndProcessors() override
void RegisterGPUProcessors() override
void ClearErrorCodes(bool cpuOnly=false)
const GPUQA * GetQA() const
void MemorySize(size_t &gpuMem, size_t &pageLockedHostMem) override
GPUChainTracking(GPUReconstruction *rec, uint32_t maxTPCHits=constants::GPU_MEM_MAX_TPC_CLUSTERS, uint32_t maxTRDTracklets=constants::GPU_MEM_MAX_TRD_TRACKLETS)
int32_t RunTPCDecompression()
int32_t DoQueuedUpdates(int32_t stream, bool updateSlave=true)
std::unique_ptr< GPUNewCalibValues > mNewCalibValues
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr) override
bool mFractionalQAEnabled
void PrepareKernelDebugOutput()
int32_t PrepareEvent() override
int32_t RunChain() override
void UpdateGPUCalibObjects(int32_t stream, const GPUCalibObjectsConst *ptrMask=nullptr)
int32_t ConvertNativeToClusterData()
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
const o2::tpc::CalibdEdxContainer * GetdEdxCalibContainer() const
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
const GPUSettingsQA * mConfigQA
int32_t FinalizePipelinedProcessing() override
bool mTPCSectorScratchOnStack
std::unique_ptr< GPUDisplayInterface > mEventDisplay
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccessReduced
aligned_unique_buffer_ptr< TPCFastTransformPOD > mTPCFastTransformU
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
GPUConstantMem * processorsDevice()
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
GPUChain * GetNextChainInQueue()
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void AllocateIOMemoryHelper(uint32_t n, const T *&ptr, std::unique_ptr< T[]> &u)
int32_t runRecoStep(RecoStep step, S T::*func, Args... args)
GPUReconstruction * rec()
static GPUDisplayInterface * getDisplay(GPUDisplayFrontendInterface *frontend, GPUChainTracking *chain, GPUQA *qa, const GPUParam *param=nullptr, const GPUCalibObjectsConst *calib=nullptr, const GPUSettingsDisplay *config=nullptr, const GPUSettingsProcessing *proc=nullptr)
const uint32_t * getErrorPtr() const
void setMemory(GPUglobalref() uint32_t *m)
bool printErrors(bool silent=false, uint64_t mask=0)
uint32_t getNErrors() const
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void InitGPUProcessor(GPUReconstruction *rec, ProcessorType type=PROCESSOR_TYPE_CPU, GPUProcessor *slaveProcessor=nullptr)
static bool QAAvailable()
int32_t DrawQAHistograms()
void UpdateChain(GPUChainTracking *chain)
static bool IsInitialized()
int32_t InitQA(int32_t tasks=0)
void RunQA(bool matchOnly=false)
HighResTimer & getGeneralStepTimer(GeneralStep step)
void SetNActiveThreads(int32_t n)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void AddGPUEvents(T *&events)
std::vector< std::array< uint32_t, 4 > > * getErrorCodeOutput()
RecoStepField GetRecoStepsGPU() const
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
void RegisterGPUProcessor(T *proc, bool deviceSlave)
void setDebugDumpCallback(std::function< void()> &&callback=std::function< void()>(nullptr))
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
uint32_t getNEventsProcessed()
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor *proc=nullptr)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
uint32_t NStreams() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
const GPUSettingsGRP & GetGRPSettings() const
GPUOutputControl & OutputControl()
static constexpr const uint32_t TPC_MAX_TF_TIME_BIN
GPUCalibObjectsTemplate< ConstPtr > GPUCalibObjectsConst
Global TPC definitions and constants.
std::string to_string(gsl::span< T, Size > span)
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
constexpr T qStr2Tag(const char(&str)[N])
S< o2::trd::GeometryFlat >::type * trdGeometry
S< o2::tpc::CalibdEdxContainer >::type * dEdxCalibContainer
S< TPCZSLinkMapping >::type * tpcZSLinkMapping
S< TPCFastTransformPOD >::type * fastTransform
S< GPUTRDRecoParam >::type * trdRecoParam
S< TPCPadGainCalib >::type * tpcPadGain
S< o2::base::PropagatorImpl< float > >::type * o2Propagator
S< o2::base::MatLayerCylSet >::type * matLUT
void * SetPointersFlatObjects(void *mem)
GPUCalibObjects mCalibObjects
char * mdEdxSplinesBuffer
GPUChainTracking * mChainTracking
std::unique_ptr< GPUTPCMCInfo[]> mcInfosTPC
std::unique_ptr< GPUTRDTrackletWord[]> trdTracklets
std::unique_ptr< GPUTPCMCInfoCol[]> mcInfosTPCCol
std::unique_ptr< GPUTPCGMMergedTrackHit[]> mergedTrackHits
std::unique_ptr< int32_t[]> trdTrackletIdxFirst
std::unique_ptr< GPUTPCGMMergedTrack[]> mergedTracks
std::unique_ptr< AliHLTTPCClusterMCLabel[]> mcLabelsTPC
std::unique_ptr< GPUTPCClusterData[]> clusterData[NSECTORS]
std::unique_ptr< o2::tpc::ClusterNative[]> clustersNative
std::unique_ptr< float[]> trdTriggerTimes
std::unique_ptr< GPUTPCTrack[]> sectorTracks[NSECTORS]
std::unique_ptr< GPUTRDSpacePoint[]> trdSpacePoints
std::unique_ptr< uint8_t[]> trdTrigRecMask
std::unique_ptr< o2::tpc::ClusterNativeAccess > clusterNativeAccess
std::unique_ptr< AliHLTTPCRawCluster[]> rawClusters[NSECTORS]
std::unique_ptr< GPUTPCHitId[]> sectorClusters[NSECTORS]
std::unique_ptr< GPUTRDTrackGPU[]> trdTracks
GPUTRDTrackerGPU trdTrackerGPU
GPUCalibObjectsConst calibObjects
GPUTRDTracker trdTrackerO2
const int32_t * trdTrackletIdxFirst
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCMCInfo * mcInfosTPC
uint32_t nClusterData[NSECTORS]
uint32_t nRawClusters[NSECTORS]
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
const AliHLTTPCClusterMCLabel * mcLabelsTPC
uint32_t nSectorClusters[NSECTORS]
const GPUTRDSpacePoint * trdSpacePoints
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUTRDTrackGPU * trdTracks
const GPUTRDTrackletWord * trdTracklets
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
uint32_t nSectorTracks[NSECTORS]
uint32_t nMergedTrackHits
const float * trdTriggerTimes
const uint8_t * trdTrigRecMask
const GPUTPCGMMergedTrackHit * mergedTrackHits
const GPUTrackingInOutDigits * tpcPackedDigits
uint32_t nTRDTriggerRecords
const GPUTPCMCInfoCol * mcInfosTPCCol
const GPUTPCGMMergedTrack * mergedTracks
GPUOutputControl clustersNative
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl sharedClusterMap
GPUOutputControl compressedClusters
GPUOutputControl tpcTracks
unsigned int nClustersTotal