31 if (synchronizeOutput) {
40 GPUWarning(
"This GPU is stuck, processing of tracking for this event is skipped!");
46 int32_t
retVal = RunTPCTrackingSectors_internal();
53int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
56 GPUInfo(
"Running TPC Sector Tracker");
59 if (!
param().par.earlyTpcTransform) {
71 if (doGPU &&
GetRecoSteps().isSet(RecoStep::TPCConversion)) {
80 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
84 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
89 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
97 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
100 processors()->
tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties<GPUTPCTrackletConstructor, GPUTPCTrackletConstructor::allSectors>().minBlocks *
BlockCount()) +
NSECTORS - 1 - iSector) /
NSECTORS) * getKernelProperties<GPUTPCTrackletConstructor, GPUTPCTrackletConstructor::allSectors>().nThreads;
110 GPUInfo(
"Copying Tracker objects to GPU");
119 streamInit[
i] =
false;
123 if (
GPUDebug(
"Initialization (1)", 0)) {
128 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
131 if (
param().
rec.tpc.occupancyMapTimeBins) {
137 runKernel<GPUMemClean16>(
GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(
param()));
139 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(
GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(
param()), streamOccMap), ptrTmp,
ptr + 2);
141 mInputsHost->mTPCClusterOccupancyMap[1] =
param().
rec.tpc.occupancyMapTimeBins * 0x10000 +
param().
rec.tpc.occupancyMapTimeBinsAverage;
148 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
149 uint32_t& occupancyTotal = *
mInputsHost->mTPCClusterOccupancyMap;
163 GPUInfo(
"Creating Sector Data (Sector %d)", iSector);
168 streamInit[useStream] =
true;
171 GPUError(
"Error reading event");
177 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({
GetGridBlk(
GPUCA_ROW_COUNT, useStream), {iSector}});
184 *
mDebugFile <<
"\n\nReconstruction: Sector " << iSector <<
"/" <<
NSECTORS << std::endl;
195 GPUInfo(
"Copying Sector Data to GPU and initializing temporary memory");
197 runKernel<GPUMemClean16>(
GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() *
sizeof(*trkShadow.Data().HitWeights()));
202 if (
GPUDebug(
"Initialization (3)", useStream)) {
203 throw std::runtime_error(
"memcpy failure");
207 streamInit[useStream] =
true;
221#ifdef GPUCA_SORT_STARTHITS_GPU
223 runKernel<GPUTPCStartHitsSorter>({
GetGridAuto(useStream), {iSector}});
227 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({
GetGrid(1, 1, useStream), {iSector}});
238 runKernel<GPUTPCTrackletConstructor>({
GetGridAuto(useStream), {iSector}});
246 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector}});
247 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, 1);
249 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {iSector}});
252 streamMap[iSector] = useStream;
254 GPUInfo(
"Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
284 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
289 int32_t runSectors = 0;
290 int32_t useStream = 0;
291 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector += runSectors) {
295 runSectors = CAMath::Min<int32_t>(runSectors,
NSECTORS - iSector);
296 if (getKernelProperties<GPUTPCTrackletSelector>().minBlocks *
BlockCount() < (uint32_t)runSectors) {
297 runSectors = getKernelProperties<GPUTPCTrackletSelector>().minBlocks *
BlockCount();
301 GPUInfo(
"Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors);
303 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector, runSectors}});
304 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, runSectors);
305 for (uint32_t k = iSector; k < iSector + runSectors; k++) {
307 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {k}});
310 streamMap[k] = useStream;
321 std::array<bool, NSECTORS> transferRunning;
322 transferRunning.fill(
true);
324 if (
param().
rec.tpc.extrapolationTracking) {
328 uint32_t tmpSector = 0;
329 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
331 GPUInfo(
"Transfering Tracks from GPU to Host");
334 if (tmpSector == iSector) {
339 if (*
processors()->tpcTrackers[tmpSector].NTracks() > 0) {
342 transferRunning[tmpSector] =
false;
361 if (transferRunning[iSector]) {
365 GPUInfo(
"Tracks Transfered: %d / %d", *
processors()->tpcTrackers[iSector].NTracks(), *
processors()->tpcTrackers[iSector].NTrackHits());
369 GPUInfo(
"Data ready for sector %d", iSector);
373 if (
param().
rec.tpc.extrapolationTracking) {
374 for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) {
375 uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a);
376 uint32_t sectorLeft, sectorRight;
377 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight);
379 if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector &&
mWriteOutputDone[tmpSector2] == 0) {
397 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
398 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
400 uint32_t sectorLeft, sectorRight;
401 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
414 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
415 if (doGPU && transferRunning[iSector]) {
422 if (
param().
rec.tpc.extrapolationTracking) {
423 ExtrapolationTracking(iSector, 0);
433 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
434 GPUInfo(
"Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
435 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits,
processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
458 GPUInfo(
"TPC Sector Tracker finished");
467 GPUInfo(
"Running ReadEvent for sector %d on thread %d\n", iSector, threadId);
471 GPUInfo(
"Finished ReadEvent for sector %d on thread %d\n", iSector, threadId);
479 GPUInfo(
"Running WriteOutput for sector %d on thread %d\n", iSector, threadId);
484 GPUInfo(
"Finished WriteOutput for sector %d on thread %d\n", iSector, threadId);
#define GPUCA_MAX_STREAMS
int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput=true)
int32_t RunTPCTrackingSectors()
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
volatile int32_t mSectorSelectorReady
void WriteOutput(int32_t iSector, int32_t threadId)
int32_t ReadEvent(uint32_t iSector, int32_t threadId)
GPUTrackingInOutPointers & mIOPtrs
std::array< int8_t, NSECTORS > mWriteOutputDone
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
void RecordMarker(deviceEvent *ev, int32_t stream)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool IsEventDone(deviceEvent *evList, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
int32_t GPUDebug(const char *state="UNKNOWN", int32_t stream=-1)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t ThreadCount() const
size_t AllocateRegisteredMemory(GPUProcessor *proc)
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const
GPUConstantMem * processors()
static constexpr krnlRunRange krnlRunRangeNone
krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
void SynchronizeEvents(deviceEvent *evList, int32_t nEvents=1)
virtual int32_t PrepareTextures()
uint32_t BlockCount() const
GPUReconstruction * rec()
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
void SetNActiveThreadsOuterLoop(uint32_t f)
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ReturnVolatileMemory()
void ComputeReuseMax(GPUProcessor *proc)
const GPUParam & GetParam() const
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
const void * DeviceMemoryBase() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
void WriteOutputPrepare()
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
void DumpOutput(std::ostream &out)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
int32_t CheckEmptySector()
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
if(!okForPhiMin(phi0, phi1))
constexpr T qStr2Tag(const char *str)
deviceEvent stream[GPUCA_MAX_STREAMS]
deviceEvent sector[NSECTORS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
uint32_t nClusterData[NSECTORS]
uint32_t nSectorClusters[NSECTORS]
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
const GPUTPCClusterData * clusterData[NSECTORS]
uint32_t nSectorTracks[NSECTORS]
GPUOutputControl tpcOccupancyMap
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]