27uint32_t GPUChainTracking::StreamForSector(uint32_t sector)
const
34 const uint32_t
stream = StreamForSector(iSector);
35 runKernel<GPUTPCExtrapolationTracking>({
GetGridBlk(256,
stream), {iSector}});
46 GPUWarning(
"This GPU is stuck, processing of tracking for this event is skipped!");
52 int32_t
retVal = RunTPCTrackingSectors_internal();
59int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
62 GPUInfo(
"Running TPC Sector Tracker");
65 if (!
param().par.earlyTpcTransform) {
77 if (doGPU &&
GetRecoSteps().isSet(RecoStep::TPCConversion)) {
86 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
90 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
95 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
105 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
108 processors()->
tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties<GPUTPCTrackletConstructor>().minBlocks *
BlockCount()) +
NSECTORS - 1 - iSector) /
NSECTORS) * getKernelProperties<GPUTPCTrackletConstructor>().nThreads;
118 GPUInfo(
"Copying Tracker objects to GPU");
126 std::fill(streamInit, streamInit +
mRec->
NStreams(),
false);
127 streamInit[streamInitAndOccMap] =
true;
130 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
133 if (
param().
rec.tpc.occupancyMapTimeBins) {
139 runKernel<GPUMemClean16>(
GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(
param()));
141 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(
GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(
param()), streamInitAndOccMap), ptrTmp,
ptr + 2);
143 mInputsHost->mTPCClusterOccupancyMap[1] =
param().
rec.tpc.occupancyMapTimeBins * 0x10000 +
param().
rec.tpc.occupancyMapTimeBinsAverage;
150 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
151 uint32_t& occupancyTotal = *
mInputsHost->mTPCClusterOccupancyMap;
162 int32_t useStream = StreamForSector(iSector);
165 GPUInfo(
"Creating Sector Data (Sector %d)", iSector);
169 streamInit[useStream] =
true;
171 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({
GetGridBlk(
GPUCA_ROW_COUNT, useStream), {iSector}});
178 *
mDebugFile <<
"\n\nReconstruction: Sector " << iSector <<
"/" <<
NSECTORS << std::endl;
187 runKernel<GPUMemClean16>(
GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() *
sizeof(*trkShadow.Data().HitWeights()));
189 streamInit[useStream] =
true;
203#ifdef GPUCA_SORT_STARTHITS_GPU
205 runKernel<GPUTPCStartHitsSorter>({
GetGridAuto(useStream), {iSector}});
209 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({
GetGrid(1, 1, useStream), {iSector}});
219 runKernel<GPUTPCTrackletConstructor>({
GetGridAuto(useStream), {iSector}});
225 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector}});
226 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, 1);
228 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {iSector}});
231 streamMap[iSector] = useStream;
233 GPUInfo(
"Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
243 if (
param().
rec.tpc.extrapolationTracking) {
245 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
246 for (uint32_t iStream = 0; iStream <
mRec->
NStreams(); iStream++) {
247 blocking[iSector *
mRec->
NStreams() + iStream] = StreamForSector(iSector) == iStream;
250 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
251 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
252 uint32_t sectorLeft, sectorRight;
253 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
254 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)]) {
256 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)] =
true;
258 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)]) {
260 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)] =
true;
267 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
273 if (
param().
rec.tpc.extrapolationTracking) {
274 ExtrapolationTracking(iSector, true);
281 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
282 GPUInfo(
"Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
283 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits,
processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
300 GPUInfo(
"TPC Sector Tracker finished");
#define GPUCA_MAX_STREAMS
int32_t RunTPCTrackingSectors()
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
GPUTrackingInOutPointers & mIOPtrs
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t ThreadCount() const
size_t AllocateRegisteredMemory(GPUProcessor *proc)
GPUConstantMem * processors()
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
virtual int32_t PrepareTextures()
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
uint32_t BlockCount() const
GPUReconstruction * rec()
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
void SetNActiveThreadsOuterLoop(uint32_t f)
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ReturnVolatileMemory()
void ComputeReuseMax(GPUProcessor *proc)
const GPUParam & GetParam() const
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
uint32_t NStreams() const
const void * DeviceMemoryBase() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
int32_t CheckEmptySector()
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
constexpr T qStr2Tag(const char *str)
deviceEvent sector[NSECTORS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
uint32_t nClusterData[NSECTORS]
uint32_t nSectorClusters[NSECTORS]
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
const GPUTPCClusterData * clusterData[NSECTORS]
uint32_t nSectorTracks[NSECTORS]
GPUOutputControl tpcOccupancyMap
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]