22#include "GPUDefParametersRuntime.h"
38uint32_t GPUChainTracking::StreamForSector(uint32_t sector)
const
45 const uint32_t
stream = StreamForSector(iSector);
46 runKernel<GPUTPCExtrapolationTracking>({
GetGridBlk(256,
stream), {iSector}});
57 GPUWarning(
"This GPU is stuck, processing of tracking for this event is skipped!");
63 int32_t
retVal = RunTPCTrackingSectors_internal();
70int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
73 GPUInfo(
"Running TPC Sector Tracker");
85 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
89 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
94 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
103 bool initializeOccMap =
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm;
107 GPUInfo(
"Copying Tracker objects to GPU");
115 std::fill(streamInit, streamInit +
mRec->
NStreams(),
false);
116 streamInit[streamInitAndOccMap] =
true;
119 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
122 if (
param().
rec.tpc.occupancyMapTimeBins) {
128 runKernel<GPUMemClean16>(
GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(
param()));
130 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(
GetGridBlk(
mInputsHost->mTPCClusterOccupancyMapSize, streamInitAndOccMap), ptrTmp,
ptr + 2);
132 mInputsHost->mTPCClusterOccupancyMap[1] =
param().
rec.tpc.occupancyMapTimeBins * 0x10000 +
param().
rec.tpc.occupancyMapTimeBinsAverage;
139 if (initializeOccMap) {
140 uint32_t& occupancyTotal = *
mInputsHost->mTPCClusterOccupancyMap;
151 int32_t useStream = StreamForSector(iSector);
157 GPUInfo(
"Creating Sector Data (Sector %d)", iSector);
161 streamInit[useStream] =
true;
163 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({
GetGridBlk(
GPUCA_ROW_COUNT, useStream), {iSector}});
171 *
mDebugFile <<
"\n\nReconstruction: Sector " << iSector <<
"/" <<
NSECTORS << std::endl;
181 runKernel<GPUMemClean16>(
GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() *
sizeof(*trkShadow.Data().HitWeights()));
183 streamInit[useStream] =
true;
198 runKernel<GPUTPCStartHitsSorter>({
GetGridAuto(useStream), {iSector}});
201 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({
GetGrid(1, 1, useStream), {iSector}});
211 runKernel<GPUTPCTrackletConstructor>({
GetGridAuto(useStream), {iSector}});
217 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector}});
218 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, 1);
220 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {iSector}});
223 streamMap[iSector] = useStream;
225 GPUInfo(
"Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
238 if (
param().
rec.tpc.extrapolationTracking) {
240 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
241 for (uint32_t iStream = 0; iStream <
mRec->
NStreams(); iStream++) {
242 blocking[iSector *
mRec->
NStreams() + iStream] = StreamForSector(iSector) == iStream;
245 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
246 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
247 uint32_t sectorLeft, sectorRight;
248 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
249 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)]) {
251 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)] =
true;
253 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)]) {
255 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)] =
true;
262 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
268 if (
param().
rec.tpc.extrapolationTracking) {
269 ExtrapolationTracking(iSector, true);
276 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
277 GPUInfo(
"Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
278 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits,
processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
295 GPUInfo(
"TPC Sector Tracker finished");
#define GPUCA_MAX_STREAMS
int32_t RunTPCTrackingSectors()
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
GPUTrackingInOutPointers & mIOPtrs
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t ThreadCount() const
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
GPUReconstruction * rec()
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, uint32_t mapSize, int32_t stream=-1, deviceEvent *ev=nullptr)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void SetNActiveThreadsOuterLoop(uint32_t f)
void * AllocateVolatileMemory(size_t size, bool device)
void ReturnVolatileMemory()
void ComputeReuseMax(GPUProcessor *proc)
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
void PopNonPersistentMemory(RecoStep step, uint64_t tag, const GPUProcessor *proc=nullptr)
uint32_t NStreams() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
int32_t CheckEmptySector()
bool MemoryReuseAllowed()
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
constexpr T qStr2Tag(const char *str)
deviceEvent sector[NSECTORS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
uint32_t nSectorClusters[NSECTORS]
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
uint32_t nSectorTracks[NSECTORS]
GPUOutputControl tpcOccupancyMap
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]