22#include "GPUDefParametersRuntime.h"
38uint32_t GPUChainTracking::StreamForSector(uint32_t sector)
const
45 const uint32_t
stream = StreamForSector(iSector);
46 runKernel<GPUTPCExtrapolationTracking>({
GetGridBlk(256,
stream), {iSector}});
57 GPUWarning(
"This GPU is stuck, processing of tracking for this event is skipped!");
63 int32_t
retVal = RunTPCTrackingSectors_internal();
70int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
73 GPUInfo(
"Running TPC Sector Tracker");
85 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
89 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
94 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
106 GPUInfo(
"Copying Tracker objects to GPU");
114 std::fill(streamInit, streamInit +
mRec->
NStreams(),
false);
115 streamInit[streamInitAndOccMap] =
true;
118 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
121 if (
param().
rec.tpc.occupancyMapTimeBins) {
127 runKernel<GPUMemClean16>(
GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(
param()));
129 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(
GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(
param()), streamInitAndOccMap), ptrTmp,
ptr + 2);
131 mInputsHost->mTPCClusterOccupancyMap[1] =
param().
rec.tpc.occupancyMapTimeBins * 0x10000 +
param().
rec.tpc.occupancyMapTimeBinsAverage;
138 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
139 uint32_t& occupancyTotal = *
mInputsHost->mTPCClusterOccupancyMap;
150 int32_t useStream = StreamForSector(iSector);
156 GPUInfo(
"Creating Sector Data (Sector %d)", iSector);
160 streamInit[useStream] =
true;
162 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({
GetGridBlk(
GPUCA_ROW_COUNT, useStream), {iSector}});
170 *
mDebugFile <<
"\n\nReconstruction: Sector " << iSector <<
"/" <<
NSECTORS << std::endl;
180 runKernel<GPUMemClean16>(
GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() *
sizeof(*trkShadow.Data().HitWeights()));
182 streamInit[useStream] =
true;
197 runKernel<GPUTPCStartHitsSorter>({
GetGridAuto(useStream), {iSector}});
200 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({
GetGrid(1, 1, useStream), {iSector}});
210 runKernel<GPUTPCTrackletConstructor>({
GetGridAuto(useStream), {iSector}});
216 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector}});
217 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, 1);
219 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {iSector}});
222 streamMap[iSector] = useStream;
224 GPUInfo(
"Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
234 if (
param().
rec.tpc.extrapolationTracking) {
236 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
237 for (uint32_t iStream = 0; iStream <
mRec->
NStreams(); iStream++) {
238 blocking[iSector *
mRec->
NStreams() + iStream] = StreamForSector(iSector) == iStream;
241 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
242 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
243 uint32_t sectorLeft, sectorRight;
244 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
245 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)]) {
247 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)] =
true;
249 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)]) {
251 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)] =
true;
258 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
264 if (
param().
rec.tpc.extrapolationTracking) {
265 ExtrapolationTracking(iSector, true);
272 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
273 GPUInfo(
"Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
274 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits,
processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
291 GPUInfo(
"TPC Sector Tracker finished");
#define GPUCA_MAX_STREAMS
int32_t RunTPCTrackingSectors()
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
GPUTrackingInOutPointers & mIOPtrs
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t ThreadCount() const
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
GPUReconstruction * rec()
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void SetNActiveThreadsOuterLoop(uint32_t f)
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ReturnVolatileMemory()
void ComputeReuseMax(GPUProcessor *proc)
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
uint32_t NStreams() const
const GPUParam & GetParam() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
int32_t CheckEmptySector()
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
constexpr T qStr2Tag(const char *str)
deviceEvent sector[NSECTORS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
uint32_t nSectorClusters[NSECTORS]
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
uint32_t nSectorTracks[NSECTORS]
GPUOutputControl tpcOccupancyMap
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]