22#include "GPUDefParametersRuntime.h"
38uint32_t GPUChainTracking::StreamForSector(uint32_t sector)
const
45 const uint32_t
stream = StreamForSector(iSector);
46 runKernel<GPUTPCExtrapolationTracking>({
GetGridBlk(256,
stream), {iSector}});
57 GPUWarning(
"This GPU is stuck, processing of tracking for this event is skipped!");
63 int32_t
retVal = RunTPCTrackingSectors_internal();
70int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
73 GPUInfo(
"Running TPC Sector Tracker");
76 if (!
param().par.earlyTpcTransform) {
88 if (doGPU &&
GetRecoSteps().isSet(RecoStep::TPCConversion)) {
97 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
101 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
106 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
118 GPUInfo(
"Copying Tracker objects to GPU");
126 std::fill(streamInit, streamInit +
mRec->
NStreams(),
false);
127 streamInit[streamInitAndOccMap] =
true;
130 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
133 if (
param().
rec.tpc.occupancyMapTimeBins) {
139 runKernel<GPUMemClean16>(
GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(
param()));
141 runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(
GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(
param()), streamInitAndOccMap), ptrTmp,
ptr + 2);
143 mInputsHost->mTPCClusterOccupancyMap[1] =
param().
rec.tpc.occupancyMapTimeBins * 0x10000 +
param().
rec.tpc.occupancyMapTimeBinsAverage;
150 if (
param().
rec.tpc.occupancyMapTimeBins ||
param().
rec.tpc.sysClusErrorC12Norm) {
151 uint32_t& occupancyTotal = *
mInputsHost->mTPCClusterOccupancyMap;
162 int32_t useStream = StreamForSector(iSector);
165 GPUInfo(
"Creating Sector Data (Sector %d)", iSector);
169 streamInit[useStream] =
true;
171 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::hitData>({
GetGridBlk(
GPUCA_ROW_COUNT, useStream), {iSector}});
178 *
mDebugFile <<
"\n\nReconstruction: Sector " << iSector <<
"/" <<
NSECTORS << std::endl;
187 runKernel<GPUMemClean16>(
GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() *
sizeof(*trkShadow.Data().HitWeights()));
189 streamInit[useStream] =
true;
204 runKernel<GPUTPCStartHitsSorter>({
GetGridAuto(useStream), {iSector}});
207 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::startHits>({
GetGrid(1, 1, useStream), {iSector}});
217 runKernel<GPUTPCTrackletConstructor>({
GetGridAuto(useStream), {iSector}});
223 runKernel<GPUTPCTrackletSelector>({
GetGridAuto(useStream), {iSector}});
224 runKernel<GPUTPCExtrapolationTrackingCopyNumbers>({{1, -
ThreadCount(), useStream}, {iSector}}, 1);
226 runKernel<GPUTPCSectorDebugSortKernels, GPUTPCSectorDebugSortKernels::sectorTracks>({
GetGrid(1, 1, useStream), {iSector}});
229 streamMap[iSector] = useStream;
231 GPUInfo(
"Sector %u, Number of tracks: %d", iSector, *trk.NTracks());
241 if (
param().
rec.tpc.extrapolationTracking) {
243 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
244 for (uint32_t iStream = 0; iStream <
mRec->
NStreams(); iStream++) {
245 blocking[iSector *
mRec->
NStreams() + iStream] = StreamForSector(iSector) == iStream;
248 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
249 uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector);
250 uint32_t sectorLeft, sectorRight;
251 GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight);
252 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)]) {
254 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorLeft)] =
true;
256 if (doGPU && !blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)]) {
258 blocking[tmpSector *
mRec->
NStreams() + StreamForSector(sectorRight)] =
true;
265 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
271 if (
param().
rec.tpc.extrapolationTracking) {
272 ExtrapolationTracking(iSector, true);
279 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
280 GPUInfo(
"Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector,
281 processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nTracks,
processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits,
processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits);
298 GPUInfo(
"TPC Sector Tracker finished");
#define GPUCA_MAX_STREAMS
int32_t RunTPCTrackingSectors()
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
GPUTrackingInOutPointers & mIOPtrs
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ExtrapolationTracking(uint32_t iSector, bool blocking)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
GPUReconstruction::RecoStepField GetRecoSteps() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
uint32_t ThreadCount() const
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
void StreamWaitForEvents(int32_t stream, deviceEvent *evList, int32_t nEvents=1)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
GPUReconstruction * rec()
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
void UpdateParamOccupancyMap(const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void SetNActiveThreadsOuterLoop(uint32_t f)
void * AllocateVolatileMemory(size_t size, bool device)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ReturnVolatileMemory()
void ComputeReuseMax(GPUProcessor *proc)
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
GPUMemoryResource & Res(int16_t num)
uint32_t NStreams() const
const GPUParam & GetParam() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int16_t MemoryResTracklets() const
int16_t MemoryResLinks() const
void SetMaxData(const GPUTrackingInOutPointers &io)
int16_t MemoryResOutput() const
void DumpTrackHits(std::ostream &out)
void DumpLinks(std::ostream &out, int32_t phase)
void DumpStartHits(std::ostream &out)
void DumpHitWeights(std::ostream &out)
int16_t MemoryResCommon() const
int32_t CheckEmptySector()
void DumpTrackingData(std::ostream &out)
void DumpTrackletHits(std::ostream &out)
constexpr T qStr2Tag(const char *str)
deviceEvent sector[NSECTORS]
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
const GPUTPCHitId * sectorClusters[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
uint32_t nClusterData[NSECTORS]
uint32_t nSectorClusters[NSECTORS]
const GPUTPCTrack * sectorTracks[NSECTORS]
const GPUSettingsTF * settingsTF
const GPUTPCClusterData * clusterData[NSECTORS]
uint32_t nSectorTracks[NSECTORS]
GPUOutputControl tpcOccupancyMap
size_t getIndex(const GPUOutputControl &v)
unsigned int nClustersSector[constants::MAXSECTOR]
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]