22#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
28#define PROFILE_MAX_SIZE (100 * 1024 * 1024)
32static inline uint32_t RGB(uint8_t
r, uint8_t
g, uint8_t
b) {
return (uint32_t)
r | ((uint32_t)
g << 8) | ((uint32_t)
b << 16); }
36#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
46#ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE
50 FILE* fp = fopen(
"profile.txt",
"w+");
51 FILE* fp2 = fopen(
"profile.bmp",
"w+b");
53 const int32_t bmpheight = 8192;
56 memset(&bmpFH, 0,
sizeof(bmpFH));
57 memset(&bmpIH, 0,
sizeof(bmpIH));
60 bmpFH.
bfSize =
sizeof(bmpFH) +
sizeof(bmpIH) + (ConstructorBlockCount() * ConstructorThreadCount() / 32 * 33 - 1) * bmpheight;
61 bmpFH.
bfOffBits =
sizeof(bmpFH) +
sizeof(bmpIH);
63 bmpIH.
biSize =
sizeof(bmpIH);
64 bmpIH.
biWidth = ConstructorBlockCount() * ConstructorThreadCount() / 32 * 33 - 1;
69 fwrite(&bmpFH, 1,
sizeof(bmpFH), fp2);
70 fwrite(&bmpIH, 1,
sizeof(bmpIH), fp2);
72 int32_t nEmptySync = 0;
73 for (uint32_t
i = 0;
i < bmpheight * ConstructorBlockCount() * ConstructorThreadCount();
i += ConstructorBlockCount() * ConstructorThreadCount()) {
75 for (uint32_t
j = 0;
j < ConstructorBlockCount() * ConstructorThreadCount();
j++) {
76 fprintf(fp,
"%d\t", stageAtSync[
i +
j]);
78 if (stageAtSync[
i +
j] == 1) {
79 color = RGB(255, 0, 0);
81 if (stageAtSync[
i +
j] == 2) {
82 color = RGB(0, 255, 0);
84 if (stageAtSync[
i +
j] == 3) {
85 color = RGB(0, 0, 255);
87 if (stageAtSync[
i +
j] == 4) {
88 color = RGB(255, 255, 0);
90 fwrite(&
color, 1,
sizeof(int32_t), fp2);
91 if (
j > 0 &&
j % 32 == 0) {
92 color = RGB(255, 255, 255);
93 fwrite(&
color, 1, 4, fp2);
95 if (stageAtSync[
i +
j]) {
117struct GPUChainTrackingMemUsage {
118 void add(
size_t n,
size_t bound)
120 nMax = std::max(nMax,
n);
121 maxUse = std::max(
n / std::max<double>(bound, 1.), maxUse);
128 size_t nBoundSum = 0;
133void addToMap(std::string
name, std::map<std::string, GPUChainTrackingMemUsage>& map, uint64_t
n, uint64_t bound)
135 GPUChainTrackingMemUsage& obj = map.insert({
name, {}}).
first->second;
142 std::map<std::string, GPUChainTrackingMemUsage> usageMap;
144#ifdef GPUCA_TPC_GEOMETRY_O2
145 addToMap(
"TPC Clusterer Sector Peaks", usageMap,
processors()->tpcClusterer[
i].mPmemory->counters.nPeaks,
processors()->tpcClusterer[
i].mNMaxPeaks);
146 addToMap(
"TPC Clusterer Sector Clusters", usageMap,
processors()->tpcClusterer[
i].mPmemory->counters.nClusters,
processors()->tpcClusterer[
i].mNMaxClusters);
148 addToMap(
"TPC Sector Start Hits", usageMap, *
processors()->tpcTrackers[
i].NStartHits(),
processors()->tpcTrackers[
i].NMaxStartHits());
149 addToMap(
"TPC Sector Tracklets", usageMap, *
processors()->tpcTrackers[
i].NTracklets(),
processors()->tpcTrackers[
i].NMaxTracklets());
150 addToMap(
"TPC Sector TrackletHits", usageMap, *
processors()->tpcTrackers[
i].NRowHits(),
processors()->tpcTrackers[
i].NMaxRowHits());
151 addToMap(
"TPC Sector Tracks", usageMap, *
processors()->tpcTrackers[
i].NTracks(),
processors()->tpcTrackers[
i].NMaxTracks());
152 addToMap(
"TPC Sector TrackHits", usageMap, *
processors()->tpcTrackers[
i].NTrackHits(),
processors()->tpcTrackers[
i].NMaxTrackHits());
155 addToMap(
"TPC Tracks", usageMap,
processors()->tpcMerger.NOutputTracks(),
processors()->tpcMerger.NMaxTracks());
156 addToMap(
"TPC TrackHits", usageMap,
processors()->tpcMerger.NOutputTrackClusters(),
processors()->tpcMerger.NMaxOutputTrackClusters());
159 addToMap(
"TPC O2 Tracks", usageMap,
processors()->tpcMerger.NOutputTracksTPCO2(),
processors()->tpcMerger.NOutputTracksTPCO2());
160 addToMap(
"TPC O2 ClusRefs", usageMap,
processors()->tpcMerger.NOutputClusRefsTPCO2(),
processors()->tpcMerger.NOutputClusRefsTPCO2());
163#ifdef GPUCA_TPC_GEOMETRY_O2
164 addToMap(
"TPC ComprCache HitsAttached", usageMap,
processors()->tpcCompressor.mOutput->nAttachedClusters,
processors()->tpcCompressor.mMaxTrackClusters);
165 addToMap(
"TPC ComprCache HitsUnattached", usageMap,
processors()->tpcCompressor.mOutput->nUnattachedClusters,
processors()->tpcCompressor.mMaxClustersInCache);
166 addToMap(
"TPC ComprCache Tracks", usageMap,
processors()->tpcCompressor.mOutput->nTracks,
processors()->tpcCompressor.mMaxTracks);
169 for (
auto& elem : usageMap) {
170 printf(
"Mem Usage %-30s : %'14zu / %'14zu (%3.0f%% / %3.0f%% / count %3u / max %'14zu)\n", elem.first.c_str(), elem.second.nSum, elem.second.nBoundSum, 100. * elem.second.nSum / std::max<size_t>(1, elem.second.nBoundSum), 100. * elem.second.maxUse, elem.second.count, elem.second.nMax);
177 GPUInfo(
"MEMREL StartHits NCl %d NTrkl %d",
processors()->tpcTrackers[
i].NHitsTotal(), *
processors()->tpcTrackers[
i].NStartHits());
178 GPUInfo(
"MEMREL Tracklets NCl %d NTrkl %d",
processors()->tpcTrackers[
i].NHitsTotal(), *
processors()->tpcTrackers[
i].NTracklets());
179 GPUInfo(
"MEMREL Tracklets NCl %d NTrkl %d",
processors()->tpcTrackers[
i].NHitsTotal(), *
processors()->tpcTrackers[
i].NRowHits());
180 GPUInfo(
"MEMREL SectorTracks NCl %d NTrk %d",
processors()->tpcTrackers[
i].NHitsTotal(), *
processors()->tpcTrackers[
i].NTracks());
181 GPUInfo(
"MEMREL SectorTrackHits NCl %d NTrkH %d",
processors()->tpcTrackers[
i].NHitsTotal(), *
processors()->tpcTrackers[
i].NTrackHits());
183 GPUInfo(
"MEMREL Tracks NCl %d NTrk %d",
processors()->tpcMerger.NMaxClusters(),
processors()->tpcMerger.NOutputTracks());
184 GPUInfo(
"MEMREL TrackHitss NCl %d NTrkH %d",
processors()->tpcMerger.NMaxClusters(),
processors()->tpcMerger.NOutputTrackClusters());
189#ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT
202#ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT
211 int32_t nTracks = 0, nAttachedClusters = 0, nAttachedClustersFitted = 0, nAdjacentClusters = 0;
224 for (uint32_t k = 0; k < nCls; k++) {
232 char trdText[1024] =
"";
233 if (
GetRecoSteps() & GPUDataTypes::RecoStep::TRDTracking) {
234 int32_t nTRDTracks = 0;
235 int32_t nTRDTracklets = 0;
239 nTRDTracklets += trk.getNtracklets();
240 nTRDTracks += trk.getNtracklets() != 0;
243 nTRDTracklets += trk.getNtracklets();
244 nTRDTracks += trk.getNtracklets() != 0;
247 snprintf(trdText, 1024,
" - TRD Tracker reconstructed %d tracks (%d tracklets)", nTRDTracks, nTRDTracklets);
249 GPUInfo(
"Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls,
GetProcessingSettings().createO2Output > 1 ?
"O2" :
"GPU", trdText);
252void GPUChainTracking::SanityCheck()
258 const auto&
ref = trk.getClusterRef();
260 if (nErrors++ < 1000) {
266 if (nErrors++ < 1000) {
271 for (int32_t
j = 0;
j < trk.getNClusters();
j++) {
276 if (nErrors++ < 1000) {
277 GPUError(
"Invalid sector / row %d / %d", (int32_t)sector, (int32_t)
row);
282 if (nErrors++ < 1000) {
290 GPUInfo(
"Sanity check passed");
292 GPUError(
"Sanity check found %lu errors", nErrors);
300 for (int32_t iPhase = 0; iPhase < 2; iPhase++) {
301 uint32_t countTotal = 0;
305 for (uint32_t k = 0; k <
clusters->nClusters[iSector][iRow]; k++) {
308 if (applyClusterCuts) {
315 keep = keep && (!
GetProcessingSettings().tpcApplyDebugClusterFilter || clusterFilter.filter(iSector, iRow, cl));
316 if (iPhase && keep) {
317 outputBuffer[countTotal] = cl;
328 clusters->clustersLinear = outputBuffer;
331 outputBuffer = allocator(countTotal);
const GPUTPCGMMerger & GetTPCMerger() const
void PrintMemoryStatistics() override
void PrintMemoryRelations()
void PrepareDebugOutput()
GPUTrackingInOutPointers & mIOPtrs
GPUReconstruction::RecoStepField GetRecoSteps() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
uint32_t ThreadCount() const
GPUConstantMem * processors()
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
static constexpr int32_t NSECTORS
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
uint32_t BlockCount() const
GPUReconstruction * rec()
virtual size_t GPUMemCpy(void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void * AllocateUnmanagedMemory(size_t size, int32_t type)
GPUMemorySizeScalers * MemoryScalers()
const GPUSettingsProcessing & GetProcessingSettings() const
GLuint const GLchar * name
GLboolean GLboolean GLboolean b
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
uint8_t itsSharedClusterMap uint8_t
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
size_t NTPCClusters(size_t tpcDigits, bool perSector=false)
const o2::tpc::ClusterNativeAccess * clustersNative
const uint32_t * outputClusRefsTPCO2
const uint32_t * mergedTrackHitAttachment
const GPUTRDTrackGPU * trdTracks
const GPUTRDTrack * trdTracksO2
uint32_t nOutputTracksTPCO2
uint32_t nMergedTrackHits
uint32_t nOutputClusRefsTPCO2
const o2::tpc::TrackTPC * outputTracksTPCO2
const GPUTPCGMMergedTrack * mergedTracks
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
unsigned int nClustersTotal
std::vector< Cluster > clusters