Project
Loading...
Searching...
No Matches
GPUChainTrackingClusterizer.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
17#include "GPULogging.h"
18#include "GPUO2DataTypes.h"
21#include "GPUNewCalibValues.h"
22#include <fstream>
23
24#ifdef GPUCA_O2_LIB
26#endif
27#include "GPUTriggerOutputs.h"
28#include "GPUHostDataTypes.h"
34#include "TPCBase/RDHUtils.h"
35
36#include "utils/strtag.h"
37
38#ifndef GPUCA_NO_VC
39#include <Vc/Vc>
40#endif
41
42using namespace o2::gpu;
43using namespace o2::tpc;
44using namespace o2::tpc::constants;
45using namespace o2::dataformats;
46
47#ifdef GPUCA_TPC_GEOMETRY_O2
48std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment)
49{
50 bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding;
51 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
53 uint32_t digits = 0;
54 uint32_t pages = 0;
55 for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
56 clusterer.mMinMaxCN[j] = mCFContext->fragmentData[fragment.index].minMaxCN[iSector][j];
57 if (doGPU) {
58 uint16_t posInEndpoint = 0;
59 uint16_t pagesEndpoint = 0;
60 for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) {
61 const uint32_t pageFirst = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0;
62 const uint32_t pageLast = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k];
63 for (uint32_t l = pageFirst; l < pageLast; l++) {
64 uint16_t pageDigits = mCFContext->fragmentData[fragment.index].pageDigits[iSector][j][posInEndpoint++];
65 if (pageDigits) {
66 *(o++) = GPUTPCClusterFinder::ZSOffset{digits, j, pagesEndpoint};
67 digits += pageDigits;
68 }
69 pagesEndpoint++;
70 }
71 }
72 if (pagesEndpoint != mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()) {
73 if (GetProcessingSettings().ignoreNonFatalGPUErrors) {
74 GPUError("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size());
75 return {0, 0};
76 } else {
77 GPUFatal("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size());
78 }
79 }
80 } else {
82 digits += mCFContext->fragmentData[fragment.index].nDigits[iSector][j];
83 pages += mCFContext->fragmentData[fragment.index].nPages[iSector][j];
84 }
85 }
86 if (doGPU) {
87 pages = o - processors()->tpcClusterer[iSector].mPzsOffsets;
88 }
89 if (!doGPU && GetProcessingSettings().debugLevel >= 4 && mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
90 TPCClusterizerEnsureZSOffsets(iSector, fragment);
91 }
92 return {digits, pages};
93}
94
95void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment)
96{
97 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
98 uint32_t nAdcs = 0;
100 const auto& data = mCFContext->fragmentData[fragment.index];
101 uint32_t pagesEndpoint = 0;
102 const uint32_t nAdcsExpected = data.nDigits[iSector][endpoint];
103 const uint32_t nPagesExpected = data.nPages[iSector][endpoint];
104
105 uint32_t nAdcDecoded = 0;
106 const auto& zs = mIOPtrs.tpcZS->sector[iSector];
107 for (uint32_t i = data.minMaxCN[iSector][endpoint].zsPtrFirst; i < data.minMaxCN[iSector][endpoint].zsPtrLast; i++) {
108 const uint32_t pageFirst = (i == data.minMaxCN[iSector][endpoint].zsPtrFirst) ? data.minMaxCN[iSector][endpoint].zsPageFirst : 0;
109 const uint32_t pageLast = (i + 1 == data.minMaxCN[iSector][endpoint].zsPtrLast) ? data.minMaxCN[iSector][endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
110 for (uint32_t j = pageFirst; j < pageLast; j++) {
111 const uint8_t* page = static_cast<const uint8_t*>(zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE;
112 const header::RAWDataHeader* rawDataHeader = reinterpret_cast<const header::RAWDataHeader*>(page);
113 const TPCZSHDRV2* decHdr = reinterpret_cast<const TPCZSHDRV2*>(page + raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2));
114 const uint16_t nSamplesInPage = decHdr->nADCsamples;
115
116 nAdcDecoded += nSamplesInPage;
117 pagesEndpoint++;
118 }
119 }
120
121 if (pagesEndpoint != nPagesExpected) {
122 GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected);
123 }
124
125 if (nAdcDecoded != nAdcsExpected) {
126 GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcsExpected, nAdcDecoded);
127 }
128
129 if (nAdcs != clusterer.mPzsOffsets[endpoint].offset) {
130 GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC offset mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcs, clusterer.mPzsOffsets[endpoint].offset);
131 }
132
133 nAdcs += nAdcsExpected;
134 }
135}
136
137namespace
138{
139struct TPCCFDecodeScanTmp {
140 int32_t zsPtrFirst, zsPageFirst, zsPtrLast, zsPageLast, hasData, pageCounter;
141};
142} // namespace
143
144std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment)
145{
146 mRec->getGeneralStepTimer(GeneralStep::Prepare).Start();
147 uint32_t nDigits = 0;
148 uint32_t nPages = 0;
149 uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS];
150 memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples));
151 bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding;
152 int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0;
153
154 for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
155#ifndef GPUCA_NO_VC
156 if (GetProcessingSettings().prefetchTPCpageScan >= 3 && j < GPUTrackingInOutZS::NENDPOINTS - 1) {
157 for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j + 1]; k++) {
158 for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j + 1][k]; l++) {
159 Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE);
160 Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader));
161 }
162 }
163 }
164#endif
165
166 std::vector<std::pair<CfFragment, TPCCFDecodeScanTmp>> fragments;
167 fragments.reserve(mCFContext->nFragments);
168 fragments.emplace_back(std::pair<CfFragment, TPCCFDecodeScanTmp>{fragment, {0, 0, 0, 0, 0, -1}});
169 for (uint32_t i = 1; i < mCFContext->nFragments; i++) {
170 fragments.emplace_back(std::pair<CfFragment, TPCCFDecodeScanTmp>{fragments.back().first.next(), {0, 0, 0, 0, 0, -1}});
171 }
172 std::vector<bool> fragmentExtends(mCFContext->nFragments, false);
173
174 uint32_t firstPossibleFragment = 0;
175 uint32_t pageCounter = 0;
176 uint32_t emptyPages = 0;
177 for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) {
178 if (GetProcessingSettings().tpcSingleSector != -1 && GetProcessingSettings().tpcSingleSector != (int32_t)iSector) {
179 break;
180 }
181 nPages += mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k];
182 for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) {
183#ifndef GPUCA_NO_VC
184 if (GetProcessingSettings().prefetchTPCpageScan >= 2 && l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]) {
185 Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE);
186 Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader));
187 }
188#endif
189 const uint8_t* const page = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE;
191 if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) {
192 emptyPages++;
193 continue;
194 }
195 pageCounter++;
196 const TPCZSHDR* const hdr = (const TPCZSHDR*)(rdh_utils::getLink(o2::raw::RDHUtils::getFEEID(*rdh)) == rdh_utils::DLBZSLinkID ? (page + o2::raw::RDHUtils::getMemorySize(*rdh) - sizeof(TPCZSHDRV2)) : (page + sizeof(o2::header::RAWDataHeader)));
197 if (mCFContext->zsVersion == -1) {
198 mCFContext->zsVersion = hdr->version;
199 if (GetProcessingSettings().param.tpcTriggerHandling && mCFContext->zsVersion < ZSVersion::ZSVersionDenseLinkBased) { // TODO: Move tpcTriggerHandling to recoSteps bitmask
200 static bool errorShown = false;
201 if (errorShown == false) {
202 GPUAlarm("Trigger handling only possible with TPC Dense Link Based data, received version %d, disabling", mCFContext->zsVersion);
203 }
204 errorShown = true;
205 }
206 } else if (mCFContext->zsVersion != (int32_t)hdr->version) {
207 GPUError("Received TPC ZS 8kb page of mixed versions, expected %d, received %d (linkid %d, feeCRU %d, feeEndpoint %d, feelinkid %d)", mCFContext->zsVersion, (int32_t)hdr->version, (int32_t)o2::raw::RDHUtils::getLinkID(*rdh), (int32_t)rdh_utils::getCRU(*rdh), (int32_t)rdh_utils::getEndPoint(*rdh), (int32_t)rdh_utils::getLink(*rdh));
208 constexpr size_t bufferSize = 3 * std::max(sizeof(*rdh), sizeof(*hdr)) + 1;
209 char dumpBuffer[bufferSize];
210 for (size_t i = 0; i < sizeof(*rdh); i++) {
211 // "%02X " guaranteed to be 3 chars + ending 0.
212 snprintf(dumpBuffer + 3 * i, 4, "%02X ", (int32_t)((uint8_t*)rdh)[i]);
213 }
214 GPUAlarm("RDH of page: %s", dumpBuffer);
215 for (size_t i = 0; i < sizeof(*hdr); i++) {
216 // "%02X " guaranteed to be 3 chars + ending 0.
217 snprintf(dumpBuffer + 3 * i, 4, "%02X ", (int32_t)((uint8_t*)hdr)[i]);
218 }
219 GPUAlarm("Metainfo of page: %s", dumpBuffer);
220 if (GetProcessingSettings().ignoreNonFatalGPUErrors) {
221 mCFContext->abandonTimeframe = true;
222 return {0, 0};
223 } else {
224 GPUFatal("Cannot process with invalid TPC ZS data, exiting");
225 }
226 }
227 if (GetProcessingSettings().param.tpcTriggerHandling) {
228 const TPCZSHDRV2* const hdr2 = (const TPCZSHDRV2*)hdr;
229 if (hdr2->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) {
230 const char* triggerWord = (const char*)hdr - TPCZSHDRV2::TRIGGER_WORD_SIZE;
232 memcpy((void*)&tmp.triggerWord, triggerWord, TPCZSHDRV2::TRIGGER_WORD_SIZE);
233 tmp.orbit = o2::raw::RDHUtils::getHeartBeatOrbit(*rdh);
234 if (tmp.triggerWord.isValid(0)) {
235 mTriggerBuffer->triggers.emplace(tmp);
236 }
237 }
238 }
239 nDigits += hdr->nADCsamples;
240 endpointAdcSamples[j] += hdr->nADCsamples;
241 uint32_t timeBin = (hdr->timeOffset + (o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
242 uint32_t maxTimeBin = timeBin + hdr->nTimeBinSpan;
243 if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
244 const TPCZSHDRV2* const hdr2 = (const TPCZSHDRV2*)hdr;
245 if (hdr2->flags & TPCZSHDRV2::ZSFlags::nTimeBinSpanBit8) {
246 maxTimeBin += 256;
247 }
248 }
249 if (maxTimeBin > mCFContext->tpcMaxTimeBin) {
250 mCFContext->tpcMaxTimeBin = maxTimeBin;
251 }
252 bool extendsInNextPage = false;
253 if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
254 if (l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k] && o2::raw::RDHUtils::getMemorySize(*rdh) == TPCZSHDR::TPC_ZS_PAGE_SIZE) {
256 extendsInNextPage = o2::raw::RDHUtils::getHeartBeatOrbit(*nextrdh) == o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) && o2::raw::RDHUtils::getMemorySize(*nextrdh) > sizeof(o2::header::RAWDataHeader);
257 }
258 }
259 while (firstPossibleFragment && (uint32_t)fragments[firstPossibleFragment - 1].first.last() > timeBin) {
260 firstPossibleFragment--;
261 }
262 auto handleExtends = [&](uint32_t ff) {
263 if (fragmentExtends[ff]) {
264 if (doGPU) {
265 // Only add extended page on GPU. On CPU the pages are in consecutive memory anyway.
266 // Not adding the page prevents an issue where a page is decoded twice on CPU, when only the extend should be decoded.
267 fragments[ff].second.zsPageLast++;
268 mCFContext->fragmentData[ff].nPages[iSector][j]++;
269 mCFContext->fragmentData[ff].pageDigits[iSector][j].emplace_back(0);
270 }
271 fragmentExtends[ff] = false;
272 }
273 };
274 if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
275 for (uint32_t ff = 0; ff < firstPossibleFragment; ff++) {
276 handleExtends(ff);
277 }
278 }
279 for (uint32_t f = firstPossibleFragment; f < mCFContext->nFragments; f++) {
280 if (timeBin < (uint32_t)fragments[f].first.last() && (uint32_t)fragments[f].first.first() <= maxTimeBin) {
281 if (!fragments[f].second.hasData) {
282 fragments[f].second.hasData = 1;
283 fragments[f].second.zsPtrFirst = k;
284 fragments[f].second.zsPageFirst = l;
285 } else {
286 if (pageCounter > (uint32_t)fragments[f].second.pageCounter + 1) {
287 mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages + pageCounter - fragments[f].second.pageCounter - 1;
288 for (uint32_t k2 = fragments[f].second.zsPtrLast - 1; k2 <= k; k2++) {
289 for (uint32_t l2 = ((int32_t)k2 == fragments[f].second.zsPtrLast - 1) ? fragments[f].second.zsPageLast : 0; l2 < (k2 < k ? mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k2] : l); l2++) {
290 if (doGPU) {
291 mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0);
292 } else {
293 // CPU cannot skip unneeded pages, so we must keep space to store the invalid dummy clusters
294 const uint8_t* const pageTmp = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k2]) + l2 * TPCZSHDR::TPC_ZS_PAGE_SIZE;
295 const o2::header::RAWDataHeader* rdhTmp = (const o2::header::RAWDataHeader*)pageTmp;
296 if (o2::raw::RDHUtils::getMemorySize(*rdhTmp) != sizeof(o2::header::RAWDataHeader)) {
297 const TPCZSHDR* const hdrTmp = (const TPCZSHDR*)(rdh_utils::getLink(o2::raw::RDHUtils::getFEEID(*rdhTmp)) == rdh_utils::DLBZSLinkID ? (pageTmp + o2::raw::RDHUtils::getMemorySize(*rdhTmp) - sizeof(TPCZSHDRV2)) : (pageTmp + sizeof(o2::header::RAWDataHeader)));
298 mCFContext->fragmentData[f].nDigits[iSector][j] += hdrTmp->nADCsamples;
299 }
300 }
301 }
302 }
303 } else if (emptyPages) {
304 mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages;
305 if (doGPU) {
306 for (uint32_t m = 0; m < emptyPages; m++) {
307 mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0);
308 }
309 }
310 }
311 }
312 fragments[f].second.zsPtrLast = k + 1;
313 fragments[f].second.zsPageLast = l + 1;
314 fragments[f].second.pageCounter = pageCounter;
315 mCFContext->fragmentData[f].nPages[iSector][j]++;
316 mCFContext->fragmentData[f].nDigits[iSector][j] += hdr->nADCsamples;
317 if (doGPU) {
318 mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(hdr->nADCsamples);
319 }
320 fragmentExtends[f] = extendsInNextPage;
321 } else {
322 handleExtends(f);
323 if (timeBin < (uint32_t)fragments[f].first.last()) {
324 if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
325 for (uint32_t ff = f + 1; ff < mCFContext->nFragments; ff++) {
326 handleExtends(ff);
327 }
328 }
329 break;
330 } else {
331 firstPossibleFragment = f + 1;
332 }
333 }
334 }
335 emptyPages = 0;
336 }
337 }
338 for (uint32_t f = 0; f < mCFContext->nFragments; f++) {
339 mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrLast = fragments[f].second.zsPtrLast;
340 mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrFirst = fragments[f].second.zsPtrFirst;
341 mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageLast = fragments[f].second.zsPageLast;
342 mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageFirst = fragments[f].second.zsPageFirst;
343 }
344 }
345 mCFContext->nPagesTotal += nPages;
346 mCFContext->nPagesSector[iSector] = nPages;
347
348 mCFContext->nDigitsEndpointMax[iSector] = 0;
349 for (uint32_t i = 0; i < GPUTrackingInOutZS::NENDPOINTS; i++) {
350 if (endpointAdcSamples[i] > mCFContext->nDigitsEndpointMax[iSector]) {
351 mCFContext->nDigitsEndpointMax[iSector] = endpointAdcSamples[i];
352 }
353 }
354 uint32_t nDigitsFragmentMax = 0;
355 for (uint32_t i = 0; i < mCFContext->nFragments; i++) {
356 uint32_t pagesInFragment = 0;
357 uint32_t digitsInFragment = 0;
358 for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
359 pagesInFragment += mCFContext->fragmentData[i].nPages[iSector][j];
360 digitsInFragment += mCFContext->fragmentData[i].nDigits[iSector][j];
361 }
362 mCFContext->nPagesFragmentMax = std::max(mCFContext->nPagesFragmentMax, pagesInFragment);
363 nDigitsFragmentMax = std::max(nDigitsFragmentMax, digitsInFragment);
364 }
365 mRec->getGeneralStepTimer(GeneralStep::Prepare).Stop();
366 return {nDigits, nDigitsFragmentMax};
367}
368
369void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int32_t stage, bool doGPU, int32_t lane)
370{
371 auto& in = stage ? clustererShadow.mPpeakPositions : clustererShadow.mPpositions;
372 auto& out = stage ? clustererShadow.mPfilteredPeakPositions : clustererShadow.mPpeakPositions;
373 if (doGPU) {
374 const uint32_t iSector = clusterer.mISector;
375 auto& count = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;
376
377 std::vector<size_t> counts;
378
379 uint32_t nSteps = clusterer.getNSteps(count);
380 if (nSteps > clusterer.mNBufs) {
381 GPUError("Clusterer buffers exceeded (%u > %u)", nSteps, (int32_t)clusterer.mNBufs);
382 exit(1);
383 }
384
385 size_t tmpCount = count;
386 if (nSteps > 1) {
387 for (uint32_t i = 1; i < nSteps; i++) {
388 counts.push_back(tmpCount);
389 if (i == 1) {
390 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanStart>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage);
391 } else {
392 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanUp>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount);
393 }
394 tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize;
395 }
396
397 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanTop>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount);
398
399 for (uint32_t i = nSteps - 1; i > 1; i--) {
400 tmpCount = counts[i - 1];
401 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanDown>({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount);
402 }
403 }
404
405 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::compactDigits>({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out);
406 } else {
407 auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks;
408 auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;
409 size_t count = 0;
410 for (size_t i = 0; i < nIn; i++) {
411 if (clusterer.mPisPeak[i]) {
412 out[count++] = in[i];
413 }
414 }
415 nOut = count;
416 }
417}
418
419std::pair<uint32_t, uint32_t> GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane)
420{
421 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding;
422 if (mCFContext->abandonTimeframe) {
423 return {0, 0};
424 }
425 const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment);
426 if (doGPU) {
427 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
428 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
429 uint32_t nPagesSector = 0;
430 for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
431 uint32_t nPages = 0;
432 mInputsHost->mPzsMeta->sector[iSector].zsPtr[j] = &mInputsShadow->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j];
433 mInputsHost->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE;
434 for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) {
435 const uint32_t min = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0;
436 const uint32_t max = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k];
437 if (max > min) {
438 char* src = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + min * TPCZSHDR::TPC_ZS_PAGE_SIZE;
439 char* ptrLast = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + (max - 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE;
440 size_t size = (ptrLast - src) + o2::raw::RDHUtils::getMemorySize(*(const o2::header::RAWDataHeader*)ptrLast);
441 GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE, src, size, lane, true);
442 }
443 nPages += max - min;
444 }
445 mInputsHost->mPzsMeta->sector[iSector].nZSPtr[j] = &mInputsShadow->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j];
446 mInputsHost->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = nPages;
447 mInputsHost->mPzsMeta->sector[iSector].count[j] = 1;
448 nPagesSector += nPages;
449 }
450 GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzsOffsets, clusterer.mPzsOffsets, clusterer.mNMaxPages * sizeof(*clusterer.mPzsOffsets), lane, true);
451 }
452 return retVal;
453}
454
455int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers)
456{
457 bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding;
458 if (restorePointers) {
459 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
460 processors()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetHost;
461 processorsShadow()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetDevice;
462 processorsShadow()->tpcClusterer[iSector].mPzs = mCFContext->ptrSave[iSector].zsDevice;
463 }
464 processorsShadow()->ioPtrs.clustersNative = mCFContext->ptrClusterNativeSave;
465 return 0;
466 }
467 const auto& threadContext = GetThreadContext();
469 if (mCFContext == nullptr) {
471 }
472 const int16_t maxFragmentLen = GetProcessingSettings().overrideClusterizerFragmentLen;
473 const uint32_t maxAllowedTimebin = param().par.continuousTracking ? std::max<int32_t>(param().continuousMaxTimeBin, maxFragmentLen) : TPC_MAX_TIME_BIN_TRIGGERED;
474 mCFContext->tpcMaxTimeBin = maxAllowedTimebin;
475 const CfFragment fragmentMax{(tpccf::TPCTime)mCFContext->tpcMaxTimeBin + 1, maxFragmentLen};
476 mCFContext->prepare(mIOPtrs.tpcZS, fragmentMax);
477 if (GetProcessingSettings().param.tpcTriggerHandling) {
478 mTriggerBuffer->triggers.clear();
479 }
480 if (mIOPtrs.tpcZS) {
481 uint32_t nDigitsFragmentMax[NSECTORS];
482 mCFContext->zsVersion = -1;
483 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
484 if (mIOPtrs.tpcZS->sector[iSector].count[0]) {
485 const void* rdh = mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0];
486 if (rdh && o2::raw::RDHUtils::getVersion<o2::header::RAWDataHeaderV6>() > o2::raw::RDHUtils::getVersion(rdh)) {
487 GPUError("Data has invalid RDH version %d, %d required\n", o2::raw::RDHUtils::getVersion(rdh), o2::raw::RDHUtils::getVersion<o2::header::RAWDataHeader>());
488 return 1;
489 }
490 }
491#ifndef GPUCA_NO_VC
492 if (GetProcessingSettings().prefetchTPCpageScan >= 1 && iSector < NSECTORS - 1) {
493 for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
494 for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) {
495 for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) {
496 Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE);
497 Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader));
498 }
499 }
500 }
501 }
502#endif
503 const auto& x = TPCClusterizerDecodeZSCount(iSector, fragmentMax);
504 nDigitsFragmentMax[iSector] = x.first;
505 processors()->tpcClusterer[iSector].mPmemory->counters.nDigits = x.first;
506 mRec->MemoryScalers()->nTPCdigits += x.first;
507 }
508 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
509 uint32_t nDigitsBase = nDigitsFragmentMax[iSector];
510 uint32_t threshold = 40000000;
511 uint32_t nDigitsScaled = nDigitsBase > threshold ? nDigitsBase : std::min((threshold + nDigitsBase) / 2, 2 * nDigitsBase);
512 processors()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]);
513 if (doGPU) {
514 processorsShadow()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]);
515 }
516 if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) {
517 mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSOffsetId, mRec);
518 mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSId, mRec);
519 } else {
520 AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSOffsetId);
521 AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSId);
522 }
523 }
524 } else {
525 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
526 uint32_t nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSector];
527 mRec->MemoryScalers()->nTPCdigits += nDigits;
528 processors()->tpcClusterer[iSector].SetNMaxDigits(nDigits, mCFContext->nPagesFragmentMax, nDigits, 0);
529 }
530 }
531
532 if (mIOPtrs.tpcZS) {
533 GPUInfo("Event has %u 8kb TPC ZS pages (version %d), %ld digits", mCFContext->nPagesTotal, mCFContext->zsVersion, (int64_t)mRec->MemoryScalers()->nTPCdigits);
534 } else {
535 GPUInfo("Event has %ld TPC Digits", (int64_t)mRec->MemoryScalers()->nTPCdigits);
536 }
537
538 if (mCFContext->tpcMaxTimeBin > maxAllowedTimebin) {
539 GPUError("Input data has invalid time bin %u > %d", mCFContext->tpcMaxTimeBin, maxAllowedTimebin);
540 if (GetProcessingSettings().ignoreNonFatalGPUErrors) {
541 mCFContext->abandonTimeframe = true;
542 mCFContext->tpcMaxTimeBin = maxAllowedTimebin;
543 } else {
544 return 1;
545 }
546 }
547
548 mCFContext->fragmentFirst = CfFragment{std::max<int32_t>(mCFContext->tpcMaxTimeBin + 1, maxFragmentLen), maxFragmentLen};
549 for (int32_t iSector = 0; iSector < GetProcessingSettings().nTPCClustererLanes && iSector < NSECTORS; iSector++) {
550 if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) {
551 mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector);
552 }
553 }
554
555 if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) {
556 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
557 mCFContext->ptrSave[iSector].zsOffsetHost = processors()->tpcClusterer[iSector].mPzsOffsets;
558 mCFContext->ptrSave[iSector].zsOffsetDevice = processorsShadow()->tpcClusterer[iSector].mPzsOffsets;
559 mCFContext->ptrSave[iSector].zsDevice = processorsShadow()->tpcClusterer[iSector].mPzs;
560 }
561 }
562 return 0;
563}
564#endif
565
566int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
567{
568 if (param().rec.fwdTPCDigitsAsClusters) {
569 return ForwardTPCDigits();
570 }
571#ifdef GPUCA_TPC_GEOMETRY_O2
572 int32_t tpcTimeBinCut = mUpdateNewCalibObjects && mNewCalibValues->newTPCTimeBinCut ? mNewCalibValues->tpcTimeBinCut : param().tpcCutTimeBin;
574 const auto& threadContext = GetThreadContext();
575 const bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding;
576 if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer)) {
577 return 1;
578 }
579 if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) {
581 }
582
584 float tpcHitLowOccupancyScalingFactor = 1.f;
586 uint32_t nHitsBase = mRec->MemoryScalers()->nTPCHits;
587 uint32_t threshold = 30000000 / 256 * mIOPtrs.settingsTF->nHBFPerTF;
588 if (mIOPtrs.settingsTF->nHBFPerTF < 64) {
589 threshold *= 2;
590 }
591 mRec->MemoryScalers()->nTPCHits = std::max<uint32_t>(nHitsBase, std::min<uint32_t>(threshold, nHitsBase * 3.5f)); // Increase the buffer size for low occupancy data to compensate for noisy pads creating exceiive clusters
592 if (nHitsBase < threshold) {
593 float maxFactor = mRec->MemoryScalers()->nTPCHits < threshold * 2 / 3 ? 3 : (mRec->MemoryScalers()->nTPCHits < threshold ? 2.25f : 1.75f);
594 mRec->MemoryScalers()->temporaryFactor *= std::min(maxFactor, (float)threshold / nHitsBase);
595 tpcHitLowOccupancyScalingFactor = std::min(3.5f, (float)threshold / nHitsBase);
596 }
597 }
598 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
599 processors()->tpcClusterer[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes
600 }
601 mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers
602 for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
603 SetupGPUProcessor(&processors()->tpcClusterer[iSector], true); // Now we allocate
604 }
605 if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) {
606 RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline)
607 }
608
609 if (doGPU && mIOPtrs.tpcZS) {
611 WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1);
612 }
613 if (doGPU) {
614 WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init);
615 }
616
617 size_t nClsTotal = 0;
618 ClusterNativeAccess* tmpNativeAccess = mClusterNativeAccess.get();
619 ClusterNative* tmpNativeClusters = nullptr;
620 std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
621
622 // setup MC Labels
624
625 auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;
626
627 bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
628 bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
629
630 mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;
631 if (buildNativeGPU) {
632 AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
633 }
634 if (buildNativeHost && !(buildNativeGPU && GetProcessingSettings().delayedOutput)) {
635 if (mWaitForFinalInputs) {
636 GPUFatal("Cannot use waitForFinalInput callback without delayed output");
637 }
638 if (!GetProcessingSettings().tpcApplyDebugClusterFilter) {
640 tmpNativeClusters = mInputsHost->mPclusterNativeOutput;
641 } else {
642 tmpNativeClusterBuffer = std::make_unique<ClusterNative[]>(mInputsHost->mNClusterNative);
643 tmpNativeClusters = tmpNativeClusterBuffer.get();
644 }
645 }
646
647 GPUTPCLinearLabels mcLinearLabels;
648 if (propagateMCLabels) {
649 // No need to overallocate here, nTPCHits is anyway an upper bound used for the GPU cluster buffer, and we can always enlarge the buffer anyway
650 mcLinearLabels.header.reserve(mRec->MemoryScalers()->nTPCHits / 2);
651 mcLinearLabels.data.reserve(mRec->MemoryScalers()->nTPCHits);
652 }
653
654 int8_t transferRunning[NSECTORS] = {0};
655 uint32_t outputQueueStart = mOutputQueue.size();
656
657 auto notifyForeignChainFinished = [this]() {
658 if (mPipelineNotifyCtx) {
659 SynchronizeStream(OutputStream()); // Must finish before updating ioPtrs in (global) constant memory
660 {
661 std::lock_guard<std::mutex> lock(mPipelineNotifyCtx->mutex);
662 mPipelineNotifyCtx->ready = true;
663 }
664 mPipelineNotifyCtx->cond.notify_one();
665 }
666 };
667 bool synchronizeCalibUpdate = false;
668
669 for (uint32_t iSectorBase = 0; iSectorBase < NSECTORS; iSectorBase += GetProcessingSettings().nTPCClustererLanes) {
670 std::vector<bool> laneHasData(GetProcessingSettings().nTPCClustererLanes, false);
671 static_assert(NSECTORS <= GPUCA_MAX_STREAMS, "Stream events must be able to hold all sectors");
672 const int32_t maxLane = std::min<int32_t>(GetProcessingSettings().nTPCClustererLanes, NSECTORS - iSectorBase);
673 for (CfFragment fragment = mCFContext->fragmentFirst; !fragment.isEnd(); fragment = fragment.next()) {
674 if (GetProcessingSettings().debugLevel >= 3) {
675 GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSectorBase, iSectorBase + GetProcessingSettings().nTPCClustererLanes - 1);
676 }
677 mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
678 if (doGPU && fragment.index != 0) {
679 SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished
680 }
681
682 uint32_t iSector = iSectorBase + lane;
683 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
684 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
685 clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0;
686 clusterer.mPmemory->fragment = fragment;
687
689 bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS;
690 bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels;
691 auto* inDigits = mIOPtrs.tpcPackedDigits;
692 size_t numDigits = inDigits->nTPCDigits[iSector];
693 if (setDigitsOnGPU) {
694 GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSector], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true);
695 }
696 if (setDigitsOnHost) {
697 clusterer.mPdigits = const_cast<o2::tpc::Digit*>(inDigits->tpcDigits[iSector]); // TODO: Needs fixing (invalid const cast)
698 }
699 clusterer.mPmemory->counters.nDigits = numDigits;
700 }
701
702 if (mIOPtrs.tpcZS) {
703 if (mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) {
704 clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSector].first;
705 clusterer.mPmemory->counters.nPagesSubsector = mCFContext->nextPos[iSector].second;
706 } else {
707 clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubsector = 0;
708 }
709 }
710 TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
711
712 using ChargeMapType = decltype(*clustererShadow.mPchargeMap);
713 using PeakMapType = decltype(*clustererShadow.mPpeakMap);
714 runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout<ChargeMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!!
715 runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout<PeakMapType>::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType));
716 if (fragment.index == 0) {
717 runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy));
718 }
719 DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges");
720
721 if (doGPU) {
722 if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) {
723 TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane);
724 SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane);
725 }
726 SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory
727 }
728
729 if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSector] || mCFContext->zsVersion == -1)) {
730 clusterer.mPmemory->counters.nPositions = 0;
731 return;
732 }
733 if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSector] == 0) {
734 clusterer.mPmemory->counters.nPositions = 0;
735 return;
736 }
737
738 if (propagateMCLabels && fragment.index == 0) {
739 clusterer.PrepareMC();
740 clusterer.mPinputLabels = digitsMC->v[iSector];
741 if (clusterer.mPinputLabels == nullptr) {
742 GPUFatal("MC label container missing, sector %d", iSector);
743 }
744 if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSector]) {
745 GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSector], (int32_t)clusterer.mPinputLabels->getIndexedSize());
746 }
747 }
748
749 if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSector) {
750 if (not mIOPtrs.tpcZS) {
751 runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid(1, lane), {iSector}}, mIOPtrs.tpcZS == nullptr);
752 TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
753 } else if (propagateMCLabels) {
754 runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, mIOPtrs.tpcZS == nullptr);
755 TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
756 }
757 }
758
759 if (mIOPtrs.tpcZS) {
760 int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0);
761 uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS;
762
763 (void)tpcTimeBinCut; // TODO: To be used in decoding kernels
764 switch (mCFContext->zsVersion) {
765 default:
766 GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion);
767 break;
770 runKernel<GPUTPCCFDecodeZS>({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
771 break;
773 runKernel<GPUTPCCFDecodeZSLink>({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
774 break;
776 runKernel<GPUTPCCFDecodeZSDenseLink>({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
777 break;
778 }
779 TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
780 } // clang-format off
781 });
782 mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
783 uint32_t iSector = iSectorBase + lane;
784 if (doGPU) {
785 SynchronizeStream(lane);
786 }
787 if (mIOPtrs.tpcZS) {
788 CfFragment f = fragment.next();
789 int32_t nextSector = iSector;
790 if (f.isEnd()) {
791 nextSector += GetProcessingSettings().nTPCClustererLanes;
792 f = mCFContext->fragmentFirst;
793 }
794 if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) {
795 mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane);
796 }
797 }
798 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
799 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
800 if (clusterer.mPmemory->counters.nPositions == 0) {
801 return;
802 }
803 if (!mIOPtrs.tpcZS) {
804 runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::fillFromDigits>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
805 }
806 if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) {
807 clusterer.DumpChargeMap(*mDebugFile, "Charges");
808 }
809
810 if (propagateMCLabels) {
811 runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::fillIndexMap>({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}});
812 }
813
814 bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0);
815 checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true);
816 checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter;
817
818 if (checkForNoisyPads) {
820
821 runKernel<GPUTPCCFCheckPadBaseline>({GetGridBlk(nBlocks, lane), {iSector}});
822 }
823
824 runKernel<GPUTPCCFPeakFinder>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
825 if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) {
826 clusterer.DumpPeakMap(*mDebugFile, "Peaks");
827 }
828
829 RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane);
830 TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
831 DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off
832 });
833 mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
834 uint32_t iSector = iSectorBase + lane;
835 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
836 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
837 if (doGPU) {
838 SynchronizeStream(lane);
839 }
840 if (clusterer.mPmemory->counters.nPeaks == 0) {
841 return;
842 }
843 runKernel<GPUTPCCFNoiseSuppression, GPUTPCCFNoiseSuppression::noiseSuppression>({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}});
844 runKernel<GPUTPCCFNoiseSuppression, GPUTPCCFNoiseSuppression::updatePeaks>({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}});
845 if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) {
846 clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks");
847 }
848
849 RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane);
850 TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
851 DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off
852 });
853 mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) {
854 uint32_t iSector = iSectorBase + lane;
855 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
856 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
857 if (doGPU) {
858 SynchronizeStream(lane);
859 }
860
861 if (fragment.index == 0) {
862 deviceEvent* waitEvent = nullptr;
863 if (transferRunning[lane] == 1) {
864 waitEvent = &mEvents->stream[lane];
865 transferRunning[lane] = 2;
866 }
867 runKernel<GPUMemClean16>({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow));
868 }
869
870 if (clusterer.mPmemory->counters.nClusters == 0) {
871 return;
872 }
873
874 runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
875 DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
876
877 runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
878 if (doGPU && propagateMCLabels) {
879 TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
880 if (doGPU) {
881 SynchronizeStream(lane);
882 }
883 runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1);
884 }
885 if (GetProcessingSettings().debugLevel >= 3) {
886 GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSector, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters);
887 }
888
889 TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane);
890 laneHasData[lane] = true;
891 // Include clusters in default debug mask, exclude other debug output by default
892 DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off
893 });
895 }
896
897 size_t nClsFirst = nClsTotal;
898 bool anyLaneHasData = false;
899 for (int32_t lane = 0; lane < maxLane; lane++) {
900 uint32_t iSector = iSectorBase + lane;
901 std::fill(&tmpNativeAccess->nClusters[iSector][0], &tmpNativeAccess->nClusters[iSector][0] + MAXGLOBALPADROW, 0);
902 if (doGPU) {
903 SynchronizeStream(lane);
904 }
905 GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector];
906 GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer;
907
908 if (laneHasData[lane]) {
909 anyLaneHasData = true;
910 if (buildNativeGPU && GetProcessingSettings().tpccfGatherKernel) {
911 runKernel<GPUTPCCFGather>({GetGridBlk(GPUCA_ROW_COUNT, mRec->NStreams() - 1), {iSector}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]);
912 }
913 for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
914 if (nClsTotal + clusterer.mPclusterInRow[j] > mInputsHost->mNClusterNative) {
915 clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSector * 1000 + j, nClsTotal + clusterer.mPclusterInRow[j], mInputsHost->mNClusterNative);
916 continue;
917 }
918 if (buildNativeGPU) {
919 if (!GetProcessingSettings().tpccfGatherKernel) {
920 GPUMemCpyAlways(RecoStep::TPCClusterFinding, (void*)&mInputsShadow->mPclusterNativeBuffer[nClsTotal], (const void*)&clustererShadow.mPclusterByRow[j * clusterer.mNMaxClusterPerRow], sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * clusterer.mPclusterInRow[j], mRec->NStreams() - 1, -2);
921 }
922 } else if (buildNativeHost) {
923 GPUMemCpyAlways(RecoStep::TPCClusterFinding, (void*)&tmpNativeClusters[nClsTotal], (const void*)&clustererShadow.mPclusterByRow[j * clusterer.mNMaxClusterPerRow], sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * clusterer.mPclusterInRow[j], mRec->NStreams() - 1, false);
924 }
925 tmpNativeAccess->nClusters[iSector][j] += clusterer.mPclusterInRow[j];
926 nClsTotal += clusterer.mPclusterInRow[j];
927 }
928 if (transferRunning[lane]) {
929 ReleaseEvent(mEvents->stream[lane], doGPU);
930 }
931 RecordMarker(&mEvents->stream[lane], mRec->NStreams() - 1);
932 transferRunning[lane] = 1;
933 }
934
935 if (not propagateMCLabels || not laneHasData[lane]) {
936 assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
937 continue;
938 }
939
940 runKernel<GPUTPCCFMCLabelFlattener, GPUTPCCFMCLabelFlattener::setRowOffsets>({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}});
942 runKernel<GPUTPCCFMCLabelFlattener, GPUTPCCFMCLabelFlattener::flatten>({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, &mcLinearLabels);
943 clusterer.clearMCMemory();
944 assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
945 }
946 if (propagateMCLabels) {
947 for (int32_t lane = 0; lane < maxLane; lane++) {
948 processors()->tpcClusterer[iSectorBase + lane].clearMCMemory();
949 }
950 }
951 if (buildNativeHost && buildNativeGPU && anyLaneHasData) {
952 if (GetProcessingSettings().delayedOutput) {
953 mOutputQueue.emplace_back(outputQueueEntry{(void*)((char*)&tmpNativeClusters[nClsFirst] - (char*)&tmpNativeClusters[0]), &mInputsShadow->mPclusterNativeBuffer[nClsFirst], (nClsTotal - nClsFirst) * sizeof(tmpNativeClusters[0]), RecoStep::TPCClusterFinding});
954 } else {
955 GPUMemCpy(RecoStep::TPCClusterFinding, (void*)&tmpNativeClusters[nClsFirst], (const void*)&mInputsShadow->mPclusterNativeBuffer[nClsFirst], (nClsTotal - nClsFirst) * sizeof(tmpNativeClusters[0]), mRec->NStreams() - 1, false);
956 }
957 }
958
959 if (mWaitForFinalInputs && iSectorBase >= 21 && (int32_t)iSectorBase < 21 + GetProcessingSettings().nTPCClustererLanes) {
960 notifyForeignChainFinished();
961 }
962 if (mWaitForFinalInputs && iSectorBase >= 30 && (int32_t)iSectorBase < 30 + GetProcessingSettings().nTPCClustererLanes) {
963 mWaitForFinalInputs();
964 synchronizeCalibUpdate = DoQueuedUpdates(0, false);
965 }
966 }
967 for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) {
968 if (transferRunning[i]) {
969 ReleaseEvent(mEvents->stream[i], doGPU);
970 }
971 }
972
973 if (GetProcessingSettings().param.tpcTriggerHandling) {
975 if (triggerOutput && triggerOutput->allocator) {
976 // GPUInfo("Storing %lu trigger words", mTriggerBuffer->triggers.size());
977 auto* outputBuffer = (decltype(mTriggerBuffer->triggers)::value_type*)triggerOutput->allocator(mTriggerBuffer->triggers.size() * sizeof(decltype(mTriggerBuffer->triggers)::value_type));
978 std::copy(mTriggerBuffer->triggers.begin(), mTriggerBuffer->triggers.end(), outputBuffer);
979 }
980 mTriggerBuffer->triggers.clear();
981 }
982
983 ClusterNativeAccess::ConstMCLabelContainerView* mcLabelsConstView = nullptr;
984 if (propagateMCLabels) {
985 // TODO: write to buffer directly
987 std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
990 throw std::runtime_error("Cluster MC Label buffer missing");
991 }
993 buffer = {&container->first, &container->second};
994 } else {
995 mIOMem.clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
996 mIOMem.clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
997 buffer.first = mIOMem.clusterNativeMCBuffer.get();
998 buffer.second = mIOMem.clusterNativeMCView.get();
999 }
1000
1001 assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
1002 assert(propagateMCLabels ? mcLinearLabels.data.size() >= nClsTotal : true);
1003
1004 mcLabels.setFrom(mcLinearLabels.header, mcLinearLabels.data);
1005 mcLabels.flatten_to(*buffer.first);
1006 *buffer.second = *buffer.first;
1007 mcLabelsConstView = buffer.second;
1008 }
1009
1010 if (buildNativeHost && buildNativeGPU && GetProcessingSettings().delayedOutput) {
1011 mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = nClsTotal;
1013 tmpNativeClusters = mInputsHost->mPclusterNativeOutput;
1014 for (uint32_t i = outputQueueStart; i < mOutputQueue.size(); i++) {
1015 mOutputQueue[i].dst = (char*)tmpNativeClusters + (size_t)mOutputQueue[i].dst;
1016 }
1017 }
1018
1019 if (buildNativeHost) {
1020 tmpNativeAccess->clustersLinear = tmpNativeClusters;
1021 tmpNativeAccess->clustersMCTruth = mcLabelsConstView;
1022 tmpNativeAccess->setOffsetPtrs();
1023 mIOPtrs.clustersNative = tmpNativeAccess;
1024 if (GetProcessingSettings().tpcApplyDebugClusterFilter) {
1025 auto allocator = [this, &tmpNativeClusters](size_t size) {
1026 this->mInputsHost->mNClusterNative = size;
1027 this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
1028 return (tmpNativeClusters = this->mInputsHost->mPclusterNativeOutput);
1029 };
1030 RunTPCClusterFilter(tmpNativeAccess, allocator, false);
1031 nClsTotal = tmpNativeAccess->nClustersTotal;
1032 }
1033 }
1034
1035 if (!mWaitForFinalInputs) {
1036 notifyForeignChainFinished();
1037 }
1038
1039 if (buildNativeGPU) {
1040 processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
1041 WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
1042 *mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
1043 mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
1044 mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
1045 TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceClusterNativeAccess, 0);
1046 }
1047 if (doGPU && synchronizeOutput) {
1049 }
1050 if (doGPU && synchronizeCalibUpdate) {
1052 }
1053 if (buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4)) {
1054 for (uint32_t i = 0; i < NSECTORS; i++) {
1055 for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1056 std::sort(&tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j] + tmpNativeAccess->nClusters[i][j]]);
1057 }
1058 }
1059 if (buildNativeGPU) {
1060 GPUMemCpy(RecoStep::TPCClusterFinding, (void*)mInputsShadow->mPclusterNativeBuffer, (const void*)tmpNativeClusters, nClsTotal * sizeof(tmpNativeClusters[0]), -1, true);
1061 }
1062 }
1063 mRec->MemoryScalers()->nTPCHits = nClsTotal;
1064 mRec->PopNonPersistentMemory(RecoStep::TPCClusterFinding, qStr2Tag("TPCCLUST"));
1065 if (mPipelineNotifyCtx) {
1067 mPipelineNotifyCtx = nullptr;
1068 }
1069
1070 if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) {
1072 }
1073
1074#endif
1075 return 0;
1076}
Definition of the TPC Digit.
int32_t i
#define TPC_MAX_TIME_BIN_TRIGGERED
#define GPUCA_MAX_STREAMS
int32_t retVal
bool o
#define GPUCA_ROW_COUNT
std::enable_if_t< std::is_signed< T >::value, bool > hasData(const CalArray< T > &cal)
Definition Painter.cxx:515
uint32_t j
Definition RawData.h:0
uint8_t endpoint
Definition RawData.h:0
Definitions of TPC Zero Suppression Data Headers.
void Start()
Definition timer.cxx:57
void Stop()
Definition timer.cxx:69
A container to hold and manage MC truth information/labels.
void setFrom(std::vector< MCTruthHeaderElement > &header, std::vector< TruthElement > &truthArray)
size_t flatten_to(ContainerType &container) const
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
int32_t RunTPCClusterizer(bool synchronizeOutput=true)
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::unique_ptr< GPUTriggerOutputs > mTriggerBuffer
std::vector< outputQueueEntry > mOutputQueue
std::unique_ptr< GPUTPCCFChainContext > mCFContext
int32_t DoQueuedUpdates(int32_t stream, bool updateSlave=true)
std::unique_ptr< GPUNewCalibValues > mNewCalibValues
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:103
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:119
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:32
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
Definition GPUChain.h:106
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
GPUConstantMem * processors()
Definition GPUChain.h:80
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:37
krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:47
GPUParam & param()
Definition GPUChain.h:83
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:120
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:21
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
bool DoDebugAndDump(RecoStep step, int32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:223
GPUReconstruction * rec()
Definition GPUChain.h:62
HighResTimer & getGeneralStepTimer(GeneralStep step)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ComputeReuseMax(GPUProcessor *proc)
const GPUParam & GetParam() const
RecoStepField GetRecoStepsGPU() const
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
GPUMemorySizeScalers * MemoryScalers()
static void setGlobalOffsetsAndAllocate(GPUTPCClusterFinder &, GPUTPCLinearLabels &)
static constexpr int32_t mScanWorkGroupSize
void SetMaxData(const GPUTrackingInOutPointers &io)
void SetNMaxDigits(size_t nDigits, size_t nPages, size_t nDigitsFragment, size_t nDigitsEndpointMax)
void DumpSuppressedPeaks(std::ostream &out)
void DumpPeakMap(std::ostream &out, std::string_view)
o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > const * mPinputLabels
void DumpChargeMap(std::ostream &out, std::string_view)
uint32_t getNSteps(size_t items) const
void DumpSuppressedPeaksCompacted(std::ostream &out)
void DumpPeaksCompacted(std::ostream &out)
tpc::ClusterNative * mPclusterByRow
#define TPC_PADS_IN_SECTOR
GLint GLenum GLint x
Definition glcorearb.h:403
const GLfloat * m
Definition glcorearb.h:4066
GLenum src
Definition glcorearb.h:1767
GLint GLsizei count
Definition glcorearb.h:399
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLdouble f
Definition glcorearb.h:310
GLboolean * data
Definition glcorearb.h:298
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLfloat param
Definition glcorearb.h:271
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
Definition of a container to keep/associate and arbitrary number of labels associated to an index wit...
RAWDataHeaderV7 RAWDataHeader
void dumpBuffer(gsl::span< const std::byte > buffer, std::ostream &out=std::cout, size_t maxbytes=std::numeric_limits< size_t >::max())
Definition DumpBuffer.h:139
constexpr int LHCBCPERTIMEBIN
Definition Constants.h:38
constexpr int MAXGLOBALPADROW
Definition Constants.h:34
Global TPC definitions and constants.
Definition SimTraits.h:167
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
@ ZSVersionRowBased10BitADC
@ ZSVersionRowBased12BitADC
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
tpccf::TPCTime start
Definition CfFragment.h:31
std::unique_ptr< o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > > clusterNativeMCView
std::unique_ptr< o2::dataformats::ConstMCTruthContainer< o2::MCCompLabel > > clusterNativeMCBuffer
deviceEvent stream[GPUCA_MAX_STREAMS]
GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
size_t NTPCClusters(size_t tpcDigits, bool perSector=false)
std::function< void *(size_t)> allocator
struct o2::gpu::GPUTPCClusterFinder::Memory::counters_t counters
std::vector< o2::MCCompLabel > data
std::vector< o2::dataformats::MCTruthHeaderElement > header
const GPUTPCDigitsMCInput * tpcDigitsMC
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUSettingsTF * settingsTF
const GPUTrackingInOutZS * tpcZS
const GPUTrackingInOutDigits * tpcPackedDigits
GPUTrackingInOutZSSector sector[NSECTORS]
static constexpr uint32_t NENDPOINTS
size_t getIndex(const GPUOutputControl &v)
static constexpr int getVersion()
get numeric version of the RDH
Definition RDHUtils.h:58
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth
std::pair< ConstMCLabelContainer, ConstMCLabelContainerView > ConstMCLabelContainerViewWithBuffer
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const ClusterNative * clustersLinear
static constexpr unsigned int TRIGGER_WORD_SIZE
unsigned char version
static constexpr size_t TPC_ZS_PAGE_SIZE
unsigned short nADCsamples
Trigger info including the orbit.
uint32_t orbit
orbit of the trigger word
TriggerWordDLBZS triggerWord
trigger Word information
bool isValid(int entry=0) const
constexpr size_t min
constexpr size_t max
std::vector< Digit > digits