27#include "GPUDefParametersRuntime.h"
69#ifdef GPUCA_TPC_GEOMETRY_O2
70std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSector,
const CfFragment& fragment)
80 uint16_t posInEndpoint = 0;
81 uint16_t pagesEndpoint = 0;
85 for (uint32_t l = pageFirst; l < pageLast; l++) {
86 uint16_t pageDigits =
mCFContext->fragmentData[fragment.
index].pageDigits[iSector][
j][posInEndpoint++];
94 if (pagesEndpoint !=
mCFContext->fragmentData[fragment.
index].pageDigits[iSector][
j].size()) {
96 GPUError(
"TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint,
mCFContext->fragmentData[fragment.
index].pageDigits[iSector][
j].size());
99 GPUFatal(
"TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint,
mCFContext->fragmentData[fragment.
index].pageDigits[iSector][
j].size());
112 TPCClusterizerEnsureZSOffsets(iSector, fragment);
117void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector,
const CfFragment& fragment)
123 uint32_t pagesEndpoint = 0;
124 const uint32_t nAdcsExpected =
data.nDigits[iSector][
endpoint];
125 const uint32_t nPagesExpected =
data.nPages[iSector][
endpoint];
127 uint32_t nAdcDecoded = 0;
130 const uint32_t pageFirst = (
i ==
data.minMaxCN[iSector][
endpoint].zsPtrFirst) ?
data.minMaxCN[iSector][
endpoint].zsPageFirst : 0;
132 for (uint32_t
j = pageFirst;
j < pageLast;
j++) {
136 const uint16_t nSamplesInPage = decHdr->
nADCsamples;
138 nAdcDecoded += nSamplesInPage;
143 if (pagesEndpoint != nPagesExpected) {
144 GPUFatal(
"Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %u", iSector,
endpoint, fragment.
index, pagesEndpoint, nPagesExpected);
147 if (nAdcDecoded != nAdcsExpected) {
148 GPUFatal(
"Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSector,
endpoint, fragment.
index, nAdcsExpected, nAdcDecoded);
155 nAdcs += nAdcsExpected;
161struct TPCCFDecodeScanTmp {
162 int32_t zsPtrFirst, zsPageFirst, zsPtrLast, zsPageLast,
hasData, pageCounter;
166std::pair<uint32_t, uint32_t> GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector,
const CfFragment& fragment)
169 uint32_t nDigits = 0;
172 memset(endpointAdcSamples, 0,
sizeof(endpointAdcSamples));
188 std::vector<std::pair<CfFragment, TPCCFDecodeScanTmp>> fragments;
190 fragments.emplace_back(std::pair<CfFragment, TPCCFDecodeScanTmp>{fragment, {0, 0, 0, 0, 0, -1}});
192 fragments.emplace_back(std::pair<CfFragment, TPCCFDecodeScanTmp>{fragments.back().
first.next(), {0, 0, 0, 0, 0, -1}});
194 std::vector<bool> fragmentExtends(
mCFContext->nFragments,
false);
196 uint32_t firstPossibleFragment = 0;
197 uint32_t pageCounter = 0;
198 uint32_t emptyPages = 0;
222 static bool errorShown =
false;
223 if (errorShown ==
false) {
224 GPUAlarm(
"Trigger handling only possible with TPC Dense Link Based data, received version %d, disabling",
mCFContext->zsVersion);
229 GPUError(
"Received TPC ZS 8kb page of mixed versions, expected %d, received %d (linkid %d, feeCRU %d, feeEndpoint %d, feelinkid %d)",
mCFContext->zsVersion, (int32_t)hdr->
version, (int32_t)o2::raw::RDHUtils::getLinkID(*rdh), (int32_t)rdh_utils::getCRU(*rdh), (int32_t)rdh_utils::getEndPoint(*rdh), (int32_t)rdh_utils::getLink(*rdh));
230 constexpr size_t bufferSize = 3 * std::max(
sizeof(*rdh),
sizeof(*hdr)) + 1;
232 for (
size_t i = 0;
i <
sizeof(*rdh);
i++) {
234 snprintf(dumpBuffer + 3 *
i, 4,
"%02X ", (int32_t)((uint8_t*)rdh)[
i]);
236 GPUAlarm(
"RDH of page: %s", dumpBuffer);
237 for (
size_t i = 0;
i <
sizeof(*hdr);
i++) {
239 snprintf(dumpBuffer + 3 *
i, 4,
"%02X ", (int32_t)((uint8_t*)hdr)[
i]);
241 GPUAlarm(
"Metainfo of page: %s", dumpBuffer);
246 GPUFatal(
"Cannot process with invalid TPC ZS data, exiting");
251 if (hdr2->
flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) {
255 tmp.
orbit = o2::raw::RDHUtils::getHeartBeatOrbit(*rdh);
261 nDigits += hdr->nADCsamples;
262 endpointAdcSamples[
j] += hdr->nADCsamples;
264 uint32_t maxTimeBin = timeBin + hdr->nTimeBinSpan;
265 if (
mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
267 if (hdr2->
flags & TPCZSHDRV2::ZSFlags::nTimeBinSpanBit8) {
274 bool extendsInNextPage =
false;
275 if (
mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
278 extendsInNextPage = o2::raw::RDHUtils::getHeartBeatOrbit(*nextrdh) == o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) && o2::raw::RDHUtils::getMemorySize(*nextrdh) >
sizeof(
o2::header::RAWDataHeader);
281 while (firstPossibleFragment && (uint32_t)fragments[firstPossibleFragment - 1].first.last() > timeBin) {
282 firstPossibleFragment--;
284 auto handleExtends = [&](uint32_t ff) {
285 if (fragmentExtends[ff]) {
289 fragments[ff].second.zsPageLast++;
291 mCFContext->fragmentData[ff].pageDigits[iSector][
j].emplace_back(0);
293 fragmentExtends[ff] =
false;
296 if (
mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
297 for (uint32_t ff = 0; ff < firstPossibleFragment; ff++) {
301 for (uint32_t
f = firstPossibleFragment;
f <
mCFContext->nFragments;
f++) {
302 if (timeBin < (uint32_t)fragments[
f].
first.last() && (uint32_t)fragments[
f].first.first() <= maxTimeBin) {
303 if (!fragments[
f].second.hasData) {
304 fragments[
f].second.hasData = 1;
305 fragments[
f].second.zsPtrFirst = k;
306 fragments[
f].second.zsPageFirst = l;
308 if (pageCounter > (uint32_t)fragments[
f].second.pageCounter + 1) {
309 mCFContext->fragmentData[
f].nPages[iSector][
j] += emptyPages + pageCounter - fragments[
f].second.pageCounter - 1;
310 for (uint32_t k2 = fragments[
f].second.zsPtrLast - 1; k2 <= k; k2++) {
311 for (uint32_t l2 = ((int32_t)k2 == fragments[
f].second.zsPtrLast - 1) ? fragments[
f].second.zsPageLast : 0; l2 < (k2 < k ?
mIOPtrs.
tpcZS->
sector[iSector].
nZSPtr[
j][k2] : l); l2++) {
313 mCFContext->fragmentData[
f].pageDigits[iSector][
j].emplace_back(0);
319 const TPCZSHDR*
const hdrTmp = (
const TPCZSHDR*)(rdh_utils::getLink(o2::raw::RDHUtils::getFEEID(*rdhTmp)) == rdh_utils::DLBZSLinkID ? (pageTmp + o2::raw::RDHUtils::getMemorySize(*rdhTmp) -
sizeof(
TPCZSHDRV2)) : (pageTmp +
sizeof(
o2::header::RAWDataHeader)));
325 }
else if (emptyPages) {
326 mCFContext->fragmentData[
f].nPages[iSector][
j] += emptyPages;
328 for (uint32_t
m = 0;
m < emptyPages;
m++) {
329 mCFContext->fragmentData[
f].pageDigits[iSector][
j].emplace_back(0);
334 fragments[
f].second.zsPtrLast = k + 1;
335 fragments[
f].second.zsPageLast = l + 1;
336 fragments[
f].second.pageCounter = pageCounter;
338 mCFContext->fragmentData[
f].nDigits[iSector][
j] += hdr->nADCsamples;
340 mCFContext->fragmentData[
f].pageDigits[iSector][
j].emplace_back(hdr->nADCsamples);
342 fragmentExtends[
f] = extendsInNextPage;
345 if (timeBin < (uint32_t)fragments[
f].
first.last()) {
346 if (
mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) {
347 for (uint32_t ff =
f + 1; ff <
mCFContext->nFragments; ff++) {
353 firstPossibleFragment =
f + 1;
361 mCFContext->fragmentData[
f].minMaxCN[iSector][
j].zsPtrLast = fragments[
f].second.zsPtrLast;
362 mCFContext->fragmentData[
f].minMaxCN[iSector][
j].zsPtrFirst = fragments[
f].second.zsPtrFirst;
363 mCFContext->fragmentData[
f].minMaxCN[iSector][
j].zsPageLast = fragments[
f].second.zsPageLast;
364 mCFContext->fragmentData[
f].minMaxCN[iSector][
j].zsPageFirst = fragments[
f].second.zsPageFirst;
372 if (endpointAdcSamples[
i] >
mCFContext->nDigitsEndpointMax[iSector]) {
373 mCFContext->nDigitsEndpointMax[iSector] = endpointAdcSamples[
i];
376 uint32_t nDigitsFragmentMax = 0;
378 uint32_t pagesInFragment = 0;
379 uint32_t digitsInFragment = 0;
381 pagesInFragment +=
mCFContext->fragmentData[
i].nPages[iSector][
j];
382 digitsInFragment +=
mCFContext->fragmentData[
i].nDigits[iSector][
j];
385 nDigitsFragmentMax = std::max(nDigitsFragmentMax, digitsInFragment);
388 return {nDigits, nDigitsFragmentMax};
396 const uint32_t iSector = clusterer.
mISector;
399 std::vector<size_t> counts;
402 if (nSteps > clusterer.
mNBufs) {
403 GPUError(
"Clusterer buffers exceeded (%u > %u)", nSteps, (int32_t)clusterer.
mNBufs);
408 size_t tmpCount =
count;
410 for (uint32_t
i = 1;
i < nSteps;
i++) {
411 counts.push_back(tmpCount);
413 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanStart>({
GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}},
i, stage);
415 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanUp>({
GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}},
i, tmpCount);
417 tmpCount = (tmpCount + scanWorkgroupSize - 1) / scanWorkgroupSize;
420 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanTop>({
GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, nSteps, tmpCount);
422 for (uint32_t
i = nSteps - 1;
i > 1;
i--) {
423 tmpCount = counts[
i - 1];
424 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanDown>({
GetGrid(tmpCount - scanWorkgroupSize, scanWorkgroupSize, lane), {iSector}},
i, scanWorkgroupSize, tmpCount);
428 runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::compactDigits>({
GetGrid(
count, scanWorkgroupSize, lane), {iSector}}, 1, stage, in, out);
433 for (
size_t i = 0;
i < nIn;
i++) {
442std::pair<uint32_t, uint32_t> GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector,
const CfFragment& fragment, int32_t lane)
448 const auto&
retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment);
452 uint32_t nPagesSector = 0;
471 nPagesSector += nPages;
478int32_t GPUChainTracking::RunTPCClusterizer_prepare(
bool restorePointers)
481 if (restorePointers) {
482 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
497 mCFContext->tpcMaxTimeBin = maxAllowedTimebin;
504 uint32_t nDigitsFragmentMax[
NSECTORS];
506 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
510 GPUError(
"Data has invalid RDH version %d, %d required\n",
o2::raw::RDHUtils::getVersion(rdh), o2::raw::RDHUtils::getVersion<o2::header::RAWDataHeader>());
526 const auto&
x = TPCClusterizerDecodeZSCount(iSector, fragmentMax);
527 nDigitsFragmentMax[iSector] =
x.first;
531 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
532 uint32_t nDigitsBase = nDigitsFragmentMax[iSector];
533 uint32_t threshold = 40000000;
534 uint32_t nDigitsScaled = nDigitsBase > threshold ? nDigitsBase : std::min((threshold + nDigitsBase) / 2, 2 * nDigitsBase);
548 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
561 if (
mCFContext->tpcMaxTimeBin > maxAllowedTimebin) {
562 GPUError(
"Input data has invalid time bin %u > %d",
mCFContext->tpcMaxTimeBin, maxAllowedTimebin);
565 mCFContext->tpcMaxTimeBin = maxAllowedTimebin;
579 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
591 if (
param().
rec.fwdTPCDigitsAsClusters) {
594#ifdef GPUCA_TPC_GEOMETRY_O2
599 if (RunTPCClusterizer_prepare(mPipelineNotifyCtx &&
GetProcessingSettings().doublePipelineClusterizer)) {
607 float tpcHitLowOccupancyScalingFactor = 1.f;
615 if (nHitsBase < threshold) {
618 tpcHitLowOccupancyScalingFactor = std::min(3.5f, (
float)threshold / nHitsBase);
621 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
625 for (uint32_t iSector = 0; iSector <
NSECTORS; iSector++) {
629 RunTPCClusterizer_prepare(
true);
645 int32_t deviceId = -1;
650 nnApplications[lane].
init(nn_settings);
651 if (nnApplications[lane].mModelsUsed[0]) {
652 SetONNXGPUStream(*(nnApplications[lane].mModelClass).getSessionOptions(), lane, &deviceId);
653 (nnApplications[lane].
mModelClass).setDeviceId(deviceId);
654 if (nnApplications[lane].mModelClass.getIntraOpNumThreads() > maxThreads) {
657 (nnApplications[lane].
mModelClass).initEnvironment();
668 if (nnApplications[lane].mModelsUsed[1]) {
669 SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId);
670 (nnApplications[lane].
mModelReg1).setDeviceId(deviceId);
671 if (nnApplications[lane].mModelReg1.getIntraOpNumThreads() > maxThreads) {
675 (nnApplications[lane].
mModelReg1).initEnvironment();
677 (nnApplications[lane].
mModelReg1).initSession();
679 if (nnApplications[lane].mModelsUsed[2]) {
680 SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId);
681 (nnApplications[lane].
mModelReg2).setDeviceId(deviceId);
682 if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) {
686 (nnApplications[lane].
mModelReg2).initEnvironment();
688 (nnApplications[lane].
mModelReg2).initSession();
690 if (nn_settings.nnClusterizerVerbosity < 3) {
691 LOG(info) <<
"(ORT) Allocated ONNX stream for lane " << lane <<
" and device " << deviceId;
697 int32_t lane = sector % numLanes;
704 clustererNNShadow.
mISector = sector;
716 size_t nClsTotal = 0;
719 std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
726 bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
730 if (buildNativeGPU) {
734 if (mWaitForFinalInputs) {
735 GPUFatal(
"Cannot use waitForFinalInput callback without delayed output");
739 tmpNativeClusters =
mInputsHost->mPclusterNativeOutput;
741 tmpNativeClusterBuffer = std::make_unique<ClusterNative[]>(
mInputsHost->mNClusterNative);
742 tmpNativeClusters = tmpNativeClusterBuffer.get();
747 if (propagateMCLabels) {
753 int8_t transferRunning[
NSECTORS] = {0};
756 auto notifyForeignChainFinished = [
this]() {
757 if (mPipelineNotifyCtx) {
760 std::lock_guard<std::mutex> lock(mPipelineNotifyCtx->
mutex);
761 mPipelineNotifyCtx->
ready =
true;
763 mPipelineNotifyCtx->
cond.notify_one();
766 bool synchronizeCalibUpdate =
false;
772 for (
CfFragment fragment =
mCFContext->fragmentFirst; !fragment.isEnd(); fragment = fragment.next()) {
774 GPUInfo(
"Processing time bins [%d, %d) for sectors %d to %d", fragment.
start, fragment.last(), iSectorBase, iSectorBase +
GetProcessingSettings().nTPCClustererLanes - 1);
777 if (doGPU && fragment.
index != 0) {
778 SynchronizeStream(lane);
781 uint32_t iSector = iSectorBase + lane;
789 bool setDigitsOnHost = (not doGPU && not
mIOPtrs.
tpcZS) || propagateMCLabels;
791 size_t numDigits = inDigits->
nTPCDigits[iSector];
792 if (setDigitsOnGPU) {
793 GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.
mPdigits, inDigits->tpcDigits[iSector],
sizeof(clustererShadow.
mPdigits[0]) * numDigits, lane,
true);
795 if (setDigitsOnHost) {
811 using ChargeMapType =
decltype(*clustererShadow.
mPchargeMap);
812 using PeakMapType =
decltype(*clustererShadow.
mPpeakMap);
815 if (fragment.
index == 0) {
837 if (propagateMCLabels && fragment.
index == 0) {
841 GPUFatal(
"MC label container missing, sector %d", iSector);
850 runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({
GetGrid(1, lane), {iSector}},
mIOPtrs.
tpcZS ==
nullptr);
852 }
else if (propagateMCLabels) {
865 GPUFatal(
"Data with invalid TPC ZS mode (%d) received",
mCFContext->zsVersion);
869 runKernel<GPUTPCCFDecodeZS>({
GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
872 runKernel<GPUTPCCFDecodeZSLink>({
GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
875 runKernel<GPUTPCCFDecodeZSDenseLink>({
GetGridBlk(nBlocks, lane), {iSector}}, firstHBF);
882 uint32_t iSector = iSectorBase + lane;
888 int32_t nextSector = iSector;
893 if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] &&
mCFContext->zsVersion != -1 && !
mCFContext->abandonTimeframe) {
909 if (propagateMCLabels) {
913 bool checkForNoisyPads = (
rec()->
GetParam().
rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (
rec()->
GetParam().
rec.tpc.maxConsecTimeBinAboveThreshold > 0);
914 checkForNoisyPads &= (
rec()->
GetParam().
rec.tpc.noisyPadsQuickCheck ? fragment.
index == 0 :
true);
917 if (checkForNoisyPads) {
920 runKernel<GPUTPCCFCheckPadBaseline>({
GetGridBlk(nBlocks, lane), {iSector}});
928 RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane);
933 uint32_t iSector = iSectorBase + lane;
942 runKernel<GPUTPCCFNoiseSuppression, GPUTPCCFNoiseSuppression::noiseSuppression>({
GetGrid(clusterer.
mPmemory->
counters.
nPeaks, lane), {iSector}});
948 RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane);
953 uint32_t iSector = iSectorBase + lane;
961 if (fragment.
index == 0) {
963 if (transferRunning[lane] == 1) {
965 transferRunning[lane] = 2;
989 int withMC = (doGPU && propagateMCLabels);
1102 GPUFatal(
"Project not compiled with neural network clusterization. Aborting.");
1110 if (doGPU && propagateMCLabels) {
1123 laneHasData[lane] =
true;
1130 size_t nClsFirst = nClsTotal;
1131 bool anyLaneHasData =
false;
1132 for (int32_t lane = 0; lane < maxLane; lane++) {
1133 uint32_t iSector = iSectorBase + lane;
1141 if (laneHasData[lane]) {
1142 anyLaneHasData =
true;
1148 clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSector * 1000 +
j, nClsTotal + clusterer.
mPclusterInRow[
j],
mInputsHost->mNClusterNative);
1151 if (buildNativeGPU) {
1155 }
else if (buildNativeHost) {
1161 if (transferRunning[lane]) {
1165 transferRunning[lane] = 1;
1168 if (not propagateMCLabels || not laneHasData[lane]) {
1169 assert(propagateMCLabels ? mcLinearLabels.
header.size() == nClsTotal :
true);
1177 assert(propagateMCLabels ? mcLinearLabels.
header.size() == nClsTotal :
true);
1179 if (propagateMCLabels) {
1180 for (int32_t lane = 0; lane < maxLane; lane++) {
1184 if (buildNativeHost && buildNativeGPU && anyLaneHasData) {
1186 mOutputQueue.emplace_back(
outputQueueEntry{(
void*)((
char*)&tmpNativeClusters[nClsFirst] - (
char*)&tmpNativeClusters[0]), &
mInputsShadow->mPclusterNativeBuffer[nClsFirst], (nClsTotal - nClsFirst) *
sizeof(tmpNativeClusters[0]), RecoStep::TPCClusterFinding});
1188 GPUMemCpy(RecoStep::TPCClusterFinding, (
void*)&tmpNativeClusters[nClsFirst], (
const void*)&
mInputsShadow->mPclusterNativeBuffer[nClsFirst], (nClsTotal - nClsFirst) *
sizeof(tmpNativeClusters[0]),
mRec->
NStreams() - 1,
false);
1192 if (mWaitForFinalInputs && iSectorBase >= 21 && (int32_t)iSectorBase < 21 +
GetProcessingSettings().nTPCClustererLanes) {
1193 notifyForeignChainFinished();
1195 if (mWaitForFinalInputs && iSectorBase >= 30 && (int32_t)iSectorBase < 30 +
GetProcessingSettings().nTPCClustererLanes) {
1196 mWaitForFinalInputs();
1201#ifdef GPUCA_HAS_ONNX
1203 LOG(info) <<
"(ORT) Environment releasing...";
1210 if (transferRunning[
i]) {
1217 if (triggerOutput && triggerOutput->
allocator) {
1226 if (propagateMCLabels) {
1229 std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*>
buffer;
1232 throw std::runtime_error(
"Cluster MC Label buffer missing");
1235 buffer = {&container->first, &container->second};
1243 assert(propagateMCLabels ? mcLinearLabels.
header.size() == nClsTotal :
true);
1244 assert(propagateMCLabels ? mcLinearLabels.
data.size() >= nClsTotal :
true);
1249 mcLabelsConstView =
buffer.second;
1255 tmpNativeClusters =
mInputsHost->mPclusterNativeOutput;
1261 if (buildNativeHost) {
1267 auto allocator = [
this, &tmpNativeClusters](
size_t size) {
1270 return (tmpNativeClusters = this->
mInputsHost->mPclusterNativeOutput);
1272 RunTPCClusterFilter(tmpNativeAccess, allocator,
false);
1277 if (!mWaitForFinalInputs) {
1278 notifyForeignChainFinished();
1281 if (buildNativeGPU) {
1286 mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
1289 if (doGPU && synchronizeOutput) {
1292 if (doGPU && synchronizeCalibUpdate) {
1301 if (buildNativeGPU) {
1302 GPUMemCpy(RecoStep::TPCClusterFinding, (
void*)
mInputsShadow->mPclusterNativeBuffer, (
const void*)tmpNativeClusters, nClsTotal *
sizeof(tmpNativeClusters[0]), -1,
true);
1307 if (mPipelineNotifyCtx) {
1309 mPipelineNotifyCtx =
nullptr;
#define TPC_MAX_TIME_BIN_TRIGGERED
#define GPUCA_MAX_STREAMS
std::enable_if_t< std::is_signed< T >::value, bool > hasData(const CalArray< T > &cal)
Definitions of TPC Zero Suppression Data Headers.
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
int32_t RunTPCClusterizer(bool synchronizeOutput=true)
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
std::unique_ptr< std::ofstream > mDebugFile
std::unique_ptr< GPUTriggerOutputs > mTriggerBuffer
std::vector< outputQueueEntry > mOutputQueue
bool mUpdateNewCalibObjects
std::unique_ptr< GPUTPCCFChainContext > mCFContext
int32_t DoQueuedUpdates(int32_t stream, bool updateSlave=true)
std::unique_ptr< GPUNewCalibValues > mNewCalibValues
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
int32_t ForwardTPCDigits()
void RecordMarker(deviceEvent *ev, int32_t stream)
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
void ReleaseEvent(deviceEvent ev, bool doGPU=true)
size_t AllocateRegisteredMemory(GPUProcessor *proc)
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
GPUConstantMem * processors()
static constexpr krnlRunRange krnlRunRangeNone
void SetONNXGPUStream(Ort::SessionOptions &opt, int32_t stream, int32_t *deviceId)
void SetupGPUProcessor(T *proc, bool allocate)
const GPUSettingsProcessing & GetProcessingSettings() const
void SynchronizeStream(int32_t stream)
GPUReconstructionCPU * mRec
GPUConstantMem * processorsShadow()
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
static constexpr int32_t NSECTORS
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
GPUReconstruction * rec()
HighResTimer & getGeneralStepTimer(GeneralStep step)
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
void SetNActiveThreads(int32_t n)
int32_t getNKernelHostThreads(bool splitCores)
const GPUDefParameters & getGPUParameters(bool doGPU) const override
void SetNActiveThreadsOuterLoop(uint32_t f)
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
void ComputeReuseMax(GPUProcessor *proc)
RecoStepField GetRecoStepsGPU() const
void UnblockStackedMemory()
uint32_t NStreams() const
const GPUParam & GetParam() const
void PushNonPersistentMemory(uint64_t tag)
InOutTypeField GetRecoStepsOutputs() const
GPUMemorySizeScalers * MemoryScalers()
static void setGlobalOffsetsAndAllocate(GPUTPCClusterFinder &, GPUTPCLinearLabels &)
void DumpDigits(std::ostream &out)
void SetMaxData(const GPUTrackingInOutPointers &io)
CfChargePos * mPpeakPositions
void DumpClusters(std::ostream &out)
void SetNMaxDigits(size_t nDigits, size_t nPages, size_t nDigitsFragment, size_t nDigitsEndpointMax)
void DumpSuppressedPeaks(std::ostream &out)
uint32_t mNMaxClusterPerRow
void DumpPeakMap(std::ostream &out, std::string_view)
o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > const * mPinputLabels
uint32_t * mPclusterInRow
void DumpChargeMap(std::ostream &out, std::string_view)
uint32_t getNSteps(size_t items) const
CfChargePos * mPpositions
CfChargePos * mPfilteredPeakPositions
void DumpSuppressedPeaksCompacted(std::ostream &out)
void DumpPeaksCompacted(std::ostream &out)
tpc::ClusterNative * mPclusterByRow
void DumpPeaks(std::ostream &out)
o2::ml::OrtModel mModelReg1
o2::ml::OrtModel mModelClass
o2::ml::OrtModel mModelReg2
void initClusterizer(const GPUSettingsProcessingNNclusterizer &, GPUTPCNNClusterizer &)
void init(const GPUSettingsProcessingNNclusterizer &)
float * mOutputDataReg2_32
OrtDataType::Float16_t * mInputData_16
int mNnClusterizerBatchedMode
int mNnInferenceOutputDType
int mNnInferenceInputDType
int mNnClusterizerElementSize
OrtDataType::Float16_t * mOutputDataReg2_16
float * mModelProbabilities_32
int mNnClusterizerTotalClusters
int mNnClusterizerUseCfRegression
float * mOutputDataReg1_32
OrtDataType::Float16_t * mModelProbabilities_16
OrtDataType::Float16_t * mOutputDataReg1_16
void setIntraOpNumThreads(int threads)
std::vector< std::vector< int64_t > > getNumOutputNodes() const
#define TPC_PADS_IN_SECTOR
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
@ TPCClustererSuppressedPeaks
@ TPCClustererZeroedCharges
void dumpBuffer(gsl::span< const std::byte > buffer, std::ostream &out=std::cout, size_t maxbytes=std::numeric_limits< size_t >::max())
constexpr int LHCBCPERTIMEBIN
constexpr int MAXGLOBALPADROW
Global TPC definitions and constants.
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
@ ZSVersionRowBased10BitADC
@ ZSVersionRowBased12BitADC
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
constexpr T qStr2Tag(const char *str)
std::condition_variable cond
std::unique_ptr< o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > > clusterNativeMCView
std::unique_ptr< o2::dataformats::ConstMCTruthContainer< o2::MCCompLabel > > clusterNativeMCBuffer
deviceEvent stream[GPUCA_MAX_STREAMS]
GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]
GPUTrackingInOutPointers ioPtrs
size_t NTPCClusters(size_t tpcDigits, bool perSector=false)
std::function< void *(size_t)> allocator
tpccf::SizeT nDigitsInFragment
struct o2::gpu::GPUTPCClusterFinder::Memory::counters_t counters
std::vector< o2::MCCompLabel > data
std::vector< o2::dataformats::MCTruthHeaderElement > header
size_t nTPCDigits[NSECTORS]
const GPUTPCDigitsMCInput * tpcDigitsMC
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUSettingsTF * settingsTF
const GPUTrackingInOutZS * tpcZS
const GPUTrackingInOutDigits * tpcPackedDigits
const void *const * zsPtr[NENDPOINTS]
uint32_t count[NENDPOINTS]
const uint32_t * nZSPtr[NENDPOINTS]
GPUTrackingInOutZSSector sector[NSECTORS]
static constexpr uint32_t NENDPOINTS
GPUOutputControl clustersNative
size_t getIndex(const GPUOutputControl &v)
GPUOutputControl clusterLabels
GPUOutputControl tpcTriggerWords
static constexpr int getVersion()
get numeric version of the RDH
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const o2::dataformats::ConstMCTruthContainerView< o2::MCCompLabel > * clustersMCTruth
std::pair< ConstMCLabelContainer, ConstMCLabelContainerView > ConstMCLabelContainerViewWithBuffer
unsigned int nClustersTotal
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const ClusterNative * clustersLinear
static constexpr unsigned int TRIGGER_WORD_SIZE
static constexpr size_t TPC_ZS_PAGE_SIZE
unsigned short nADCsamples
Trigger info including the orbit.
uint32_t orbit
orbit of the trigger word
TriggerWordDLBZS triggerWord
trigger Word information
bool isValid(int entry=0) const
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
std::vector< Digit > digits