30#ifdef _PERFORM_TIMING_
33 nThreads = std::max(nThreads, 1);
47 const int MaxErrLog = 2;
48 static int errLocCount = 0;
49 if (errLocCount++ < MaxErrLog) {
50 LOGP(warn,
"Impossible ROF IR {}, does not exceed previous {}, discarding in clusterization", reader.
getInteractionRecord().
asString(), lastIR.asString());
57 mFiredChipsPtr.clear();
60 mFiredChipsPtr.push_back(curChipData);
61 nPix += curChipData->
getData().size();
64 auto& rof = vecROFRec->emplace_back(reader.
getInteractionRecord(), vecROFRec->size(), compClus->size(), 0);
66 uint16_t nFired = mFiredChipsPtr.size();
73 nThreads = std::min<int>(nFired, nThreads);
77 uint16_t chipStep = nThreads > 1 ? (nThreads == 2 ? 20 : 10) : nFired;
78 int dynGrp = std::min(4, std::max(1, nThreads / 2));
79 if (nThreads > mThreads.size()) {
80 int oldSz = mThreads.size();
81 mThreads.resize(nThreads);
82 for (
int i = oldSz;
i < nThreads;
i++) {
83 mThreads[
i] = std::make_unique<ClustererThread>(
this,
i);
87#pragma omp parallel for schedule(dynamic, dynGrp) num_threads(nThreads)
89 for (uint16_t ic = 0; ic < nFired; ic += chipStep) {
90 auto ith = omp_get_thread_num();
92 mThreads[ith]->process(ic, std::min(chipStep, uint16_t(nFired - ic)),
93 &mThreads[ith]->compClusters,
94 patterns ? &mThreads[ith]->patterns :
nullptr,
96 labelsCl ? &mThreads[ith]->labels :
nullptr, rof);
98 mThreads[0]->process(0, nFired, compClus, patterns, labelsCl ? reader.
getDigitsMCTruth() :
nullptr, labelsCl, rof);
103 mThreads[0]->process(0, nFired, compClus, patterns, labelsCl ? reader.
getDigitsMCTruth() :
nullptr, labelsCl, rof);
107#ifdef _PERFORM_TIMING_
108 mTimerMerge.Start(
false);
110 size_t nClTot = 0, nPattTot = 0;
111 int chid = 0, thrStatIdx[nThreads];
112 for (
int ith = 0; ith < nThreads; ith++) {
113 std::sort(mThreads[ith]->stats.begin(), mThreads[ith]->stats.end(), [](
const ThreadStat&
a,
const ThreadStat&
b) { return a.firstChip < b.firstChip; });
115 nClTot += mThreads[ith]->compClusters.size();
116 nPattTot += mThreads[ith]->patterns.size();
118 compClus->reserve(nClTot);
120 patterns->reserve(nPattTot);
122 while (
chid < nFired) {
123 for (
int ith = 0; ith < nThreads; ith++) {
124 if (thrStatIdx[ith] >= mThreads[ith]->stats.size()) {
127 const auto& stat = mThreads[ith]->stats[thrStatIdx[ith]];
128 if (stat.firstChip ==
chid) {
131 if (stat.nClus > 0) {
132 const auto clbeg = mThreads[ith]->compClusters.begin() + stat.firstClus;
133 auto szold = compClus->size();
134 compClus->insert(compClus->end(), clbeg, clbeg + stat.nClus);
136 const auto ptbeg = mThreads[ith]->patterns.begin() + stat.firstPatt;
137 patterns->insert(patterns->end(), ptbeg, ptbeg + stat.nPatt);
146 for (
int ith = 0; ith < nThreads; ith++) {
147 mThreads[ith]->patterns.clear();
148 mThreads[ith]->compClusters.clear();
149 mThreads[ith]->labels.clear();
150 mThreads[ith]->stats.clear();
152#ifdef _PERFORM_TIMING_
156 mThreads[0]->stats.clear();
158 rof.setNEntries(compClus->size() - rof.getFirstEntry());
159 }
while (autoDecode);
161#ifdef _PERFORM_TIMING_
170 if (
stats.empty() ||
stats.back().firstChip +
stats.back().nChips != chip) {
171 stats.emplace_back(
ThreadStat{.
firstChip = chip, .nChips = 0, .firstClus = uint32_t(compClusPtr->size()), .firstPatt = patternsPtr ? uint32_t(patternsPtr->size()) : 0, .nClus = 0, .nPatt = 0});
173 for (
int ic = 0; ic < nChips; ic++) {
174 auto* curChipData =
parent->mFiredChipsPtr[chip + ic];
175 auto chipID = curChipData->getChipID();
176 if (
parent->mMaxBCSeparationToMask > 0) {
177 const auto& chipInPrevROF =
parent->mChipsOld[chipID];
179 parent->mMaxRowColDiffToMask ? curChipData->maskFiredInSample(
parent->mChipsOld[chipID],
parent->mMaxRowColDiffToMask) : curChipData->maskFiredInSample(
parent->mChipsOld[chipID]);
182 auto nclus0 = compClusPtr->size();
183 auto validPixID = curChipData->getFirstUnmasked();
184 auto npix = curChipData->getData().size();
185 if (validPixID < npix) {
186 auto valp = validPixID++;
187 if (validPixID == npix) {
191 for (; validPixID < npix; validPixID++) {
192 if (!curChipData->getData()[validPixID].isMasked()) {
196 finishChip(curChipData, compClusPtr, patternsPtr, labelsDigPtr, labelsClPtr);
199 if (
parent->mMaxBCSeparationToMask > 0) {
200 parent->mChipsOld[chipID].swap(*curChipData);
203 auto& currStat =
stats.back();
204 currStat.nChips += nChips;
205 currStat.nClus = compClusPtr->size() - currStat.firstClus;
206 currStat.nPatt = patternsPtr ? (patternsPtr->size() - currStat.firstPatt) : 0;
213 const auto& pixData = curChipData->
getData();
214 int nPreclusters = preClusters.size();
216 for (
int i = 1;
i < nPreclusters;
i++) {
217 if (preClusters[
i].
index !=
i) {
218 preClusters[
i].index = preClusters[preClusters[
i].index].index;
221 for (
int i1 = 0; i1 < nPreclusters; ++i1) {
222 auto& preCluster = preClusters[i1];
223 auto ci = preCluster.index;
229 int next = preCluster.head;
232 const auto& pixEntry =
pixels[next];
233 const auto pix = pixData[pixEntry.second];
234 pixArrBuff.push_back(pix);
235 bbox.adjust(pix.getRowDirect(), pix.getCol());
237 if (parent->mSquashingDepth) {
238 fetchMCLabels(curChipData->
getOrderedPixId(pixEntry.second), labelsDigPtr, nlab);
240 fetchMCLabels(pixEntry.second + curChipData->
getStartID(), labelsDigPtr, nlab);
243 next = pixEntry.first;
245 preCluster.index = -1;
246 for (
int i2 = i1 + 1; i2 < nPreclusters; ++i2) {
247 auto& preCluster2 = preClusters[i2];
248 if (preCluster2.index != ci) {
251 next = preCluster2.head;
253 const auto& pixEntry =
pixels[next];
254 const auto pix = pixData[pixEntry.second];
255 pixArrBuff.push_back(pix);
256 bbox.adjust(pix.getRowDirect(), pix.getCol());
258 if (parent->mSquashingDepth) {
259 fetchMCLabels(curChipData->
getOrderedPixId(pixEntry.second), labelsDigPtr, nlab);
261 fetchMCLabels(pixEntry.second + curChipData->
getStartID(), labelsDigPtr, nlab);
264 next = pixEntry.first;
266 preCluster2.index = -1;
268 if (
bbox.isAcceptableSize()) {
269 parent->streamCluster(pixArrBuff, &labelsBuff,
bbox, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab);
272 if (!parent->mDropHugeClusters) {
274 LOGP(warn,
"Splitting a huge cluster: chipID {}, rows {}:{} cols {}:{}{}",
bbox.chipID,
bbox.rowMin,
bbox.rowMax,
bbox.colMin,
bbox.colMax,
275 warnLeft == 1 ?
" (Further warnings will be muted)" :
"");
280 parent->mNHugeClus++;
284 std::vector<PixelData> pixbuf;
290 for (
const auto& pix : pixArrBuff) {
291 if (bboxT.
isInside(pix.getRowDirect(), pix.getCol())) {
292 pixbuf.push_back(pix);
295 if (!pixbuf.empty()) {
296 parent->streamCluster(pixbuf, &labelsBuff, bboxT, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab,
true);
312 auto pix = curChipData->
getData()[hit];
313 uint16_t
row = pix.getRowDirect(),
col = pix.getCol();
317 fetchMCLabels(curChipData->
getStartID() + hit, labelsDigPtr, nlab);
318 auto cnt = compClusPtr->size();
319 for (
int i = nlab;
i--;) {
328 patternsPtr->emplace_back(1);
329 patternsPtr->emplace_back(1);
330 patternsPtr->insert(patternsPtr->end(), std::begin(patt), std::begin(patt) + 1);
367 const auto pix = curChipData->
getData()[ip];
368 uint16_t
row = pix.getRowDirect();
369 if (currCol != pix.getCol()) {
373 if (pix.getCol() > currCol + 1) {
376 currCol = pix.getCol();
377 addNewPrecluster(ip,
row);
381 currCol = pix.getCol();
385 if (curr[
row - 1] >= 0) {
386 expandPreCluster(ip,
row, curr[
row - 1]);
388 addNewPrecluster(ip,
row);
392 int nnb = 0, lowestIndex = curr[
row - 1], lowestNb = 0, *nbrCol[4], nbrRow[4];
393 if (lowestIndex >= 0) {
395 nbrRow[nnb++] =
row - 1;
397 lowestIndex = 0x7ffff;
400#ifdef _ALLOW_DIAGONAL_ALPIDE_CLUSTERS_
401 for (
int i : {-1, 0, 1}) {
402 auto v = prev[
row +
i];
405 nbrRow[nnb] =
row +
i;
406 if (
v < lowestIndex) {
414 if (prev[
row] >= 0) {
417 if (prev[
row] < lowestIndex) {
425 addNewPrecluster(ip,
row);
427 expandPreCluster(ip,
row, lowestIndex);
429 for (
int inb = 0; inb < nnb; inb++) {
430 auto& prevIndex = (nbrCol[inb])[nbrRow[inb]];
431 prevIndex = preClusters[prevIndex].index = lowestIndex;
477 if (mSquashingLayerDepth.empty()) {
478 LOGP(info,
"Clusterizer squashes overflow pixels separated by {} BC and <= {} in row/col seeking down to {} neighbour ROFs", mMaxBCSeparationToSquash, mMaxRowColDiffToMask, mSquashingDepth);
480 LOGP(info,
"Clusterizer squashes overflow pixels <= {} in row/col", mMaxRowColDiffToMask);
481 for (
size_t i{0};
i < mSquashingLayerDepth.size(); ++
i) {
482 LOGP(info,
"\tlay:{} separated by {} BC seeking down to {} neighbour ROFs",
i, mMaxBCSeparationToSquashLayer[
i], mSquashingLayerDepth[
i]);
485 LOGP(info,
"Clusterizer masks overflow pixels separated by < {} BC and <= {} in row/col", mMaxBCSeparationToMask, mMaxRowColDiffToMask);
486 LOGP(info,
"Clusterizer does {} drop huge clusters", mDropHugeClusters ?
"" :
"not");
489#ifdef _PERFORM_TIMING_
490 auto& tmr =
const_cast<TStopwatch&
>(mTimer);
491 auto& tmrm =
const_cast<TStopwatch&
>(mTimerMerge);
492 LOG(info) <<
"Inclusive clusterization timing (w/o disk IO): Cpu: " << tmr.CpuTime()
493 <<
" Real: " << tmr.RealTime() <<
" s in " << tmr.Counter() <<
" slots";
494 LOG(info) <<
"Threads output merging timing : Cpu: " << tmrm.CpuTime()
495 <<
" Real: " << tmrm.RealTime() <<
" s in " << tmrm.Counter() <<
" slots";