31 CheckBaselineGPU(nBlocks, nThreads, iBlock, iThread, smem, clusterer);
33 CheckBaselineCPU(nBlocks, nThreads, iBlock, iThread, smem, clusterer);
43GPUd()
void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
50 const CfFragment& fragment = clusterer.mPmemory->fragment;
53 const auto iRow = iBlock;
54 const auto rowinfo = GetRowInfo(iRow);
57 int32_t totalCharges = 0;
58 int32_t consecCharges = 0;
59 int32_t maxConsecCharges = 0;
62 const int16_t iPadOffset = iThread % MaxNPadsPerRow;
63 const int16_t iTimeOffset = iThread / MaxNPadsPerRow;
64 const int16_t iPadHandle = iThread;
65 const bool handlePad = iPadHandle < rowinfo.nPads;
67 const auto firstTB = fragment.firstNonOverlapTimeBin();
68 const auto lastTB = fragment.lastNonOverlapTimeBin();
70 for (
auto t = firstTB; t < lastTB; t += NumOfCachedTBs) {
76 smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < rowinfo.nPads ? chargeMap[
pos].unpack() : 0;
81 for (int32_t
i = 0;
i < NumOfCachedTBs;
i++) {
82 const Charge q = smem.charges[
i][iPadHandle];
83 totalCharges += (q > 0);
84 consecCharges = (q > 0) ? consecCharges + 1 : 0;
85 maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges);
86 maxCharge = CAMath::Max<Charge>(q, maxCharge);
94 updatePadBaseline(rowinfo.globalPadOffset + iPadOffset, clusterer, totalCharges, maxConsecCharges, maxCharge);
99GPUd()
void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
102 const CfFragment& fragment = clusterer.mPmemory->fragment;
105 int32_t basePad = iBlock * PadsPerCacheline;
107 CfChargePos basePos = padToCfChargePos<PadsPerCacheline>(basePad, clusterer, padsPerRow);
109 if (not basePos.valid()) {
113 constexpr size_t ElemsInTileRow = (size_t)
TilingLayout<
GridSize<2>>::WidthInTiles * TimebinsPerCacheline * PadsPerCacheline;
115 using UShort8 = Vc::fixed_size_simd<uint16_t, PadsPerCacheline>;
116 using Charge8 = Vc::fixed_size_simd<float, PadsPerCacheline>;
118 UShort8 totalCharges{Vc::Zero};
119 UShort8 consecCharges{Vc::Zero};
120 UShort8 maxConsecCharges{Vc::Zero};
121 Charge8 maxCharge{Vc::Zero};
126 const uint16_t* packedChargeStart =
reinterpret_cast<uint16_t*
>(&chargeMap[basePos.delta({0, t})]);
128 for (; t < fragment.lastNonOverlapTimeBin(); t += TimebinsPerCacheline) {
130 const UShort8 packedCharges{packedChargeStart + PadsPerCacheline * localtime, Vc::Aligned};
131 const UShort8::mask_type isCharge = packedCharges != 0;
133 if (isCharge.isNotEmpty()) {
134 totalCharges(isCharge)++;
136 consecCharges(not isCharge) = 0;
137 maxConsecCharges = Vc::max(consecCharges, maxConsecCharges);
146 maxCharge = Vc::max(maxCharge, unpackedCharges);
152 packedChargeStart += ElemsInTileRow;
155 for (
tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
156 updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
161template <
int32_t PadsPerBlock>
166 int32_t padOffset = 0;
168 int32_t npads = geo.NPads(
r);
169 int32_t padInRow = pad - padOffset;
170 if (0 <= padInRow && padInRow < npads) {
171 int32_t cachelineOffset = padInRow % PadsPerBlock;
172 pad -= cachelineOffset;
187 int16_t padOffset = 0;
188 for (int16_t
r = 0;
r <
row;
r++) {
189 padOffset += geo.NPads(
r);
192 return RowInfo{padOffset, geo.NPads(
row)};
197 const CfFragment& fragment = clusterer.mPmemory->fragment;
198 const int32_t totalChargesBaseline = clusterer.Param().rec.tpc.maxTimeBinAboveThresholdIn1000Bin * fragment.lengthWithoutOverlap() / 1000;
199 const int32_t consecChargesBaseline = clusterer.Param().rec.tpc.maxConsecTimeBinAboveThreshold;
200 const uint16_t saturationThreshold = clusterer.Param().rec.tpc.noisyPadSaturationThreshold;
201 const bool isNoisy = (!saturationThreshold || maxCharge < saturationThreshold) && ((totalChargesBaseline > 0 && totalCharges >= totalChargesBaseline) || (consecChargesBaseline > 0 && consecCharges >= consecChargesBaseline));
204 clusterer.mPpadIsNoisy[pad] =
true;
GPUd() void GPUTPCCFCheckPadBaseline
Provides a basic fallback implementation for Vc.
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)