31 CheckBaselineGPU(nBlocks, nThreads, iBlock, iThread, smem, clusterer);
33 CheckBaselineCPU(nBlocks, nThreads, iBlock, iThread, smem, clusterer);
43GPUd()
void GPUTPCCFCheckPadBaseline::CheckBaselineGPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
51 const CfFragment& fragment = clusterer.mPmemory->fragment;
56 const auto iRow = iBlock;
57 const auto nPads = geo.NPads(iRow);
60 int32_t totalCharges = 0;
61 int32_t consecCharges = 0;
62 int32_t maxConsecCharges = 0;
65 const int16_t iPadOffset = iThread % MaxNPadsPerRow;
66 const int16_t iTimeOffset = iThread / MaxNPadsPerRow;
67 const int16_t iPadHandle = iThread;
68 const bool handlePad = iPadHandle < nPads;
70 const auto firstTB = fragment.firstNonOverlapTimeBin();
71 const auto lastTB = fragment.lastNonOverlapTimeBin();
73 for (
auto t = firstTB; t < lastTB; t += NumOfCachedTBs) {
79 smem.charges[iTimeOffset][iPadOffset] = iTime < lastTB && iPadOffset < nPads ? chargeMap[
pos].unpack() : 0;
84 for (int32_t
i = 0;
i < NumOfCachedTBs;
i++) {
85 const Charge q = smem.charges[
i][iPadHandle];
86 totalCharges += (q > 0);
87 consecCharges = (q > 0) ? consecCharges + 1 : 0;
88 maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges);
89 maxCharge = CAMath::Max<Charge>(q, maxCharge);
97 updatePadBaseline(basePos.gpad + iPadHandle, clusterer, totalCharges, maxConsecCharges, maxCharge);
102GPUd()
void GPUTPCCFCheckPadBaseline::CheckBaselineCPU(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
105 const CfFragment& fragment = clusterer.mPmemory->fragment;
111 if (basePos.pad() >= geo.NPads(basePos.row())) {
115 constexpr size_t ElemsInTileRow = (size_t)
TilingLayout<
GridSize<2>>::WidthInTiles * TimebinsPerCacheline * PadsPerCacheline;
117 using UShort8 = Vc::fixed_size_simd<uint16_t, PadsPerCacheline>;
118 using Charge8 = Vc::fixed_size_simd<float, PadsPerCacheline>;
120 UShort8 totalCharges{Vc::Zero};
121 UShort8 consecCharges{Vc::Zero};
122 UShort8 maxConsecCharges{Vc::Zero};
123 Charge8 maxCharge{Vc::Zero};
128 const uint16_t* packedChargeStart =
reinterpret_cast<uint16_t*
>(&chargeMap[basePos.delta({0, t})]);
130 for (; t < fragment.lastNonOverlapTimeBin(); t += TimebinsPerCacheline) {
132 const UShort8 packedCharges{packedChargeStart + PadsPerCacheline * localtime, Vc::Aligned};
133 const UShort8::mask_type isCharge = packedCharges != 0;
135 if (isCharge.isNotEmpty()) {
136 totalCharges(isCharge)++;
138 consecCharges(not isCharge) = 0;
139 maxConsecCharges = Vc::max(consecCharges, maxConsecCharges);
148 maxCharge = Vc::max(maxCharge, unpackedCharges);
154 packedChargeStart += ElemsInTileRow;
157 for (
tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
158 updatePadBaseline(basePos.gpad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
165 const CfFragment& fragment = clusterer.mPmemory->fragment;
166 const int32_t totalChargesBaseline = clusterer.Param().rec.tpc.maxTimeBinAboveThresholdIn1000Bin * fragment.lengthWithoutOverlap() / 1000;
167 const int32_t consecChargesBaseline = clusterer.Param().rec.tpc.maxConsecTimeBinAboveThreshold;
168 const uint16_t saturationThreshold = clusterer.Param().rec.tpc.noisyPadSaturationThreshold;
169 const bool isNoisy = (!saturationThreshold || maxCharge < saturationThreshold) && ((totalChargesBaseline > 0 && totalCharges >= totalChargesBaseline) || (consecChargesBaseline > 0 && consecCharges >= consecChargesBaseline));
172 clusterer.mPpadIsNoisy[pad] =
true;
#define GPUCA_GET_THREAD_COUNT(...)
GPUd() void GPUTPCCFCheckPadBaseline
Provides a basic fallback implementation for Vc.
static constexpr uint32_t NROWS
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)