35 const CfFragment& fragment = clusterer.mPmemory->fragment;
38 int32_t basePad = iBlock * PadsPerCacheline;
39 ChargePos basePos = padToChargePos(basePad, clusterer);
41 if (not basePos.valid()) {
48 int32_t totalCharges = 0;
49 int32_t consecCharges = 0;
50 int32_t maxConsecCharges = 0;
53 int16_t localPadId = iThread / NumOfCachedTimebins;
54 int16_t localTimeBin = iThread % NumOfCachedTimebins;
55 bool handlePad = localTimeBin == 0;
57 for (
tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) {
58 const ChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)});
59 smem.charges[localPadId][localTimeBin] = (
pos.valid()) ? chargeMap[
pos].unpack() : 0;
62 for (int32_t
i = 0;
i < NumOfCachedTimebins;
i++) {
63 const Charge q = smem.charges[localPadId][
i];
64 totalCharges += (q > 0);
65 consecCharges = (q > 0) ? consecCharges + 1 : 0;
66 maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges);
67 maxCharge = CAMath::Max<Charge>(q, maxCharge);
76 updatePadBaseline(basePad + localPadId, clusterer, totalCharges, maxConsecCharges, maxCharge);
81 constexpr size_t ElemsInTileRow = (size_t)
TilingLayout<
GridSize<2>>::WidthInTiles * TimebinsPerCacheline * PadsPerCacheline;
84 using UShort8 = Vc::fixed_size_simd<uint16_t, PadsPerCacheline>;
85 using Charge8 = Vc::fixed_size_simd<float, PadsPerCacheline>;
87 UShort8 totalCharges{Vc::Zero};
88 UShort8 consecCharges{Vc::Zero};
89 UShort8 maxConsecCharges{Vc::Zero};
90 Charge8 maxCharge{Vc::Zero};
92 std::array<uint16_t, PadsPerCacheline> totalCharges{0};
93 std::array<uint16_t, PadsPerCacheline> consecCharges{0};
94 std::array<uint16_t, PadsPerCacheline> maxConsecCharges{0};
95 std::array<Charge, PadsPerCacheline> maxCharge{0};
101 const uint16_t* packedChargeStart =
reinterpret_cast<uint16_t*
>(&chargeMap[basePos.delta({0, t})]);
103 for (; t < fragment.lastNonOverlapTimeBin(); t += TimebinsPerCacheline) {
106 const UShort8 packedCharges{packedChargeStart + PadsPerCacheline * localtime, Vc::Aligned};
107 const UShort8::mask_type isCharge = packedCharges != 0;
109 if (isCharge.isNotEmpty()) {
110 totalCharges(isCharge)++;
112 consecCharges(not isCharge) = 0;
113 maxConsecCharges = Vc::max(consecCharges, maxConsecCharges);
122 maxCharge = Vc::max(maxCharge, unpackedCharges);
127 for (
tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
128 const uint16_t packedCharge = packedChargeStart[PadsPerCacheline * localtime + localpad];
129 const bool isCharge = packedCharge != 0;
131 totalCharges[localpad]++;
132 consecCharges[localpad]++;
133 maxConsecCharges[localpad] = CAMath::Max(maxConsecCharges[localpad], consecCharges[localpad]);
136 maxCharge[localpad] = CAMath::Max<Charge>(maxCharge[localpad], unpackedCharge);
138 consecCharges[localpad] = 0;
144 packedChargeStart += ElemsInTileRow;
147 for (
tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
148 updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);