Project
Loading...
Searching...
No Matches
GPUTPCCFCheckPadBaseline.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "Array2D.h"
17#include "PackedCharge.h"
18#include "GPUTPCGeometry.h"
19#include "clusterFinderDefs.h"
20
21#ifndef GPUCA_GPUCODE
22#ifndef GPUCA_NO_VC
23#include <Vc/Vc>
24#else
25#include <array>
26#endif
27#endif
28
29using namespace o2::gpu;
30using namespace o2::gpu::tpccf;
31
32template <>
33GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
34{
35 const CfFragment& fragment = clusterer.mPmemory->fragment;
36 Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
37
38 int32_t basePad = iBlock * PadsPerCacheline;
39 ChargePos basePos = padToChargePos(basePad, clusterer);
40
41 if (not basePos.valid()) {
42 return;
43 }
44
45#ifdef GPUCA_GPUCODE
46 static_assert(TPC_MAX_FRAGMENT_LEN_GPU % NumOfCachedTimebins == 0);
47
48 int32_t totalCharges = 0;
49 int32_t consecCharges = 0;
50 int32_t maxConsecCharges = 0;
51 Charge maxCharge = 0;
52
53 int16_t localPadId = iThread / NumOfCachedTimebins;
54 int16_t localTimeBin = iThread % NumOfCachedTimebins;
55 bool handlePad = localTimeBin == 0;
56
57 for (tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) {
58 const ChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)});
59 smem.charges[localPadId][localTimeBin] = (pos.valid()) ? chargeMap[pos].unpack() : 0;
60 GPUbarrier();
61 if (handlePad) {
62 for (int32_t i = 0; i < NumOfCachedTimebins; i++) {
63 const Charge q = smem.charges[localPadId][i];
64 totalCharges += (q > 0);
65 consecCharges = (q > 0) ? consecCharges + 1 : 0;
66 maxConsecCharges = CAMath::Max(consecCharges, maxConsecCharges);
67 maxCharge = CAMath::Max<Charge>(q, maxCharge);
68 }
69 }
70 GPUbarrier();
71 }
72
73 GPUbarrier();
74
75 if (handlePad) {
76 updatePadBaseline(basePad + localPadId, clusterer, totalCharges, maxConsecCharges, maxCharge);
77 }
78
79#else // CPU CODE
80
81 constexpr size_t ElemsInTileRow = (size_t)TilingLayout<GridSize<2>>::WidthInTiles * TimebinsPerCacheline * PadsPerCacheline;
82
83#ifndef GPUCA_NO_VC
84 using UShort8 = Vc::fixed_size_simd<uint16_t, PadsPerCacheline>;
85 using Charge8 = Vc::fixed_size_simd<float, PadsPerCacheline>;
86
87 UShort8 totalCharges{Vc::Zero};
88 UShort8 consecCharges{Vc::Zero};
89 UShort8 maxConsecCharges{Vc::Zero};
90 Charge8 maxCharge{Vc::Zero};
91#else
92 std::array<uint16_t, PadsPerCacheline> totalCharges{0};
93 std::array<uint16_t, PadsPerCacheline> consecCharges{0};
94 std::array<uint16_t, PadsPerCacheline> maxConsecCharges{0};
95 std::array<Charge, PadsPerCacheline> maxCharge{0};
96#endif
97
98 tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin();
99
100 // Access packed charges as raw integers. We throw away the PackedCharge type here to simplify vectorization.
101 const uint16_t* packedChargeStart = reinterpret_cast<uint16_t*>(&chargeMap[basePos.delta({0, t})]);
102
103 for (; t < fragment.lastNonOverlapTimeBin(); t += TimebinsPerCacheline) {
104 for (tpccf::TPCFragmentTime localtime = 0; localtime < TimebinsPerCacheline; localtime++) {
105#ifndef GPUCA_NO_VC
106 const UShort8 packedCharges{packedChargeStart + PadsPerCacheline * localtime, Vc::Aligned};
107 const UShort8::mask_type isCharge = packedCharges != 0;
108
109 if (isCharge.isNotEmpty()) {
110 totalCharges(isCharge)++;
111 consecCharges += 1;
112 consecCharges(not isCharge) = 0;
113 maxConsecCharges = Vc::max(consecCharges, maxConsecCharges);
114
115 // Manually unpack charges to float.
116 // Duplicated from PackedCharge::unpack to generate vectorized code:
117 // Charge unpack() const { return Charge(mVal & ChargeMask) / Charge(1 << DecimalBits); }
118 // Note that PackedCharge has to cut off the highest 2 bits via ChargeMask as they are used for flags by the cluster finder
119 // and are not part of the charge value. We can skip this step because the cluster finder hasn't run yet
120 // and thus these bits are guarenteed to be zero.
121 const Charge8 unpackedCharges = Charge8(packedCharges) / Charge(1 << PackedCharge::DecimalBits);
122 maxCharge = Vc::max(maxCharge, unpackedCharges);
123 } else {
124 consecCharges = 0;
125 }
126#else // Vc not available
127 for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
128 const uint16_t packedCharge = packedChargeStart[PadsPerCacheline * localtime + localpad];
129 const bool isCharge = packedCharge != 0;
130 if (isCharge) {
131 totalCharges[localpad]++;
132 consecCharges[localpad]++;
133 maxConsecCharges[localpad] = CAMath::Max(maxConsecCharges[localpad], consecCharges[localpad]);
134
135 const Charge unpackedCharge = Charge(packedCharge) / Charge(1 << PackedCharge::DecimalBits);
136 maxCharge[localpad] = CAMath::Max<Charge>(maxCharge[localpad], unpackedCharge);
137 } else {
138 consecCharges[localpad] = 0;
139 }
140 }
141#endif
142 }
143
144 packedChargeStart += ElemsInTileRow;
145 }
146
147 for (tpccf::Pad localpad = 0; localpad < PadsPerCacheline; localpad++) {
148 updatePadBaseline(basePad + localpad, clusterer, totalCharges[localpad], maxConsecCharges[localpad], maxCharge[localpad]);
149 }
150#endif
151}
152
153GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer)
154{
155 constexpr GPUTPCGeometry geo;
156
157 int32_t padOffset = 0;
158 for (Row r = 0; r < GPUCA_ROW_COUNT; r++) {
159 int32_t npads = geo.NPads(r);
160 int32_t padInRow = pad - padOffset;
161 if (0 <= padInRow && padInRow < CAMath::nextMultipleOf<PadsPerCacheline, int32_t>(npads)) {
162 int32_t cachelineOffset = padInRow % PadsPerCacheline;
163 pad -= cachelineOffset;
164 return ChargePos{r, Pad(padInRow - cachelineOffset), 0};
165 }
166 padOffset += npads;
167 }
168
169 return ChargePos{0, 0, INVALID_TIME_BIN};
170}
171
172GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTPCClusterFinder& clusterer, int32_t totalCharges, int32_t consecCharges, Charge maxCharge)
173{
174 const CfFragment& fragment = clusterer.mPmemory->fragment;
175 const int32_t totalChargesBaseline = clusterer.Param().rec.tpc.maxTimeBinAboveThresholdIn1000Bin * fragment.lengthWithoutOverlap() / 1000;
176 const int32_t consecChargesBaseline = clusterer.Param().rec.tpc.maxConsecTimeBinAboveThreshold;
177 const uint16_t saturationThreshold = clusterer.Param().rec.tpc.noisyPadSaturationThreshold;
178 const bool isNoisy = (!saturationThreshold || maxCharge < saturationThreshold) && ((totalChargesBaseline > 0 && totalCharges >= totalChargesBaseline) || (consecChargesBaseline > 0 && consecCharges >= consecChargesBaseline));
179
180 if (isNoisy) {
181 clusterer.mPpadIsNoisy[pad] = true;
182 }
183}
#define INVALID_TIME_BIN
Definition ChargePos.h:23
int32_t i
#define GPUbarrier()
GPUd() void GPUTPCCFCheckPadBaseline
#define GPUCA_ROW_COUNT
uint16_t pos
Definition RawData.h:3
#define TPC_MAX_FRAGMENT_LEN_GPU
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233