Project
Loading...
Searching...
No Matches
GPUTPCCFDecodeZS.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCCFDecodeZS.h"
16#include "GPUCommonMath.h"
17#include "GPUTPCClusterFinder.h"
18#include "Array2D.h"
19#include "PackedCharge.h"
20#include "CfUtils.h"
22#include "GPUCommonAlgorithm.h"
23#include "TPCPadGainCalib.h"
24#include "TPCZSLinkMapping.h"
25#include "GPUTPCGeometry.h"
26
27using namespace o2::gpu;
28using namespace o2::gpu::tpccf;
29using namespace o2::tpc;
30using namespace o2::tpc::constants;
31
32// ===========================================================================
33// ===========================================================================
34// Decode ZS Row
35// ===========================================================================
36// ===========================================================================
37
38template <>
39GPUdii() void GPUTPCCFDecodeZS::Thread<GPUTPCCFDecodeZS::decodeZS>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
40{
41 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
42}
43
44GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF)
45{
46 const uint32_t sector = clusterer.mISector;
47#ifdef GPUCA_GPUCODE
48 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
49#else
50 const uint32_t endpoint = iBlock;
51#endif
52 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
53 if (zs.count[endpoint] == 0) {
54 return;
55 }
56 ChargePos* positions = clusterer.mPpositions;
57 Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
58 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
59 if (iThread == 0) {
60 const int32_t region = endpoint / 2;
61 s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region);
62 s.regionStartRow = GPUTPCGeometry::GetRegionStart(region);
63 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((s.nRowsRegion + (endpoint & 1)) / 2));
64 s.rowStride = nThreads / s.nThreadsPerRow;
65 s.rowOffsetCounter = 0;
66 }
67 GPUbarrier();
68 const uint32_t myRow = iThread / s.nThreadsPerRow;
69 const uint32_t mySequence = iThread % s.nThreadsPerRow;
70#ifdef GPUCA_GPUCODE
71 const uint32_t i = 0;
72 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
73 {
74 {
75#else
76 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
77 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
78 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
79 for (uint32_t j = minJ; j < maxJ; j++) {
80#endif
81 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
82 CA_SHARED_CACHE_REF(&s.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
83 GPUbarrier();
84 const uint8_t* page = (const uint8_t*)pageCache;
86 if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) {
87#ifdef GPUCA_GPUCODE
88 return;
89#else
90 continue;
91#endif
92 }
93 const uint8_t* pagePtr = page + sizeof(o2::header::RAWDataHeader);
94 const TPCZSHDR* hdr = reinterpret_cast<const TPCZSHDR*>(pagePtr);
95 pagePtr += sizeof(*hdr);
96 const bool decode12bit = hdr->version == 2;
97 const uint32_t decodeBits = decode12bit ? TPCZSHDR::TPC_ZS_NBITS_V2 : TPCZSHDR::TPC_ZS_NBITS_V1;
98 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
99 uint32_t mask = (1 << decodeBits) - 1;
100 int32_t timeBin = (hdr->timeOffset + (o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
101 const int32_t rowOffset = s.regionStartRow + ((endpoint & 1) ? (s.nRowsRegion / 2) : 0);
102 const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2);
103
104 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins
105 pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment
106 const TPCZSTBHDR* tbHdr = reinterpret_cast<const TPCZSTBHDR*>(pagePtr);
107 if ((tbHdr->rowMask & 0x7FFF) == 0) {
108 pagePtr += 2;
109 continue;
110 }
111 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->rowMask & 0x7FFF));
112 pagePtr += 2 * nRowsUsed;
113
114 GPUbarrier();
115 for (int32_t n = iThread; n < nRowsUsed; n += nThreads) {
116 const uint8_t* rowData = n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[n - 1]);
117 s.RowClusterOffset[n] = CAMath::AtomicAddShared<uint32_t>(&s.rowOffsetCounter, rowData[2 * *rowData]);
118 }
119 /*if (iThread < GPUCA_WARP_SIZE) { // TODO: Seems to miscompile with HIP, CUDA performance doesn't really change, for now sticking to the AtomicAdd
120 GPUSharedMemory& smem = s;
121 int32_t o;
122 if (iThread < nRowsUsed) {
123 const uint8_t* rowData = iThread == 0 ? pagePtr : (page + tbHdr->rowAddr1()[iThread - 1]);
124 o = rowData[2 * *rowData];
125 } else {
126 o = 0;
127 }
128 int32_t x = warp_scan_inclusive_add(o);
129 if (iThread < nRowsUsed) {
130 s.RowClusterOffset[iThread] = s.rowOffsetCounter + x - o;
131 } else if (iThread == GPUCA_WARP_SIZE - 1) {
132 s.rowOffsetCounter += x;
133 }
134 }*/
135 GPUbarrier();
136
137 if (myRow < s.rowStride) {
138 for (int32_t m = myRow; m < nRows; m += s.rowStride) {
139 if ((tbHdr->rowMask & (1 << m)) == 0) {
140 continue;
141 }
142 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->rowMask & ((1 << m) - 1)));
143 size_t nDigitsTmp = nDigits + s.RowClusterOffset[rowPos];
144 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
145 const int32_t nSeqRead = *rowData;
146 const int32_t nSeqPerThread = (nSeqRead + s.nThreadsPerRow - 1) / s.nThreadsPerRow;
147 const int32_t mySequenceStart = mySequence * nSeqPerThread;
148 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
149 if (mySequenceEnd > mySequenceStart) {
150 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
151 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
152 nDigitsTmp += nSamplesStart;
153 uint32_t nADCStartBits = nSamplesStart * decodeBits;
154 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
155 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
156 adcData += nADCStart;
157 nADCStartBits &= 0x7;
158 uint32_t byte = 0, bits = 0;
159 if (nADCStartBits) { // % 8 != 0
160 bits = 8 - nADCStartBits;
161 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
162 }
163 int32_t nSeq = mySequenceStart;
164 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
165 Pad pad = rowData[nSeq++ * 2 + 1];
166 for (int32_t n = nADCStart; n < nADC; n++) {
167 byte |= *(adcData++) << bits;
168 bits += 8;
169 while (bits >= decodeBits) {
170 if (seqLen == 0) {
171 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
172 pad = rowData[nSeq++ * 2 + 1];
173 }
174 const CfFragment& fragment = clusterer.mPmemory->fragment;
175 TPCTime globalTime = timeBin + l;
176 bool inFragment = fragment.contains(globalTime);
177 Row row = rowOffset + m;
178 ChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN);
179 positions[nDigitsTmp++] = pos;
180
181 if (inFragment) {
182 float q = float(byte & mask) * decodeBitsFactor;
183 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, row, pad);
184 chargeMap[pos] = PackedCharge(q);
185 }
186 pad++;
187 byte = byte >> decodeBits;
188 bits -= decodeBits;
189 seqLen--;
190 }
191 }
192 }
193 }
194 }
195 if (nRowsUsed > 1) {
196 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
197 }
198 pagePtr += 2 * *pagePtr; // Go to entry for last sequence length
199 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8; // Go to beginning of next time bin
200 }
201 }
202 }
203}
204
205// ===========================================================================
206// ===========================================================================
207// Decode ZS Link
208// ===========================================================================
209// ===========================================================================
210
211template <>
212GPUdii() void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
213{
214 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
215}
216
217GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
218{
219 const CfFragment& fragment = clusterer.mPmemory->fragment;
220
221 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
222
223 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
224 return pageDigitOffset;
225 }
226
227 int32_t nDecoded = 0;
228 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
229 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
230
231 assert(decHdr->version == ZSVersionLinkBasedWithMeta);
233
234 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
235 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
236 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16); // Page now points to next timebin or past the page
237
238 int32_t timeBin = (decHdr->timeOffset + tbHdr->bunchCrossing + (uint64_t)(o2::raw::RDHUtils::getHeartBeatOrbit(*rdHdr) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
239
240 uint32_t channelMask[3];
241 GetChannelBitmask(*tbHdr, channelMask);
242 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
243
244 bool inFragment = fragment.contains(timeBin);
245 nDecoded += nAdc;
246
247 // TimeBin not in fragment: Skip this timebin header and fill positions with dummy values instead
248 if (not inFragment) {
249 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
250 continue;
251 }
252
253#ifdef GPUCA_GPUCODE
254 DecodeTBMultiThread(
255 clusterer,
256 iThread,
257 smem,
258 adcData,
259 nAdc,
260 channelMask,
261 timeBin,
262 decHdr->cruID,
263 tbHdr->fecInPartition,
264 pageDigitOffset);
265#else // CPU
266 DecodeTBSingleThread(
267 clusterer,
268 adcData,
269 nAdc,
270 channelMask,
271 timeBin,
272 decHdr->cruID,
273 tbHdr->fecInPartition,
274 pageDigitOffset);
275#endif
276 pageDigitOffset += nAdc;
277 } // for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++)
278 (void)nDecoded;
279#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
280 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
281 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
282 /*#ifndef GPUCA_GPUCODE
283 FILE* foo = fopen("dump.bin", "w+b");
284 fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
285 fclose(foo);
286 #endif*/
287 }
288#endif
289 return pageDigitOffset;
290}
291
292GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread(
293 processorType& clusterer,
294 const uint8_t* adcData,
295 uint32_t nAdc,
296 const uint32_t* channelMask,
297 int32_t timeBin,
298 int32_t cru,
299 int32_t fecInPartition,
300 uint32_t pageDigitOffset)
301{
302 const CfFragment& fragment = clusterer.mPmemory->fragment;
303
304 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
305
306 uint32_t byte = 0, bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
307
308 // unpack adc values, assume tightly packed data
309 while (nSamplesWritten < nAdc) {
310 byte |= adcData[0] << bits;
311 adcData++;
312 bits += CHAR_BIT;
313 while (bits >= DECODE_BITS) {
314
315 // Find next channel with data
316 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
317 }
318
319 // Unpack data for cluster finder
320 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
321
322 WriteCharge(clusterer, byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
323
324 byte = byte >> DECODE_BITS;
325 bits -= DECODE_BITS;
326 nSamplesWritten++;
327 rawFECChannel++; // Ensure we don't decode same channel twice
328 } // while (bits >= DECODE_BITS)
329 } // while (nSamplesWritten < nAdc)
330
331 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
332 uint32_t rawFECChannel = 0;
333 const uint64_t* adcData64 = (const uint64_t*)adcData;
334 for (uint32_t j = 0; j < nAdc; j++) {
335 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
336 }
337
338 uint32_t adc = (adcData64[j / TPCZSHDRV2::SAMPLESPER64BIT] >> ((j % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
339
340 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
341 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
342 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + j);
343 rawFECChannel++;
344 }
345 }
346}
347
348GPUd() void GPUTPCCFDecodeZSLink::DecodeTBMultiThread(
349 processorType& clusterer,
350 int32_t iThread,
351 GPUSharedMemory& smem,
352 const uint8_t* adcData,
353 uint32_t nAdc,
354 const uint32_t* channelMask,
355 int32_t timeBin,
356 int32_t cru,
357 int32_t fecInPartition,
358 uint32_t pageDigitOffset)
359{
360 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSLink);
361 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
362
363 uint8_t blockOffset = 0;
364 for (uint8_t i = iThread; blockOffset < nAdc; i += NTHREADS) {
365
366 uint8_t rawFECChannel = i;
367
368 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
369
370 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
371 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
372
373 // Decode entire timebin at once if we have enough threads
374 // This should further improve performance, but code below is buggy...
375 // if (nAdc <= NThreads) {
376 // for (int32_t j = 1; blockOffset < nAdc; j++) {
377 // rawFECChannel = myChannelActive ? rawFECChannel : (iThread + j*NThreads - myOffset);
378
379 // bool iAmIdle = not myChannelActive;
380
381 // myChannelActive =
382 // rawFECChannel < zerosupp_link_based::CommonHeaderlPerTBHeader
383 // ? BitIsSet(channelMask, rawFECChannel)
384 // : false;
385
386 // uint8_t newOffset = warp_scan_inclusive_add(static_cast<uint8_t>(myChannelActive && iAmIdle)) - 1 + blockOffset;
387 // blockOffset = warp_broadcast(newOffset, NThreads - 1) + 1;
388
389 // myOffset = iAmIdle ? newOffset : myOffset;
390 // }
391 // }
392
393 if (not myChannelActive) {
394 continue;
395 }
396 assert(myOffset < nAdc);
397
398 uint32_t adc = 0;
399
400 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
401
402 // Try to access adcData with 4 byte reads instead of 1 byte.
403 // You'd think this would improve performace, but it's actually slower...
404 // const uint32_t* adcDataU32 = reinterpret_cast<const uint32_t*>(adcData);
405
406 uint32_t adcBitOffset = myOffset * DECODE_BITS;
407 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
408 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
409 // uint32_t adcByteOffset = adcBitOffset / 32;
410 // uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * 32;
411
412 uint32_t byte = 0, bits = 0;
413
414 // uint32_t byte = adcDataU32[adcByteOffset] >> adcOffsetInByte;
415 // uint32_t bits = 32 - adcOffsetInByte;
416 // adcByteOffset++;
417
418 while (bits < DECODE_BITS) {
419 byte |= ((uint32_t)adcData[adcByteOffset]) << bits;
420 // byte |= adcDataU32[adcByteOffset] << bits;
421 adcByteOffset++;
422 bits += CHAR_BIT;
423 // bits += 32;
424 }
425 adc = byte >> adcOffsetInByte;
426
427 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
428 const uint64_t* adcData64 = (const uint64_t*)adcData;
429 adc = (adcData64[myOffset / TPCZSHDRV2::SAMPLESPER64BIT] >> ((myOffset % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
430 }
431
432 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
433 const CfFragment& fragment = clusterer.mPmemory->fragment;
434 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
435 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
436
437 } // for (uint8_t i = iThread; blockOffset < nAdc; i += NThreads)
438}
439
440GPUd() void GPUTPCCFDecodeZSLink::GetChannelBitmask(const zerosupp_link_based::CommonHeader& tbHdr, uint32_t* chan)
441{
442 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
443 chan[1] = tbHdr.bitMaskLow >> (sizeof(uint32_t) * CHAR_BIT);
444 chan[2] = tbHdr.bitMaskHigh;
445}
446
447GPUd() bool GPUTPCCFDecodeZSLink::ChannelIsActive(const uint32_t* chan, uint8_t chanIndex)
448{
449 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
450 return false;
451 }
452 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
453 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
454 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
455 return chan[entryIndex] & (1 << bitInEntry);
456}
457
458// ===========================================================================
459// ===========================================================================
460// Decode ZS Link Base
461// ===========================================================================
462// ===========================================================================
463
464template <class Decoder>
465GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
466{
467 const uint32_t sector = clusterer.mISector;
468
469#ifdef GPUCA_GPUCODE
470 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
471#else // CPU
472 const uint32_t endpoint = iBlock;
473#endif
474
475 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
476 if (zs.count[endpoint] == 0) {
477 return;
478 }
479
480 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
481
482#ifdef GPUCA_GPUCODE
483 const uint32_t i = 0;
484 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
485 {
486 {
487#else // CPU
488 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
489 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
490 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
491 for (uint32_t j = minJ; j < maxJ; j++) {
492#endif
493 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
494 // Cache zs page in shared memory. Curiously this actually degrades performance...
495 // CA_SHARED_CACHE_REF(&smem.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
496 // GPUbarrier();
497 // const uint8_t* page = (const uint8_t*)pageCache;
498 const uint8_t* page = (const uint8_t*)pageSrc;
499
500 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
501
502 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
503#ifdef GPUCA_GPUCODE
504 return;
505#else
506 continue;
507#endif
508 }
509
510 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
511 } // [CPU] for (uint32_t j = minJ; j < maxJ; j++)
512 } // [CPU] for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++)
513
514#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
515 if (iThread == 0 && iBlock < nBlocks - 1) {
516 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
517 if (pageDigitOffset != maxOffset) {
518 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 + endpoint, pageDigitOffset, maxOffset);
519 }
520 }
521#endif
522}
523
524GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
525{
526#ifdef GPUCA_TPC_GEOMETRY_O2
527 // Ported from tpc::Mapper (Not available on GPU...)
528 constexpr GPUTPCGeometry geo;
529
530 const int32_t regionIter = cru % 2;
531 const int32_t istreamm = ((rawFECChannel % 10) / 2);
532 const int32_t partitionStream = istreamm + regionIter * 5;
533 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
534 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
535 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
536
537 const int32_t partition = (cru % 10) / 2;
538 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
539
540 const TPCZSLinkMapping* gpuMapping = clusterer.GetConstantMem()->calibObjects.tpcZSLinkMapping;
541 assert(gpuMapping != nullptr);
542
543 uint16_t globalSAMPAId = (static_cast<uint16_t>(fecInSector) << 8) + (static_cast<uint16_t>(sampaOnFEC) << 5) + static_cast<uint16_t>(channelOnSAMPA);
544 const o2::tpc::PadPos pos = gpuMapping->FECIDToPadPos[globalSAMPAId];
545
546 return pos;
547#else
548 return o2::tpc::PadPos{};
549#endif
550}
551
552GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset)
553{
554 const uint32_t sector = clusterer.mISector;
555 ChargePos* positions = clusterer.mPpositions;
556#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
557 if (padAndRow.getRow() >= GPUCA_ROW_COUNT) {
559 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISector * 1000 + padAndRow.getRow());
560 return;
561 }
562#endif
563 Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
564
565 ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime);
566 positions[positionOffset] = pos;
567
568 charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
569 chargeMap[pos] = PackedCharge(charge);
570}
571
572GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
573{
574 for (uint16_t i = iThread; i < nSamples; i += nThreads) {
576 }
577 return nSamples;
578}
579
580// ===========================================================================
581// ===========================================================================
582// Decode ZS Dense Link
583// ===========================================================================
584// ===========================================================================
585
586template <>
587GPUd() void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
588{
589 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
590}
591
592GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
593{
594#ifdef GPUCA_GPUCODE
595 constexpr bool DecodeInParallel = true;
596#else
597 constexpr bool DecodeInParallel = false;
598#endif
599
600 const uint8_t* const pageStart = page;
601
602 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
603 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2));
604 ConsumeHeader<header::RAWDataHeader>(page);
605
606 assert(decHeader->version >= ZSVersionDenseLinkBased);
608
609 uint16_t nSamplesWritten = 0;
610 const uint16_t nSamplesInPage = decHeader->nADCsamples;
611
612 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ? TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
613 const auto* nextPage = Peek(pageStart, TPCZSHDR::TPC_ZS_PAGE_SIZE);
614
615 ConsumeBytes(page, decHeader->firstZSDataOffset - sizeof(o2::header::RAWDataHeader));
616
617 for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++) {
618 [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
619 assert(sizeLeftInPage > 0);
620
621 uint16_t nSamplesWrittenTB = 0;
622
623 if (i == decHeader->nTimebinHeaders - 1 && decHeader->flags & o2::tpc::TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage) {
624 assert(o2::raw::RDHUtils::getMemorySize(*rawDataHeader) == TPCZSHDR::TPC_ZS_PAGE_SIZE);
625 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
626 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
627 } else {
628 nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
629#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
630 if (iThread == 0) {
631 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
632 }
633#endif
634 }
635 } else {
636 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
637 }
638
639 assert(nSamplesWritten <= nSamplesInPage);
640 nSamplesWritten += nSamplesWrittenTB;
641 pageDigitOffset += nSamplesWrittenTB;
642 } // for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++)
643
644#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
645 if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
646 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
647 /*#ifndef GPUCA_GPUCODE
648 FILE* foo = fopen("dump.bin", "w+b");
649 fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
650 fclose(foo);
651 #endif*/
652 }
653#endif
654
655 return pageDigitOffset;
656}
657
658template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
659GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
660 processorType& clusterer,
661 [[maybe_unused]] GPUSharedMemory& smem,
662 int32_t iThread,
663 const uint8_t*& page,
664 uint32_t pageDigitOffset,
665 const header::RAWDataHeader* rawDataHeader,
666 int32_t firstHBF,
667 int32_t cru,
668 [[maybe_unused]] const uint8_t* payloadEnd,
669 [[maybe_unused]] const uint8_t* nextPage)
670{
671
672 if constexpr (DecodeInParallel) {
673 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
674 } else {
675 uint16_t nSamplesWritten = 0;
676 if (iThread == 0) {
677 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
678 }
679 return warp_broadcast(nSamplesWritten, 0);
680 }
681}
682
683template <bool PayloadExtendsToNextPage>
684GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
685 processorType& clusterer,
686 GPUSharedMemory& smem,
687 const int32_t iThread,
688 const uint8_t*& page,
689 uint32_t pageDigitOffset,
690 const header::RAWDataHeader* rawDataHeader,
691 int32_t firstHBF,
692 int32_t cru,
693 [[maybe_unused]] const uint8_t* payloadEnd,
694 [[maybe_unused]] const uint8_t* nextPage)
695{
696#define MAYBE_PAGE_OVERFLOW(pagePtr) \
697 if constexpr (PayloadExtendsToNextPage) { \
698 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
699 ptrdiff_t diff = pagePtr - payloadEnd; \
700 pagePtr = nextPage; \
701 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
702 } \
703 } else { \
704 assert(pagePtr <= payloadEnd); \
705 }
706
707#define PEEK_OVERFLOW(pagePtr, offset) \
708 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
709 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \
710 : (pagePtr) + (offset)))
711
712#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
713
714 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSDenseLink);
715 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
716
717 const CfFragment& fragment = clusterer.mPmemory->fragment;
718
719 // Read timebin block header
720 uint16_t tbbHdr = ConsumeByte(page);
722 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
724
725 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
726 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
727 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
728
729 uint16_t nSamplesInTB = 0;
730
731 GPUbarrier();
732
733 // Read timebin link headers
734 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
735 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
737
738 if (iThread == 0) {
739 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
740 }
741 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
742
743 uint16_t bitmaskL2 = 0x03FF;
744 if (not bitmaskIsFlat) {
745 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
747 }
748
749 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
750 assert(nBytesBitmask <= 10);
751
752 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
753 int32_t chanL2Idx = chan / 8;
754 bool l2 = TEST_BIT(bitmaskL2, chanL2Idx);
755
756 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
757
758 uint8_t myChannelHasData = (chan < 80 && l2 ? TEST_BIT(PEEK_OVERFLOW(page, chanByteOffset), chan % 8) : 0);
759 assert(myChannelHasData == 0 || myChannelHasData == 1);
760
761 int32_t nSamplesStep;
762 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
763
764 if (myChannelHasData) {
765 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
766 }
767
768 nSamplesInTB += nSamplesStep;
769 }
770
771 ConsumeBytes(page, nBytesBitmask);
773
774 if (iThread == 0) {
775 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
776 }
777
778 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
779
780 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
781 MAYBE_PAGE_OVERFLOW(page); // TODO: We don't need this check?
782
783 if (not fragment.contains(timeBin)) {
784 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
785 }
786
787 GPUbarrier();
788
789 // Unpack ADC
790 int32_t iLink = 0;
791 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
792 const uint16_t adcBitOffset = sample * DECODE_BITS;
793 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
794 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
795
796 uint8_t bits = 0;
797 uint16_t byte = 0;
798
799 static_assert(DECODE_BITS <= sizeof(uint16_t) * CHAR_BIT);
800
801 while (bits < DECODE_BITS) {
802 byte |= static_cast<uint16_t>(PEEK_OVERFLOW(adcData, adcByteOffset)) << bits;
803 adcByteOffset++;
804 bits += CHAR_BIT;
805 }
806 byte >>= adcOffsetInByte;
807
808 while (smem.samplesPerLinkEnd[iLink] <= sample) {
809 iLink++;
810 }
811
812 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
813
814 // Unpack data for cluster finder
815 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
816
817 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
818 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
819
820 } // for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS)
821
822 assert(PayloadExtendsToNextPage || adcData <= page);
823 assert(PayloadExtendsToNextPage || page <= payloadEnd);
824
825 return nSamplesInTB;
826
827#undef TEST_BIT
828#undef PEEK_OVERFLOW
829#undef MAYBE_PAGE_OVERFLOW
830}
831
832template <bool PayloadExtendsToNextPage>
833GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
834 processorType& clusterer,
835 const uint8_t*& page,
836 uint32_t pageDigitOffset,
837 const header::RAWDataHeader* rawDataHeader,
838 int32_t firstHBF,
839 int32_t cru,
840 [[maybe_unused]] const uint8_t* payloadEnd,
841 [[maybe_unused]] const uint8_t* nextPage)
842{
843#define MAYBE_PAGE_OVERFLOW(pagePtr) \
844 if constexpr (PayloadExtendsToNextPage) { \
845 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
846 ptrdiff_t diff = pagePtr - payloadEnd; \
847 pagePtr = nextPage; \
848 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
849 } \
850 } else { \
851 assert(pagePtr <= payloadEnd); \
852 }
853
854 using zerosupp_link_based::ChannelPerTBHeader;
855
856 const CfFragment& fragment = clusterer.mPmemory->fragment;
857
858 uint8_t linkIds[MaxNLinksPerTimebin];
859 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
860 uint16_t nSamplesWritten = 0;
861
862 // Read timebin block header
863 uint16_t tbbHdr = ConsumeByte(page);
865 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
867
868 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
869 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
870 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
871
872 uint16_t nSamplesInTB = 0;
873
874 // Read timebin link headers
875 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
876 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
878
879 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
880
881 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
882
883 uint16_t bitmaskL2 = 0x0FFF;
884 if (not bitmaskIsFlat) {
885 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
887 }
888
889 for (int32_t i = 0; i < 10; i++) {
890 if (bitmaskL2 & 1 << i) {
891 nSamplesInTB += CAMath::Popcount(*Peek(page));
892 channelMasks[10 * iLink + i] = ConsumeByte(page);
894 }
895 }
896
897 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
898
899 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
901
902 if (not fragment.contains(timeBin)) {
903 FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
904 return nSamplesInTB;
905 }
906
907 // Unpack ADC
908 uint32_t byte = 0, bits = 0;
909 uint16_t rawFECChannel = 0;
910
911 // unpack adc values, assume tightly packed data
912 while (nSamplesWritten < nSamplesInTB) {
913 byte |= static_cast<uint32_t>(ConsumeByte(adcData)) << bits;
914 MAYBE_PAGE_OVERFLOW(adcData);
915 bits += CHAR_BIT;
916 while (bits >= DECODE_BITS) {
917
918 // Find next channel with data
919 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
920 }
921
922 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
923 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
924
925 // Unpack data for cluster finder
926 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
927
928 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
929 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
930
931 byte >>= DECODE_BITS;
932 bits -= DECODE_BITS;
933 nSamplesWritten++;
934 rawFECChannel++; // Ensure we don't decode same channel twice
935 } // while (bits >= DECODE_BITS)
936 } // while (nSamplesWritten < nAdc)
937
938 assert(PayloadExtendsToNextPage || adcData <= page);
939 assert(PayloadExtendsToNextPage || page <= payloadEnd);
940 assert(nSamplesWritten == nSamplesInTB);
941
942 return nSamplesWritten;
943
944#undef MAYBE_PAGE_OVERFLOW
945}
946
947GPUd() bool GPUTPCCFDecodeZSDenseLink::ChannelIsActive(const uint8_t* chan, uint16_t chanIndex)
948{
949 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
950 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
951 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
952 return chan[entryIndex] & (1 << bitInEntry);
953}
#define INVALID_TIME_BIN
Definition ChargePos.h:23
int16_t charge
Definition RawEventData.h:5
int32_t i
#define GPUbarrier()
#define GPUCA_GET_THREAD_COUNT(...)
#define GPUCA_WARP_SIZE
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
Definition GPUDef.h:58
GPUdii() void GPUTPCCFDecodeZS
#define TEST_BIT(x, bit)
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
#define GPUCA_ROW_COUNT
Header to collect LHC related constants.
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint8_t endpoint
Definition RawData.h:0
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
GLdouble n
Definition glcorearb.h:1982
const GLfloat * m
Definition glcorearb.h:4066
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
Definition glcorearb.h:4150
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLuint mask
Definition glcorearb.h:291
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr ChargePos INVALID_CHARGE_POS
Definition ChargePos.h:59
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
RAWDataHeaderV7 RAWDataHeader
constexpr int LHCBCPERTIMEBIN
Definition Constants.h:38
Global TPC definitions and constants.
Definition SimTraits.h:167
GPUd() void PIDResponse
Definition PIDResponse.h:71
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
unsigned char version
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
unsigned short rowMask
coder decode(ctfImage, triggersD, clustersD)
std::vector< int > row
ArrayADC adc