Project
Loading...
Searching...
No Matches
GPUTPCCFDecodeZS.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCCFDecodeZS.h"
16#include "GPUCommonMath.h"
17#include "GPUTPCClusterFinder.h"
18#include "Array2D.h"
19#include "PackedCharge.h"
20#include "CfUtils.h"
22#include "GPUCommonAlgorithm.h"
23#include "TPCPadGainCalib.h"
24#include "TPCZSLinkMapping.h"
25
26using namespace o2::gpu;
27using namespace o2::gpu::tpccf;
28using namespace o2::tpc;
29using namespace o2::tpc::constants;
30
31// ===========================================================================
32// ===========================================================================
33// Decode ZS Row
34// ===========================================================================
35// ===========================================================================
36
37template <>
38GPUdii() void GPUTPCCFDecodeZS::Thread<GPUTPCCFDecodeZS::decodeZS>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
39{
40 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
41}
42
43GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF)
44{
45 const uint32_t sector = clusterer.mISector;
46#ifdef GPUCA_GPUCODE
47 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
48#else
49 const uint32_t endpoint = iBlock;
50#endif
51 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
52 if (zs.count[endpoint] == 0) {
53 return;
54 }
55 ChargePos* positions = clusterer.mPpositions;
56 Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
57 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
58 if (iThread == 0) {
59 const int32_t region = endpoint / 2;
60 s.nRowsRegion = clusterer.Param().tpcGeometry.GetRegionRows(region);
61 s.regionStartRow = clusterer.Param().tpcGeometry.GetRegionStart(region);
62 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((s.nRowsRegion + (endpoint & 1)) / 2));
63 s.rowStride = nThreads / s.nThreadsPerRow;
64 s.rowOffsetCounter = 0;
65 }
66 GPUbarrier();
67 const uint32_t myRow = iThread / s.nThreadsPerRow;
68 const uint32_t mySequence = iThread % s.nThreadsPerRow;
69#ifdef GPUCA_GPUCODE
70 const uint32_t i = 0;
71 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
72 {
73 {
74#else
75 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
76 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
77 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
78 for (uint32_t j = minJ; j < maxJ; j++) {
79#endif
80 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
81 CA_SHARED_CACHE_REF(&s.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
82 GPUbarrier();
83 const uint8_t* page = (const uint8_t*)pageCache;
85 if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) {
86#ifdef GPUCA_GPUCODE
87 return;
88#else
89 continue;
90#endif
91 }
92 const uint8_t* pagePtr = page + sizeof(o2::header::RAWDataHeader);
93 const TPCZSHDR* hdr = reinterpret_cast<const TPCZSHDR*>(pagePtr);
94 pagePtr += sizeof(*hdr);
95 const bool decode12bit = hdr->version == 2;
96 const uint32_t decodeBits = decode12bit ? TPCZSHDR::TPC_ZS_NBITS_V2 : TPCZSHDR::TPC_ZS_NBITS_V1;
97 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
98 uint32_t mask = (1 << decodeBits) - 1;
99 int32_t timeBin = (hdr->timeOffset + (o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
100 const int32_t rowOffset = s.regionStartRow + ((endpoint & 1) ? (s.nRowsRegion / 2) : 0);
101 const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2);
102
103 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins
104 pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment
105 const TPCZSTBHDR* tbHdr = reinterpret_cast<const TPCZSTBHDR*>(pagePtr);
106 if ((tbHdr->rowMask & 0x7FFF) == 0) {
107 pagePtr += 2;
108 continue;
109 }
110 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->rowMask & 0x7FFF));
111 pagePtr += 2 * nRowsUsed;
112
113 GPUbarrier();
114 for (int32_t n = iThread; n < nRowsUsed; n += nThreads) {
115 const uint8_t* rowData = n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[n - 1]);
116 s.RowClusterOffset[n] = CAMath::AtomicAddShared<uint32_t>(&s.rowOffsetCounter, rowData[2 * *rowData]);
117 }
118 /*if (iThread < GPUCA_WARP_SIZE) { // TODO: Seems to miscompile with HIP, CUDA performance doesn't really change, for now sticking to the AtomicAdd
119 GPUSharedMemory& smem = s;
120 int32_t o;
121 if (iThread < nRowsUsed) {
122 const uint8_t* rowData = iThread == 0 ? pagePtr : (page + tbHdr->rowAddr1()[iThread - 1]);
123 o = rowData[2 * *rowData];
124 } else {
125 o = 0;
126 }
127 int32_t x = warp_scan_inclusive_add(o);
128 if (iThread < nRowsUsed) {
129 s.RowClusterOffset[iThread] = s.rowOffsetCounter + x - o;
130 } else if (iThread == GPUCA_WARP_SIZE - 1) {
131 s.rowOffsetCounter += x;
132 }
133 }*/
134 GPUbarrier();
135
136 if (myRow < s.rowStride) {
137 for (int32_t m = myRow; m < nRows; m += s.rowStride) {
138 if ((tbHdr->rowMask & (1 << m)) == 0) {
139 continue;
140 }
141 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->rowMask & ((1 << m) - 1)));
142 size_t nDigitsTmp = nDigits + s.RowClusterOffset[rowPos];
143 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
144 const int32_t nSeqRead = *rowData;
145 const int32_t nSeqPerThread = (nSeqRead + s.nThreadsPerRow - 1) / s.nThreadsPerRow;
146 const int32_t mySequenceStart = mySequence * nSeqPerThread;
147 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
148 if (mySequenceEnd > mySequenceStart) {
149 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
150 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
151 nDigitsTmp += nSamplesStart;
152 uint32_t nADCStartBits = nSamplesStart * decodeBits;
153 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
154 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
155 adcData += nADCStart;
156 nADCStartBits &= 0x7;
157 uint32_t byte = 0, bits = 0;
158 if (nADCStartBits) { // % 8 != 0
159 bits = 8 - nADCStartBits;
160 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
161 }
162 int32_t nSeq = mySequenceStart;
163 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
164 Pad pad = rowData[nSeq++ * 2 + 1];
165 for (int32_t n = nADCStart; n < nADC; n++) {
166 byte |= *(adcData++) << bits;
167 bits += 8;
168 while (bits >= decodeBits) {
169 if (seqLen == 0) {
170 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
171 pad = rowData[nSeq++ * 2 + 1];
172 }
173 const CfFragment& fragment = clusterer.mPmemory->fragment;
174 TPCTime globalTime = timeBin + l;
175 bool inFragment = fragment.contains(globalTime);
176 Row row = rowOffset + m;
177 ChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN);
178 positions[nDigitsTmp++] = pos;
179
180 if (inFragment) {
181 float q = float(byte & mask) * decodeBitsFactor;
182 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, row, pad);
183 chargeMap[pos] = PackedCharge(q);
184 }
185 pad++;
186 byte = byte >> decodeBits;
187 bits -= decodeBits;
188 seqLen--;
189 }
190 }
191 }
192 }
193 }
194 if (nRowsUsed > 1) {
195 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
196 }
197 pagePtr += 2 * *pagePtr; // Go to entry for last sequence length
198 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8; // Go to beginning of next time bin
199 }
200 }
201 }
202}
203
204// ===========================================================================
205// ===========================================================================
206// Decode ZS Link
207// ===========================================================================
208// ===========================================================================
209
210template <>
211GPUdii() void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
212{
213 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
214}
215
216GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
217{
218 const CfFragment& fragment = clusterer.mPmemory->fragment;
219
220 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
221
222 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
223 return pageDigitOffset;
224 }
225
226 int32_t nDecoded = 0;
227 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
228 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
229
230 assert(decHdr->version == ZSVersionLinkBasedWithMeta);
232
233 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
234 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
235 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16); // Page now points to next timebin or past the page
236
237 int32_t timeBin = (decHdr->timeOffset + tbHdr->bunchCrossing + (uint64_t)(o2::raw::RDHUtils::getHeartBeatOrbit(*rdHdr) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
238
239 uint32_t channelMask[3];
240 GetChannelBitmask(*tbHdr, channelMask);
241 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
242
243 bool inFragment = fragment.contains(timeBin);
244 nDecoded += nAdc;
245
246 // TimeBin not in fragment: Skip this timebin header and fill positions with dummy values instead
247 if (not inFragment) {
248 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
249 continue;
250 }
251
252#ifdef GPUCA_GPUCODE
253 DecodeTBMultiThread(
254 clusterer,
255 iThread,
256 smem,
257 adcData,
258 nAdc,
259 channelMask,
260 timeBin,
261 decHdr->cruID,
262 tbHdr->fecInPartition,
263 pageDigitOffset);
264#else // CPU
265 DecodeTBSingleThread(
266 clusterer,
267 adcData,
268 nAdc,
269 channelMask,
270 timeBin,
271 decHdr->cruID,
272 tbHdr->fecInPartition,
273 pageDigitOffset);
274#endif
275 pageDigitOffset += nAdc;
276 } // for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++)
277 (void)nDecoded;
278#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
279 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
280 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
281 /*#ifndef GPUCA_GPUCODE
282 FILE* foo = fopen("dump.bin", "w+b");
283 fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
284 fclose(foo);
285 #endif*/
286 }
287#endif
288 return pageDigitOffset;
289}
290
291GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread(
292 processorType& clusterer,
293 const uint8_t* adcData,
294 uint32_t nAdc,
295 const uint32_t* channelMask,
296 int32_t timeBin,
297 int32_t cru,
298 int32_t fecInPartition,
299 uint32_t pageDigitOffset)
300{
301 const CfFragment& fragment = clusterer.mPmemory->fragment;
302
303 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
304
305 uint32_t byte = 0, bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
306
307 // unpack adc values, assume tightly packed data
308 while (nSamplesWritten < nAdc) {
309 byte |= adcData[0] << bits;
310 adcData++;
311 bits += CHAR_BIT;
312 while (bits >= DECODE_BITS) {
313
314 // Find next channel with data
315 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
316 }
317
318 // Unpack data for cluster finder
319 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
320
321 WriteCharge(clusterer, byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
322
323 byte = byte >> DECODE_BITS;
324 bits -= DECODE_BITS;
325 nSamplesWritten++;
326 rawFECChannel++; // Ensure we don't decode same channel twice
327 } // while (bits >= DECODE_BITS)
328 } // while (nSamplesWritten < nAdc)
329
330 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
331 uint32_t rawFECChannel = 0;
332 const uint64_t* adcData64 = (const uint64_t*)adcData;
333 for (uint32_t j = 0; j < nAdc; j++) {
334 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
335 }
336
337 uint32_t adc = (adcData64[j / TPCZSHDRV2::SAMPLESPER64BIT] >> ((j % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
338
339 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
340 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
341 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + j);
342 rawFECChannel++;
343 }
344 }
345}
346
347GPUd() void GPUTPCCFDecodeZSLink::DecodeTBMultiThread(
348 processorType& clusterer,
349 int32_t iThread,
350 GPUSharedMemory& smem,
351 const uint8_t* adcData,
352 uint32_t nAdc,
353 const uint32_t* channelMask,
354 int32_t timeBin,
355 int32_t cru,
356 int32_t fecInPartition,
357 uint32_t pageDigitOffset)
358{
359 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSLink);
360 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
361
362 uint8_t blockOffset = 0;
363 for (uint8_t i = iThread; blockOffset < nAdc; i += NTHREADS) {
364
365 uint8_t rawFECChannel = i;
366
367 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
368
369 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
370 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
371
372 // Decode entire timebin at once if we have enough threads
373 // This should further improve performance, but code below is buggy...
374 // if (nAdc <= NThreads) {
375 // for (int32_t j = 1; blockOffset < nAdc; j++) {
376 // rawFECChannel = myChannelActive ? rawFECChannel : (iThread + j*NThreads - myOffset);
377
378 // bool iAmIdle = not myChannelActive;
379
380 // myChannelActive =
381 // rawFECChannel < zerosupp_link_based::CommonHeaderlPerTBHeader
382 // ? BitIsSet(channelMask, rawFECChannel)
383 // : false;
384
385 // uint8_t newOffset = warp_scan_inclusive_add(static_cast<uint8_t>(myChannelActive && iAmIdle)) - 1 + blockOffset;
386 // blockOffset = warp_broadcast(newOffset, NThreads - 1) + 1;
387
388 // myOffset = iAmIdle ? newOffset : myOffset;
389 // }
390 // }
391
392 if (not myChannelActive) {
393 continue;
394 }
395 assert(myOffset < nAdc);
396
397 uint32_t adc = 0;
398
399 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
400
401 // Try to access adcData with 4 byte reads instead of 1 byte.
402 // You'd think this would improve performace, but it's actually slower...
403 // const uint32_t* adcDataU32 = reinterpret_cast<const uint32_t*>(adcData);
404
405 uint32_t adcBitOffset = myOffset * DECODE_BITS;
406 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
407 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
408 // uint32_t adcByteOffset = adcBitOffset / 32;
409 // uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * 32;
410
411 uint32_t byte = 0, bits = 0;
412
413 // uint32_t byte = adcDataU32[adcByteOffset] >> adcOffsetInByte;
414 // uint32_t bits = 32 - adcOffsetInByte;
415 // adcByteOffset++;
416
417 while (bits < DECODE_BITS) {
418 byte |= ((uint32_t)adcData[adcByteOffset]) << bits;
419 // byte |= adcDataU32[adcByteOffset] << bits;
420 adcByteOffset++;
421 bits += CHAR_BIT;
422 // bits += 32;
423 }
424 adc = byte >> adcOffsetInByte;
425
426 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
427 const uint64_t* adcData64 = (const uint64_t*)adcData;
428 adc = (adcData64[myOffset / TPCZSHDRV2::SAMPLESPER64BIT] >> ((myOffset % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
429 }
430
431 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
432 const CfFragment& fragment = clusterer.mPmemory->fragment;
433 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
434 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
435
436 } // for (uint8_t i = iThread; blockOffset < nAdc; i += NThreads)
437}
438
439GPUd() void GPUTPCCFDecodeZSLink::GetChannelBitmask(const zerosupp_link_based::CommonHeader& tbHdr, uint32_t* chan)
440{
441 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
442 chan[1] = tbHdr.bitMaskLow >> (sizeof(uint32_t) * CHAR_BIT);
443 chan[2] = tbHdr.bitMaskHigh;
444}
445
446GPUd() bool GPUTPCCFDecodeZSLink::ChannelIsActive(const uint32_t* chan, uint8_t chanIndex)
447{
448 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
449 return false;
450 }
451 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
452 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
453 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
454 return chan[entryIndex] & (1 << bitInEntry);
455}
456
457// ===========================================================================
458// ===========================================================================
459// Decode ZS Link Base
460// ===========================================================================
461// ===========================================================================
462
463template <class Decoder>
464GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
465{
466 const uint32_t sector = clusterer.mISector;
467
468#ifdef GPUCA_GPUCODE
469 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
470#else // CPU
471 const uint32_t endpoint = iBlock;
472#endif
473
474 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
475 if (zs.count[endpoint] == 0) {
476 return;
477 }
478
479 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
480
481#ifdef GPUCA_GPUCODE
482 const uint32_t i = 0;
483 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
484 {
485 {
486#else // CPU
487 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
488 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
489 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
490 for (uint32_t j = minJ; j < maxJ; j++) {
491#endif
492 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
493 // Cache zs page in shared memory. Curiously this actually degrades performance...
494 // CA_SHARED_CACHE_REF(&smem.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
495 // GPUbarrier();
496 // const uint8_t* page = (const uint8_t*)pageCache;
497 const uint8_t* page = (const uint8_t*)pageSrc;
498
499 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
500
501 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
502#ifdef GPUCA_GPUCODE
503 return;
504#else
505 continue;
506#endif
507 }
508
509 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
510 } // [CPU] for (uint32_t j = minJ; j < maxJ; j++)
511 } // [CPU] for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++)
512
513#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
514 if (iThread == 0 && iBlock < nBlocks - 1) {
515 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
516 if (pageDigitOffset != maxOffset) {
517 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 + endpoint, pageDigitOffset, maxOffset);
518 }
519 }
520#endif
521}
522
523GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
524{
525#ifdef GPUCA_TPC_GEOMETRY_O2
526 // Ported from tpc::Mapper (Not available on GPU...)
527 const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry;
528
529 const int32_t regionIter = cru % 2;
530 const int32_t istreamm = ((rawFECChannel % 10) / 2);
531 const int32_t partitionStream = istreamm + regionIter * 5;
532 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
533 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
534 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
535
536 const int32_t partition = (cru % 10) / 2;
537 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
538
539 const TPCZSLinkMapping* gpuMapping = clusterer.GetConstantMem()->calibObjects.tpcZSLinkMapping;
540 assert(gpuMapping != nullptr);
541
542 uint16_t globalSAMPAId = (static_cast<uint16_t>(fecInSector) << 8) + (static_cast<uint16_t>(sampaOnFEC) << 5) + static_cast<uint16_t>(channelOnSAMPA);
543 const o2::tpc::PadPos pos = gpuMapping->FECIDToPadPos[globalSAMPAId];
544
545 return pos;
546#else
547 return o2::tpc::PadPos{};
548#endif
549}
550
551GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset)
552{
553 const uint32_t sector = clusterer.mISector;
554 ChargePos* positions = clusterer.mPpositions;
555#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
556 if (padAndRow.getRow() >= GPUCA_ROW_COUNT) {
558 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISector * 1000 + padAndRow.getRow());
559 return;
560 }
561#endif
562 Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
563
564 ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime);
565 positions[positionOffset] = pos;
566
567 charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
568 chargeMap[pos] = PackedCharge(charge);
569}
570
571GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
572{
573 for (uint16_t i = iThread; i < nSamples; i += nThreads) {
575 }
576 return nSamples;
577}
578
579// ===========================================================================
580// ===========================================================================
581// Decode ZS Dense Link
582// ===========================================================================
583// ===========================================================================
584
585template <>
586GPUd() void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
587{
588 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
589}
590
591GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
592{
593#ifdef GPUCA_GPUCODE
594 constexpr bool DecodeInParallel = true;
595#else
596 constexpr bool DecodeInParallel = false;
597#endif
598
599 const uint8_t* const pageStart = page;
600
601 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
602 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2));
603 ConsumeHeader<header::RAWDataHeader>(page);
604
605 assert(decHeader->version >= ZSVersionDenseLinkBased);
607
608 uint16_t nSamplesWritten = 0;
609 const uint16_t nSamplesInPage = decHeader->nADCsamples;
610
611 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ? TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
612 const auto* nextPage = Peek(pageStart, TPCZSHDR::TPC_ZS_PAGE_SIZE);
613
614 ConsumeBytes(page, decHeader->firstZSDataOffset - sizeof(o2::header::RAWDataHeader));
615
616 for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++) {
617 [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
618 assert(sizeLeftInPage > 0);
619
620 uint16_t nSamplesWrittenTB = 0;
621
622 if (i == decHeader->nTimebinHeaders - 1 && decHeader->flags & o2::tpc::TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage) {
623 assert(o2::raw::RDHUtils::getMemorySize(*rawDataHeader) == TPCZSHDR::TPC_ZS_PAGE_SIZE);
624 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
625 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
626 } else {
627 nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
628#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
629 if (iThread == 0) {
630 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
631 }
632#endif
633 }
634 } else {
635 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
636 }
637
638 assert(nSamplesWritten <= nSamplesInPage);
639 nSamplesWritten += nSamplesWrittenTB;
640 pageDigitOffset += nSamplesWrittenTB;
641 } // for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++)
642
643#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
644 if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
645 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
646 /*#ifndef GPUCA_GPUCODE
647 FILE* foo = fopen("dump.bin", "w+b");
648 fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
649 fclose(foo);
650 #endif*/
651 }
652#endif
653
654 return pageDigitOffset;
655}
656
657template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
658GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
659 processorType& clusterer,
660 [[maybe_unused]] GPUSharedMemory& smem,
661 int32_t iThread,
662 const uint8_t*& page,
663 uint32_t pageDigitOffset,
664 const header::RAWDataHeader* rawDataHeader,
665 int32_t firstHBF,
666 int32_t cru,
667 [[maybe_unused]] const uint8_t* payloadEnd,
668 [[maybe_unused]] const uint8_t* nextPage)
669{
670
671 if constexpr (DecodeInParallel) {
672 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
673 } else {
674 uint16_t nSamplesWritten = 0;
675 if (iThread == 0) {
676 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
677 }
678 return warp_broadcast(nSamplesWritten, 0);
679 }
680}
681
682template <bool PayloadExtendsToNextPage>
683GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
684 processorType& clusterer,
685 GPUSharedMemory& smem,
686 const int32_t iThread,
687 const uint8_t*& page,
688 uint32_t pageDigitOffset,
689 const header::RAWDataHeader* rawDataHeader,
690 int32_t firstHBF,
691 int32_t cru,
692 [[maybe_unused]] const uint8_t* payloadEnd,
693 [[maybe_unused]] const uint8_t* nextPage)
694{
695#define MAYBE_PAGE_OVERFLOW(pagePtr) \
696 if constexpr (PayloadExtendsToNextPage) { \
697 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
698 ptrdiff_t diff = pagePtr - payloadEnd; \
699 pagePtr = nextPage; \
700 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
701 } \
702 } else { \
703 assert(pagePtr <= payloadEnd); \
704 }
705
706#define PEEK_OVERFLOW(pagePtr, offset) \
707 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
708 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd) \
709 : (pagePtr) + (offset)))
710
711#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
712
713 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSDenseLink);
714 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
715
716 const CfFragment& fragment = clusterer.mPmemory->fragment;
717
718 // Read timebin block header
719 uint16_t tbbHdr = ConsumeByte(page);
721 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
723
724 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
725 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
726 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
727
728 uint16_t nSamplesInTB = 0;
729
730 GPUbarrier();
731
732 // Read timebin link headers
733 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
734 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
736
737 if (iThread == 0) {
738 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
739 }
740 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
741
742 uint16_t bitmaskL2 = 0x03FF;
743 if (not bitmaskIsFlat) {
744 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
746 }
747
748 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
749 assert(nBytesBitmask <= 10);
750
751 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
752 int32_t chanL2Idx = chan / 8;
753 bool l2 = TEST_BIT(bitmaskL2, chanL2Idx);
754
755 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
756
757 uint8_t myChannelHasData = (chan < 80 && l2 ? TEST_BIT(PEEK_OVERFLOW(page, chanByteOffset), chan % 8) : 0);
758 assert(myChannelHasData == 0 || myChannelHasData == 1);
759
760 int32_t nSamplesStep;
761 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
762
763 if (myChannelHasData) {
764 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
765 }
766
767 nSamplesInTB += nSamplesStep;
768 }
769
770 ConsumeBytes(page, nBytesBitmask);
772
773 if (iThread == 0) {
774 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
775 }
776
777 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
778
779 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
780 MAYBE_PAGE_OVERFLOW(page); // TODO: We don't need this check?
781
782 if (not fragment.contains(timeBin)) {
783 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
784 }
785
786 GPUbarrier();
787
788 // Unpack ADC
789 int32_t iLink = 0;
790 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
791 const uint16_t adcBitOffset = sample * DECODE_BITS;
792 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
793 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
794
795 uint8_t bits = 0;
796 uint16_t byte = 0;
797
798 static_assert(DECODE_BITS <= sizeof(uint16_t) * CHAR_BIT);
799
800 while (bits < DECODE_BITS) {
801 byte |= static_cast<uint16_t>(PEEK_OVERFLOW(adcData, adcByteOffset)) << bits;
802 adcByteOffset++;
803 bits += CHAR_BIT;
804 }
805 byte >>= adcOffsetInByte;
806
807 while (smem.samplesPerLinkEnd[iLink] <= sample) {
808 iLink++;
809 }
810
811 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
812
813 // Unpack data for cluster finder
814 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
815
816 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
817 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
818
819 } // for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS)
820
821 assert(PayloadExtendsToNextPage || adcData <= page);
822 assert(PayloadExtendsToNextPage || page <= payloadEnd);
823
824 return nSamplesInTB;
825
826#undef TEST_BIT
827#undef PEEK_OVERFLOW
828#undef MAYBE_PAGE_OVERFLOW
829}
830
831template <bool PayloadExtendsToNextPage>
832GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
833 processorType& clusterer,
834 const uint8_t*& page,
835 uint32_t pageDigitOffset,
836 const header::RAWDataHeader* rawDataHeader,
837 int32_t firstHBF,
838 int32_t cru,
839 [[maybe_unused]] const uint8_t* payloadEnd,
840 [[maybe_unused]] const uint8_t* nextPage)
841{
842#define MAYBE_PAGE_OVERFLOW(pagePtr) \
843 if constexpr (PayloadExtendsToNextPage) { \
844 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
845 ptrdiff_t diff = pagePtr - payloadEnd; \
846 pagePtr = nextPage; \
847 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
848 } \
849 } else { \
850 assert(pagePtr <= payloadEnd); \
851 }
852
853 using zerosupp_link_based::ChannelPerTBHeader;
854
855 const CfFragment& fragment = clusterer.mPmemory->fragment;
856
857 uint8_t linkIds[MaxNLinksPerTimebin];
858 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
859 uint16_t nSamplesWritten = 0;
860
861 // Read timebin block header
862 uint16_t tbbHdr = ConsumeByte(page);
864 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
866
867 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
868 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
869 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
870
871 uint16_t nSamplesInTB = 0;
872
873 // Read timebin link headers
874 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
875 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
877
878 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
879
880 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
881
882 uint16_t bitmaskL2 = 0x0FFF;
883 if (not bitmaskIsFlat) {
884 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
886 }
887
888 for (int32_t i = 0; i < 10; i++) {
889 if (bitmaskL2 & 1 << i) {
890 nSamplesInTB += CAMath::Popcount(*Peek(page));
891 channelMasks[10 * iLink + i] = ConsumeByte(page);
893 }
894 }
895
896 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
897
898 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
900
901 if (not fragment.contains(timeBin)) {
902 FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
903 return nSamplesInTB;
904 }
905
906 // Unpack ADC
907 uint32_t byte = 0, bits = 0;
908 uint16_t rawFECChannel = 0;
909
910 // unpack adc values, assume tightly packed data
911 while (nSamplesWritten < nSamplesInTB) {
912 byte |= static_cast<uint32_t>(ConsumeByte(adcData)) << bits;
913 MAYBE_PAGE_OVERFLOW(adcData);
914 bits += CHAR_BIT;
915 while (bits >= DECODE_BITS) {
916
917 // Find next channel with data
918 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
919 }
920
921 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
922 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
923
924 // Unpack data for cluster finder
925 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
926
927 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
928 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
929
930 byte >>= DECODE_BITS;
931 bits -= DECODE_BITS;
932 nSamplesWritten++;
933 rawFECChannel++; // Ensure we don't decode same channel twice
934 } // while (bits >= DECODE_BITS)
935 } // while (nSamplesWritten < nAdc)
936
937 assert(PayloadExtendsToNextPage || adcData <= page);
938 assert(PayloadExtendsToNextPage || page <= payloadEnd);
939 assert(nSamplesWritten == nSamplesInTB);
940
941 return nSamplesWritten;
942
943#undef MAYBE_PAGE_OVERFLOW
944}
945
946GPUd() bool GPUTPCCFDecodeZSDenseLink::ChannelIsActive(const uint8_t* chan, uint16_t chanIndex)
947{
948 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
949 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
950 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
951 return chan[entryIndex] & (1 << bitInEntry);
952}
#define INVALID_TIME_BIN
Definition ChargePos.h:23
int16_t charge
Definition RawEventData.h:5
int32_t i
#define GPUbarrier()
#define GPUCA_GET_THREAD_COUNT(...)
#define GPUCA_WARP_SIZE
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
Definition GPUDef.h:58
GPUdii() void GPUTPCCFDecodeZS
#define TEST_BIT(x, bit)
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
#define GPUCA_ROW_COUNT
Header to collect LHC related constants.
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint8_t endpoint
Definition RawData.h:0
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
GLdouble n
Definition glcorearb.h:1982
const GLfloat * m
Definition glcorearb.h:4066
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
Definition glcorearb.h:4150
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLuint mask
Definition glcorearb.h:291
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr ChargePos INVALID_CHARGE_POS
Definition ChargePos.h:58
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
RAWDataHeaderV7 RAWDataHeader
constexpr int LHCBCPERTIMEBIN
Definition Constants.h:38
Global TPC definitions and constants.
Definition SimTraits.h:167
GPUd() void PIDResponse
Definition PIDResponse.h:71
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
unsigned char version
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
unsigned short rowMask
coder decode(ctfImage, triggersD, clustersD)
std::vector< int > row
ArrayADC adc