Project
Loading...
Searching...
No Matches
GPUTPCCFDecodeZS.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCCFDecodeZS.h"
16#include "GPUCommonMath.h"
17#include "GPUTPCClusterFinder.h"
18#include "CfArray2D.h"
19#include "PackedCharge.h"
20#include "CfUtils.h"
22#include "GPUCommonAlgorithm.h"
23#include "TPCPadGainCalib.h"
24#include "TPCZSLinkMapping.h"
25#include "GPUTPCGeometry.h"
26
27using namespace o2::gpu;
28using namespace o2::gpu::tpccf;
29using namespace o2::tpc;
30using namespace o2::tpc::constants;
31
32// ===========================================================================
33// ===========================================================================
34// Decode ZS Row
35// ===========================================================================
36// ===========================================================================
37
38template <>
39GPUdii() void GPUTPCCFDecodeZS::Thread<GPUTPCCFDecodeZS::decodeZS>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
40{
41 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
42}
43
44GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF)
45{
46 const uint32_t sector = clusterer.mISector;
47#ifdef GPUCA_GPUCODE
48 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
49#else
50 const uint32_t endpoint = iBlock;
51#endif
52 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
53 if (zs.count[endpoint] == 0) {
54 return;
55 }
56 CfChargePos* positions = clusterer.mPpositions;
57 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
58 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
59 if (iThread == 0) {
60 const int32_t region = endpoint / 2;
61 s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region);
62 s.regionStartRow = GPUTPCGeometry::GetRegionStart(region);
63 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((s.nRowsRegion + (endpoint & 1)) / 2));
64 s.rowStride = nThreads / s.nThreadsPerRow;
65 s.rowOffsetCounter = 0;
66 }
67 GPUbarrier();
68 const uint32_t myRow = iThread / s.nThreadsPerRow;
69 const uint32_t mySequence = iThread % s.nThreadsPerRow;
70#ifdef GPUCA_GPUCODE
71 const uint32_t i = 0;
72 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
73 {
74 {
75#else
76 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
77 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
78 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
79 for (uint32_t j = minJ; j < maxJ; j++) {
80#endif
81 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
82 CA_SHARED_CACHE_REF(&s.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
83 GPUbarrier();
84 const uint8_t* page = (const uint8_t*)pageCache;
86 if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) {
87#ifdef GPUCA_GPUCODE
88 return;
89#else
90 continue;
91#endif
92 }
93 const uint8_t* pagePtr = page + sizeof(o2::header::RAWDataHeader);
94 const TPCZSHDR* hdr = reinterpret_cast<const TPCZSHDR*>(pagePtr);
95 pagePtr += sizeof(*hdr);
96 const bool decode12bit = hdr->version == 2;
97 const uint32_t decodeBits = decode12bit ? TPCZSHDR::TPC_ZS_NBITS_V2 : TPCZSHDR::TPC_ZS_NBITS_V1;
98 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
99 uint32_t mask = (1 << decodeBits) - 1;
100 int32_t timeBin = (hdr->timeOffset + (o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
101 const int32_t rowOffset = s.regionStartRow + ((endpoint & 1) ? (s.nRowsRegion / 2) : 0);
102 const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2);
103
104 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins
105 pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment
106 const TPCZSTBHDR* tbHdr = reinterpret_cast<const TPCZSTBHDR*>(pagePtr);
107 if ((tbHdr->rowMask & 0x7FFF) == 0) {
108 pagePtr += 2;
109 continue;
110 }
111 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->rowMask & 0x7FFF));
112 pagePtr += 2 * nRowsUsed;
113
114 GPUbarrier();
115 for (int32_t n = iThread; n < nRowsUsed; n += nThreads) {
116 const uint8_t* rowData = n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[n - 1]);
117 s.RowClusterOffset[n] = CAMath::AtomicAddShared<uint32_t>(&s.rowOffsetCounter, rowData[2 * *rowData]);
118 }
119 /*if (iThread < GPUCA_WARP_SIZE) { // TODO: Seems to miscompile with HIP, CUDA performance doesn't really change, for now sticking to the AtomicAdd
120 GPUSharedMemory& smem = s;
121 int32_t o;
122 if (iThread < nRowsUsed) {
123 const uint8_t* rowData = iThread == 0 ? pagePtr : (page + tbHdr->rowAddr1()[iThread - 1]);
124 o = rowData[2 * *rowData];
125 } else {
126 o = 0;
127 }
128 int32_t x = warp_scan_inclusive_add(o);
129 if (iThread < nRowsUsed) {
130 s.RowClusterOffset[iThread] = s.rowOffsetCounter + x - o;
131 } else if (iThread == GPUCA_WARP_SIZE - 1) {
132 s.rowOffsetCounter += x;
133 }
134 }*/
135 GPUbarrier();
136
137 if (myRow < s.rowStride) {
138 for (int32_t m = myRow; m < nRows; m += s.rowStride) {
139 if ((tbHdr->rowMask & (1 << m)) == 0) {
140 continue;
141 }
142 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->rowMask & ((1 << m) - 1)));
143 size_t nDigitsTmp = nDigits + s.RowClusterOffset[rowPos];
144 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
145 const int32_t nSeqRead = *rowData;
146 const int32_t nSeqPerThread = (nSeqRead + s.nThreadsPerRow - 1) / s.nThreadsPerRow;
147 const int32_t mySequenceStart = mySequence * nSeqPerThread;
148 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
149 if (mySequenceEnd > mySequenceStart) {
150 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
151 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
152 nDigitsTmp += nSamplesStart;
153 uint32_t nADCStartBits = nSamplesStart * decodeBits;
154 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
155 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
156 adcData += nADCStart;
157 nADCStartBits &= 0x7;
158 uint32_t byte = 0, bits = 0;
159 if (nADCStartBits) { // % 8 != 0
160 bits = 8 - nADCStartBits;
161 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
162 }
163 int32_t nSeq = mySequenceStart;
164 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
165 Pad pad = rowData[nSeq++ * 2 + 1];
166 for (int32_t n = nADCStart; n < nADC; n++) {
167 byte |= *(adcData++) << bits;
168 bits += 8;
169 while (bits >= decodeBits) {
170 if (seqLen == 0) {
171 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
172 pad = rowData[nSeq++ * 2 + 1];
173 }
174 const CfFragment& fragment = clusterer.mPmemory->fragment;
175 TPCTime globalTime = timeBin + l;
176 bool inFragment = fragment.contains(globalTime);
177 Row row = rowOffset + m;
178 CfChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN);
179 positions[nDigitsTmp++] = pos;
180
181 if (inFragment) {
182 float q = float(byte & mask) * decodeBitsFactor;
183 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, row, pad);
184 chargeMap[pos] = PackedCharge(q);
185 }
186 pad++;
187 byte = byte >> decodeBits;
188 bits -= decodeBits;
189 seqLen--;
190 }
191 }
192 }
193 }
194 }
195 if (nRowsUsed > 1) {
196 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
197 }
198 pagePtr += 2 * *pagePtr; // Go to entry for last sequence length
199 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8; // Go to beginning of next time bin
200 }
201 }
202 }
203}
204
205// ===========================================================================
206// ===========================================================================
207// Decode ZS Link
208// ===========================================================================
209// ===========================================================================
210
211template <>
212GPUdii() void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
213{
214 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
215}
216
217GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
218{
219 const CfFragment& fragment = clusterer.mPmemory->fragment;
220
221 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
222
223 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
224 return pageDigitOffset;
225 }
226
227 [[maybe_unused]] int32_t nDecoded = 0;
228 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
229 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
230
231 assert(decHdr->version == ZSVersionLinkBasedWithMeta);
233
234 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
235 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
236 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16); // Page now points to next timebin or past the page
237
238 int32_t timeBin = (decHdr->timeOffset + tbHdr->bunchCrossing + (uint64_t)(o2::raw::RDHUtils::getHeartBeatOrbit(*rdHdr) - firstHBF) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
239
240 uint32_t channelMask[3];
241 GetChannelBitmask(*tbHdr, channelMask);
242 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
243
244 bool inFragment = fragment.contains(timeBin);
245 nDecoded += nAdc;
246
247 // TimeBin not in fragment: Skip this timebin header and fill positions with dummy values instead
248 if (not inFragment) {
249 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
250 continue;
251 }
252
253#ifdef GPUCA_GPUCODE
254 DecodeTBMultiThread(
255 clusterer,
256 iThread,
257 smem,
258 adcData,
259 nAdc,
260 channelMask,
261 timeBin,
262 decHdr->cruID,
263 tbHdr->fecInPartition,
264 pageDigitOffset);
265#else // CPU
266 DecodeTBSingleThread(
267 clusterer,
268 adcData,
269 nAdc,
270 channelMask,
271 timeBin,
272 decHdr->cruID,
273 tbHdr->fecInPartition,
274 pageDigitOffset);
275#endif
276 pageDigitOffset += nAdc;
277 } // for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++)
278
279#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
280 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
281 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
282 /*#ifndef GPUCA_GPUCODE
283 FILE* foo = fopen("dump.bin", "w+b");
284 fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
285 fclose(foo);
286 #endif*/
287 }
288#endif
289 return pageDigitOffset;
290}
291
292GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread(
293 processorType& clusterer,
294 const uint8_t* adcData,
295 uint32_t nAdc,
296 const uint32_t* channelMask,
297 int32_t timeBin,
298 int32_t cru,
299 int32_t fecInPartition,
300 uint32_t pageDigitOffset)
301{
302 const CfFragment& fragment = clusterer.mPmemory->fragment;
303
304 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
305
306 uint32_t byte = 0, bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
307
308 // unpack adc values, assume tightly packed data
309 while (nSamplesWritten < nAdc) {
310 byte |= adcData[0] << bits;
311 adcData++;
312 bits += CHAR_BIT;
313 while (bits >= DECODE_BITS) {
314
315 // Find next channel with data
316 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
317 }
318
319 // Unpack data for cluster finder
320 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
321
322 WriteCharge(clusterer, byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
323
324 byte = byte >> DECODE_BITS;
325 bits -= DECODE_BITS;
326 nSamplesWritten++;
327 rawFECChannel++; // Ensure we don't decode same channel twice
328 } // while (bits >= DECODE_BITS)
329 } // while (nSamplesWritten < nAdc)
330
331 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
332 uint32_t rawFECChannel = 0;
333 const uint64_t* adcData64 = (const uint64_t*)adcData;
334 for (uint32_t j = 0; j < nAdc; j++) {
335 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
336 }
337
338 uint32_t adc = (adcData64[j / TPCZSHDRV2::SAMPLESPER64BIT] >> ((j % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
339
340 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
341 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
342 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + j);
343 rawFECChannel++;
344 }
345 }
346}
347
348GPUd() void GPUTPCCFDecodeZSLink::DecodeTBMultiThread(
349 processorType& clusterer,
350 int32_t iThread,
351 GPUSharedMemory& smem,
352 const uint8_t* adcData,
353 uint32_t nAdc,
354 const uint32_t* channelMask,
355 int32_t timeBin,
356 int32_t cru,
357 int32_t fecInPartition,
358 uint32_t pageDigitOffset)
359{
360 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSLink);
361 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
362
363 uint8_t blockOffset = 0;
364 for (uint8_t i = iThread; blockOffset < nAdc; i += NTHREADS) {
365
366 uint8_t rawFECChannel = i;
367
368 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
369
370 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
371 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
372
373 // Decode entire timebin at once if we have enough threads
374 // This should further improve performance, but code below is buggy...
375 // if (nAdc <= NThreads) {
376 // for (int32_t j = 1; blockOffset < nAdc; j++) {
377 // rawFECChannel = myChannelActive ? rawFECChannel : (iThread + j*NThreads - myOffset);
378
379 // bool iAmIdle = not myChannelActive;
380
381 // myChannelActive =
382 // rawFECChannel < zerosupp_link_based::CommonHeaderlPerTBHeader
383 // ? BitIsSet(channelMask, rawFECChannel)
384 // : false;
385
386 // uint8_t newOffset = warp_scan_inclusive_add(static_cast<uint8_t>(myChannelActive && iAmIdle)) - 1 + blockOffset;
387 // blockOffset = warp_broadcast(newOffset, NThreads - 1) + 1;
388
389 // myOffset = iAmIdle ? newOffset : myOffset;
390 // }
391 // }
392
393 if (not myChannelActive) {
394 continue;
395 }
396 assert(myOffset < nAdc);
397
398 uint32_t adc = 0;
399
400 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
401
402 // Try to access adcData with 4 byte reads instead of 1 byte.
403 // You'd think this would improve performace, but it's actually slower...
404 // const uint32_t* adcDataU32 = reinterpret_cast<const uint32_t*>(adcData);
405
406 uint32_t adcBitOffset = myOffset * DECODE_BITS;
407 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
408 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
409 // uint32_t adcByteOffset = adcBitOffset / 32;
410 // uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * 32;
411
412 uint32_t byte = 0, bits = 0;
413
414 // uint32_t byte = adcDataU32[adcByteOffset] >> adcOffsetInByte;
415 // uint32_t bits = 32 - adcOffsetInByte;
416 // adcByteOffset++;
417
418 while (bits < DECODE_BITS) {
419 byte |= ((uint32_t)adcData[adcByteOffset]) << bits;
420 // byte |= adcDataU32[adcByteOffset] << bits;
421 adcByteOffset++;
422 bits += CHAR_BIT;
423 // bits += 32;
424 }
425 adc = byte >> adcOffsetInByte;
426
427 } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
428 const uint64_t* adcData64 = (const uint64_t*)adcData;
429 adc = (adcData64[myOffset / TPCZSHDRV2::SAMPLESPER64BIT] >> ((myOffset % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
430 }
431
432 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
433 const CfFragment& fragment = clusterer.mPmemory->fragment;
434 float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
435 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
436
437 } // for (uint8_t i = iThread; blockOffset < nAdc; i += NThreads)
438}
439
440GPUd() void GPUTPCCFDecodeZSLink::GetChannelBitmask(const zerosupp_link_based::CommonHeader& tbHdr, uint32_t* chan)
441{
442 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
443 chan[1] = tbHdr.bitMaskLow >> (sizeof(uint32_t) * CHAR_BIT);
444 chan[2] = tbHdr.bitMaskHigh;
445}
446
447GPUd() bool GPUTPCCFDecodeZSLink::ChannelIsActive(const uint32_t* chan, uint8_t chanIndex)
448{
449 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
450 return false;
451 }
452 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
453 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
454 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
455 return chan[entryIndex] & (1 << bitInEntry);
456}
457
458// ===========================================================================
459// ===========================================================================
460// Decode ZS Link Base
461// ===========================================================================
462// ===========================================================================
463
464template <class Decoder>
465GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
466{
467 const uint32_t sector = clusterer.mISector;
468
469#ifdef GPUCA_GPUCODE
470 const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
471#else // CPU
472 const uint32_t endpoint = iBlock;
473#endif
474
475 const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector];
476 if (zs.count[endpoint] == 0) {
477 return;
478 }
479
480 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
481
482#ifdef GPUCA_GPUCODE
483 const uint32_t i = 0;
484 const uint32_t j = clusterer.mPzsOffsets[iBlock].num;
485 {
486 {
487#else // CPU
488 for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++) {
489 const uint32_t minJ = (i == clusterer.mMinMaxCN[endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[endpoint].zsPageFirst : 0;
490 const uint32_t maxJ = (i + 1 == clusterer.mMinMaxCN[endpoint].zsPtrLast) ? clusterer.mMinMaxCN[endpoint].zsPageLast : zs.nZSPtr[endpoint][i];
491 for (uint32_t j = minJ; j < maxJ; j++) {
492#endif
493 const uint32_t* pageSrc = (const uint32_t*)(((const uint8_t*)zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE);
494 // Cache zs page in shared memory. Curiously this actually degrades performance...
495 // CA_SHARED_CACHE_REF(&smem.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, uint32_t, pageCache);
496 // GPUbarrier();
497 // const uint8_t* page = (const uint8_t*)pageCache;
498 const uint8_t* page = (const uint8_t*)pageSrc;
499
500 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
501
502 if (o2::raw::RDHUtils::getMemorySize(*rdHdr) == sizeof(o2::header::RAWDataHeader)) {
503#ifdef GPUCA_GPUCODE
504 return;
505#else
506 continue;
507#endif
508 }
509
510 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
511 } // [CPU] for (uint32_t j = minJ; j < maxJ; j++)
512 } // [CPU] for (uint32_t i = clusterer.mMinMaxCN[endpoint].zsPtrFirst; i < clusterer.mMinMaxCN[endpoint].zsPtrLast; i++)
513
514#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
515 if (iThread == 0 && iBlock < nBlocks - 1) {
516 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
517 if (pageDigitOffset != maxOffset) {
518 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 + endpoint, pageDigitOffset, maxOffset);
519 }
520 }
521#endif
522}
523
524GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
525{
526#ifdef GPUCA_TPC_GEOMETRY_O2
527 // Ported from tpc::Mapper (Not available on GPU...)
528 constexpr GPUTPCGeometry geo;
529
530 const int32_t regionIter = cru % 2;
531 const int32_t istreamm = ((rawFECChannel % 10) / 2);
532 const int32_t partitionStream = istreamm + regionIter * 5;
533 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
534 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
535 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
536
537 const int32_t partition = (cru % 10) / 2;
538 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
539
540 const TPCZSLinkMapping* gpuMapping = clusterer.GetConstantMem()->calibObjects.tpcZSLinkMapping;
541 assert(gpuMapping != nullptr);
542
543 uint16_t globalSAMPAId = (static_cast<uint16_t>(fecInSector) << 8) + (static_cast<uint16_t>(sampaOnFEC) << 5) + static_cast<uint16_t>(channelOnSAMPA);
544 const o2::tpc::PadPos pos = gpuMapping->FECIDToPadPos[globalSAMPAId];
545
546 return pos;
547#else
548 return o2::tpc::PadPos{};
549#endif
550}
551
552GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset)
553{
554 const uint32_t sector = clusterer.mISector;
555 CfChargePos* positions = clusterer.mPpositions;
556#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
557 if (padAndRow.getRow() >= GPUCA_ROW_COUNT) {
559 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISector * 1000 + padAndRow.getRow());
560 return;
561 }
562#endif
563 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
564
565 CfChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime);
566 positions[positionOffset] = pos;
567
568 charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
569
570 chargeMap[pos] = PackedCharge(charge);
571}
572
573GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
574{
575 for (uint16_t i = iThread; i < nSamples; i += nThreads) {
577 }
578 return nSamples;
579}
580
581// ===========================================================================
582// ===========================================================================
583// Decode ZS Dense Link
584// ===========================================================================
585// ===========================================================================
586
587template <>
588GPUd() void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
589{
590 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
591}
592
593GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
594{
595#ifdef GPUCA_GPUCODE
596 constexpr bool DecodeInParallel = true;
597#else
598 constexpr bool DecodeInParallel = false;
599#endif
600
601 const uint8_t* const pageStart = page;
602
603 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
604 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2));
605 ConsumeHeader<header::RAWDataHeader>(page);
606
607 uint16_t nSamplesWritten = 0;
608 const uint16_t nSamplesInPage = decHeader->nADCsamples;
609
610 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ? TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
611 const auto* nextPage = Peek(pageStart, TPCZSHDR::TPC_ZS_PAGE_SIZE);
612
613 const bool extendsToNextPage = decHeader->flags & TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage;
614
615 ConsumeBytes(page, decHeader->firstZSDataOffset - sizeof(o2::header::RAWDataHeader));
616
617 int err = GPUErrors::ERROR_NONE;
618
619 if (decHeader->version < ZSVersionDenseLinkBased) {
620 err = GPUErrors::ERROR_TPCZS_VERSION_MISMATCH;
621 }
622
624 err = GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD;
625 }
626
627 for (uint16_t i = 0; i < decHeader->nTimebinHeaders && !err; i++) {
628
629 ptrdiff_t sizeLeftInPage = payloadEnd - page;
630 if (sizeLeftInPage <= 0) {
631 err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW;
632 break;
633 }
634
635 int16_t nSamplesWrittenTB = 0;
636 uint16_t nSamplesLeftInPage = nSamplesInPage - nSamplesWritten;
637
638 if (i == decHeader->nTimebinHeaders - 1 && extendsToNextPage) {
639 if (raw::RDHUtils::getMemorySize(*rawDataHeader) != TPCZSHDR::TPC_ZS_PAGE_SIZE) {
640 err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW;
641 break;
642 }
643
644 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
645 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
646 } else {
647 err = GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF;
648 break;
649 }
650 } else {
651 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
652 }
653
654 // Abort decoding the page if an error was detected.
655 if (nSamplesWrittenTB < 0) {
656 err = -nSamplesWrittenTB;
657 break;
658 }
659
660 nSamplesWritten += nSamplesWrittenTB;
661 pageDigitOffset += nSamplesWrittenTB;
662 } // for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++)
663
664 if (nSamplesWritten != nSamplesInPage) {
665 if (nSamplesWritten < nSamplesInPage) {
666 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
667 }
668 err = !err ? GPUErrors::ERROR_TPCZS_INVALID_NADC : err; // Ensure we don't overwrite any previous error
669 }
670
671 if (iThread == 0 && err) {
672 [[maybe_unused]] bool dumpPage = false;
673
674 if (err == GPUErrors::ERROR_TPCZS_VERSION_MISMATCH) {
675 clusterer.raiseError(err, decHeader->version, ZSVersionDenseLinkBased);
676 } else if (err == GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD) {
677 clusterer.raiseError(err, decHeader->magicWord);
678 } else if (err == GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF) {
679 clusterer.raiseError(err, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
680 } else if (err == GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW) {
681 clusterer.raiseError(err, extendsToNextPage);
682 dumpPage = true;
683 } else if (err == GPUErrors::ERROR_TPCZS_INVALID_NADC) {
684 clusterer.raiseError(err, nSamplesInPage, nSamplesWritten, extendsToNextPage);
685 dumpPage = true;
686 } else {
687 clusterer.raiseError(GPUErrors::ERROR_TPCZS_UNKNOWN, err);
688 }
689
690#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
691#ifndef GPUCA_GPUCODE
692 if (dumpPage) {
693 // allocate more space on the stack for fname, so it can be overwritten by hand in a debugger.
694 const char fname[64] = "dump00.bin";
695 FILE* foo = fopen(fname, "w+b");
696 fwrite(pageStart, 1, TPCZSHDR::TPC_ZS_PAGE_SIZE, foo);
697 fclose(foo);
698 }
699#endif
700#endif
701 }
702
703 return pageDigitOffset;
704}
705
706template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
707GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
708 processorType& clusterer,
709 [[maybe_unused]] GPUSharedMemory& smem,
710 int32_t iThread,
711 const uint8_t*& page,
712 uint32_t pageDigitOffset,
713 const header::RAWDataHeader* rawDataHeader,
714 int32_t firstHBF,
715 int32_t cru,
716 uint16_t nSamplesLeftInPage,
717 const uint8_t* payloadEnd,
718 const uint8_t* nextPage)
719{
720
721 if constexpr (DecodeInParallel) {
722 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage);
723 } else {
724 int16_t nSamplesWritten = 0;
725 if (iThread == 0) {
726 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage);
727 }
728 return warp_broadcast(nSamplesWritten, 0);
729 }
730}
731
732template <bool PayloadExtendsToNextPage>
733GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
734 processorType& clusterer,
735 GPUSharedMemory& smem,
736 const int32_t iThread,
737 const uint8_t*& page,
738 uint32_t pageDigitOffset,
739 const header::RAWDataHeader* rawDataHeader,
740 int32_t firstHBF,
741 int32_t cru,
742 uint16_t nSamplesLeftInPage,
743 const uint8_t* payloadEnd,
744 const uint8_t* nextPage)
745{
746#define MAYBE_PAGE_OVERFLOW(pagePtr) \
747 if constexpr (PayloadExtendsToNextPage) { \
748 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
749 ptrdiff_t diff = pagePtr - payloadEnd; \
750 pagePtr = nextPage; \
751 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
752 } \
753 } else { \
754 if (pagePtr > payloadEnd) { \
755 return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
756 } \
757 }
758
759#define PEEK_OVERFLOW(pagePtr, offset) \
760 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
761 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \
762 : (pagePtr) + (offset)))
763
764#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
765
766 constexpr int32_t NTHREADS = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDecodeZSDenseLink);
767 static_assert(NTHREADS == GPUCA_WARP_SIZE, "Decoding TB Headers in parallel assumes block size is a single warp.");
768
769 const CfFragment& fragment = clusterer.mPmemory->fragment;
770
771 // Read timebin block header
772 uint16_t tbbHdr = ConsumeByte(page);
774 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
776
777 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
778 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
779 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
780
781 int16_t nSamplesInTB = 0;
782
783 // Read timebin link headers
784 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
785 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
787
788 if (iThread == 0) {
789 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
790 }
791 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
792
793 uint16_t bitmaskL2 = 0x03FF;
794 if (not bitmaskIsFlat) {
795 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
797 }
798
799 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
800
801 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
802 int32_t chanL2Idx = chan / 8;
803 bool l2 = TEST_BIT(bitmaskL2, chanL2Idx);
804
805 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
806
807 uint8_t myChannelHasData = (chan < 80 && l2 ? TEST_BIT(PEEK_OVERFLOW(page, chanByteOffset), chan % 8) : 0);
808
809 int32_t nSamplesStep;
810 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
811
812 if (myChannelHasData) {
813 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
814 }
815
816 nSamplesInTB += nSamplesStep;
817 }
818
819 ConsumeBytes(page, nBytesBitmask);
821
822 if (iThread == 0) {
823 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
824 }
825
826 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
827
828 GPUbarrierWarp(); // Ensure all writes to shared memory are finished, before reading it
829
830 if (nSamplesInTB > nSamplesLeftInPage) {
831 return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
832 }
833
834 // This needs to happen BEFORE checking if the timebin is in fragment
835 // to ensure ADC bytes are always consumed, even if data isn't decoded
836 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
838
839 if (not fragment.contains(timeBin)) {
840 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
841 }
842
843 // Unpack ADC
844 int32_t iLink = 0;
845 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
846 const uint16_t adcBitOffset = sample * DECODE_BITS;
847 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
848 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
849
850 uint8_t bits = 0;
851 uint16_t byte = 0;
852
853 static_assert(DECODE_BITS <= sizeof(uint16_t) * CHAR_BIT);
854
855 while (bits < DECODE_BITS) {
856 byte |= static_cast<uint16_t>(PEEK_OVERFLOW(adcData, adcByteOffset)) << bits;
857 adcByteOffset++;
858 bits += CHAR_BIT;
859 }
860 byte >>= adcOffsetInByte;
861
862 while (smem.samplesPerLinkEnd[iLink] <= sample) {
863 iLink++;
864 }
865
866 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
867
868 // Unpack data for cluster finder
869 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
870
871 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
872 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
873
874 } // for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS)
875
876 GPUbarrierWarp(); // Ensure all reads to shared memory are finished, before decoding next header into shmem
877
878 return nSamplesInTB;
879
880#undef TEST_BIT
881#undef PEEK_OVERFLOW
882#undef MAYBE_PAGE_OVERFLOW
883}
884
885template <bool PayloadExtendsToNextPage>
886GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
887 processorType& clusterer,
888 const uint8_t*& page,
889 uint32_t pageDigitOffset,
890 const header::RAWDataHeader* rawDataHeader,
891 int32_t firstHBF,
892 int32_t cru,
893 uint16_t nSamplesLeftInPage,
894 [[maybe_unused]] const uint8_t* payloadEnd,
895 [[maybe_unused]] const uint8_t* nextPage)
896{
897#define MAYBE_PAGE_OVERFLOW(pagePtr) \
898 if constexpr (PayloadExtendsToNextPage) { \
899 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
900 ptrdiff_t diff = pagePtr - payloadEnd; \
901 pagePtr = nextPage; \
902 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
903 } \
904 } else { \
905 if (pagePtr > payloadEnd) { \
906 return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
907 } \
908 }
909
910 using zerosupp_link_based::ChannelPerTBHeader;
911
912 const CfFragment& fragment = clusterer.mPmemory->fragment;
913
914 uint8_t linkIds[MaxNLinksPerTimebin];
915 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
916 uint16_t nSamplesWritten = 0;
917
918 // Read timebin block header
919 uint16_t tbbHdr = ConsumeByte(page);
921 tbbHdr |= static_cast<uint16_t>(ConsumeByte(page)) << CHAR_BIT;
923
924 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
925 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
926 int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
927
928 uint16_t nSamplesInTB = 0;
929
930 // Read timebin link headers
931 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
932 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
934
935 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
936
937 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
938
939 uint16_t bitmaskL2 = 0x0FFF;
940 if (not bitmaskIsFlat) {
941 bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(page));
943 }
944
945 for (int32_t i = 0; i < 10; i++) {
946 if (bitmaskL2 & 1 << i) {
947 nSamplesInTB += CAMath::Popcount(*Peek(page));
948 channelMasks[10 * iLink + i] = ConsumeByte(page);
950 }
951 }
952
953 } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
954
955 if (nSamplesInTB > nSamplesLeftInPage) {
956 return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
957 }
958
959 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
961
962 if (not fragment.contains(timeBin)) {
963 return FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
964 }
965
966 // Unpack ADC
967 uint32_t byte = 0, bits = 0;
968 uint16_t rawFECChannel = 0;
969
970 // unpack adc values, assume tightly packed data
971 while (nSamplesWritten < nSamplesInTB) {
972 byte |= static_cast<uint32_t>(ConsumeByte(adcData)) << bits;
973 MAYBE_PAGE_OVERFLOW(adcData);
974 bits += CHAR_BIT;
975 while (bits >= DECODE_BITS) {
976
977 // Find next channel with data
978 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
979 }
980
981 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
982 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
983
984 // Unpack data for cluster finder
985 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
986
987 float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
988 WriteCharge(clusterer, charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
989
990 byte >>= DECODE_BITS;
991 bits -= DECODE_BITS;
992 nSamplesWritten++;
993 rawFECChannel++; // Ensure we don't decode same channel twice
994 } // while (bits >= DECODE_BITS)
995 } // while (nSamplesWritten < nAdc)
996
997 return nSamplesWritten;
998
999#undef MAYBE_PAGE_OVERFLOW
1000}
1001
1002GPUd() bool GPUTPCCFDecodeZSDenseLink::ChannelIsActive(const uint8_t* chan, uint16_t chanIndex)
1003{
1004 constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;
1005 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
1006 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
1007 return chan[entryIndex] & (1 << bitInEntry);
1008}
#define INVALID_TIME_BIN
Definition CfChargePos.h:23
int16_t charge
Definition RawEventData.h:5
int32_t i
#define GPUbarrierWarp()
#define GPUbarrier()
#define GPUCA_GET_THREAD_COUNT(...)
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
Definition GPUDef.h:54
GPUdii() void GPUTPCCFDecodeZS
#define TEST_BIT(x, bit)
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
#define GPUCA_ROW_COUNT
Header to collect LHC related constants.
uint16_t pos
Definition RawData.h:3
uint32_t j
Definition RawData.h:0
uint8_t endpoint
Definition RawData.h:0
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
GLdouble n
Definition glcorearb.h:1982
const GLfloat * m
Definition glcorearb.h:4066
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
Definition glcorearb.h:4150
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLuint mask
Definition glcorearb.h:291
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr CfChargePos INVALID_CHARGE_POS
Definition CfChargePos.h:59
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
RAWDataHeaderV7 RAWDataHeader
constexpr int LHCBCPERTIMEBIN
Definition Constants.h:38
Global TPC definitions and constants.
Definition SimTraits.h:168
GPUd() void PIDResponse
Definition PIDResponse.h:71
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
constexpr std::array< int, nLayers > nRows
Definition Specs.h:56
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
unsigned char version
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
unsigned short rowMask
HistogramRegistry foo()
coder decode(ctfImage, triggersD, clustersD)
std::vector< int > row
ArrayADC adc