40 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
45 const uint32_t sector = clusterer.mISector;
47 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
55 ChargePos* positions = clusterer.mPpositions;
57 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
60 s.nRowsRegion = clusterer.Param().tpcGeometry.GetRegionRows(region);
61 s.regionStartRow = clusterer.Param().tpcGeometry.GetRegionStart(region);
62 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((
s.nRowsRegion + (
endpoint & 1)) / 2));
63 s.rowStride = nThreads /
s.nThreadsPerRow;
64 s.rowOffsetCounter = 0;
67 const uint32_t myRow = iThread /
s.nThreadsPerRow;
68 const uint32_t mySequence = iThread %
s.nThreadsPerRow;
71 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
75 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
76 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
77 const uint32_t maxJ = (
i + 1 == clusterer.mMinMaxCN[
endpoint].zsPtrLast) ? clusterer.mMinMaxCN[
endpoint].zsPageLast : zs.nZSPtr[
endpoint][
i];
78 for (uint32_t
j = minJ;
j < maxJ;
j++) {
94 pagePtr +=
sizeof(*hdr);
95 const bool decode12bit = hdr->
version == 2;
97 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
98 uint32_t
mask = (1 << decodeBits) - 1;
100 const int32_t rowOffset =
s.regionStartRow + ((
endpoint & 1) ? (
s.nRowsRegion / 2) : 0);
101 const int32_t
nRows = (
endpoint & 1) ? (
s.nRowsRegion -
s.nRowsRegion / 2) : (
s.nRowsRegion / 2);
103 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) {
104 pagePtr += (pagePtr - page) & 1;
106 if ((tbHdr->
rowMask & 0x7FFF) == 0) {
110 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & 0x7FFF));
111 pagePtr += 2 * nRowsUsed;
114 for (int32_t
n = iThread;
n < nRowsUsed;
n += nThreads) {
115 const uint8_t* rowData =
n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[
n - 1]);
116 s.RowClusterOffset[
n] = CAMath::AtomicAddShared<uint32_t>(&
s.rowOffsetCounter, rowData[2 * *rowData]);
136 if (myRow <
s.rowStride) {
137 for (int32_t
m = myRow;
m <
nRows;
m +=
s.rowStride) {
138 if ((tbHdr->
rowMask & (1 <<
m)) == 0) {
141 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & ((1 <<
m) - 1)));
142 size_t nDigitsTmp = nDigits +
s.RowClusterOffset[rowPos];
143 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
144 const int32_t nSeqRead = *rowData;
145 const int32_t nSeqPerThread = (nSeqRead +
s.nThreadsPerRow - 1) /
s.nThreadsPerRow;
146 const int32_t mySequenceStart = mySequence * nSeqPerThread;
147 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
148 if (mySequenceEnd > mySequenceStart) {
149 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
150 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
151 nDigitsTmp += nSamplesStart;
152 uint32_t nADCStartBits = nSamplesStart * decodeBits;
153 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
154 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
155 adcData += nADCStart;
156 nADCStartBits &= 0x7;
157 uint32_t
byte = 0,
bits = 0;
159 bits = 8 - nADCStartBits;
160 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
162 int32_t nSeq = mySequenceStart;
163 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
164 Pad pad = rowData[nSeq++ * 2 + 1];
165 for (int32_t
n = nADCStart;
n < nADC;
n++) {
166 byte |= *(adcData++) <<
bits;
168 while (
bits >= decodeBits) {
170 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
171 pad = rowData[nSeq++ * 2 + 1];
173 const CfFragment& fragment = clusterer.mPmemory->fragment;
174 TPCTime globalTime = timeBin + l;
175 bool inFragment = fragment.contains(globalTime);
178 positions[nDigitsTmp++] =
pos;
181 float q =
float(
byte &
mask) * decodeBitsFactor;
182 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector,
row, pad);
186 byte =
byte >> decodeBits;
195 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
197 pagePtr += 2 * *pagePtr;
198 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8;
211GPUdii()
void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
213 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
216GPUd() size_t
GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
218 const CfFragment& fragment = clusterer.mPmemory->fragment;
220 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
223 return pageDigitOffset;
226 int32_t nDecoded = 0;
227 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
228 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
233 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
234 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
235 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16);
239 uint32_t channelMask[3];
240 GetChannelBitmask(*tbHdr, channelMask);
241 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
243 bool inFragment = fragment.contains(timeBin);
247 if (not inFragment) {
248 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
262 tbHdr->fecInPartition,
265 DecodeTBSingleThread(
272 tbHdr->fecInPartition,
275 pageDigitOffset += nAdc;
278#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
279 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
280 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
288 return pageDigitOffset;
292 processorType& clusterer,
293 const uint8_t* adcData,
295 const uint32_t* channelMask,
298 int32_t fecInPartition,
299 uint32_t pageDigitOffset)
301 const CfFragment& fragment = clusterer.mPmemory->fragment;
305 uint32_t
byte = 0,
bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
308 while (nSamplesWritten < nAdc) {
309 byte |= adcData[0] <<
bits;
312 while (
bits >= DECODE_BITS) {
315 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
319 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
321 WriteCharge(clusterer,
byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
323 byte =
byte >> DECODE_BITS;
331 uint32_t rawFECChannel = 0;
332 const uint64_t* adcData64 = (
const uint64_t*)adcData;
333 for (uint32_t
j = 0;
j < nAdc;
j++) {
334 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
339 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
340 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
341 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset +
j);
348 processorType& clusterer,
350 GPUSharedMemory& smem,
351 const uint8_t* adcData,
353 const uint32_t* channelMask,
356 int32_t fecInPartition,
357 uint32_t pageDigitOffset)
360 static_assert(NTHREADS ==
GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
363 for (uint8_t
i = iThread; blockOffset < nAdc;
i += NTHREADS) {
367 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
369 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
370 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
392 if (not myChannelActive) {
395 assert(myOffset < nAdc);
405 uint32_t adcBitOffset = myOffset * DECODE_BITS;
406 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
407 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
411 uint32_t
byte = 0,
bits = 0;
417 while (
bits < DECODE_BITS) {
418 byte |= ((uint32_t)adcData[adcByteOffset]) <<
bits;
424 adc =
byte >> adcOffsetInByte;
427 const uint64_t* adcData64 = (
const uint64_t*)adcData;
431 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
432 const CfFragment& fragment = clusterer.mPmemory->fragment;
433 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
434 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
441 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
442 chan[1] = tbHdr.bitMaskLow >> (
sizeof(uint32_t) * CHAR_BIT);
443 chan[2] = tbHdr.bitMaskHigh;
448 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
451 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
452 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
453 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
454 return chan[entryIndex] & (1 << bitInEntry);
463template <
class Decoder>
464GPUd()
void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
466 const uint32_t sector = clusterer.mISector;
469 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
479 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
482 const uint32_t
i = 0;
483 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
487 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
488 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
490 for (uint32_t
j = minJ;
j < maxJ;
j++) {
497 const uint8_t* page = (
const uint8_t*)pageSrc;
499 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
509 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
513#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
514 if (iThread == 0 && iBlock < nBlocks - 1) {
515 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
516 if (pageDigitOffset != maxOffset) {
517 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 +
endpoint, pageDigitOffset, maxOffset);
523GPUd()
o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
525#ifdef GPUCA_TPC_GEOMETRY_O2
529 const int32_t regionIter = cru % 2;
530 const int32_t istreamm = ((rawFECChannel % 10) / 2);
531 const int32_t partitionStream = istreamm + regionIter * 5;
532 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
533 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
534 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
536 const int32_t partition = (cru % 10) / 2;
537 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
540 assert(gpuMapping !=
nullptr);
542 uint16_t globalSAMPAId = (
static_cast<uint16_t
>(fecInSector) << 8) + (
static_cast<uint16_t
>(sampaOnFEC) << 5) +
static_cast<uint16_t
>(channelOnSAMPA);
551GPUd()
void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer,
float charge,
PadPos padAndRow,
TPCFragmentTime localTime,
size_t positionOffset)
555#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
567 charge *=
clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
571GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
586GPUd()
void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
588 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
591GPUd() uint32_t
GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
594 constexpr bool DecodeInParallel =
true;
596 constexpr bool DecodeInParallel =
false;
599 const uint8_t*
const pageStart = page;
601 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
602 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2));
603 ConsumeHeader<header::RAWDataHeader>(page);
608 uint16_t nSamplesWritten = 0;
609 const uint16_t nSamplesInPage = decHeader->nADCsamples;
611 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ?
TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
616 for (uint16_t
i = 0;
i < decHeader->nTimebinHeaders;
i++) {
617 [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
618 assert(sizeLeftInPage > 0);
620 uint16_t nSamplesWrittenTB = 0;
624 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
625 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
627 nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
628#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
630 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
635 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
638 assert(nSamplesWritten <= nSamplesInPage);
639 nSamplesWritten += nSamplesWrittenTB;
640 pageDigitOffset += nSamplesWrittenTB;
643#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
644 if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
645 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
654 return pageDigitOffset;
657template <
bool DecodeInParallel,
bool PayloadExtendsToNextPage>
659 processorType& clusterer,
660 [[maybe_unused]] GPUSharedMemory& smem,
662 const uint8_t*& page,
663 uint32_t pageDigitOffset,
664 const header::RAWDataHeader* rawDataHeader,
667 [[maybe_unused]] const uint8_t* payloadEnd,
668 [[maybe_unused]] const uint8_t* nextPage)
671 if constexpr (DecodeInParallel) {
672 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
674 uint16_t nSamplesWritten = 0;
676 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
678 return warp_broadcast(nSamplesWritten, 0);
682template <
bool PayloadExtendsToNextPage>
684 processorType& clusterer,
685 GPUSharedMemory& smem,
686 const int32_t iThread,
687 const uint8_t*& page,
688 uint32_t pageDigitOffset,
689 const header::RAWDataHeader* rawDataHeader,
692 [[maybe_unused]] const uint8_t* payloadEnd,
693 [[maybe_unused]] const uint8_t* nextPage)
695#define MAYBE_PAGE_OVERFLOW(pagePtr) \
696 if constexpr (PayloadExtendsToNextPage) { \
697 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
698 ptrdiff_t diff = pagePtr - payloadEnd; \
699 pagePtr = nextPage; \
700 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
703 assert(pagePtr <= payloadEnd); \
706#define PEEK_OVERFLOW(pagePtr, offset) \
707 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
708 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd) \
709 : (pagePtr) + (offset)))
711#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
714 static_assert(NTHREADS ==
GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
716 const CfFragment& fragment = clusterer.mPmemory->fragment;
719 uint16_t tbbHdr = ConsumeByte(page);
721 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
724 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
725 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
728 uint16_t nSamplesInTB = 0;
733 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
734 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
738 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
740 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
742 uint16_t bitmaskL2 = 0x03FF;
743 if (not bitmaskIsFlat) {
744 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
748 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
749 assert(nBytesBitmask <= 10);
751 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
752 int32_t chanL2Idx = chan / 8;
753 bool l2 =
TEST_BIT(bitmaskL2, chanL2Idx);
755 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
758 assert(myChannelHasData == 0 || myChannelHasData == 1);
760 int32_t nSamplesStep;
761 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
763 if (myChannelHasData) {
764 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
767 nSamplesInTB += nSamplesStep;
770 ConsumeBytes(page, nBytesBitmask);
774 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
779 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
782 if (not fragment.contains(timeBin)) {
783 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
790 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
791 const uint16_t adcBitOffset = sample * DECODE_BITS;
792 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
793 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
798 static_assert(DECODE_BITS <=
sizeof(uint16_t) * CHAR_BIT);
800 while (
bits < DECODE_BITS) {
805 byte >>= adcOffsetInByte;
807 while (smem.samplesPerLinkEnd[iLink] <= sample) {
811 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
814 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
816 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
817 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
821 assert(PayloadExtendsToNextPage || adcData <= page);
822 assert(PayloadExtendsToNextPage || page <= payloadEnd);
828#undef MAYBE_PAGE_OVERFLOW
831template <
bool PayloadExtendsToNextPage>
833 processorType& clusterer,
834 const uint8_t*& page,
835 uint32_t pageDigitOffset,
836 const header::RAWDataHeader* rawDataHeader,
839 [[maybe_unused]] const uint8_t* payloadEnd,
840 [[maybe_unused]] const uint8_t* nextPage)
842#define MAYBE_PAGE_OVERFLOW(pagePtr) \
843 if constexpr (PayloadExtendsToNextPage) { \
844 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
845 ptrdiff_t diff = pagePtr - payloadEnd; \
846 pagePtr = nextPage; \
847 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
850 assert(pagePtr <= payloadEnd); \
853 using zerosupp_link_based::ChannelPerTBHeader;
855 const CfFragment& fragment = clusterer.mPmemory->fragment;
857 uint8_t linkIds[MaxNLinksPerTimebin];
858 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
859 uint16_t nSamplesWritten = 0;
862 uint16_t tbbHdr = ConsumeByte(page);
864 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
867 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
868 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
871 uint16_t nSamplesInTB = 0;
874 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
875 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
878 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
880 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
882 uint16_t bitmaskL2 = 0x0FFF;
883 if (not bitmaskIsFlat) {
884 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
888 for (int32_t
i = 0;
i < 10;
i++) {
889 if (bitmaskL2 & 1 <<
i) {
890 nSamplesInTB += CAMath::Popcount(*Peek(page));
891 channelMasks[10 * iLink +
i] = ConsumeByte(page);
898 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
901 if (not fragment.contains(timeBin)) {
902 FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
907 uint32_t
byte = 0,
bits = 0;
908 uint16_t rawFECChannel = 0;
911 while (nSamplesWritten < nSamplesInTB) {
912 byte |=
static_cast<uint32_t
>(ConsumeByte(adcData)) <<
bits;
915 while (
bits >= DECODE_BITS) {
918 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
921 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
922 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
925 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
927 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
928 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
930 byte >>= DECODE_BITS;
937 assert(PayloadExtendsToNextPage || adcData <= page);
938 assert(PayloadExtendsToNextPage || page <= payloadEnd);
939 assert(nSamplesWritten == nSamplesInTB);
941 return nSamplesWritten;
943#undef MAYBE_PAGE_OVERFLOW
948 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
949 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
950 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
951 return chan[entryIndex] & (1 << bitInEntry);
#define GPUCA_GET_THREAD_COUNT(...)
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
GPUdii() void GPUTPCCFDecodeZS
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
Header to collect LHC related constants.
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
GPUd() static o2 float charge
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr ChargePos INVALID_CHARGE_POS
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
constexpr int LHCBCPERTIMEBIN
Global TPC definitions and constants.
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
const void *const * zsPtr[NENDPOINTS]
uint32_t count[NENDPOINTS]
const uint32_t * nZSPtr[NENDPOINTS]
@ payloadExtendsToNextPage
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
coder decode(ctfImage, triggersD, clustersD)