41 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
46 const uint32_t sector = clusterer.mISector;
48 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
56 ChargePos* positions = clusterer.mPpositions;
58 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
61 s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region);
62 s.regionStartRow = GPUTPCGeometry::GetRegionStart(region);
63 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((
s.nRowsRegion + (
endpoint & 1)) / 2));
64 s.rowStride = nThreads /
s.nThreadsPerRow;
65 s.rowOffsetCounter = 0;
68 const uint32_t myRow = iThread /
s.nThreadsPerRow;
69 const uint32_t mySequence = iThread %
s.nThreadsPerRow;
72 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
76 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
77 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
78 const uint32_t maxJ = (
i + 1 == clusterer.mMinMaxCN[
endpoint].zsPtrLast) ? clusterer.mMinMaxCN[
endpoint].zsPageLast : zs.nZSPtr[
endpoint][
i];
79 for (uint32_t
j = minJ;
j < maxJ;
j++) {
95 pagePtr +=
sizeof(*hdr);
96 const bool decode12bit = hdr->
version == 2;
98 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
99 uint32_t
mask = (1 << decodeBits) - 1;
101 const int32_t rowOffset =
s.regionStartRow + ((
endpoint & 1) ? (
s.nRowsRegion / 2) : 0);
102 const int32_t
nRows = (
endpoint & 1) ? (
s.nRowsRegion -
s.nRowsRegion / 2) : (
s.nRowsRegion / 2);
104 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) {
105 pagePtr += (pagePtr - page) & 1;
107 if ((tbHdr->
rowMask & 0x7FFF) == 0) {
111 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & 0x7FFF));
112 pagePtr += 2 * nRowsUsed;
115 for (int32_t
n = iThread;
n < nRowsUsed;
n += nThreads) {
116 const uint8_t* rowData =
n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[
n - 1]);
117 s.RowClusterOffset[
n] = CAMath::AtomicAddShared<uint32_t>(&
s.rowOffsetCounter, rowData[2 * *rowData]);
137 if (myRow <
s.rowStride) {
138 for (int32_t
m = myRow;
m <
nRows;
m +=
s.rowStride) {
139 if ((tbHdr->
rowMask & (1 <<
m)) == 0) {
142 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & ((1 <<
m) - 1)));
143 size_t nDigitsTmp = nDigits +
s.RowClusterOffset[rowPos];
144 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
145 const int32_t nSeqRead = *rowData;
146 const int32_t nSeqPerThread = (nSeqRead +
s.nThreadsPerRow - 1) /
s.nThreadsPerRow;
147 const int32_t mySequenceStart = mySequence * nSeqPerThread;
148 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
149 if (mySequenceEnd > mySequenceStart) {
150 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
151 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
152 nDigitsTmp += nSamplesStart;
153 uint32_t nADCStartBits = nSamplesStart * decodeBits;
154 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
155 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
156 adcData += nADCStart;
157 nADCStartBits &= 0x7;
158 uint32_t
byte = 0,
bits = 0;
160 bits = 8 - nADCStartBits;
161 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
163 int32_t nSeq = mySequenceStart;
164 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
165 Pad pad = rowData[nSeq++ * 2 + 1];
166 for (int32_t
n = nADCStart;
n < nADC;
n++) {
167 byte |= *(adcData++) <<
bits;
169 while (
bits >= decodeBits) {
171 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
172 pad = rowData[nSeq++ * 2 + 1];
174 const CfFragment& fragment = clusterer.mPmemory->fragment;
175 TPCTime globalTime = timeBin + l;
176 bool inFragment = fragment.contains(globalTime);
179 positions[nDigitsTmp++] =
pos;
182 float q =
float(
byte &
mask) * decodeBitsFactor;
183 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector,
row, pad);
187 byte =
byte >> decodeBits;
196 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
198 pagePtr += 2 * *pagePtr;
199 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8;
212GPUdii()
void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
214 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
217GPUd() size_t
GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
219 const CfFragment& fragment = clusterer.mPmemory->fragment;
221 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
224 return pageDigitOffset;
227 int32_t nDecoded = 0;
228 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
229 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
234 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
235 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
236 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16);
240 uint32_t channelMask[3];
241 GetChannelBitmask(*tbHdr, channelMask);
242 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
244 bool inFragment = fragment.contains(timeBin);
248 if (not inFragment) {
249 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
263 tbHdr->fecInPartition,
266 DecodeTBSingleThread(
273 tbHdr->fecInPartition,
276 pageDigitOffset += nAdc;
279#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
280 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
281 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
289 return pageDigitOffset;
293 processorType& clusterer,
294 const uint8_t* adcData,
296 const uint32_t* channelMask,
299 int32_t fecInPartition,
300 uint32_t pageDigitOffset)
302 const CfFragment& fragment = clusterer.mPmemory->fragment;
306 uint32_t
byte = 0,
bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
309 while (nSamplesWritten < nAdc) {
310 byte |= adcData[0] <<
bits;
313 while (
bits >= DECODE_BITS) {
316 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
320 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
322 WriteCharge(clusterer,
byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
324 byte =
byte >> DECODE_BITS;
332 uint32_t rawFECChannel = 0;
333 const uint64_t* adcData64 = (
const uint64_t*)adcData;
334 for (uint32_t
j = 0;
j < nAdc;
j++) {
335 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
340 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
341 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
342 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset +
j);
349 processorType& clusterer,
351 GPUSharedMemory& smem,
352 const uint8_t* adcData,
354 const uint32_t* channelMask,
357 int32_t fecInPartition,
358 uint32_t pageDigitOffset)
361 static_assert(NTHREADS ==
GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
364 for (uint8_t
i = iThread; blockOffset < nAdc;
i += NTHREADS) {
368 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
370 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
371 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
393 if (not myChannelActive) {
396 assert(myOffset < nAdc);
406 uint32_t adcBitOffset = myOffset * DECODE_BITS;
407 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
408 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
412 uint32_t
byte = 0,
bits = 0;
418 while (
bits < DECODE_BITS) {
419 byte |= ((uint32_t)adcData[adcByteOffset]) <<
bits;
425 adc =
byte >> adcOffsetInByte;
428 const uint64_t* adcData64 = (
const uint64_t*)adcData;
432 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
433 const CfFragment& fragment = clusterer.mPmemory->fragment;
434 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
435 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
442 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
443 chan[1] = tbHdr.bitMaskLow >> (
sizeof(uint32_t) * CHAR_BIT);
444 chan[2] = tbHdr.bitMaskHigh;
449 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
452 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
453 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
454 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
455 return chan[entryIndex] & (1 << bitInEntry);
464template <
class Decoder>
465GPUd()
void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
467 const uint32_t sector = clusterer.mISector;
470 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
480 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
483 const uint32_t
i = 0;
484 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
488 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
489 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
491 for (uint32_t
j = minJ;
j < maxJ;
j++) {
498 const uint8_t* page = (
const uint8_t*)pageSrc;
500 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
510 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
514#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
515 if (iThread == 0 && iBlock < nBlocks - 1) {
516 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
517 if (pageDigitOffset != maxOffset) {
518 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 +
endpoint, pageDigitOffset, maxOffset);
524GPUd()
o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
526#ifdef GPUCA_TPC_GEOMETRY_O2
530 const int32_t regionIter = cru % 2;
531 const int32_t istreamm = ((rawFECChannel % 10) / 2);
532 const int32_t partitionStream = istreamm + regionIter * 5;
533 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
534 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
535 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
537 const int32_t partition = (cru % 10) / 2;
538 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
541 assert(gpuMapping !=
nullptr);
543 uint16_t globalSAMPAId = (
static_cast<uint16_t
>(fecInSector) << 8) + (
static_cast<uint16_t
>(sampaOnFEC) << 5) +
static_cast<uint16_t
>(channelOnSAMPA);
552GPUd()
void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer,
float charge,
PadPos padAndRow,
TPCFragmentTime localTime,
size_t positionOffset)
556#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
568 charge *=
clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
572GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
587GPUd()
void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
589 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
592GPUd() uint32_t
GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
595 constexpr bool DecodeInParallel =
true;
597 constexpr bool DecodeInParallel =
false;
600 const uint8_t*
const pageStart = page;
602 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
603 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2));
604 ConsumeHeader<header::RAWDataHeader>(page);
609 uint16_t nSamplesWritten = 0;
610 const uint16_t nSamplesInPage = decHeader->nADCsamples;
612 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ?
TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
617 for (uint16_t
i = 0;
i < decHeader->nTimebinHeaders;
i++) {
618 [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
619 assert(sizeLeftInPage > 0);
621 uint16_t nSamplesWrittenTB = 0;
625 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
626 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
628 nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
629#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
631 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
636 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
639 assert(nSamplesWritten <= nSamplesInPage);
640 nSamplesWritten += nSamplesWrittenTB;
641 pageDigitOffset += nSamplesWrittenTB;
644#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
645 if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
646 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
655 return pageDigitOffset;
658template <
bool DecodeInParallel,
bool PayloadExtendsToNextPage>
660 processorType& clusterer,
661 [[maybe_unused]] GPUSharedMemory& smem,
663 const uint8_t*& page,
664 uint32_t pageDigitOffset,
665 const header::RAWDataHeader* rawDataHeader,
668 [[maybe_unused]] const uint8_t* payloadEnd,
669 [[maybe_unused]] const uint8_t* nextPage)
672 if constexpr (DecodeInParallel) {
673 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
675 uint16_t nSamplesWritten = 0;
677 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
679 return warp_broadcast(nSamplesWritten, 0);
683template <
bool PayloadExtendsToNextPage>
685 processorType& clusterer,
686 GPUSharedMemory& smem,
687 const int32_t iThread,
688 const uint8_t*& page,
689 uint32_t pageDigitOffset,
690 const header::RAWDataHeader* rawDataHeader,
693 [[maybe_unused]] const uint8_t* payloadEnd,
694 [[maybe_unused]] const uint8_t* nextPage)
696#define MAYBE_PAGE_OVERFLOW(pagePtr) \
697 if constexpr (PayloadExtendsToNextPage) { \
698 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
699 ptrdiff_t diff = pagePtr - payloadEnd; \
700 pagePtr = nextPage; \
701 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
704 assert(pagePtr <= payloadEnd); \
707#define PEEK_OVERFLOW(pagePtr, offset) \
708 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
709 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \
710 : (pagePtr) + (offset)))
712#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
715 static_assert(NTHREADS ==
GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
717 const CfFragment& fragment = clusterer.mPmemory->fragment;
720 uint16_t tbbHdr = ConsumeByte(page);
722 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
725 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
726 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
729 uint16_t nSamplesInTB = 0;
734 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
735 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
739 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
741 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
743 uint16_t bitmaskL2 = 0x03FF;
744 if (not bitmaskIsFlat) {
745 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
749 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
750 assert(nBytesBitmask <= 10);
752 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
753 int32_t chanL2Idx = chan / 8;
754 bool l2 =
TEST_BIT(bitmaskL2, chanL2Idx);
756 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
759 assert(myChannelHasData == 0 || myChannelHasData == 1);
761 int32_t nSamplesStep;
762 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
764 if (myChannelHasData) {
765 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
768 nSamplesInTB += nSamplesStep;
771 ConsumeBytes(page, nBytesBitmask);
775 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
780 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
783 if (not fragment.contains(timeBin)) {
784 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
791 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
792 const uint16_t adcBitOffset = sample * DECODE_BITS;
793 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
794 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
799 static_assert(DECODE_BITS <=
sizeof(uint16_t) * CHAR_BIT);
801 while (
bits < DECODE_BITS) {
806 byte >>= adcOffsetInByte;
808 while (smem.samplesPerLinkEnd[iLink] <= sample) {
812 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
815 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
817 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
818 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
822 assert(PayloadExtendsToNextPage || adcData <= page);
823 assert(PayloadExtendsToNextPage || page <= payloadEnd);
829#undef MAYBE_PAGE_OVERFLOW
832template <
bool PayloadExtendsToNextPage>
834 processorType& clusterer,
835 const uint8_t*& page,
836 uint32_t pageDigitOffset,
837 const header::RAWDataHeader* rawDataHeader,
840 [[maybe_unused]] const uint8_t* payloadEnd,
841 [[maybe_unused]] const uint8_t* nextPage)
843#define MAYBE_PAGE_OVERFLOW(pagePtr) \
844 if constexpr (PayloadExtendsToNextPage) { \
845 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
846 ptrdiff_t diff = pagePtr - payloadEnd; \
847 pagePtr = nextPage; \
848 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
851 assert(pagePtr <= payloadEnd); \
854 using zerosupp_link_based::ChannelPerTBHeader;
856 const CfFragment& fragment = clusterer.mPmemory->fragment;
858 uint8_t linkIds[MaxNLinksPerTimebin];
859 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
860 uint16_t nSamplesWritten = 0;
863 uint16_t tbbHdr = ConsumeByte(page);
865 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
868 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
869 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
872 uint16_t nSamplesInTB = 0;
875 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
876 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
879 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
881 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
883 uint16_t bitmaskL2 = 0x0FFF;
884 if (not bitmaskIsFlat) {
885 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
889 for (int32_t
i = 0;
i < 10;
i++) {
890 if (bitmaskL2 & 1 <<
i) {
891 nSamplesInTB += CAMath::Popcount(*Peek(page));
892 channelMasks[10 * iLink +
i] = ConsumeByte(page);
899 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
902 if (not fragment.contains(timeBin)) {
903 FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
908 uint32_t
byte = 0,
bits = 0;
909 uint16_t rawFECChannel = 0;
912 while (nSamplesWritten < nSamplesInTB) {
913 byte |=
static_cast<uint32_t
>(ConsumeByte(adcData)) <<
bits;
916 while (
bits >= DECODE_BITS) {
919 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
922 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
923 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
926 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
928 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
929 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
931 byte >>= DECODE_BITS;
938 assert(PayloadExtendsToNextPage || adcData <= page);
939 assert(PayloadExtendsToNextPage || page <= payloadEnd);
940 assert(nSamplesWritten == nSamplesInTB);
942 return nSamplesWritten;
944#undef MAYBE_PAGE_OVERFLOW
949 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
950 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
951 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
952 return chan[entryIndex] & (1 << bitInEntry);
#define GPUCA_GET_THREAD_COUNT(...)
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
GPUdii() void GPUTPCCFDecodeZS
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
Header to collect LHC related constants.
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
GPUd() static o2 float charge
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr ChargePos INVALID_CHARGE_POS
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
constexpr int LHCBCPERTIMEBIN
Global TPC definitions and constants.
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
const void *const * zsPtr[NENDPOINTS]
uint32_t count[NENDPOINTS]
const uint32_t * nZSPtr[NENDPOINTS]
@ payloadExtendsToNextPage
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
coder decode(ctfImage, triggersD, clustersD)