41 GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, firstHBF);
46 const uint32_t sector = clusterer.mISector;
48 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
58 const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset;
61 s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region);
62 s.regionStartRow = GPUTPCGeometry::GetRegionStart(region);
63 s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((
s.nRowsRegion + (
endpoint & 1)) / 2));
64 s.rowStride = nThreads /
s.nThreadsPerRow;
65 s.rowOffsetCounter = 0;
68 const uint32_t myRow = iThread /
s.nThreadsPerRow;
69 const uint32_t mySequence = iThread %
s.nThreadsPerRow;
72 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
76 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
77 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
78 const uint32_t maxJ = (
i + 1 == clusterer.mMinMaxCN[
endpoint].zsPtrLast) ? clusterer.mMinMaxCN[
endpoint].zsPageLast : zs.nZSPtr[
endpoint][
i];
79 for (uint32_t
j = minJ;
j < maxJ;
j++) {
95 pagePtr +=
sizeof(*hdr);
96 const bool decode12bit = hdr->
version == 2;
98 const float decodeBitsFactor = 1.f / (1 << (decodeBits - 10));
99 uint32_t
mask = (1 << decodeBits) - 1;
101 const int32_t rowOffset =
s.regionStartRow + ((
endpoint & 1) ? (
s.nRowsRegion / 2) : 0);
102 const int32_t
nRows = (
endpoint & 1) ? (
s.nRowsRegion -
s.nRowsRegion / 2) : (
s.nRowsRegion / 2);
104 for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) {
105 pagePtr += (pagePtr - page) & 1;
107 if ((tbHdr->
rowMask & 0x7FFF) == 0) {
111 const int32_t nRowsUsed = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & 0x7FFF));
112 pagePtr += 2 * nRowsUsed;
115 for (int32_t
n = iThread;
n < nRowsUsed;
n += nThreads) {
116 const uint8_t* rowData =
n == 0 ? pagePtr : (page + tbHdr->rowAddr1()[
n - 1]);
117 s.RowClusterOffset[
n] = CAMath::AtomicAddShared<uint32_t>(&
s.rowOffsetCounter, rowData[2 * *rowData]);
137 if (myRow <
s.rowStride) {
138 for (int32_t
m = myRow;
m <
nRows;
m +=
s.rowStride) {
139 if ((tbHdr->
rowMask & (1 <<
m)) == 0) {
142 const int32_t rowPos = CAMath::Popcount((uint32_t)(tbHdr->
rowMask & ((1 <<
m) - 1)));
143 size_t nDigitsTmp = nDigits +
s.RowClusterOffset[rowPos];
144 const uint8_t* rowData = rowPos == 0 ? pagePtr : (page + tbHdr->rowAddr1()[rowPos - 1]);
145 const int32_t nSeqRead = *rowData;
146 const int32_t nSeqPerThread = (nSeqRead +
s.nThreadsPerRow - 1) /
s.nThreadsPerRow;
147 const int32_t mySequenceStart = mySequence * nSeqPerThread;
148 const int32_t mySequenceEnd = CAMath::Min(mySequenceStart + nSeqPerThread, nSeqRead);
149 if (mySequenceEnd > mySequenceStart) {
150 const uint8_t* adcData = rowData + 2 * nSeqRead + 1;
151 const uint32_t nSamplesStart = mySequenceStart ? rowData[2 * mySequenceStart] : 0;
152 nDigitsTmp += nSamplesStart;
153 uint32_t nADCStartBits = nSamplesStart * decodeBits;
154 const uint32_t nADCStart = (nADCStartBits + 7) / 8;
155 const int32_t nADC = (rowData[2 * mySequenceEnd] * decodeBits + 7) / 8;
156 adcData += nADCStart;
157 nADCStartBits &= 0x7;
158 uint32_t
byte = 0,
bits = 0;
160 bits = 8 - nADCStartBits;
161 byte = ((*(adcData - 1) & (0xFF ^ ((1 << nADCStartBits) - 1)))) >> nADCStartBits;
163 int32_t nSeq = mySequenceStart;
164 int32_t seqLen = nSeq ? (rowData[(nSeq + 1) * 2] - rowData[nSeq * 2]) : rowData[2];
165 Pad pad = rowData[nSeq++ * 2 + 1];
166 for (int32_t
n = nADCStart;
n < nADC;
n++) {
167 byte |= *(adcData++) <<
bits;
169 while (
bits >= decodeBits) {
171 seqLen = rowData[(nSeq + 1) * 2] - rowData[nSeq * 2];
172 pad = rowData[nSeq++ * 2 + 1];
174 const CfFragment& fragment = clusterer.mPmemory->fragment;
175 TPCTime globalTime = timeBin + l;
176 bool inFragment = fragment.contains(globalTime);
179 positions[nDigitsTmp++] =
pos;
182 float q = float(
byte &
mask) * decodeBitsFactor;
183 q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector,
row, pad);
187 byte =
byte >> decodeBits;
196 pagePtr = page + tbHdr->rowAddr1()[nRowsUsed - 2];
198 pagePtr += 2 * *pagePtr;
199 pagePtr += 1 + (*pagePtr * decodeBits + 7) / 8;
212GPUdii()
void GPUTPCCFDecodeZSLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
214 Decode<GPUTPCCFDecodeZSLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
217GPUd() size_t
GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
219 const CfFragment& fragment = clusterer.mPmemory->fragment;
221 const auto* rdHdr = ConsumeHeader<header::RAWDataHeader>(page);
224 return pageDigitOffset;
227 [[maybe_unused]] int32_t nDecoded = 0;
228 const auto* decHdr = ConsumeHeader<TPCZSHDRV2>(page);
229 ConsumeBytes(page, decHdr->firstZSDataOffset * 16);
234 for (uint32_t t = 0; t < decHdr->nTimebinHeaders; t++) {
235 const auto* tbHdr = ConsumeHeader<zerosupp_link_based::CommonHeader>(page);
236 const auto* adcData = ConsumeBytes(page, tbHdr->numWordsPayload * 16);
240 uint32_t channelMask[3];
241 GetChannelBitmask(*tbHdr, channelMask);
242 uint32_t nAdc = CAMath::Popcount(channelMask[0]) + CAMath::Popcount(channelMask[1]) + CAMath::Popcount(channelMask[2]);
244 bool inFragment = fragment.contains(timeBin);
248 if (not inFragment) {
249 pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nAdc);
263 tbHdr->fecInPartition,
266 DecodeTBSingleThread(
273 tbHdr->fecInPartition,
276 pageDigitOffset += nAdc;
279#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
280 if (iThread == 0 && nDecoded != decHdr->nADCsamples) {
281 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded);
289 return pageDigitOffset;
293 processorType& clusterer,
294 const uint8_t* adcData,
296 const uint32_t* channelMask,
299 int32_t fecInPartition,
300 uint32_t pageDigitOffset)
302 const CfFragment& fragment = clusterer.mPmemory->fragment;
306 uint32_t
byte = 0,
bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
309 while (nSamplesWritten < nAdc) {
310 byte |= adcData[0] <<
bits;
313 while (
bits >= DECODE_BITS) {
316 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
320 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
322 WriteCharge(clusterer,
byte, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
324 byte =
byte >> DECODE_BITS;
332 uint32_t rawFECChannel = 0;
333 const uint64_t* adcData64 = (
const uint64_t*)adcData;
334 for (uint32_t
j = 0;
j < nAdc;
j++) {
335 for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
340 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
341 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
342 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset +
j);
349 processorType& clusterer,
351 GPUSharedMemory& smem,
352 const uint8_t* adcData,
354 const uint32_t* channelMask,
357 int32_t fecInPartition,
358 uint32_t pageDigitOffset)
361 static_assert(NTHREADS == GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
364 for (uint8_t
i = iThread; blockOffset < nAdc;
i += NTHREADS) {
368 uint8_t myChannelActive = ChannelIsActive(channelMask, rawFECChannel);
370 uint8_t myOffset = warp_scan_inclusive_add(myChannelActive) - 1 + blockOffset;
371 blockOffset = warp_broadcast(myOffset, NTHREADS - 1) + 1;
393 if (not myChannelActive) {
396 assert(myOffset < nAdc);
406 uint32_t adcBitOffset = myOffset * DECODE_BITS;
407 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
408 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
412 uint32_t
byte = 0,
bits = 0;
418 while (
bits < DECODE_BITS) {
419 byte |= ((uint32_t)adcData[adcByteOffset]) <<
bits;
425 adc =
byte >> adcOffsetInByte;
428 const uint64_t* adcData64 = (
const uint64_t*)adcData;
432 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannel, fecInPartition);
433 const CfFragment& fragment = clusterer.mPmemory->fragment;
434 float charge = ADCToFloat(
adc, DECODE_MASK, DECODE_BITS_FACTOR);
435 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + myOffset);
442 chan[0] = tbHdr.bitMaskLow & 0xfffffffful;
443 chan[1] = tbHdr.bitMaskLow >> (
sizeof(uint32_t) * CHAR_BIT);
444 chan[2] = tbHdr.bitMaskHigh;
449 if (chanIndex >= zerosupp_link_based::ChannelPerTBHeader) {
452 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
453 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
454 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
455 return chan[entryIndex] & (1 << bitInEntry);
464template <
class Decoder>
465GPUd()
void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
467 const uint32_t sector = clusterer.mISector;
470 const uint32_t
endpoint = clusterer.mPzsOffsets[iBlock].endpoint;
480 uint32_t pageDigitOffset = clusterer.mPzsOffsets[iBlock].offset;
483 const uint32_t
i = 0;
484 const uint32_t
j = clusterer.mPzsOffsets[iBlock].num;
488 for (uint32_t
i = clusterer.mMinMaxCN[
endpoint].zsPtrFirst;
i < clusterer.mMinMaxCN[
endpoint].zsPtrLast;
i++) {
489 const uint32_t minJ = (
i == clusterer.mMinMaxCN[
endpoint].zsPtrFirst) ? clusterer.mMinMaxCN[
endpoint].zsPageFirst : 0;
491 for (uint32_t
j = minJ;
j < maxJ;
j++) {
498 const uint8_t* page = (
const uint8_t*)pageSrc;
500 const auto* rdHdr = Peek<header::RAWDataHeader>(page);
510 pageDigitOffset = Decoder::DecodePage(smem, clusterer, iBlock, nThreads, iThread, page, pageDigitOffset, firstHBF);
514#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
515 if (iThread == 0 && iBlock < nBlocks - 1) {
516 uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset;
517 if (pageDigitOffset != maxOffset) {
518 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 +
endpoint, pageDigitOffset, maxOffset);
524GPUd()
o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorType& clusterer, int32_t cru, int32_t rawFECChannel, int32_t fecInPartition)
526#ifdef GPUCA_TPC_GEOMETRY_O2
530 const int32_t regionIter = cru % 2;
531 const int32_t istreamm = ((rawFECChannel % 10) / 2);
532 const int32_t partitionStream = istreamm + regionIter * 5;
533 const int32_t sampaOnFEC = geo.GetSampaMapping(partitionStream);
534 const int32_t channel = (rawFECChannel % 2) + 2 * (rawFECChannel / 10);
535 const int32_t channelOnSAMPA = channel + geo.GetChannelOffset(partitionStream);
537 const int32_t partition = (cru % 10) / 2;
538 const int32_t fecInSector = geo.GetSectorFECOffset(partition) + fecInPartition;
541 assert(gpuMapping !=
nullptr);
543 uint16_t globalSAMPAId = (
static_cast<uint16_t
>(fecInSector) << 8) + (
static_cast<uint16_t
>(sampaOnFEC) << 5) +
static_cast<uint16_t
>(channelOnSAMPA);
552GPUd()
void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer,
float charge,
PadPos padAndRow,
TPCFragmentTime localTime,
size_t positionOffset)
556#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
568 charge *=
clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad());
573GPUd() uint16_t GPUTPCCFDecodeZSLinkBase::FillWithInvalid(processorType& clusterer, int32_t iThread, int32_t nThreads, uint32_t pageDigitOffset, uint16_t nSamples)
588GPUd()
void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF)
590 Decode<GPUTPCCFDecodeZSDenseLink>(nBlocks, nThreads, iBlock, iThread, smem, clusterer, firstHBF);
593GPUd() uint32_t
GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, processorType& clusterer, int32_t iBlock, int32_t nThreads, int32_t iThread, const uint8_t* page, uint32_t pageDigitOffset, int32_t firstHBF)
596 constexpr bool DecodeInParallel =
true;
598 constexpr bool DecodeInParallel =
false;
601 const uint8_t*
const pageStart = page;
603 const auto* rawDataHeader = Peek<header::RAWDataHeader>(page);
604 const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2));
605 ConsumeHeader<header::RAWDataHeader>(page);
610 uint16_t nSamplesWritten = 0;
611 const uint16_t nSamplesInPage = decHeader->nADCsamples;
613 const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) -
sizeof(
TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ?
TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
618 for (uint16_t
i = 0;
i < decHeader->nTimebinHeaders;
i++) {
620 [[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
621 assert(sizeLeftInPage > 0);
623 uint16_t nSamplesWrittenTB = 0;
627 if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
628 nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
630 nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
631#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
633 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
638 nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
641 assert(nSamplesWritten <= nSamplesInPage);
642 nSamplesWritten += nSamplesWrittenTB;
643 pageDigitOffset += nSamplesWrittenTB;
646#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
647 if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
648 clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
657 return pageDigitOffset;
660template <
bool DecodeInParallel,
bool PayloadExtendsToNextPage>
662 processorType& clusterer,
663 [[maybe_unused]] GPUSharedMemory& smem,
665 const uint8_t*& page,
666 uint32_t pageDigitOffset,
667 const header::RAWDataHeader* rawDataHeader,
670 [[maybe_unused]] const uint8_t* payloadEnd,
671 [[maybe_unused]] const uint8_t* nextPage)
674 if constexpr (DecodeInParallel) {
675 return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
677 uint16_t nSamplesWritten = 0;
679 nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
681 return warp_broadcast(nSamplesWritten, 0);
685template <
bool PayloadExtendsToNextPage>
687 processorType& clusterer,
688 GPUSharedMemory& smem,
689 const int32_t iThread,
690 const uint8_t*& page,
691 uint32_t pageDigitOffset,
692 const header::RAWDataHeader* rawDataHeader,
695 [[maybe_unused]] const uint8_t* payloadEnd,
696 [[maybe_unused]] const uint8_t* nextPage)
698#define MAYBE_PAGE_OVERFLOW(pagePtr) \
699 if constexpr (PayloadExtendsToNextPage) { \
700 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
701 ptrdiff_t diff = pagePtr - payloadEnd; \
702 pagePtr = nextPage; \
703 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
706 assert(pagePtr <= payloadEnd); \
709#define PEEK_OVERFLOW(pagePtr, offset) \
710 (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \
711 ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \
712 : (pagePtr) + (offset)))
714#define TEST_BIT(x, bit) static_cast<bool>((x) & (1 << (bit)))
717 static_assert(NTHREADS == GPUCA_WARP_SIZE,
"Decoding TB Headers in parallel assumes block size is a single warp.");
719 const CfFragment& fragment = clusterer.mPmemory->fragment;
722 uint16_t tbbHdr = ConsumeByte(page);
724 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
727 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
728 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
731 uint16_t nSamplesInTB = 0;
734 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
735 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
739 smem.linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
741 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
743 uint16_t bitmaskL2 = 0x03FF;
744 if (not bitmaskIsFlat) {
745 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
749 int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
750 assert(nBytesBitmask <= 10);
752 for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
753 int32_t chanL2Idx = chan / 8;
754 bool l2 =
TEST_BIT(bitmaskL2, chanL2Idx);
756 int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));
759 assert(myChannelHasData == 0 || myChannelHasData == 1);
761 int32_t nSamplesStep;
762 int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
764 if (myChannelHasData) {
765 smem.rawFECChannels[nSamplesInTB + threadSampleOffset] = chan;
768 nSamplesInTB += nSamplesStep;
771 ConsumeBytes(page, nBytesBitmask);
775 smem.samplesPerLinkEnd[iLink] = nSamplesInTB;
782 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
785 if (not fragment.contains(timeBin)) {
786 return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
791 for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
792 const uint16_t adcBitOffset = sample * DECODE_BITS;
793 uint16_t adcByteOffset = adcBitOffset / CHAR_BIT;
794 const uint8_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
799 static_assert(DECODE_BITS <=
sizeof(uint16_t) * CHAR_BIT);
801 while (
bits < DECODE_BITS) {
806 byte >>= adcOffsetInByte;
808 while (smem.samplesPerLinkEnd[iLink] <= sample) {
812 int32_t rawFECChannelLink = smem.rawFECChannels[sample];
815 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, smem.linkIds[iLink]);
817 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
818 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + sample);
824 assert(PayloadExtendsToNextPage || adcData <= page);
825 assert(PayloadExtendsToNextPage || page <= payloadEnd);
831#undef MAYBE_PAGE_OVERFLOW
834template <
bool PayloadExtendsToNextPage>
836 processorType& clusterer,
837 const uint8_t*& page,
838 uint32_t pageDigitOffset,
839 const header::RAWDataHeader* rawDataHeader,
842 [[maybe_unused]] const uint8_t* payloadEnd,
843 [[maybe_unused]] const uint8_t* nextPage)
845#define MAYBE_PAGE_OVERFLOW(pagePtr) \
846 if constexpr (PayloadExtendsToNextPage) { \
847 if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
848 ptrdiff_t diff = pagePtr - payloadEnd; \
849 pagePtr = nextPage; \
850 ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
853 assert(pagePtr <= payloadEnd); \
856 using zerosupp_link_based::ChannelPerTBHeader;
858 const CfFragment& fragment = clusterer.mPmemory->fragment;
860 uint8_t linkIds[MaxNLinksPerTimebin];
861 uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
862 uint16_t nSamplesWritten = 0;
865 uint16_t tbbHdr = ConsumeByte(page);
867 tbbHdr |=
static_cast<uint16_t
>(ConsumeByte(page)) << CHAR_BIT;
870 uint8_t nLinksInTimebin = tbbHdr & 0x000F;
871 uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
874 uint16_t nSamplesInTB = 0;
877 for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
878 uint8_t timebinLinkHeaderStart = ConsumeByte(page);
881 linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
883 bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
885 uint16_t bitmaskL2 = 0x0FFF;
886 if (not bitmaskIsFlat) {
887 bitmaskL2 =
static_cast<uint16_t
>(timebinLinkHeaderStart & 0b11000000) << 2 |
static_cast<uint16_t
>(ConsumeByte(page));
891 for (int32_t
i = 0;
i < 10;
i++) {
892 if (bitmaskL2 & 1 <<
i) {
893 nSamplesInTB += CAMath::Popcount(*Peek(page));
894 channelMasks[10 * iLink +
i] = ConsumeByte(page);
901 const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
904 if (not fragment.contains(timeBin)) {
905 FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
910 uint32_t
byte = 0,
bits = 0;
911 uint16_t rawFECChannel = 0;
914 while (nSamplesWritten < nSamplesInTB) {
915 byte |=
static_cast<uint32_t
>(ConsumeByte(adcData)) <<
bits;
918 while (
bits >= DECODE_BITS) {
921 for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
924 int32_t iLink = rawFECChannel / ChannelPerTBHeader;
925 int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
928 o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(clusterer, cru, rawFECChannelLink, linkIds[iLink]);
930 float charge = ADCToFloat(
byte, DECODE_MASK, DECODE_BITS_FACTOR);
931 WriteCharge(clusterer,
charge, padAndRow, fragment.toLocal(timeBin), pageDigitOffset + nSamplesWritten);
933 byte >>= DECODE_BITS;
940 assert(PayloadExtendsToNextPage || adcData <= page);
941 assert(PayloadExtendsToNextPage || page <= payloadEnd);
942 assert(nSamplesWritten == nSamplesInTB);
944 return nSamplesWritten;
946#undef MAYBE_PAGE_OVERFLOW
951 constexpr uint8_t N_BITS_PER_ENTRY =
sizeof(*chan) * CHAR_BIT;
952 const uint8_t entryIndex = chanIndex / N_BITS_PER_ENTRY;
953 const uint8_t bitInEntry = chanIndex % N_BITS_PER_ENTRY;
954 return chan[entryIndex] & (1 << bitInEntry);
#define GPUCA_GET_THREAD_COUNT(...)
#define CA_SHARED_CACHE_REF(target, src, size, reftype, ref)
GPUdii() void GPUTPCCFDecodeZS
#define MAYBE_PAGE_OVERFLOW(pagePtr)
#define PEEK_OVERFLOW(pagePtr, offset)
Header to collect LHC related constants.
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime localTime
GPUd() static o2 float o2::tpc::PadPos pos
int32_t int32_t int32_t Decoder::GPUSharedMemory processorType & clusterer
GPUd() static o2 float charge
int32_t int32_t uint32_t pageDigitOffset
GPUd() static o2 float o2::tpc::PadPos tpccf::TPCFragmentTime size_t positionOffset
int32_t int32_t uint32_t uint16_t nSamples
int32_t int32_t int32_t iThread
CfChargePos * mPpositions
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const void * bits
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
uint8_t itsSharedClusterMap uint8_t
constexpr int LHCMaxBunches
constexpr CfChargePos INVALID_CHARGE_POS
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
constexpr int LHCBCPERTIMEBIN
Global TPC definitions and constants.
@ ZSVersionDenseLinkBased
@ ZSVersionLinkBasedWithMeta
constexpr std::array< int, nLayers > nRows
const void *const * zsPtr[NENDPOINTS]
uint32_t count[NENDPOINTS]
const uint32_t * nZSPtr[NENDPOINTS]
@ payloadExtendsToNextPage
static constexpr bool TIGHTLY_PACKED_V3
static constexpr unsigned int SAMPLESPER64BIT
static constexpr unsigned int TRIGGER_WORD_SIZE
static constexpr unsigned int TPC_ZS_NBITS_V1
static constexpr unsigned int TPC_ZS_NBITS_V2
static constexpr size_t TPC_ZS_PAGE_SIZE
coder decode(ctfImage, triggersD, clustersD)