Project
Loading...
Searching...
No Matches
GPUTPCDecompressionKernels.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPULogging.h"
17#include "GPUConstantMem.h"
19#include "GPUCommonAlgorithm.h"
20#include "TPCClusterDecompressionCore.inc"
21
22using namespace o2::gpu;
23using namespace o2::tpc;
24
25template <>
26GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step0attached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t trackStart, int32_t trackEnd)
27{
28 GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
29 CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;
30 const GPUParam& GPUrestrict() param = processors.param;
31
32 const uint32_t maxTime = (param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1;
33
34 for (int32_t i = trackStart + get_global_id(0); i < trackEnd; i += get_global_size(0)) {
35 uint32_t offset = decompressor.mAttachedClustersOffsets[i];
36 TPCClusterDecompressionCore::decompressTrack(cmprClusters, param, maxTime, i, offset, decompressor);
37 }
38}
39
40template <>
41GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step1unattached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sectorStart, int32_t nSectors)
42{
43 GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
44 CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;
45 ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;
46 const ClusterNativeAccess* outputAccess = decompressor.mClusterNativeAccess;
47 uint32_t* offsets = decompressor.mUnattachedClustersOffsets;
48 for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSectors; i += get_global_size(0)) {
49 uint32_t iRow = i % GPUCA_ROW_COUNT;
50 uint32_t iSector = sectorStart + (i / GPUCA_ROW_COUNT);
51 const uint32_t linearIndex = iSector * GPUCA_ROW_COUNT + iRow;
52 uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSector, iRow, decompressor.mMaxNativeClustersPerBuffer);
53 ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSector][iRow];
54 if (decompressor.mNativeClustersIndex[linearIndex] != 0) {
55 decompressorMemcpyBasic(buffer, decompressor.mTmpNativeClusters + tmpBufferIndex, decompressor.mNativeClustersIndex[linearIndex]);
56 }
57 ClusterNative* clout = buffer + decompressor.mNativeClustersIndex[linearIndex];
58 uint32_t end = offsets[linearIndex] + ((linearIndex >= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]);
59 TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout);
60 if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) {
61 for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) {
62 auto& cl = buffer[k];
63 float t = cl.getTime() + processors.param.rec.tpc.clustersShiftTimebins;
64 if (t < 0) {
65 t = 0;
66 }
67 if (processors.param.continuousMaxTimeBin > 0 && t > processors.param.continuousMaxTimeBin) {
68 t = processors.param.continuousMaxTimeBin;
69 }
70 cl.setTime(t);
71 }
72 }
73 }
74}
75
76template <typename T>
77GPUdi() void GPUTPCDecompressionKernels::decompressorMemcpyBasic(T* GPUrestrict() dst, const T* GPUrestrict() src, uint32_t size)
78{
79 for (uint32_t i = 0; i < size; i++) {
80 dst[i] = src[i];
81 }
82}
83
84GPUdi() bool GPUTPCDecompressionUtilKernels::isClusterKept(const o2::tpc::ClusterNative& cl, const GPUParam& GPUrestrict() param)
85{
86 return param.tpcCutTimeBin > 0 ? cl.getTime() < param.tpcCutTimeBin : true;
87}
88
89template <>
90GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::countFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
91{
92 const GPUParam& GPUrestrict() param = processors.param;
93 GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
94 const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;
95 for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {
96 uint32_t sector = i / GPUCA_ROW_COUNT;
97 uint32_t row = i % GPUCA_ROW_COUNT;
98 for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) {
99 ClusterNative cl = clusterAccess->clusters[sector][row][k];
100 if (isClusterKept(cl, param)) {
101 decompressor.mNClusterPerSectorRow[i]++;
102 }
103 }
104 }
105}
106
107template <>
108GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::storeFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
109{
110 const GPUParam& GPUrestrict() param = processors.param;
111 GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;
112 ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;
113 const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;
114 const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;
115 for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {
116 uint32_t sector = i / GPUCA_ROW_COUNT;
117 uint32_t row = i % GPUCA_ROW_COUNT;
118 uint32_t count = 0;
119 for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) {
120 const ClusterNative cl = clusterAccess->clusters[sector][row][k];
121 if (isClusterKept(cl, param)) {
122 clusterBuffer[outputAccess->clusterOffset[sector][row] + count] = cl;
123 count++;
124 }
125 }
126 }
127}
128
129template <>
130GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::sortPerSectorRow>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)
131{
132 ClusterNative* GPUrestrict() clusterBuffer = processors.tpcDecompressor.mNativeClustersBuffer;
133 const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;
134 for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {
135 uint32_t sector = i / GPUCA_ROW_COUNT;
136 uint32_t row = i % GPUCA_ROW_COUNT;
137 ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[sector][row];
138 GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[sector][row]);
139 }
140}
int32_t i
#define GPUsharedref()
#define get_global_size(dim)
#define GPUrestrict()
#define get_global_id(dim)
GPUdii() void GPUTPCDecompressionKernels
#define GPUCA_NSECTORS
#define GPUCA_ROW_COUNT
o2::tpc::CompressedClusters mInputGPU
o2::tpc::ClusterNative * mTmpNativeClusters
o2::tpc::ClusterNative * mNativeClustersBuffer
o2::tpc::ClusterNativeAccess * mClusterNativeAccess
GLenum src
Definition glcorearb.h:1767
GLint GLsizei count
Definition glcorearb.h:399
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLsizei const GLuint const GLintptr * offsets
Definition glcorearb.h:2595
GLuint GLuint end
Definition glcorearb.h:469
GLenum GLenum dst
Definition glcorearb.h:1767
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLfloat param
Definition glcorearb.h:271
Global TPC definitions and constants.
Definition SimTraits.h:167
GPUdi() T BetheBlochAleph(T bg
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const ClusterNative * clusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
static constexpr int scaleTimePacked
std::vector< int > row