Project
Loading...
Searching...
No Matches
GPUTPCCompressionKernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUTPCCONMPRESSIONKERNELS_H
16#define GPUTPCCONMPRESSIONKERNELS_H
17
18#include "GPUGeneralKernels.h"
19
20namespace o2::tpc
21{
22struct ClusterNative;
23} // namespace o2::tpc
24
25namespace o2::gpu
26{
28{
29 public:
30 GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; }
31
32 enum K : int32_t {
35 };
36
37 struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached)> {
38 GPUAtomic(uint32_t) nCount;
39 uint32_t lastIndex;
41 };
42
43 template <int32_t iKernel = defaultKernel>
44 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
45
46 template <int32_t I>
48 {
49 public:
50 GPUhdi() GPUTPCCompressionKernels_Compare(const o2::tpc::ClusterNative* p) : mClsPtr(p) {}
51 GPUd() bool operator()(uint32_t a, uint32_t b) const;
52
54 const o2::tpc::ClusterNative* mClsPtr;
55 };
56};
57
59{
60
61 public:
62 enum K : int32_t {
67 multiBlock
68 };
69
70 using Vec16 = uint16_t;
71 using Vec32 = uint32_t;
72 using Vec64 = uint64_t;
73 using Vec128 = uint4;
74
75 static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32));
76 static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64));
77 static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128));
78 static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock));
79 struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)> {
80 union {
81 uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)];
82 Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
83 Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
84 Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
85 struct {
86 uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
87 uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
88 } unbuffered;
89 };
90
91 template <typename V>
92 GPUdi() V* getBuffer(int32_t iWarp);
93 };
94
95 template <typename Scalar, typename BaseVector>
96 union CpyVector {
97 enum {
98 Size = sizeof(BaseVector) / sizeof(Scalar),
99 };
100 BaseVector all;
101 Scalar elems[Size];
102 };
103
104 template <int32_t iKernel = defaultKernel>
105 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
106
107 template <typename T, typename S>
108 GPUdi() static bool isAlignedTo(const S* ptr);
109
110 template <typename T>
111 GPUdi() static void compressorMemcpy(GPUgeneric() T* dst, GPUgeneric() const T* src, uint32_t size, int32_t nThreads, int32_t iThread);
112
113 template <typename Scalar, typename Vector>
114 GPUdi() static void compressorMemcpyVectorised(Scalar* dst, const Scalar* src, uint32_t size, int32_t nThreads, int32_t iThread);
115
116 template <typename T>
117 GPUdi() static void compressorMemcpyBasic(T* dst, const T* src, uint32_t size, int32_t nThreads, int32_t iThread, int32_t nBlocks = 1, int32_t iBlock = 0);
118
119 template <typename V, typename T, typename S>
120 GPUdi() static void compressorMemcpyBuffered(V* buf, T* dst, const T* src, const S* nums, const uint32_t* srcOffets, uint32_t nEntries, int32_t nLanes, int32_t iLane, int32_t diff = 0, size_t scaleBase1024 = 1024);
121
122 template <typename T>
123 GPUdi() static uint32_t calculateWarpOffsets(GPUSharedMemory& smem, T* nums, uint32_t start, uint32_t end, int32_t nWarps, int32_t iWarp, int32_t nLanes, int32_t iLane);
124
125 template <typename V>
126 GPUdii() static void gatherBuffered(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
127
128 GPUdii() static void gatherMulti(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
129};
130
131} // namespace o2::gpu
132
133#endif
#define GPUsharedref()
#define GPUdii()
#define GPUgeneric()
#define GPUCA_TPC_COMP_CHUNK_SIZE
#define GPUCA_GET_THREAD_COUNT(...)
#define GPUCA_GET_WARP_COUNT(...)
#define protected
TBranch * ptr
int32_t int32_t int32_t processorType & processors
GPUd() static void Thread(int32_t nBlocks
GPUhdi() const expr static GPUDataTypes
GPUd() static void Thread(int32_t nBlocks
int32_t int32_t int32_t processorType & GPUrestrict() processors)
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition glcorearb.h:2514
GPUdi() o2
Definition TrackTRD.h:38
Global TPC definitions and constants.
Definition SimTraits.h:167
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
std::vector< std::byte > getBuffer(const char *filename)