Project
Loading...
Searching...
No Matches
GPUTPCCompressionKernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef GPUTPCCONMPRESSIONKERNELS_H
16#define GPUTPCCONMPRESSIONKERNELS_H
17
18#include "GPUGeneralKernels.h"
19
20namespace o2::tpc
21{
22struct ClusterNative;
23} // namespace o2::tpc
24
25namespace o2::gpu
26{
28{
29 public:
30 GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; }
31
32 enum K : int32_t {
35 };
36
37 struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached)> {
38 GPUAtomic(uint32_t) nCount;
39 uint32_t lastIndex;
41 };
42
43 template <int32_t iKernel = defaultKernel>
44 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
45
46 template <int32_t I>
48 {
49 public:
50 GPUhdi() GPUTPCCompressionKernels_Compare(const o2::tpc::ClusterNative* p) : mClsPtr(p) {}
51 GPUd() bool operator()(uint32_t a, uint32_t b) const;
52
54 const o2::tpc::ClusterNative* mClsPtr;
55 };
56};
57
59{
60
61 public:
62 enum K : int32_t {
67 multiBlock
68 };
69
70 using Vec16 = uint16_t;
71 using Vec32 = uint32_t;
72 using Vec64 = uint64_t;
73 using Vec128 = uint4;
74
75 struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_COMPRESSION_GATHER)> {
76 union {
77 uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)];
78 Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
79 Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
80 Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
81 struct {
82 uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
83 uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
84 } unbuffered;
85 };
86
87 template <typename V>
88 GPUdi() V* getBuffer(int32_t iWarp);
89 };
90
91 template <typename Scalar, typename BaseVector>
92 union CpyVector {
93 enum {
94 Size = sizeof(BaseVector) / sizeof(Scalar),
95 };
96 BaseVector all;
97 Scalar elems[Size];
98 };
99
100 template <int32_t iKernel = defaultKernel>
101 GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
102
103 template <typename T, typename S>
104 GPUdi() static bool isAlignedTo(const S* ptr);
105
106 template <typename T>
107 GPUdi() static void compressorMemcpy(GPUgeneric() T* dst, GPUgeneric() const T* src, uint32_t size, int32_t nThreads, int32_t iThread);
108
109 template <typename Scalar, typename Vector>
110 GPUdi() static void compressorMemcpyVectorised(Scalar* dst, const Scalar* src, uint32_t size, int32_t nThreads, int32_t iThread);
111
112 template <typename T>
113 GPUdi() static void compressorMemcpyBasic(T* dst, const T* src, uint32_t size, int32_t nThreads, int32_t iThread, int32_t nBlocks = 1, int32_t iBlock = 0);
114
115 template <typename V, typename T, typename S>
116 GPUdi() static void compressorMemcpyBuffered(V* buf, T* dst, const T* src, const S* nums, const uint32_t* srcOffets, uint32_t nEntries, int32_t nLanes, int32_t iLane, int32_t diff = 0, size_t scaleBase1024 = 1024);
117
118 template <typename T>
119 GPUdi() static uint32_t calculateWarpOffsets(GPUSharedMemory& smem, T* nums, uint32_t start, uint32_t end, int32_t nWarps, int32_t iWarp, int32_t nLanes, int32_t iLane);
120
121 template <typename V>
122 GPUdii() static void gatherBuffered(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
123
124 GPUdii() static void gatherMulti(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);
125};
126
127} // namespace o2::gpu
128
129#endif
#define GPUsharedref()
#define GPUdii()
#define GPUgeneric()
#define GPUCA_TPC_COMP_CHUNK_SIZE
#define GPUCA_GET_WARP_COUNT(...)
#define GPUCA_WARP_SIZE
#define protected
TBranch * ptr
int32_t int32_t int32_t processorType & processors
GPUd() static void Thread(int32_t nBlocks
GPUhdi() const expr static GPUDataTypes
GPUd() static void Thread(int32_t nBlocks
int32_t int32_t int32_t processorType & GPUrestrict() processors)
GLenum src
Definition glcorearb.h:1767
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLenum dst
Definition glcorearb.h:1767
GLuint start
Definition glcorearb.h:469
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition glcorearb.h:2514
GPUdi() o2
Definition TrackTRD.h:38
Global TPC definitions and constants.
Definition SimTraits.h:167
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
std::vector< std::byte > getBuffer(const char *filename)