d2/dc6/GPUTPCCompressionKernels_8h_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#ifndef GPUTPCCONMPRESSIONKERNELS_H

#define GPUTPCCONMPRESSIONKERNELS_H


#include "GPUGeneralKernels.h"


namespace o2::tpc

{

struct ClusterNative;

} // namespace o2::tpc


namespace o2::gpu

{


class GPUTPCCompressionKernels : public GPUKernelTemplate

{

 public:

  GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; }


  enum K : int32_t {

    step0attached = 0,

    step1unattached = 1,

  };


  struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached)> {

    GPUAtomic(uint32_t) nCount;

    uint32_t lastIndex;

    uint32_t sortBuffer[GPUCA_TPC_COMP_CHUNK_SIZE];

  };


  template <int32_t iKernel = defaultKernel>

  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);


  template <int32_t I>


  class GPUTPCCompressionKernels_Compare

  {

   public:

    GPUhdi() GPUTPCCompressionKernels_Compare(const o2::tpc::ClusterNative* p) : mClsPtr(p) {}

    GPUd() bool operator()(uint32_t a, uint32_t b) const;


   protected:

    const o2::tpc::ClusterNative* mClsPtr;

  };


};


class GPUTPCCompressionGatherKernels : public GPUKernelTemplate

{


 public:


  enum K : int32_t {

    unbuffered,

    buffered32,

    buffered64,

    buffered128,

    multiBlock

  };


  using Vec16 = uint16_t;

  using Vec32 = uint32_t;

  using Vec64 = uint64_t;

  using Vec128 = uint4;


  static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32));

  static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64));

  static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128));

  static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock));


  struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)> {

    union {

      uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)];

      Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];

      Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];

      Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];

      struct {

        uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];

        uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];

      } unbuffered;

    };


    template <typename V>

    GPUdi() V* getBuffer(int32_t iWarp);

  };


  template <typename Scalar, typename BaseVector>


  union CpyVector {

    enum {

      Size = sizeof(BaseVector) / sizeof(Scalar),

    };

    BaseVector all;

    Scalar elems[Size];

  };


  template <int32_t iKernel = defaultKernel>

  GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);


  template <typename T, typename S>

  GPUdi() static bool isAlignedTo(const S* ptr);


  template <typename T>

  GPUdi() static void compressorMemcpy(GPUgeneric() T* dst, GPUgeneric() const T* src, uint32_t size, int32_t nThreads, int32_t iThread);


  template <typename Scalar, typename Vector>

  GPUdi() static void compressorMemcpyVectorised(Scalar* dst, const Scalar* src, uint32_t size, int32_t nThreads, int32_t iThread);


  template <typename T>

  GPUdi() static void compressorMemcpyBasic(T* dst, const T* src, uint32_t size, int32_t nThreads, int32_t iThread, int32_t nBlocks = 1, int32_t iBlock = 0);


  template <typename V, typename T, typename S>

  GPUdi() static void compressorMemcpyBuffered(V* buf, T* dst, const T* src, const S* nums, const uint32_t* srcOffets, uint32_t nEntries, int32_t nLanes, int32_t iLane, int32_t diff = 0, size_t scaleBase1024 = 1024);


  template <typename T>

  GPUdi() static uint32_t calculateWarpOffsets(GPUSharedMemory& smem, T* nums, uint32_t start, uint32_t end, int32_t nWarps, int32_t iWarp, int32_t nLanes, int32_t iLane);


  template <typename V>

  GPUdii() static void gatherBuffered(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);


  GPUdii() static void gatherMulti(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors);

};


} // namespace o2::gpu


#endif

GPUsharedref
#define GPUsharedref()
Definition GPUCommonDefAPI.h:56

GPUdii
#define GPUdii()
Definition GPUCommonDefAPI.h:35

GPUgeneric
#define GPUgeneric()
Definition GPUCommonDefAPI.h:52

GPUCA_TPC_COMP_CHUNK_SIZE
#define GPUCA_TPC_COMP_CHUNK_SIZE
Definition GPUDefConstantsAndSettings.h:47

GPUCA_GET_THREAD_COUNT
#define GPUCA_GET_THREAD_COUNT(...)
Definition GPUDefParametersWrapper.h:35

GPUCA_GET_WARP_COUNT
#define GPUCA_GET_WARP_COUNT(...)
Definition GPUDefParametersWrapper.h:36

GPUGeneralKernels.h

protected
#define protected
Definition RCombinedDS.cxx:34

ptr
TBranch * ptr
Definition TTreePlugin.cxx:841

Measurement::Size
@ Size

o2::gpu::GPUDataTypes
Definition GPUDataTypes.h:115

o2::gpu::GPUDataTypes::RecoStep::TPCCompression
@ TPCCompression

o2::gpu::GPUKernelTemplate
Definition GPUGeneralKernels.h:40

o2::gpu::GPUKernelTemplate::processors
int32_t int32_t int32_t processorType & processors
Definition GPUGeneralKernels.h:88

o2::gpu::GPUTPCCompressionGatherKernels
Definition GPUTPCCompressionKernels.h:59

o2::gpu::GPUTPCCompressionGatherKernels::Vec16
uint16_t Vec16
Definition GPUTPCCompressionKernels.h:70

o2::gpu::GPUTPCCompressionGatherKernels::K
K
Definition GPUTPCCompressionKernels.h:62

o2::gpu::GPUTPCCompressionGatherKernels::buffered128
@ buffered128
Definition GPUTPCCompressionKernels.h:66

o2::gpu::GPUTPCCompressionGatherKernels::buffered32
@ buffered32
Definition GPUTPCCompressionKernels.h:64

o2::gpu::GPUTPCCompressionGatherKernels::buffered64
@ buffered64
Definition GPUTPCCompressionKernels.h:65

o2::gpu::GPUTPCCompressionGatherKernels::unbuffered
@ unbuffered
Definition GPUTPCCompressionKernels.h:63

o2::gpu::GPUTPCCompressionGatherKernels::GPUd
GPUd() static void Thread(int32_t nBlocks

o2::gpu::GPUTPCCompressionGatherKernels::Vec64
uint64_t Vec64
Definition GPUTPCCompressionKernels.h:72

o2::gpu::GPUTPCCompressionGatherKernels::Vec32
uint32_t Vec32
Definition GPUTPCCompressionKernels.h:71

o2::gpu::GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare
Definition GPUTPCCompressionKernels.h:48

o2::gpu::GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::GPUhdi
GPUhdi() GPUTPCCompressionKernels_Compare(const o2
Definition GPUTPCCompressionKernels.h:50

o2::gpu::GPUTPCCompressionKernels
Definition GPUTPCCompressionKernels.h:28

o2::gpu::GPUTPCCompressionKernels::GPUhdi
GPUhdi() const expr static GPUDataTypes
Definition GPUTPCCompressionKernels.h:30

o2::gpu::GPUTPCCompressionKernels::iBlock
int32_t int32_t iBlock
Definition GPUTPCCompressionKernels.h:44

o2::gpu::GPUTPCCompressionKernels::nThreads
int32_t nThreads
Definition GPUTPCCompressionKernels.h:44

o2::gpu::GPUTPCCompressionKernels::K
K
Definition GPUTPCCompressionKernels.h:32

o2::gpu::GPUTPCCompressionKernels::step1unattached
@ step1unattached
Definition GPUTPCCompressionKernels.h:34

o2::gpu::GPUTPCCompressionKernels::step0attached
@ step0attached
Definition GPUTPCCompressionKernels.h:33

o2::gpu::GPUTPCCompressionKernels::iThread
int32_t int32_t int32_t iThread
Definition GPUTPCCompressionKernels.h:44

o2::gpu::GPUTPCCompressionKernels::GPUd
GPUd() static void Thread(int32_t nBlocks

o2::gpu::GPUTPCCompressionKernels::GPUrestrict
int32_t int32_t int32_t processorType & GPUrestrict() processors)

src
GLenum src
Definition glcorearb.h:1767

size
GLsizeiptr size
Definition glcorearb.h:659

end
GLuint GLuint end
Definition glcorearb.h:469

sizes
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
Definition glcorearb.h:2595

b
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233

dst
GLenum GLenum dst
Definition glcorearb.h:1767

start
GLuint start
Definition glcorearb.h:469

a
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233

buf
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition glcorearb.h:2514

o2::gpu
Definition TrackTRD.h:35

o2::gpu::GPUdi
GPUdi() o2
Definition TrackTRD.h:38

o2::tpc
Global TPC definitions and constants.
Definition SimTraits.h:167

o2
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Definition BitstreamReader.h:24

S
Definition cxx14-test-aggregate-initialization.cxx:18

o2::gpu::GPUKernelTemplate::GPUSharedMemoryScan64
Definition GPUGeneralKernels.h:65

o2::gpu::GPUTPCCompressionGatherKernels::GPUSharedMemory
Definition GPUTPCCompressionKernels.h:79

o2::gpu::GPUTPCCompressionGatherKernels::GPUSharedMemory::GPUdi
GPUdi() V *getBuffer(int32_t iWarp)

o2::gpu::GPUTPCCompressionKernels::GPUSharedMemory
Definition GPUTPCCompressionKernels.h:37

o2::gpu::GPUTPCCompressionKernels::GPUSharedMemory::lastIndex
uint32_t lastIndex
Definition GPUTPCCompressionKernels.h:39

o2::gpu::GPUTPCCompressionKernels::GPUSharedMemory::GPUAtomic
GPUAtomic(uint32_t) nCount

o2::gpu::GPUTPCCompressionKernels::GPUSharedMemory::sortBuffer
uint32_t sortBuffer[GPUCA_TPC_COMP_CHUNK_SIZE]
Definition GPUTPCCompressionKernels.h:40

uint4
Definition GPUCommonDefAPI.h:74

getBuffer
std::vector< std::byte > getBuffer(const char *filename)
Definition testClosureCoDecDigit.cxx:111

o2::gpu::GPUTPCCompressionGatherKernels::CpyVector
Definition GPUTPCCompressionKernels.h:96

o2::gpu::GPUTPCCompressionGatherKernels::CpyVector::all
BaseVector all
Definition GPUTPCCompressionKernels.h:100