db/d37/GPUTPCDecompressionKernels_8cxx_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#include "GPUTPCDecompressionKernels.h"

#include "GPULogging.h"

#include "GPUConstantMem.h"

#include "GPUTPCCompressionTrackModel.h"

#include "GPUCommonAlgorithm.h"

#include "TPCClusterDecompressionCore.inc"


using namespace o2::gpu;

using namespace o2::tpc;


template <>


GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step0attached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t trackStart, int32_t trackEnd)

{

  GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;

  CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;

  const GPUParam& GPUrestrict() param = processors.param;


  const uint32_t maxTime = (param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1;


  for (int32_t i = trackStart + get_global_id(0); i < trackEnd; i += get_global_size(0)) {

    uint32_t offset = decompressor.mAttachedClustersOffsets[i];

    TPCClusterDecompressionCore::decompressTrack(cmprClusters, param, maxTime, i, offset, decompressor);

  }

}


template <>


GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step1unattached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sectorStart, int32_t nSectors)

{

  GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;

  CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU;

  ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;

  const ClusterNativeAccess* outputAccess = decompressor.mClusterNativeAccess;

  uint32_t* offsets = decompressor.mUnattachedClustersOffsets;

  for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSectors; i += get_global_size(0)) {

    uint32_t iRow = i % GPUCA_ROW_COUNT;

    uint32_t iSector = sectorStart + (i / GPUCA_ROW_COUNT);

    const uint32_t linearIndex = iSector * GPUCA_ROW_COUNT + iRow;

    uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSector, iRow, decompressor.mMaxNativeClustersPerBuffer);

    ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSector][iRow];

    if (decompressor.mNativeClustersIndex[linearIndex] != 0) {

      decompressorMemcpyBasic(buffer, decompressor.mTmpNativeClusters + tmpBufferIndex, decompressor.mNativeClustersIndex[linearIndex]);

    }

    ClusterNative* clout = buffer + decompressor.mNativeClustersIndex[linearIndex];

    uint32_t end = offsets[linearIndex] + ((linearIndex >= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]);

    TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout);

    if (processors.param.rec.tpc.clustersEdgeFixDistance > 0.f) {

      constexpr GPUTPCGeometry geo;

      for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) {

        auto& cluster = buffer[k];

        if (cluster.getFlags() & ClusterNative::flagEdge) {

          auto padF = cluster.getPad();

          float distEdge = padF < geo.NPads(iRow) / 2 ? padF : geo.NPads(iRow) - 1 - padF;

          if (distEdge > processors.param.rec.tpc.clustersEdgeFixDistance) {

            cluster.setFlags(cluster.getFlags() ^ ClusterNative::flagEdge);

          }

        }

      }

    }

    if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) {

      for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) {

        auto& cl = buffer[k];

        float t = cl.getTime() + processors.param.rec.tpc.clustersShiftTimebins;

        if (t < 0) {

          t = 0;

        }

        if (processors.param.continuousMaxTimeBin > 0 && t > processors.param.continuousMaxTimeBin) {

          t = processors.param.continuousMaxTimeBin;

        }

        cl.setTime(t);

      }

    }

  }

}


template <typename T>


GPUdi() void GPUTPCDecompressionKernels::decompressorMemcpyBasic(T* GPUrestrict() dst, const T* GPUrestrict() src, uint32_t size)

{

  for (uint32_t i = 0; i < size; i++) {

    dst[i] = src[i];

  }

}


GPUdi() bool GPUTPCDecompressionUtilKernels::isClusterKept(const o2::tpc::ClusterNative& cl, const GPUParam& GPUrestrict() param)

{

  return param.tpcCutTimeBin > 0 ? cl.getTime() < param.tpcCutTimeBin : true;

}


template <>


GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::countFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)

{

  const GPUParam& GPUrestrict() param = processors.param;

  GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;

  const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;

  for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {

    uint32_t sector = i / GPUCA_ROW_COUNT;

    uint32_t row = i % GPUCA_ROW_COUNT;

    for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) {

      ClusterNative cl = clusterAccess->clusters[sector][row][k];

      if (isClusterKept(cl, param)) {

        decompressor.mNClusterPerSectorRow[i]++;

      }

    }

  }

}


template <>


GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::storeFilteredClusters>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)

{

  const GPUParam& GPUrestrict() param = processors.param;

  GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor;

  ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer;

  const ClusterNativeAccess* clusterAccess = decompressor.mClusterNativeAccess;

  const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;

  for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {

    uint32_t sector = i / GPUCA_ROW_COUNT;

    uint32_t row = i % GPUCA_ROW_COUNT;

    uint32_t count = 0;

    for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) {

      const ClusterNative cl = clusterAccess->clusters[sector][row][k];

      if (isClusterKept(cl, param)) {

        clusterBuffer[outputAccess->clusterOffset[sector][row] + count] = cl;

        count++;

      }

    }

  }

}


template <>


GPUdii() void GPUTPCDecompressionUtilKernels::Thread<GPUTPCDecompressionUtilKernels::sortPerSectorRow>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors)

{

  ClusterNative* GPUrestrict() clusterBuffer = processors.tpcDecompressor.mNativeClustersBuffer;

  const ClusterNativeAccess* outputAccess = processors.ioPtrs.clustersNative;

  for (uint32_t i = get_global_id(0); i < GPUCA_NSECTORS * GPUCA_ROW_COUNT; i += get_global_size(0)) {

    uint32_t sector = i / GPUCA_ROW_COUNT;

    uint32_t row = i % GPUCA_ROW_COUNT;

    ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[sector][row];

    GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[sector][row]);

  }

}


GPUCommonAlgorithm.h

i
int32_t i
Definition GPUCommonAlgorithm.h:436

GPUsharedref
#define GPUsharedref()
Definition GPUCommonDefAPI.h:56

get_global_size
#define get_global_size(dim)
Definition GPUCommonDefAPI.h:226

GPUrestrict
#define GPUrestrict()
Definition GPUCommonDefAPI.h:212

get_global_id
#define get_global_id(dim)
Definition GPUCommonDefAPI.h:225

GPUConstantMem.h

GPULogging.h

GPUTPCCompressionTrackModel.h

GPUdii
GPUdii() void GPUTPCDecompressionKernels
Definition GPUTPCDecompressionKernels.cxx:26

GPUTPCDecompressionKernels.h

GPUCA_NSECTORS
#define GPUCA_NSECTORS
Definition GPUTPCGeometry.h:22

GPUCA_ROW_COUNT
#define GPUCA_ROW_COUNT
Definition GPUTPCGeometry.h:23

o2::gpu::GPUTPCDecompressionKernels
Definition GPUTPCDecompressionKernels.h:28

o2::gpu::GPUTPCDecompressionUtilKernels
Definition GPUTPCDecompressionKernels.h:50

o2::gpu::GPUTPCDecompression
Definition GPUTPCDecompression.h:29

o2::gpu::GPUTPCDecompression::mNClusterPerSectorRow
uint32_t * mNClusterPerSectorRow
Definition GPUTPCDecompression.h:60

o2::gpu::GPUTPCDecompression::mInputGPU
o2::tpc::CompressedClusters mInputGPU
Definition GPUTPCDecompression.h:53

o2::gpu::GPUTPCDecompression::mUnattachedClustersOffsets
uint32_t * mUnattachedClustersOffsets
Definition GPUTPCDecompression.h:58

o2::gpu::GPUTPCDecompression::mMaxNativeClustersPerBuffer
uint32_t mMaxNativeClustersPerBuffer
Definition GPUTPCDecompression.h:55

o2::gpu::GPUTPCDecompression::mNativeClustersIndex
uint32_t * mNativeClustersIndex
Definition GPUTPCDecompression.h:57

o2::gpu::GPUTPCDecompression::mTmpNativeClusters
o2::tpc::ClusterNative * mTmpNativeClusters
Definition GPUTPCDecompression.h:61

o2::gpu::GPUTPCDecompression::mAttachedClustersOffsets
uint32_t * mAttachedClustersOffsets
Definition GPUTPCDecompression.h:59

o2::gpu::GPUTPCDecompression::mNativeClustersBuffer
o2::tpc::ClusterNative * mNativeClustersBuffer
Definition GPUTPCDecompression.h:62

o2::gpu::GPUTPCDecompression::mClusterNativeAccess
o2::tpc::ClusterNativeAccess * mClusterNativeAccess
Definition GPUTPCDecompression.h:63

o2::gpu::GPUTPCGeometry
Definition GPUTPCGeometry.h:84

src
GLenum src
Definition glcorearb.h:1767

count
GLint GLsizei count
Definition glcorearb.h:399

buffer
GLuint buffer
Definition glcorearb.h:655

size
GLsizeiptr size
Definition glcorearb.h:659

offsets
GLuint GLsizei const GLuint const GLintptr * offsets
Definition glcorearb.h:2595

end
GLuint GLuint end
Definition glcorearb.h:469

dst
GLenum GLenum dst
Definition glcorearb.h:1767

offset
GLintptr offset
Definition glcorearb.h:660

void
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)

param
GLenum GLfloat param
Definition glcorearb.h:271

o2::gpu
Definition TrackTRD.h:35

o2::tpc
Global TPC definitions and constants.
Definition SimTraits.h:167

o2::tpc::GPUdi
GPUdi() T BetheBlochAleph(T bg
Definition GPUTPCCompressionKernels.cxx:512

o2
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Definition BitstreamReader.h:24

o2::gpu::GPUParam
Definition GPUParam.h:79

o2::tpc::ClusterNativeAccess
Definition ClusterNative.h:174

o2::tpc::ClusterNativeAccess::nClusters
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
Definition ClusterNative.h:178

o2::tpc::ClusterNativeAccess::clusters
const ClusterNative * clusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
Definition ClusterNative.h:176

o2::tpc::ClusterNativeAccess::clusterOffset
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
Definition ClusterNative.h:180

o2::tpc::ClusterNative
Definition ClusterNative.h:54

o2::tpc::ClusterNative::scaleTimePacked
static constexpr int scaleTimePacked
Definition ClusterNative.h:61

o2::tpc::ClusterNative::flagEdge
@ flagEdge
Definition ClusterNative.h:58

o2::tpc::CompressedClustersCounters::nSliceRows
unsigned int nSliceRows
Definition CompressedClusters.h:31

o2::tpc::CompressedClustersPtrs_x::nSliceRowClusters
TINT nSliceRowClusters
Definition CompressedClusters.h:66

o2::tpc::CompressedClusters
Definition CompressedClusters.h:79

row
std::vector< int > row
Definition test_ctf_io_itsmft.cxx:48