Project
Loading...
Searching...
No Matches
GPUTPCCFStreamCompaction.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUCommonAlgorithm.h"
17
18#include "CfChargePos.h"
19#include "CfUtils.h"
20
21using namespace o2::gpu;
22using namespace o2::gpu::tpccf;
23
24template <>
25GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanStart>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage)
26{
27#ifdef GPUCA_GPUCODE
28 int32_t nElems = CompactionElems(clusterer, stage);
29
30 const auto* predicate = clusterer.mPisPeak;
31 auto* scanOffset = clusterer.GetScanBuffer(iBuf);
32
33 int32_t iThreadGlobal = get_global_id(0);
34 int32_t pred = 0;
35 if (iThreadGlobal < nElems) {
36 pred = predicate[iThreadGlobal];
37 }
38
39 int32_t nElemsInBlock = CfUtils::blockPredicateSum<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
40
41 int32_t lastThread = nThreads - 1;
42 if (iThread == lastThread) {
43 scanOffset[iBlock] = nElemsInBlock;
44 }
45#endif
46}
47
48template <>
49GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanUp>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
50{
51#ifdef GPUCA_GPUCODE
52 auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
53 auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf);
54
55 int32_t iThreadGlobal = get_global_id(0);
56 int32_t offsetInBlock = work_group_scan_inclusive_add((iThreadGlobal < nElems) ? scanOffset[iThreadGlobal] : 0);
57
58 if (iThreadGlobal < nElems) {
59 scanOffset[iThreadGlobal] = offsetInBlock;
60 }
61
62 int32_t lastThread = nThreads - 1;
63 if (iThread == lastThread) {
64 scanOffsetNext[iBlock] = offsetInBlock;
65 }
66#endif
67}
68
69template <>
70GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanTop>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
71{
72#ifdef GPUCA_GPUCODE
73 int32_t iThreadGlobal = get_global_id(0);
74 int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
75
76 bool inBounds = (iThreadGlobal < nElems);
77
78 int32_t offsetInBlock = work_group_scan_inclusive_add(inBounds ? scanOffset[iThreadGlobal] : 0);
79
80 if (inBounds) {
81 scanOffset[iThreadGlobal] = offsetInBlock;
82 }
83#endif
84}
85
86template <>
87GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanDown>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems)
88{
89#ifdef GPUCA_GPUCODE
90 int32_t iThreadGlobal = get_global_id(0) + offset;
91
92 int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1);
93 const int32_t* scanOffset = clusterer.GetScanBuffer(iBuf);
94
95 int32_t shift = scanOffset[iBlock];
96
97 if (iThreadGlobal < nElems) {
98 scanOffsetPrev[iThreadGlobal] += shift;
99 }
100#endif
101}
102
103template <>
104GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compactDigits>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out)
105{
106#ifdef GPUCA_GPUCODE
107 uint32_t nElems = CompactionElems(clusterer, stage);
108 SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks;
109
110 uint32_t iThreadGlobal = get_global_id(0);
111
112 const auto* predicate = clusterer.mPisPeak;
113 const auto* scanOffset = clusterer.GetScanBuffer(iBuf);
114
115 bool iAmDummy = (iThreadGlobal >= nElems);
116
117 int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal];
118 int32_t offsetInBlock = CfUtils::blockPredicateScan<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
119
120 SizeT globalOffsetOut = offsetInBlock;
121 if (iBlock > 0) {
122 globalOffsetOut += scanOffset[iBlock - 1];
123 }
124
125 if (pred && globalOffsetOut < bufferSize) {
126 out[globalOffsetOut] = in[iThreadGlobal];
127 }
128
129 uint32_t lastId = get_global_size(0) - 1;
130 if (iThreadGlobal == lastId) {
131 SizeT nFinal = globalOffsetOut + pred;
132 if (nFinal > bufferSize) {
133 clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISector, nFinal, bufferSize);
134 nFinal = bufferSize;
135 }
136 if (stage) {
137 clusterer.mPmemory->counters.nClusters = nFinal;
138 } else {
139 clusterer.mPmemory->counters.nPeaks = nFinal;
140 }
141 }
142#endif
143}
144
145GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage)
146{
147 return (stage) ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;
148}
#define get_global_size(dim)
#define get_global_id(dim)
GPUdii() void GPUTPCCFStreamCompaction
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)