Project
Loading...
Searching...
No Matches
GPUTPCCFStreamCompaction.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUCommonAlgorithm.h"
17
18#include "CfChargePos.h"
19#include "CfUtils.h"
20
21using namespace o2::gpu;
22using namespace o2::gpu::tpccf;
23
24template <>
25GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanStart>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage)
26{
27#ifdef GPUCA_GPUCODE
28 int32_t nElems = CompactionElems(clusterer, stage);
29
30 const auto* predicate = clusterer.mPisPeak;
31 auto* scanOffset = clusterer.GetScanBuffer(iBuf);
32
33 int32_t iThreadGlobal = get_global_id(0);
34 int32_t pred = 0;
35 if (iThreadGlobal < nElems) {
36 pred = predicate[iThreadGlobal];
37 }
38
39 int32_t nElemsInBlock = CfUtils::blockPredicateSum<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
40
41 int32_t lastThread = nThreads - 1;
42 if (iThread == lastThread) {
43 scanOffset[iBlock] = nElemsInBlock;
44 }
45#endif
46}
47
48template <>
49GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanUp>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
50{
51#ifdef GPUCA_GPUCODE
52 auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
53 auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf);
54
55 int32_t iThreadGlobal = get_global_id(0);
56 int32_t offsetInBlock = work_group_scan_inclusive_add((iThreadGlobal < nElems) ? scanOffset[iThreadGlobal] : 0);
57
58 // TODO: This write isn't needed??
59 scanOffset[iThreadGlobal] = offsetInBlock;
60
61 int32_t lastThread = nThreads - 1;
62 if (iThread == lastThread) {
63 scanOffsetNext[iBlock] = offsetInBlock;
64 }
65#endif
66}
67
68template <>
69GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanTop>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
70{
71#ifdef GPUCA_GPUCODE
72 int32_t iThreadGlobal = get_global_id(0);
73 int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
74
75 bool inBounds = (iThreadGlobal < nElems);
76
77 int32_t offsetInBlock = work_group_scan_inclusive_add(inBounds ? scanOffset[iThreadGlobal] : 0);
78
79 if (inBounds) {
80 scanOffset[iThreadGlobal] = offsetInBlock;
81 }
82#endif
83}
84
85template <>
86GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanDown>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems)
87{
88#ifdef GPUCA_GPUCODE
89 int32_t iThreadGlobal = get_global_id(0) + offset;
90
91 int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1);
92 const int32_t* scanOffset = clusterer.GetScanBuffer(iBuf);
93
94 int32_t shift = scanOffset[iBlock];
95
96 if (iThreadGlobal < nElems) {
97 scanOffsetPrev[iThreadGlobal] += shift;
98 }
99#endif
100}
101
102template <>
103GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compactDigits>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out)
104{
105#ifdef GPUCA_GPUCODE
106 uint32_t nElems = CompactionElems(clusterer, stage);
107 SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks;
108
109 uint32_t iThreadGlobal = get_global_id(0);
110
111 const auto* predicate = clusterer.mPisPeak;
112 const auto* scanOffset = clusterer.GetScanBuffer(iBuf);
113
114 bool iAmDummy = (iThreadGlobal >= nElems);
115
116 int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal];
117 int32_t offsetInBlock = CfUtils::blockPredicateScan<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
118
119 SizeT globalOffsetOut = offsetInBlock;
120 if (iBlock > 0) {
121 globalOffsetOut += scanOffset[iBlock - 1];
122 }
123
124 if (pred && globalOffsetOut < bufferSize) {
125 out[globalOffsetOut] = in[iThreadGlobal];
126 }
127
128 uint32_t lastId = get_global_size(0) - 1;
129 if (iThreadGlobal == lastId) {
130 SizeT nFinal = globalOffsetOut + pred;
131 if (nFinal > bufferSize) {
132 clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISector, nFinal, bufferSize);
133 nFinal = bufferSize;
134 }
135 if (stage) {
136 clusterer.mPmemory->counters.nClusters = nFinal;
137 } else {
138 clusterer.mPmemory->counters.nPeaks = nFinal;
139 }
140 }
141#endif
142}
143
144GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage)
145{
146 return (stage) ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;
147}
#define get_global_size(dim)
#define get_global_id(dim)
GPUdii() void GPUTPCCFStreamCompaction
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)