Project
Loading...
Searching...
No Matches
GPUTPCCFStreamCompaction.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUCommonAlgorithm.h"
17
18#include "ChargePos.h"
19#include "CfUtils.h"
20
21using namespace o2::gpu;
22using namespace o2::gpu::tpccf;
23
24template <>
25GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanStart>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage)
26{
27 int32_t nElems = CompactionElems(clusterer, stage);
28
29 const auto* predicate = clusterer.mPisPeak;
30 auto* scanOffset = clusterer.GetScanBuffer(iBuf);
31
32 int32_t iThreadGlobal = get_global_id(0);
33 int32_t pred = 0;
34 if (iThreadGlobal < nElems) {
35 pred = predicate[iThreadGlobal];
36 }
37
38 int32_t nElemsInBlock = CfUtils::blockPredicateSum<GPUCA_THREAD_COUNT_SCAN>(smem, pred);
39
40 int32_t lastThread = nThreads - 1;
41 if (iThread == lastThread) {
42 scanOffset[iBlock] = nElemsInBlock;
43 }
44}
45
46template <>
47GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanUp>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
48{
49 auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
50 auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf);
51
52 int32_t iThreadGlobal = get_global_id(0);
53 int32_t offsetInBlock = work_group_scan_inclusive_add((iThreadGlobal < nElems) ? scanOffset[iThreadGlobal] : 0);
54
55 // TODO: This write isn't needed??
56 scanOffset[iThreadGlobal] = offsetInBlock;
57
58 int32_t lastThread = nThreads - 1;
59 if (iThread == lastThread) {
60 scanOffsetNext[iBlock] = offsetInBlock;
61 }
62}
63
64template <>
65GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanTop>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
66{
67 int32_t iThreadGlobal = get_global_id(0);
68 int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
69
70 bool inBounds = (iThreadGlobal < nElems);
71
72 int32_t offsetInBlock = work_group_scan_inclusive_add(inBounds ? scanOffset[iThreadGlobal] : 0);
73
74 if (inBounds) {
75 scanOffset[iThreadGlobal] = offsetInBlock;
76 }
77}
78
79template <>
80GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanDown>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems)
81{
82 int32_t iThreadGlobal = get_global_id(0) + offset;
83
84 int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1);
85 const int32_t* scanOffset = clusterer.GetScanBuffer(iBuf);
86
87 int32_t shift = scanOffset[iBlock];
88
89 if (iThreadGlobal < nElems) {
90 scanOffsetPrev[iThreadGlobal] += shift;
91 }
92}
93
94template <>
95GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compactDigits>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, ChargePos* in, ChargePos* out)
96{
97 uint32_t nElems = CompactionElems(clusterer, stage);
98 SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks;
99
100 uint32_t iThreadGlobal = get_global_id(0);
101
102 const auto* predicate = clusterer.mPisPeak;
103 const auto* scanOffset = clusterer.GetScanBuffer(iBuf);
104
105 bool iAmDummy = (iThreadGlobal >= nElems);
106
107 int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal];
108 int32_t offsetInBlock = CfUtils::blockPredicateScan<GPUCA_THREAD_COUNT_SCAN>(smem, pred);
109
110 SizeT globalOffsetOut = offsetInBlock;
111 if (iBlock > 0) {
112 globalOffsetOut += scanOffset[iBlock - 1];
113 }
114
115 if (pred && globalOffsetOut < bufferSize) {
116 out[globalOffsetOut] = in[iThreadGlobal];
117 }
118
119 uint32_t lastId = get_global_size(0) - 1;
120 if (iThreadGlobal == lastId) {
121 SizeT nFinal = globalOffsetOut + pred;
122 if (nFinal > bufferSize) {
123 clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISector, nFinal, bufferSize);
124 nFinal = bufferSize;
125 }
126 if (stage) {
127 clusterer.mPmemory->counters.nClusters = nFinal;
128 } else {
129 clusterer.mPmemory->counters.nPeaks = nFinal;
130 }
131 }
132}
133
134GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage)
135{
136 return (stage) ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;
137}
#define get_global_size(dim)
#define get_global_id(dim)
GPUdii() void GPUTPCCFStreamCompaction
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)