Project
Loading...
Searching...
No Matches
GPUTPCNNClusterizerKernels.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16#include "GPUTPCCFClusterizer.h"
17#include "GPUTPCGeometry.h"
18
19using namespace o2::gpu;
20using namespace o2::gpu::tpccf;
21
22#include "CfConsts.h"
23#include "CfUtils.h"
24#include "ClusterAccumulator.h"
26
27#if !defined(GPUCA_GPUCODE)
28#include "GPUHostDataTypes.h"
29#include "MCLabelAccumulator.h"
30#endif
31
32#ifdef GPUCA_GPUCODE
33#include "GPUTPCCFClusterizer.inc"
34#endif
35
36// Defining individual thread functions for data filling, determining the class label and running the CF clusterizer
37template <>
38GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::runCfClusterizer>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
39{
40 uint32_t glo_idx = get_global_id(0);
41 auto& clusterer = processors.tpcClusterer[sector];
42 auto& clustererNN = processors.tpcNNClusterer[sector];
43 if (clustererNN.mOutputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices
44 return;
45 }
46 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
47 CPU_ONLY(MCLabelAccumulator labelAcc(clusterer));
48 tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow;
50 GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem_new, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow);
51}
52
53template <>
54GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
55{
56 uint32_t glo_idx = get_global_id(0);
57 auto& clusterer = processors.tpcClusterer[sector];
58 auto& clustererNN = processors.tpcNNClusterer[sector];
59 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId
60
61 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
62 CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
63 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
64 int32_t row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad()), time = static_cast<int>(peak.time()); // Explicit casting to avoid conversion errors
65 float central_charge = static_cast<float>(chargeMap[peak].unpack());
66 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
67
68#ifndef GPUCA_GPUCODE
69 GPUCA_UNROLL(U(), U());
70#endif
71 for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) {
72 bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
73 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r);
74 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
75 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow);
76 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) {
77 if (!is_boundary) {
78 CfChargePos tmp_pos(row + r, pad + p, time + t);
79 if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
80 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
81 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
82 }
83 if (dtype == 0) {
84 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
85 } else if (dtype == 1) {
86 clustererNN.mInputData_32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
87 }
88 } else {
89 // Filling boundary just to make sure that no values are left unintentionally
90 if (dtype == 0) {
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
92 } else {
93 clustererNN.mInputData_32[write_idx] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
94 }
95 }
96 write_idx++;
97 }
98 }
99 }
100 if (clustererNN.mNnClusterizerAddIndexData) {
101 if (dtype == 0) {
102 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f);
103 clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f);
104 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / GPUTPCGeometry::NPads(row));
105 } else {
106 clustererNN.mInputData_32[write_idx] = sector / 36.f;
107 clustererNN.mInputData_32[write_idx + 1] = row / 152.f;
108 clustererNN.mInputData_32[write_idx + 2] = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
109 }
110 }
111}
112
113template <>
114GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
115{
116 uint32_t glo_idx = get_global_id(0);
117 auto& clusterer = processors.tpcClusterer[sector];
118 auto& clustererNN = processors.tpcNNClusterer[sector];
119 uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize);
120 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize);
121
122 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
123 CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
124 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
125 int32_t row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad());
126
127 if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
128 uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
129 for (uint16_t i = 0; i < 8; i++) {
130 Delta2 d = cfconsts::InnerNeighbors[i];
131 CfChargePos tmp_pos = peak.delta(d);
132 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
133 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
134 }
135 if (dtype == 0) {
136 clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
137 clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f);
138 clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast<float>(pad) / GPUTPCGeometry::NPads(row));
139 } else {
140 clustererNN.mInputData_32[top_idx - 3] = sector / 36.f;
141 clustererNN.mInputData_32[top_idx - 2] = row / 152.f;
142 clustererNN.mInputData_32[top_idx - 1] = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
143 }
144 } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) {
145 int32_t time = static_cast<int>(peak.time());
146 int32_t r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow;
147 bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
148 if (is_row_boundary) {
149 if (dtype == 0) {
150 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
151 } else {
152 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
153 }
154 } else {
155 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
156 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r);
157 int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1));
158 int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset;
159 int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime;
160
161 bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >= TPC_MAX_FRAGMENT_LEN_GPU);
162
163 if (!is_boundary) {
164 float central_charge = static_cast<float>(chargeMap[peak].unpack());
165 CfChargePos tmp_pos(row + r, pad + p, time + t);
166 if (dtype == 0) {
167 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
168 } else if (dtype == 1) {
169 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
170 }
171 } else {
172 if (dtype == 0) {
173 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
174 } else {
175 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
176 }
177 }
178 }
179 }
180}
181
182template <>
183GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
184{
185 uint32_t glo_idx = get_global_id(0);
186 if (dtype == 0) {
187 processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold);
188 } else if (dtype == 1) {
189 processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].mModelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].mNnClassThreshold);
190 }
191}
192
193template <>
194GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
195{
196 auto& clustererNN = processors.tpcNNClusterer[sector];
197 uint32_t glo_idx = get_global_id(0);
198 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
199 float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
200 uint32_t class_label = 0;
201 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
202 if (pIdx == elem_iterator) {
203 if (dtype == 0) {
204 current_max_prob = static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
205 } else if (dtype == 1) {
206 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
207 }
208 } else {
209 if (dtype == 0) {
210 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
211 } else if (dtype == 1) {
212 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
213 }
214 }
215 }
216 // uint32_t class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
217 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
218 if (class_label > 1) {
219 clustererNN.mClusterFlags[2 * glo_idx] = 1;
220 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
221 }
222}
223
224template <>
225GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass1Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
226{
227 uint32_t glo_idx = get_global_id(0);
228 auto& clusterer = processors.tpcClusterer[sector];
229 auto& clustererNN = processors.tpcNNClusterer[sector];
230
231 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
232 uint32_t full_glo_idx = glo_idx + batchStart;
233 if (full_glo_idx >= maxClusterNum) {
234 return;
235 }
236 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
237
238 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
239 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
240 float central_charge = static_cast<float>(chargeMap[peak].unpack());
241
242 CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer));
243 MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);
244 tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow;
245
246 // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size();
247
248 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
249
251
252 // Publishing logic is taken from default clusterizer
253 if (withMC) {
254 ClusterAccumulator dummy_pc;
255 CPU_ONLY(labelAcc->collect(peak, central_charge));
256 GPUTPCCFClusterizer::buildCluster(
257 clusterer.Param().rec,
258 chargeMap,
259 peak,
260 smem.posBcast,
261 smem.buf,
262 smem.innerAboveThreshold,
263 &dummy_pc,
264 labelAcc);
265 }
266 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
267 if (clusterer.mPclusterPosInRow) {
268 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
269 }
270 return;
271 }
272
273 if (dtype == 0) {
274 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
275 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
276 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
277 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
278 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
279 clustererNN.mClusterFlags[2 * glo_idx],
280 clustererNN.mClusterFlags[2 * glo_idx + 1]);
281 } else if (dtype == 1) {
282 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
283 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
284 clustererNN.mOutputDataReg1_32[model_output_index + 2],
285 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
286 clustererNN.mOutputDataReg1_32[model_output_index + 3],
287 clustererNN.mClusterFlags[2 * glo_idx],
288 clustererNN.mClusterFlags[2 * glo_idx + 1]);
289 }
290
291 tpc::ClusterNative myCluster;
292 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
293 if (rejectCluster) {
294 if (clusterer.mPclusterPosInRow) {
295 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
296 }
297 return;
298 }
299
300 uint32_t rowIndex = 0;
301 if (clusterOut != nullptr) {
302 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
303 clusterer,
304 myCluster,
305 peak.row(),
306 clusterer.mNMaxClusterPerRow,
307 clusterer.mPclusterInRow,
308 clusterOut);
309 if (clusterer.mPclusterPosInRow != nullptr) {
310 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
311 }
312 } else if (clusterer.mPclusterPosInRow) {
313 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
314 }
315 CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
316 } else {
317 if (clusterer.mPclusterPosInRow) {
318 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
319 }
320 return;
321 }
322}
323
324template <>
325GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishClass2Regression>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
326{
327 uint32_t glo_idx = get_global_id(0);
328 auto& clusterer = processors.tpcClusterer[sector];
329 auto& clustererNN = processors.tpcNNClusterer[sector];
330
331 CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
332 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
333 float central_charge = static_cast<float>(chargeMap[peak].unpack());
334
335 CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer));
336 MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);
337 tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow;
338 uint32_t full_glo_idx = glo_idx + batchStart;
339 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
340
341 if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
342
344
345 if (withMC) {
346 ClusterAccumulator dummy_pc;
347 CPU_ONLY(labelAcc->collect(peak, central_charge));
348 GPUTPCCFClusterizer::buildCluster(
349 clusterer.Param().rec,
350 chargeMap,
351 peak,
352 smem.posBcast,
353 smem.buf,
354 smem.innerAboveThreshold,
355 &dummy_pc,
356 labelAcc);
357 }
358 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
359 if (clusterer.mPclusterPosInRow) {
360 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
361 }
362 return;
363 }
364
365 // Cluster 1
366 if (dtype == 0) {
367 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
368 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
369 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
370 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
371 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
372 clustererNN.mClusterFlags[2 * glo_idx],
373 clustererNN.mClusterFlags[2 * glo_idx + 1]);
374 } else if (dtype == 1) {
375 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
376 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
377 clustererNN.mOutputDataReg2_32[model_output_index + 4],
378 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
379 clustererNN.mOutputDataReg2_32[model_output_index + 6],
380 clustererNN.mClusterFlags[2 * glo_idx],
381 clustererNN.mClusterFlags[2 * glo_idx + 1]);
382 }
383
384 tpc::ClusterNative myCluster;
385 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
386 if (rejectCluster) {
387 if (clusterer.mPclusterPosInRow) {
388 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
389 }
390 return;
391 }
392
393 uint32_t rowIndex = 0;
394 if (clusterOut != nullptr) {
395 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
396 clusterer,
397 myCluster,
398 peak.row(),
399 clusterer.mNMaxClusterPerRow,
400 clusterer.mPclusterInRow,
401 clusterOut);
402 if (clusterer.mPclusterPosInRow != nullptr) {
403 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
404 }
405 } else if (clusterer.mPclusterPosInRow) {
406 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
407 }
408 CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
409
410 // Cluster 2
411 if (dtype == 0) {
412 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
413 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
414 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
415 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
416 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
417 clustererNN.mClusterFlags[2 * glo_idx],
418 clustererNN.mClusterFlags[2 * glo_idx + 1]);
419 } else if (dtype == 1) {
420 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
421 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
422 clustererNN.mOutputDataReg2_32[model_output_index + 5],
423 (clusterer.mPmemory->fragment).start + static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
424 clustererNN.mOutputDataReg2_32[model_output_index + 7],
425 clustererNN.mClusterFlags[2 * glo_idx],
426 clustererNN.mClusterFlags[2 * glo_idx + 1]);
427 }
428
429 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
430 if (rejectCluster) {
431 if (clusterer.mPclusterPosInRow) {
432 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
433 }
434 return;
435 }
436
437 if (clusterOut != nullptr) {
438 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
439 clusterer,
440 myCluster,
441 peak.row(),
442 clusterer.mNMaxClusterPerRow,
443 clusterer.mPclusterInRow,
444 clusterOut);
445 if (clusterer.mPclusterPosInRow != nullptr) {
446 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
447 }
448 } else if (clusterer.mPclusterPosInRow) {
449 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
450 }
451 // CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters?
452 } else {
453 if (clusterer.mPclusterPosInRow) {
454 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
455 }
456 return;
457 }
458}
459
460// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
461GPUd() int32_t GPUTPCNNClusterizerKernels::padOffset(int32_t row_ref, int32_t row_current)
462{
463 return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2);
464}
465
466GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t global_shift)
467{
468 return (row > 62 ? global_shift : 0);
469}
470
471GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift)
472{
473 if (pad < 0 || row < 0) { // Faster short-circuit
474 return true;
475 } else if (row < 63) {
476 return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row)));
477 } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network
478 return true;
479 } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) {
480 return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - global_shift)));
481 } else {
482 return true;
483 }
484}
int16_t time
Definition RawEventData.h:4
int32_t i
#define get_local_size(dim)
#define get_local_id(dim)
#define get_num_groups(dim)
#define get_global_id(dim)
#define get_group_id(dim)
#define GPUCA_UNROLL(optCu, optHi)
GPUdii() void GPUTPCNNClusterizerKernels
GPUd() int32_t GPUTPCNNClusterizerKernels
void collect(const CfChargePos &, tpccf::Charge)
void commit(tpccf::Row, uint32_t, uint32_t)
#define TPC_MAX_FRAGMENT_LEN_GPU
#define CPU_ONLY(x)
#define CPU_PTR(x)
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
constexpr int MAXGLOBALPADROW
Definition Constants.h:34
std::vector< int > row