56 auto& clusterer = processors.tpcClusterer[sector];
57 auto& clustererNN = processors.tpcNNClusterer[sector];
60 if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
64 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
68 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
69 int32_t
row =
static_cast<int>(peak.row());
70 int32_t pad =
static_cast<int>(peak.pad());
71 int32_t
time =
static_cast<int>(peak.time());
72 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
73 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
75 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow; ++
r) {
76 int32_t target_row =
row +
r;
78 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
80 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; ++p) {
81 int32_t target_pad = pad + p;
82 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow);
84 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; ++t) {
85 int32_t target_time =
time + t;
89 float boundary_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_value;
93 clustererNN.mInputData_32[write_idx] = boundary_value;
96 CfChargePos tmp_pos(target_row, target_pad, target_time);
97 float normalized_charge =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
99 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags &&
r == 0 && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
100 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
101 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
105 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)normalized_charge;
107 clustererNN.mInputData_32[write_idx] = normalized_charge;
119 if (clustererNN.mNnClusterizerAddIndexData) {
123 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
127 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
131 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
132 clustererNN.mClusterFlags[2 * glo_idx] = 0;
133 clustererNN.mClusterFlags[2 * glo_idx + 1] = 0;
135 for (uint16_t
i = 0;
i < 8; ++
i) {
136 Delta2 d = cfconsts::InnerNeighbors[
i];
138 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
140 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
149 auto& clusterer = processors.tpcClusterer[sector];
150 auto& clustererNN = processors.tpcNNClusterer[sector];
153 uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeFull;
154 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeFull);
157 if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
165 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
166 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
167 int32_t
row =
static_cast<int>(peak.row());
168 int32_t pad =
static_cast<int>(peak.pad());
169 int32_t
time =
static_cast<int>(peak.time());
172 if (clustererNN.mNnClusterizerAddIndexData && transient_index >= clustererNN.mNnClusterizerRowTimeSize) {
173 int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
174 uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
176 float index_values[3] = {
179 static_cast<float>(pad) / GPUTPCGeometry::NPads(
row)};
182 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
184 clustererNN.mInputData_32[write_idx] = index_values[data_idx];
188 if (data_idx == 2 && !clustererNN.mNnClusterizerSetDeconvolutionFlags) {
189 uint8_t cluster_flags = 0;
190 for (uint16_t
i = 0;
i < 8;
i++) {
191 Delta2 d = cfconsts::InnerNeighbors[
i];
193 cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
195 clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
196 clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
202 if (transient_index < clustererNN.mNnClusterizerRowTimeSize) {
204 int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
205 int32_t r_local = row_idx - clustererNN.mNnClusterizerSizeInputRow;
206 int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
207 int32_t t_local = time_idx - clustererNN.mNnClusterizerSizeInputTime;
208 int32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + row_idx * clustererNN.mNnClusterizerPadTimeSize + time_idx;
211 int32_t target_row =
row + r_local;
215 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
216 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
217 for (int32_t p_local = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local++) {
218 if (is_row_boundary) {
220 float boundary_val =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
222 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_val;
224 clustererNN.mInputData_32[write_idx] = boundary_val;
226 write_idx += clustererNN.mNnClusterizerFullTimeSize;
231 int32_t target_pad = pad + p_local;
232 int32_t target_time =
time + t_local;
235 int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow) || (target_time < 0) || (target_time >=
TPC_MAX_FRAGMENT_LEN_GPU);
239 output_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
242 CfChargePos tmp_pos(target_row, target_pad, target_time);
243 output_value =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
248 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)output_value;
250 clustererNN.mInputData_32[write_idx] = output_value;
252 write_idx += clustererNN.mNnClusterizerFullTimeSize;
271 auto& clustererNN = processors.tpcNNClusterer[sector];
273 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
274 float current_max_prob = 0.f;
275 uint32_t class_label = 0;
276 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
277 if (pIdx == elem_iterator) {
279 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
280 }
else if (dtype == 1) {
281 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
285 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
286 }
else if (dtype == 1) {
287 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
292 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
293 if (class_label > 1) {
294 clustererNN.mClusterFlags[2 * glo_idx] = 1;
295 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
303 auto& clusterer = processors.tpcClusterer[sector];
304 auto& clustererNN = processors.tpcNNClusterer[sector];
306 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
307 uint32_t full_glo_idx = glo_idx + batchStart;
308 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
311 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
312 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
317 if (full_glo_idx >= maxClusterNum) {
321 GPUTPCCFClusterizer::buildCluster(
322 clusterer.Param().rec,
327 smem.innerAboveThreshold,
338 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerUseClassification <= 0)) {
346 GPUTPCCFClusterizer::buildCluster(
347 clusterer.Param().rec,
352 smem.innerAboveThreshold,
356 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
357 if (clusterer.mPclusterPosInRow) {
358 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
363 bool notSinglePad =
false, notSingleTime =
false;
364 for (uint16_t
i = 0;
i < 8;
i++) {
365 Delta2 d = cfconsts::InnerNeighbors[
i];
367 notSinglePad |= (d.
x != 0) && (
static_cast<float>(chargeMap[tmp_pos].unpack()) > 0);
368 notSingleTime |= (d.
y != 0) && (
static_cast<float>(chargeMap[tmp_pos].unpack()) > 0);
372 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
373 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
374 notSinglePad ? clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat() : 0.f,
375 (clusterer.mPmemory->fragment).
start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
376 notSingleTime ? clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat() : 0.f,
377 clustererNN.mClusterFlags[2 * glo_idx],
378 clustererNN.mClusterFlags[2 * glo_idx + 1]);
379 }
else if (dtype == 1) {
380 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
381 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
382 notSinglePad ? clustererNN.mOutputDataReg1_32[model_output_index + 2] : 0.f,
383 (clusterer.mPmemory->fragment).
start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
384 notSingleTime ? clustererNN.mOutputDataReg1_32[model_output_index + 3] : 0.f,
385 clustererNN.mClusterFlags[2 * glo_idx],
386 clustererNN.mClusterFlags[2 * glo_idx + 1]);
390 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
392 if (clusterer.mPclusterPosInRow) {
393 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
398 uint32_t rowIndex = 0;
399 if (clusterOut !=
nullptr) {
400 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
404 clusterer.mNMaxClusterPerRow,
405 clusterer.mPclusterInRow,
407 if (clusterer.mPclusterPosInRow !=
nullptr) {
408 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
410 }
else if (clusterer.mPclusterPosInRow) {
411 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
413 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
415 if (clusterer.mPclusterPosInRow) {
416 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
426 auto& clusterer = processors.tpcClusterer[sector];
427 auto& clustererNN = processors.tpcNNClusterer[sector];
429 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
431 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
432 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
437 uint32_t full_glo_idx = glo_idx + batchStart;
439 if (full_glo_idx >= maxClusterNum) {
443 GPUTPCCFClusterizer::buildCluster(
444 clusterer.Param().rec,
449 smem.innerAboveThreshold,
456 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
458 if ((clustererNN.mOutputDataClass[full_glo_idx] > 0) || (clustererNN.mNnClusterizerUseClassification <= 0)) {
465 GPUTPCCFClusterizer::buildCluster(
466 clusterer.Param().rec,
471 smem.innerAboveThreshold,
475 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
476 if (clusterer.mPclusterPosInRow) {
477 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
484 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
485 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
486 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
487 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
488 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
489 clustererNN.mClusterFlags[2 * glo_idx],
490 clustererNN.mClusterFlags[2 * glo_idx + 1]);
491 }
else if (dtype == 1) {
492 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
493 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
494 clustererNN.mOutputDataReg2_32[model_output_index + 4],
495 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
496 clustererNN.mOutputDataReg2_32[model_output_index + 6],
497 clustererNN.mClusterFlags[2 * glo_idx],
498 clustererNN.mClusterFlags[2 * glo_idx + 1]);
502 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
504 if (clusterer.mPclusterPosInRow) {
505 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
510 uint32_t rowIndex = 0;
511 if (clusterOut !=
nullptr) {
512 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
516 clusterer.mNMaxClusterPerRow,
517 clusterer.mPclusterInRow,
519 if (clusterer.mPclusterPosInRow !=
nullptr) {
520 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
522 }
else if (clusterer.mPclusterPosInRow) {
523 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
525 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
529 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
530 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
531 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
532 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
533 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
534 clustererNN.mClusterFlags[2 * glo_idx],
535 clustererNN.mClusterFlags[2 * glo_idx + 1]);
536 }
else if (dtype == 1) {
537 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
538 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
539 clustererNN.mOutputDataReg2_32[model_output_index + 5],
540 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
541 clustererNN.mOutputDataReg2_32[model_output_index + 7],
542 clustererNN.mClusterFlags[2 * glo_idx],
543 clustererNN.mClusterFlags[2 * glo_idx + 1]);
546 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
548 if (clusterer.mPclusterPosInRow) {
549 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
554 if (clusterOut !=
nullptr) {
555 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
559 clusterer.mNMaxClusterPerRow,
560 clusterer.mPclusterInRow,
562 if (clusterer.mPclusterPosInRow !=
nullptr) {
563 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
565 }
else if (clusterer.mPclusterPosInRow) {
566 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
570 if (clusterer.mPclusterPosInRow) {
571 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;