57 auto& clusterer = processors.tpcClusterer[sector];
58 auto& clustererNN = processors.tpcNNClusterer[sector];
61 if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode) {
65 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
69 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
70 int32_t
row =
static_cast<int>(peak.row());
71 int32_t pad =
static_cast<int>(peak.pad());
72 int32_t
time =
static_cast<int>(peak.time());
73 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
74 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
76 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow; ++
r) {
77 int32_t target_row =
row +
r;
79 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
81 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; ++p) {
82 int32_t target_pad = pad + p;
83 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow);
85 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; ++t) {
86 int32_t target_time =
time + t;
88 if (is_boundary || target_time < 0 || target_time >= clustererNN.maxAllowedTimebin) {
90 float boundary_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
92 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_value;
94 clustererNN.mInputData_32[write_idx] = boundary_value;
97 CfChargePos tmp_pos(target_row, target_pad, target_time);
98 float normalized_charge =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
100 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)normalized_charge;
102 clustererNN.mInputData_32[write_idx] = normalized_charge;
114 if (clustererNN.mNnClusterizerAddIndexData) {
118 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
122 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
126 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
127 clustererNN.mClusterFlags[2 * glo_idx] = 0;
128 clustererNN.mClusterFlags[2 * glo_idx + 1] = 0;
130 for (uint16_t
i = 0;
i < 8; ++
i) {
131 Delta2 d = cfconsts::InnerNeighbors[
i];
133 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
135 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
143 auto& clusterer = processors.tpcClusterer[sector];
144 auto& clustererNN = processors.tpcNNClusterer[sector];
146 if (glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode * clustererNN.mNnClusterizerRowTimeSizeFull) {
150 uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeFull;
151 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeFull);
154 if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
162 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
163 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
164 int32_t
row =
static_cast<int>(peak.row());
165 int32_t pad =
static_cast<int>(peak.pad());
166 int32_t
time =
static_cast<int>(peak.time());
169 if (clustererNN.mNnClusterizerAddIndexData && transient_index >= clustererNN.mNnClusterizerRowTimeSize) {
170 int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
171 uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
173 float index_values[3] = {
176 static_cast<float>(pad) / GPUTPCGeometry::NPads(
row)};
179 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
181 clustererNN.mInputData_32[write_idx] = index_values[data_idx];
185 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags && data_idx == 2) {
186 uint8_t cluster_flags = 0;
187 for (uint16_t
i = 0;
i < 8;
i++) {
188 Delta2 d = cfconsts::InnerNeighbors[
i];
190 cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
192 clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
193 clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
199 if (transient_index < clustererNN.mNnClusterizerRowTimeSize) {
201 int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
202 int32_t r_local = row_idx - clustererNN.mNnClusterizerSizeInputRow;
203 int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
204 int32_t t_local = time_idx - clustererNN.mNnClusterizerSizeInputTime;
205 int32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + row_idx * clustererNN.mNnClusterizerPadTimeSize + time_idx;
208 int32_t target_row =
row + r_local;
212 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
213 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
214 for (int32_t p_local = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local++) {
215 if (is_row_boundary) {
217 float boundary_val =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
219 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_val;
221 clustererNN.mInputData_32[write_idx] = boundary_val;
223 write_idx += clustererNN.mNnClusterizerFullTimeSize;
228 int32_t target_pad = pad + p_local;
229 int32_t target_time =
time + t_local;
232 int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow) || (target_time < 0) || (target_time >= clustererNN.maxAllowedTimebin);
236 output_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
239 CfChargePos tmp_pos(target_row, target_pad, target_time);
240 output_value =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
245 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)output_value;
247 clustererNN.mInputData_32[write_idx] = output_value;
264 write_idx += clustererNN.mNnClusterizerFullTimeSize;
293 auto& clusterer = processors.tpcClusterer[sector];
294 auto& clustererNN = processors.tpcNNClusterer[sector];
295 if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode) {
298 if (clustererNN.mNnClusterizerUseClassification) {
299 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
300 float current_max_prob = 0.f;
301 uint32_t class_label = 0;
302 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
303 if (pIdx == elem_iterator) {
305 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
306 }
else if (dtype == 1) {
307 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
311 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
312 }
else if (dtype == 1) {
313 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
318 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
319 if (class_label > 1) {
320 clustererNN.mClusterFlags[2 * glo_idx] = 1;
321 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
324 clustererNN.mOutputDataClass[glo_idx + batchStart] = 1;
332 auto& clusterer = processors.tpcClusterer[sector];
333 auto& clustererNN = processors.tpcNNClusterer[sector];
334 if (glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode) {
338 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
339 uint32_t full_glo_idx = glo_idx + batchStart;
340 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
343 uint32_t peakIndex = CAMath::Min(full_glo_idx, maxClusterNum - 1);
344 CfChargePos peak = clusterer.mPfilteredPeakPositions[peakIndex];
345 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
350 if (full_glo_idx >= maxClusterNum) {
354 GPUTPCCFClusterizer::buildCluster(
355 clusterer.Param().rec,
360 smem.innerAboveThreshold,
374 GPUTPCCFClusterizer::buildCluster(
375 clusterer.Param().rec,
380 smem.innerAboveThreshold,
384 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
385 if (clusterer.mPclusterPosInRow) {
386 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
391 bool notSinglePad =
false, notSingleTime =
false;
392 for (uint16_t
i = 0;
i < 8;
i++) {
393 Delta2 d = cfconsts::InnerNeighbors[
i];
395 float v =
static_cast<float>(chargeMap[tmp_pos].unpack());
396 notSinglePad |= (d.
x != 0) && (
v > 0.f);
397 notSingleTime |= (d.
y != 0) && (
v > 0.f);
400 float publishPadPosition = 0.f, publishTimePosition = 0.f;
402 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat();
403 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat();
404 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
405 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
407 notSinglePad ? clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat() : 0.f,
408 (clusterer.mPmemory->fragment).
start + publishTimePosition,
409 notSingleTime ? clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat() : 0.f,
410 clustererNN.mClusterFlags[2 * glo_idx],
411 clustererNN.mClusterFlags[2 * glo_idx + 1]);
413 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index];
414 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1];
415 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
416 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
418 notSinglePad ? clustererNN.mOutputDataReg1_32[model_output_index + 2] : 0.f,
419 (clusterer.mPmemory->fragment).
start + publishTimePosition,
420 notSingleTime ? clustererNN.mOutputDataReg1_32[model_output_index + 3] : 0.f,
421 clustererNN.mClusterFlags[2 * glo_idx],
422 clustererNN.mClusterFlags[2 * glo_idx + 1]);
495 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
496 if (clustererNN.mNnClusterizerUseClassification) {
497 rejectCluster |= (clustererNN.mOutputDataClass[peakIndex] <= 0);
500 if (clusterer.mPclusterPosInRow) {
501 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
506 uint32_t rowIndex = 0;
507 if (clusterOut !=
nullptr) {
508 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
512 clusterer.mNMaxClusterPerRow,
513 clusterer.mPclusterInRow,
515 if (clusterer.mPclusterPosInRow !=
nullptr) {
516 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
518 }
else if (clusterer.mPclusterPosInRow) {
519 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
521 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
528 auto& clusterer = processors.tpcClusterer[sector];
529 auto& clustererNN = processors.tpcNNClusterer[sector];
530 if (glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode) {
534 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
536 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
537 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
542 uint32_t full_glo_idx = glo_idx + batchStart;
544 if (full_glo_idx >= maxClusterNum) {
548 GPUTPCCFClusterizer::buildCluster(
549 clusterer.Param().rec,
554 smem.innerAboveThreshold,
561 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
568 GPUTPCCFClusterizer::buildCluster(
569 clusterer.Param().rec,
574 smem.innerAboveThreshold,
578 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
579 if (clusterer.mPclusterPosInRow) {
580 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
586 float publishPadPosition = 0.f, publishTimePosition = 0.f;
588 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat();
589 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat();
590 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
591 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
593 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
594 (clusterer.mPmemory->fragment).start + publishTimePosition,
595 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
596 clustererNN.mClusterFlags[2 * glo_idx],
597 clustererNN.mClusterFlags[2 * glo_idx + 1]);
598 }
else if (dtype == 1) {
599 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index];
600 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 1];
601 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
602 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
604 clustererNN.mOutputDataReg2_32[model_output_index + 4],
605 (clusterer.mPmemory->fragment).start + publishTimePosition,
606 clustererNN.mOutputDataReg2_32[model_output_index + 6],
607 clustererNN.mClusterFlags[2 * glo_idx],
608 clustererNN.mClusterFlags[2 * glo_idx + 1]);
612 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
613 if (clustererNN.mNnClusterizerUseClassification) {
614 rejectCluster |= (clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] <= 0);
617 if (clusterer.mPclusterPosInRow) {
618 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
623 uint32_t rowIndex = 0;
624 if (clusterOut !=
nullptr) {
625 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
629 clusterer.mNMaxClusterPerRow,
630 clusterer.mPclusterInRow,
632 if (clusterer.mPclusterPosInRow !=
nullptr) {
633 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
635 }
else if (clusterer.mPclusterPosInRow) {
636 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
638 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
642 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat();
643 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat();
644 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
645 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
647 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
648 (clusterer.mPmemory->fragment).start + publishTimePosition,
649 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
650 clustererNN.mClusterFlags[2 * glo_idx],
651 clustererNN.mClusterFlags[2 * glo_idx + 1]);
652 }
else if (dtype == 1) {
653 publishPadPosition =
static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1];
654 publishTimePosition =
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3];
655 isBoundaryPublish(full_glo_idx,
static_cast<int32_t
>(peak.row()), publishPadPosition, publishTimePosition);
656 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
658 clustererNN.mOutputDataReg2_32[model_output_index + 5],
659 (clusterer.mPmemory->fragment).start + publishTimePosition,
660 clustererNN.mOutputDataReg2_32[model_output_index + 7],
661 clustererNN.mClusterFlags[2 * glo_idx],
662 clustererNN.mClusterFlags[2 * glo_idx + 1]);
665 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
666 if (clustererNN.mNnClusterizerUseClassification) {
667 rejectCluster |= (clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] <= 0);
670 if (clusterer.mPclusterPosInRow) {
671 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
676 if (clusterOut !=
nullptr) {
677 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
681 clusterer.mNMaxClusterPerRow,
682 clusterer.mPclusterInRow,
684 if (clusterer.mPclusterPosInRow !=
nullptr) {
685 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
687 }
else if (clusterer.mPclusterPosInRow) {
688 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];