56 auto& clusterer = processors.tpcClusterer[sector];
57 auto& clustererNN = processors.tpcNNClusterer[sector];
60 if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
64 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
68 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
69 int32_t
row =
static_cast<int>(peak.row());
70 int32_t pad =
static_cast<int>(peak.pad());
71 int32_t
time =
static_cast<int>(peak.time());
72 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
73 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
75 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow; ++
r) {
76 int32_t target_row =
row +
r;
78 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
80 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; ++p) {
81 int32_t target_pad = pad + p;
82 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow);
84 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; ++t) {
85 int32_t target_time =
time + t;
89 float boundary_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_value;
93 clustererNN.mInputData_32[write_idx] = boundary_value;
96 CfChargePos tmp_pos(target_row, target_pad, target_time);
97 float normalized_charge =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
99 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags &&
r == 0 && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
100 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
101 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
105 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)normalized_charge;
107 clustererNN.mInputData_32[write_idx] = normalized_charge;
119 if (clustererNN.mNnClusterizerAddIndexData) {
120 float sector_norm = sector / 36.f;
121 float row_norm =
row / 152.f;
122 float pad_norm =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
125 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)sector_norm;
126 clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)row_norm;
127 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)pad_norm;
129 clustererNN.mInputData_32[write_idx] = sector_norm;
130 clustererNN.mInputData_32[write_idx + 1] = row_norm;
131 clustererNN.mInputData_32[write_idx + 2] = pad_norm;
135 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
136 clustererNN.mClusterFlags[2 * glo_idx] = 0;
137 clustererNN.mClusterFlags[2 * glo_idx + 1] = 0;
139 for (uint16_t
i = 0;
i < 8; ++
i) {
140 Delta2 d = cfconsts::InnerNeighbors[
i];
142 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
144 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
153 auto& clusterer = processors.tpcClusterer[sector];
154 auto& clustererNN = processors.tpcNNClusterer[sector];
157 uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeFull;
158 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeFull);
161 if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
169 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
170 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
171 int32_t
row =
static_cast<int>(peak.row());
172 int32_t pad =
static_cast<int>(peak.pad());
173 int32_t
time =
static_cast<int>(peak.time());
176 if (clustererNN.mNnClusterizerAddIndexData && transient_index >= clustererNN.mNnClusterizerRowTimeSize) {
177 int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
178 uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
180 float index_values[3] = {
183 static_cast<float>(pad) / GPUTPCGeometry::NPads(
row)};
186 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
188 clustererNN.mInputData_32[write_idx] = index_values[data_idx];
192 if (data_idx == 2 && !clustererNN.mNnClusterizerSetDeconvolutionFlags) {
193 uint8_t cluster_flags = 0;
194 for (uint16_t
i = 0;
i < 8;
i++) {
195 Delta2 d = cfconsts::InnerNeighbors[
i];
197 cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
199 clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
200 clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
206 if (transient_index < clustererNN.mNnClusterizerRowTimeSize) {
208 int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
209 int32_t r_local = row_idx - clustererNN.mNnClusterizerSizeInputRow;
210 int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
211 int32_t t_local = time_idx - clustererNN.mNnClusterizerSizeInputTime;
212 int32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + row_idx * clustererNN.mNnClusterizerPadTimeSize + time_idx;
215 int32_t target_row =
row + r_local;
219 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
220 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
221 for (int32_t p_local = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local++) {
222 if (is_row_boundary) {
224 float boundary_val =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
226 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_val;
228 clustererNN.mInputData_32[write_idx] = boundary_val;
230 write_idx += clustererNN.mNnClusterizerFullTimeSize;
235 int32_t target_pad = pad + p_local;
236 int32_t target_time =
time + t_local;
239 int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow) || (target_time < 0) || (target_time >=
TPC_MAX_FRAGMENT_LEN_GPU);
243 output_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
246 CfChargePos tmp_pos(target_row, target_pad, target_time);
247 output_value =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
252 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)output_value;
254 clustererNN.mInputData_32[write_idx] = output_value;
256 write_idx += clustererNN.mNnClusterizerFullTimeSize;
275 auto& clustererNN = processors.tpcNNClusterer[sector];
277 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
278 float current_max_prob = 0.f;
279 uint32_t class_label = 0;
280 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
281 if (pIdx == elem_iterator) {
283 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
284 }
else if (dtype == 1) {
285 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
289 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
290 }
else if (dtype == 1) {
291 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
296 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
297 if (class_label > 1) {
298 clustererNN.mClusterFlags[2 * glo_idx] = 1;
299 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
307 auto& clusterer = processors.tpcClusterer[sector];
308 auto& clustererNN = processors.tpcNNClusterer[sector];
310 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
311 uint32_t full_glo_idx = glo_idx + batchStart;
312 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
315 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
316 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
321 if (full_glo_idx >= maxClusterNum) {
325 GPUTPCCFClusterizer::buildCluster(
326 clusterer.Param().rec,
331 smem.innerAboveThreshold,
342 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
350 GPUTPCCFClusterizer::buildCluster(
351 clusterer.Param().rec,
356 smem.innerAboveThreshold,
360 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
361 if (clusterer.mPclusterPosInRow) {
362 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
368 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
369 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
370 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
371 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
372 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
373 clustererNN.mClusterFlags[2 * glo_idx],
374 clustererNN.mClusterFlags[2 * glo_idx + 1]);
375 }
else if (dtype == 1) {
376 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
377 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
378 clustererNN.mOutputDataReg1_32[model_output_index + 2],
379 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
380 clustererNN.mOutputDataReg1_32[model_output_index + 3],
381 clustererNN.mClusterFlags[2 * glo_idx],
382 clustererNN.mClusterFlags[2 * glo_idx + 1]);
386 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
388 if (clusterer.mPclusterPosInRow) {
389 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
394 uint32_t rowIndex = 0;
395 if (clusterOut !=
nullptr) {
396 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
400 clusterer.mNMaxClusterPerRow,
401 clusterer.mPclusterInRow,
403 if (clusterer.mPclusterPosInRow !=
nullptr) {
404 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
406 }
else if (clusterer.mPclusterPosInRow) {
407 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
409 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
411 if (clusterer.mPclusterPosInRow) {
412 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
422 auto& clusterer = processors.tpcClusterer[sector];
423 auto& clustererNN = processors.tpcNNClusterer[sector];
425 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
427 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
428 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
433 uint32_t full_glo_idx = glo_idx + batchStart;
435 if (full_glo_idx >= maxClusterNum) {
439 GPUTPCCFClusterizer::buildCluster(
440 clusterer.Param().rec,
445 smem.innerAboveThreshold,
452 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
454 if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
461 GPUTPCCFClusterizer::buildCluster(
462 clusterer.Param().rec,
467 smem.innerAboveThreshold,
471 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
472 if (clusterer.mPclusterPosInRow) {
473 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
480 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
481 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
482 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
483 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
484 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
485 clustererNN.mClusterFlags[2 * glo_idx],
486 clustererNN.mClusterFlags[2 * glo_idx + 1]);
487 }
else if (dtype == 1) {
488 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
489 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
490 clustererNN.mOutputDataReg2_32[model_output_index + 4],
491 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
492 clustererNN.mOutputDataReg2_32[model_output_index + 6],
493 clustererNN.mClusterFlags[2 * glo_idx],
494 clustererNN.mClusterFlags[2 * glo_idx + 1]);
498 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
500 if (clusterer.mPclusterPosInRow) {
501 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
506 uint32_t rowIndex = 0;
507 if (clusterOut !=
nullptr) {
508 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
512 clusterer.mNMaxClusterPerRow,
513 clusterer.mPclusterInRow,
515 if (clusterer.mPclusterPosInRow !=
nullptr) {
516 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
518 }
else if (clusterer.mPclusterPosInRow) {
519 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
521 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
525 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
526 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
527 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
528 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
529 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
530 clustererNN.mClusterFlags[2 * glo_idx],
531 clustererNN.mClusterFlags[2 * glo_idx + 1]);
532 }
else if (dtype == 1) {
533 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
534 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
535 clustererNN.mOutputDataReg2_32[model_output_index + 5],
536 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
537 clustererNN.mOutputDataReg2_32[model_output_index + 7],
538 clustererNN.mClusterFlags[2 * glo_idx],
539 clustererNN.mClusterFlags[2 * glo_idx + 1]);
542 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
544 if (clusterer.mPclusterPosInRow) {
545 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
550 if (clusterOut !=
nullptr) {
551 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
555 clusterer.mNMaxClusterPerRow,
556 clusterer.mPclusterInRow,
558 if (clusterer.mPclusterPosInRow !=
nullptr) {
559 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
561 }
else if (clusterer.mPclusterPosInRow) {
562 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
566 if (clusterer.mPclusterPosInRow) {
567 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;