56 auto& clusterer = processors.tpcClusterer[sector];
57 auto& clustererNN = processors.tpcNNClusterer[sector];
60 if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
64 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
68 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
69 int32_t
row =
static_cast<int>(peak.row());
70 int32_t pad =
static_cast<int>(peak.pad());
71 int32_t
time =
static_cast<int>(peak.time());
72 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
73 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
75 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow; ++
r) {
76 int32_t target_row =
row +
r;
78 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
80 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; ++p) {
81 int32_t target_pad = pad + p;
82 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow);
84 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; ++t) {
85 int32_t target_time =
time + t;
89 float boundary_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_value;
93 clustererNN.mInputData_32[write_idx] = boundary_value;
96 CfChargePos tmp_pos(target_row, target_pad, target_time);
97 float normalized_charge =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
99 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags &&
r == 0 && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
100 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
101 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
105 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)normalized_charge;
107 clustererNN.mInputData_32[write_idx] = normalized_charge;
119 if (clustererNN.mNnClusterizerAddIndexData) {
123 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
127 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
131 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
132 clustererNN.mClusterFlags[2 * glo_idx] = 0;
133 clustererNN.mClusterFlags[2 * glo_idx + 1] = 0;
135 for (uint16_t
i = 0;
i < 8; ++
i) {
136 Delta2 d = cfconsts::InnerNeighbors[
i];
138 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
140 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
149 auto& clusterer = processors.tpcClusterer[sector];
150 auto& clustererNN = processors.tpcNNClusterer[sector];
153 uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeFull;
154 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeFull);
157 if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
165 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
166 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
167 int32_t
row =
static_cast<int>(peak.row());
168 int32_t pad =
static_cast<int>(peak.pad());
169 int32_t
time =
static_cast<int>(peak.time());
172 if (clustererNN.mNnClusterizerAddIndexData && transient_index >= clustererNN.mNnClusterizerRowTimeSize) {
173 int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
174 uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
176 float index_values[3] = {
179 static_cast<float>(pad) / GPUTPCGeometry::NPads(
row)};
182 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
184 clustererNN.mInputData_32[write_idx] = index_values[data_idx];
188 if (data_idx == 2 && !clustererNN.mNnClusterizerSetDeconvolutionFlags) {
189 uint8_t cluster_flags = 0;
190 for (uint16_t
i = 0;
i < 8;
i++) {
191 Delta2 d = cfconsts::InnerNeighbors[
i];
193 cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
195 clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
196 clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
202 if (transient_index < clustererNN.mNnClusterizerRowTimeSize) {
204 int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
205 int32_t r_local = row_idx - clustererNN.mNnClusterizerSizeInputRow;
206 int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
207 int32_t t_local = time_idx - clustererNN.mNnClusterizerSizeInputTime;
208 int32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + row_idx * clustererNN.mNnClusterizerPadTimeSize + time_idx;
211 int32_t target_row =
row + r_local;
215 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
216 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row, target_row);
217 for (int32_t p_local = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local++) {
218 if (is_row_boundary) {
220 float boundary_val =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
222 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_val;
224 clustererNN.mInputData_32[write_idx] = boundary_val;
226 write_idx += clustererNN.mNnClusterizerFullTimeSize;
231 int32_t target_pad = pad + p_local;
232 int32_t target_time =
time + t_local;
235 int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow) || (target_time < 0) || (target_time >=
TPC_MAX_FRAGMENT_LEN_GPU);
239 output_value =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
242 CfChargePos tmp_pos(target_row, target_pad, target_time);
243 output_value =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
248 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)output_value;
250 clustererNN.mInputData_32[write_idx] = output_value;
252 write_idx += clustererNN.mNnClusterizerFullTimeSize;
271 auto& clustererNN = processors.tpcNNClusterer[sector];
273 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
274 float current_max_prob = 0.f;
275 uint32_t class_label = 0;
276 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
277 if (pIdx == elem_iterator) {
279 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
280 }
else if (dtype == 1) {
281 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
285 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
286 }
else if (dtype == 1) {
287 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
292 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
293 if (class_label > 1) {
294 clustererNN.mClusterFlags[2 * glo_idx] = 1;
295 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
303 auto& clusterer = processors.tpcClusterer[sector];
304 auto& clustererNN = processors.tpcNNClusterer[sector];
306 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
307 uint32_t full_glo_idx = glo_idx + batchStart;
308 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
311 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
312 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
317 if (full_glo_idx >= maxClusterNum) {
321 GPUTPCCFClusterizer::buildCluster(
322 clusterer.Param().rec,
327 smem.innerAboveThreshold,
338 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerUseClassification <= 0)) {
346 GPUTPCCFClusterizer::buildCluster(
347 clusterer.Param().rec,
352 smem.innerAboveThreshold,
356 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
357 if (clusterer.mPclusterPosInRow) {
358 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
364 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
365 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
366 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
367 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
368 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
369 clustererNN.mClusterFlags[2 * glo_idx],
370 clustererNN.mClusterFlags[2 * glo_idx + 1]);
371 }
else if (dtype == 1) {
372 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
373 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
374 clustererNN.mOutputDataReg1_32[model_output_index + 2],
375 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
376 clustererNN.mOutputDataReg1_32[model_output_index + 3],
377 clustererNN.mClusterFlags[2 * glo_idx],
378 clustererNN.mClusterFlags[2 * glo_idx + 1]);
382 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
384 if (clusterer.mPclusterPosInRow) {
385 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
390 uint32_t rowIndex = 0;
391 if (clusterOut !=
nullptr) {
392 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
396 clusterer.mNMaxClusterPerRow,
397 clusterer.mPclusterInRow,
399 if (clusterer.mPclusterPosInRow !=
nullptr) {
400 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
402 }
else if (clusterer.mPclusterPosInRow) {
403 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
405 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
407 if (clusterer.mPclusterPosInRow) {
408 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
418 auto& clusterer = processors.tpcClusterer[sector];
419 auto& clustererNN = processors.tpcNNClusterer[sector];
421 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
423 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
424 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
429 uint32_t full_glo_idx = glo_idx + batchStart;
431 if (full_glo_idx >= maxClusterNum) {
435 GPUTPCCFClusterizer::buildCluster(
436 clusterer.Param().rec,
441 smem.innerAboveThreshold,
448 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
450 if ((clustererNN.mOutputDataClass[full_glo_idx] > 0) || (clustererNN.mNnClusterizerUseClassification <= 0)) {
457 GPUTPCCFClusterizer::buildCluster(
458 clusterer.Param().rec,
463 smem.innerAboveThreshold,
467 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
468 if (clusterer.mPclusterPosInRow) {
469 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
476 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
477 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
478 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
479 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
480 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
481 clustererNN.mClusterFlags[2 * glo_idx],
482 clustererNN.mClusterFlags[2 * glo_idx + 1]);
483 }
else if (dtype == 1) {
484 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
485 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
486 clustererNN.mOutputDataReg2_32[model_output_index + 4],
487 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
488 clustererNN.mOutputDataReg2_32[model_output_index + 6],
489 clustererNN.mClusterFlags[2 * glo_idx],
490 clustererNN.mClusterFlags[2 * glo_idx + 1]);
494 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
496 if (clusterer.mPclusterPosInRow) {
497 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
502 uint32_t rowIndex = 0;
503 if (clusterOut !=
nullptr) {
504 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
508 clusterer.mNMaxClusterPerRow,
509 clusterer.mPclusterInRow,
511 if (clusterer.mPclusterPosInRow !=
nullptr) {
512 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
514 }
else if (clusterer.mPclusterPosInRow) {
515 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
517 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
521 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
522 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
523 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
524 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
525 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
526 clustererNN.mClusterFlags[2 * glo_idx],
527 clustererNN.mClusterFlags[2 * glo_idx + 1]);
528 }
else if (dtype == 1) {
529 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
530 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
531 clustererNN.mOutputDataReg2_32[model_output_index + 5],
532 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
533 clustererNN.mOutputDataReg2_32[model_output_index + 7],
534 clustererNN.mClusterFlags[2 * glo_idx],
535 clustererNN.mClusterFlags[2 * glo_idx + 1]);
538 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
540 if (clusterer.mPclusterPosInRow) {
541 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
546 if (clusterOut !=
nullptr) {
547 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
551 clusterer.mNMaxClusterPerRow,
552 clusterer.mPclusterInRow,
554 if (clusterer.mPclusterPosInRow !=
nullptr) {
555 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
557 }
else if (clusterer.mPclusterPosInRow) {
558 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
562 if (clusterer.mPclusterPosInRow) {
563 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;