57 auto& clusterer = processors.tpcClusterer[sector];
58 auto& clustererNN = processors.tpcNNClusterer[sector];
59 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
63 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
64 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad()),
time =
static_cast<int>(peak.time());
65 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
66 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
68 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow;
r++) {
70 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
71 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
72 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow);
73 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) {
74 int32_t time_pos =
time + t;
77 if (
r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
78 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
79 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
82 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
83 }
else if (dtype == 1) {
84 clustererNN.mInputData_32[write_idx] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
89 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
91 clustererNN.mInputData_32[write_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
98 if (clustererNN.mNnClusterizerAddIndexData) {
100 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f);
101 clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(
row / 152.f);
102 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
104 clustererNN.mInputData_32[write_idx] = sector / 36.f;
105 clustererNN.mInputData_32[write_idx + 1] =
row / 152.f;
106 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
109 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
110 clustererNN.mClusterFlags[2 * glo_idx] = 0;
111 clustererNN.mClusterFlags[2 * glo_idx + 1] = 0;
112 for (uint16_t
i = 0;
i < 8;
i++) {
113 Delta2 d = cfconsts::InnerNeighbors[
i];
115 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
117 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
125 auto& clusterer = processors.tpcClusterer[sector];
126 auto& clustererNN = processors.tpcNNClusterer[sector];
127 uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize);
128 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize);
132 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
133 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad());
135 if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
136 uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
137 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
138 clustererNN.mClusterFlags[2 * base_idx] = 0;
139 clustererNN.mClusterFlags[2 * base_idx + 1] = 0;
140 for (uint16_t
i = 0;
i < 8;
i++) {
141 Delta2 d = cfconsts::InnerNeighbors[
i];
143 clustererNN.mClusterFlags[2 * base_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
145 clustererNN.mClusterFlags[2 * base_idx + 1] = clustererNN.mClusterFlags[2 * base_idx];
148 clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
149 clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(
row / 152.f);
150 clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
152 clustererNN.mInputData_32[top_idx - 3] = sector / 36.f;
153 clustererNN.mInputData_32[top_idx - 2] =
row / 152.f;
154 clustererNN.mInputData_32[top_idx - 1] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
156 }
else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) {
157 int32_t
time =
static_cast<int>(peak.time());
158 int32_t
r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow;
160 if (is_row_boundary) {
162 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
164 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
167 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
168 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
169 int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1));
170 int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset;
171 int32_t time_pos = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime +
time;
173 bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (time_pos < 0 || time_pos >=
TPC_MAX_FRAGMENT_LEN_GPU);
176 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
179 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
180 }
else if (dtype == 1) {
181 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
185 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
187 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
208 auto& clustererNN = processors.tpcNNClusterer[sector];
210 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
211 float current_max_prob = 0.f;
212 uint32_t class_label = 0;
213 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
214 if (pIdx == elem_iterator) {
216 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
217 }
else if (dtype == 1) {
218 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
222 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
223 }
else if (dtype == 1) {
224 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
229 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
230 if (class_label > 1) {
231 clustererNN.mClusterFlags[2 * glo_idx] = 1;
232 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
240 auto& clusterer = processors.tpcClusterer[sector];
241 auto& clustererNN = processors.tpcNNClusterer[sector];
243 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
244 uint32_t full_glo_idx = glo_idx + batchStart;
245 if (full_glo_idx >= maxClusterNum) {
248 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
251 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
252 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
260 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
268 GPUTPCCFClusterizer::buildCluster(
269 clusterer.Param().rec,
274 smem.innerAboveThreshold,
278 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
279 if (clusterer.mPclusterPosInRow) {
280 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
286 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
287 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
288 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
289 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
290 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
291 clustererNN.mClusterFlags[2 * glo_idx],
292 clustererNN.mClusterFlags[2 * glo_idx + 1]);
293 }
else if (dtype == 1) {
294 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
295 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
296 clustererNN.mOutputDataReg1_32[model_output_index + 2],
297 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
298 clustererNN.mOutputDataReg1_32[model_output_index + 3],
299 clustererNN.mClusterFlags[2 * glo_idx],
300 clustererNN.mClusterFlags[2 * glo_idx + 1]);
304 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
306 if (clusterer.mPclusterPosInRow) {
307 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
312 uint32_t rowIndex = 0;
313 if (clusterOut !=
nullptr) {
314 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
318 clusterer.mNMaxClusterPerRow,
319 clusterer.mPclusterInRow,
321 if (clusterer.mPclusterPosInRow !=
nullptr) {
322 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
324 }
else if (clusterer.mPclusterPosInRow) {
325 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
327 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
329 if (clusterer.mPclusterPosInRow) {
330 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
340 auto& clusterer = processors.tpcClusterer[sector];
341 auto& clustererNN = processors.tpcNNClusterer[sector];
344 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
345 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
350 uint32_t full_glo_idx = glo_idx + batchStart;
351 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
353 if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
360 GPUTPCCFClusterizer::buildCluster(
361 clusterer.Param().rec,
366 smem.innerAboveThreshold,
370 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
371 if (clusterer.mPclusterPosInRow) {
372 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
379 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
380 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
381 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
382 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
383 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
384 clustererNN.mClusterFlags[2 * glo_idx],
385 clustererNN.mClusterFlags[2 * glo_idx + 1]);
386 }
else if (dtype == 1) {
387 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
388 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
389 clustererNN.mOutputDataReg2_32[model_output_index + 4],
390 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
391 clustererNN.mOutputDataReg2_32[model_output_index + 6],
392 clustererNN.mClusterFlags[2 * glo_idx],
393 clustererNN.mClusterFlags[2 * glo_idx + 1]);
397 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
399 if (clusterer.mPclusterPosInRow) {
400 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
405 uint32_t rowIndex = 0;
406 if (clusterOut !=
nullptr) {
407 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
411 clusterer.mNMaxClusterPerRow,
412 clusterer.mPclusterInRow,
414 if (clusterer.mPclusterPosInRow !=
nullptr) {
415 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
417 }
else if (clusterer.mPclusterPosInRow) {
418 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
420 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
424 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
425 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
426 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
427 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
428 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
429 clustererNN.mClusterFlags[2 * glo_idx],
430 clustererNN.mClusterFlags[2 * glo_idx + 1]);
431 }
else if (dtype == 1) {
432 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
433 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
434 clustererNN.mOutputDataReg2_32[model_output_index + 5],
435 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
436 clustererNN.mOutputDataReg2_32[model_output_index + 7],
437 clustererNN.mClusterFlags[2 * glo_idx],
438 clustererNN.mClusterFlags[2 * glo_idx + 1]);
441 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
443 if (clusterer.mPclusterPosInRow) {
444 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
449 if (clusterOut !=
nullptr) {
450 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
454 clusterer.mNMaxClusterPerRow,
455 clusterer.mPclusterInRow,
457 if (clusterer.mPclusterPosInRow !=
nullptr) {
458 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
460 }
else if (clusterer.mPclusterPosInRow) {
461 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
465 if (clusterer.mPclusterPosInRow) {
466 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;