57 auto& clusterer = processors.tpcClusterer[sector];
58 auto& clustererNN = processors.tpcNNClusterer[sector];
59 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
63 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
64 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad()),
time =
static_cast<int>(peak.time());
65 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
66 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
71 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow;
r++) {
73 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
74 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
75 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow);
76 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) {
79 if (
r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
80 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
81 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
84 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
85 }
else if (dtype == 1) {
86 clustererNN.mInputData_32[write_idx] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
93 clustererNN.mInputData_32[write_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
100 if (clustererNN.mNnClusterizerAddIndexData) {
102 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f);
103 clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(
row / 152.f);
104 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
106 clustererNN.mInputData_32[write_idx] = sector / 36.f;
107 clustererNN.mInputData_32[write_idx + 1] =
row / 152.f;
108 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
117 auto& clusterer = processors.tpcClusterer[sector];
118 auto& clustererNN = processors.tpcNNClusterer[sector];
119 uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize);
120 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize);
124 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
125 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad());
127 if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
128 uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
129 for (uint16_t
i = 0;
i < 8;
i++) {
130 Delta2 d = cfconsts::InnerNeighbors[
i];
132 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
133 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
136 clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
137 clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(
row / 152.f);
138 clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
140 clustererNN.mInputData_32[top_idx - 3] = sector / 36.f;
141 clustererNN.mInputData_32[top_idx - 2] =
row / 152.f;
142 clustererNN.mInputData_32[top_idx - 1] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
144 }
else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) {
145 int32_t
time =
static_cast<int>(peak.time());
146 int32_t
r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow;
148 if (is_row_boundary) {
150 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
152 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
155 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
156 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
157 int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1));
158 int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset;
159 int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime;
161 bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >=
TPC_MAX_FRAGMENT_LEN_GPU);
164 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
167 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
168 }
else if (dtype == 1) {
169 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
173 clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
175 clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
196 auto& clustererNN = processors.tpcNNClusterer[sector];
198 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
199 float current_max_prob = 0.f;
200 uint32_t class_label = 0;
201 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
202 if (pIdx == elem_iterator) {
204 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
205 }
else if (dtype == 1) {
206 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
210 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
211 }
else if (dtype == 1) {
212 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
217 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
218 if (class_label > 1) {
219 clustererNN.mClusterFlags[2 * glo_idx] = 1;
220 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
228 auto& clusterer = processors.tpcClusterer[sector];
229 auto& clustererNN = processors.tpcNNClusterer[sector];
231 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
232 uint32_t full_glo_idx = glo_idx + batchStart;
233 if (full_glo_idx >= maxClusterNum) {
236 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
239 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
240 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
248 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
256 GPUTPCCFClusterizer::buildCluster(
257 clusterer.Param().rec,
262 smem.innerAboveThreshold,
266 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
267 if (clusterer.mPclusterPosInRow) {
268 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
274 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
275 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
276 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
277 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
278 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
279 clustererNN.mClusterFlags[2 * glo_idx],
280 clustererNN.mClusterFlags[2 * glo_idx + 1]);
281 }
else if (dtype == 1) {
282 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
283 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
284 clustererNN.mOutputDataReg1_32[model_output_index + 2],
285 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
286 clustererNN.mOutputDataReg1_32[model_output_index + 3],
287 clustererNN.mClusterFlags[2 * glo_idx],
288 clustererNN.mClusterFlags[2 * glo_idx + 1]);
292 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
294 if (clusterer.mPclusterPosInRow) {
295 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
300 uint32_t rowIndex = 0;
301 if (clusterOut !=
nullptr) {
302 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
306 clusterer.mNMaxClusterPerRow,
307 clusterer.mPclusterInRow,
309 if (clusterer.mPclusterPosInRow !=
nullptr) {
310 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
312 }
else if (clusterer.mPclusterPosInRow) {
313 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
315 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
317 if (clusterer.mPclusterPosInRow) {
318 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
328 auto& clusterer = processors.tpcClusterer[sector];
329 auto& clustererNN = processors.tpcNNClusterer[sector];
332 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
333 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
338 uint32_t full_glo_idx = glo_idx + batchStart;
339 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
341 if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
348 GPUTPCCFClusterizer::buildCluster(
349 clusterer.Param().rec,
354 smem.innerAboveThreshold,
358 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
359 if (clusterer.mPclusterPosInRow) {
360 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
367 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
368 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
369 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
370 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
371 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
372 clustererNN.mClusterFlags[2 * glo_idx],
373 clustererNN.mClusterFlags[2 * glo_idx + 1]);
374 }
else if (dtype == 1) {
375 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
376 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
377 clustererNN.mOutputDataReg2_32[model_output_index + 4],
378 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
379 clustererNN.mOutputDataReg2_32[model_output_index + 6],
380 clustererNN.mClusterFlags[2 * glo_idx],
381 clustererNN.mClusterFlags[2 * glo_idx + 1]);
385 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
387 if (clusterer.mPclusterPosInRow) {
388 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
393 uint32_t rowIndex = 0;
394 if (clusterOut !=
nullptr) {
395 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
399 clusterer.mNMaxClusterPerRow,
400 clusterer.mPclusterInRow,
402 if (clusterer.mPclusterPosInRow !=
nullptr) {
403 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
405 }
else if (clusterer.mPclusterPosInRow) {
406 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
408 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
412 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
413 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
414 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
415 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
416 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
417 clustererNN.mClusterFlags[2 * glo_idx],
418 clustererNN.mClusterFlags[2 * glo_idx + 1]);
419 }
else if (dtype == 1) {
420 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
421 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
422 clustererNN.mOutputDataReg2_32[model_output_index + 5],
423 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
424 clustererNN.mOutputDataReg2_32[model_output_index + 7],
425 clustererNN.mClusterFlags[2 * glo_idx],
426 clustererNN.mClusterFlags[2 * glo_idx + 1]);
429 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
431 if (clusterer.mPclusterPosInRow) {
432 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
437 if (clusterOut !=
nullptr) {
438 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
442 clusterer.mNMaxClusterPerRow,
443 clusterer.mPclusterInRow,
445 if (clusterer.mPclusterPosInRow !=
nullptr) {
446 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
448 }
else if (clusterer.mPclusterPosInRow) {
449 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
453 if (clusterer.mPclusterPosInRow) {
454 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;