57 auto& clusterer = processors.tpcClusterer[sector];
58 auto& clustererNN = processors.tpcNNClusterer[sector];
59 uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize;
63 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
64 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad()),
time =
static_cast<int>(peak.time());
65 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
66 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
71 for (int32_t
r = -clustererNN.mNnClusterizerSizeInputRow;
r <= clustererNN.mNnClusterizerSizeInputRow;
r++) {
73 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
74 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
75 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow);
76 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) {
79 if (
r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) {
80 clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
81 clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
84 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
85 }
else if (dtype == 1) {
86 clustererNN.mInputData_32[write_idx] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
91 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
93 clustererNN.mInputData_32[write_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
100 if (clustererNN.mNnClusterizerAddIndexData) {
102 clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f);
103 clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(
row / 152.f);
104 clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
106 clustererNN.mInputData_32[write_idx] = sector / 36.f;
107 clustererNN.mInputData_32[write_idx + 1] =
row / 152.f;
108 clustererNN.mInputData_32[write_idx + 2] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
117 auto& clusterer = processors.tpcClusterer[sector];
118 auto& clustererNN = processors.tpcNNClusterer[sector];
119 uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize);
120 uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize);
124 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
125 int32_t
row =
static_cast<int>(peak.row()), pad =
static_cast<int>(peak.pad());
127 if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
128 uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
129 if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) {
130 clustererNN.mClusterFlags[2 * base_idx] = 0;
131 clustererNN.mClusterFlags[2 * base_idx + 1] = 0;
132 for (uint16_t
i = 0;
i < 8;
i++) {
133 Delta2 d = cfconsts::InnerNeighbors[
i];
135 clustererNN.mClusterFlags[2 * base_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
137 clustererNN.mClusterFlags[2 * base_idx + 1] = clustererNN.mClusterFlags[2 * base_idx];
140 clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
141 clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(
row / 152.f);
142 clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row));
144 clustererNN.mInputData_32[top_idx - 3] = sector / 36.f;
145 clustererNN.mInputData_32[top_idx - 2] =
row / 152.f;
146 clustererNN.mInputData_32[top_idx - 1] =
static_cast<float>(pad) / GPUTPCGeometry::NPads(
row);
148 }
else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) {
149 int32_t
time =
static_cast<int>(peak.time());
150 int32_t
r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow;
152 if (is_row_boundary) {
154 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
156 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
159 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(
row, clustererNN.mNnClusterizerSizeInputRow);
160 int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(
row,
row +
r);
161 int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1));
162 int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset;
163 int32_t time_pos = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime +
time;
165 bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(
row +
r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (time_pos < 0 || time_pos >=
TPC_MAX_FRAGMENT_LEN_GPU);
168 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
171 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
172 }
else if (dtype == 1) {
173 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
177 clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
179 clustererNN.mInputData_32[glo_idx] =
static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
200 auto& clustererNN = processors.tpcNNClusterer[sector];
202 uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
203 float current_max_prob = 0.f;
204 uint32_t class_label = 0;
205 for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
206 if (pIdx == elem_iterator) {
208 current_max_prob =
static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
209 }
else if (dtype == 1) {
210 current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
214 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
215 }
else if (dtype == 1) {
216 current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
221 clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
222 if (class_label > 1) {
223 clustererNN.mClusterFlags[2 * glo_idx] = 1;
224 clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
232 auto& clusterer = processors.tpcClusterer[sector];
233 auto& clustererNN = processors.tpcNNClusterer[sector];
235 uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters;
236 uint32_t full_glo_idx = glo_idx + batchStart;
237 if (full_glo_idx >= maxClusterNum) {
240 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes;
243 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)];
244 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
252 if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
260 GPUTPCCFClusterizer::buildCluster(
261 clusterer.Param().rec,
266 smem.innerAboveThreshold,
270 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
271 if (clusterer.mPclusterPosInRow) {
272 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
278 pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(),
279 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(),
280 clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(),
281 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(),
282 clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(),
283 clustererNN.mClusterFlags[2 * glo_idx],
284 clustererNN.mClusterFlags[2 * glo_idx + 1]);
285 }
else if (dtype == 1) {
286 pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4],
287 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index],
288 clustererNN.mOutputDataReg1_32[model_output_index + 2],
289 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1],
290 clustererNN.mOutputDataReg1_32[model_output_index + 3],
291 clustererNN.mClusterFlags[2 * glo_idx],
292 clustererNN.mClusterFlags[2 * glo_idx + 1]);
296 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
298 if (clusterer.mPclusterPosInRow) {
299 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
304 uint32_t rowIndex = 0;
305 if (clusterOut !=
nullptr) {
306 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
310 clusterer.mNMaxClusterPerRow,
311 clusterer.mPclusterInRow,
313 if (clusterer.mPclusterPosInRow !=
nullptr) {
314 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
316 }
else if (clusterer.mPclusterPosInRow) {
317 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
319 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
321 if (clusterer.mPclusterPosInRow) {
322 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
332 auto& clusterer = processors.tpcClusterer[sector];
333 auto& clustererNN = processors.tpcNNClusterer[sector];
336 CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
337 float central_charge =
static_cast<float>(chargeMap[peak].unpack());
342 uint32_t full_glo_idx = glo_idx + batchStart;
343 uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
345 if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
352 GPUTPCCFClusterizer::buildCluster(
353 clusterer.Param().rec,
358 smem.innerAboveThreshold,
362 if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) {
363 if (clusterer.mPclusterPosInRow) {
364 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
371 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(),
372 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(),
373 clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(),
374 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(),
375 clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(),
376 clustererNN.mClusterFlags[2 * glo_idx],
377 clustererNN.mClusterFlags[2 * glo_idx + 1]);
378 }
else if (dtype == 1) {
379 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8],
380 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index],
381 clustererNN.mOutputDataReg2_32[model_output_index + 4],
382 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2],
383 clustererNN.mOutputDataReg2_32[model_output_index + 6],
384 clustererNN.mClusterFlags[2 * glo_idx],
385 clustererNN.mClusterFlags[2 * glo_idx + 1]);
389 bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
391 if (clusterer.mPclusterPosInRow) {
392 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
397 uint32_t rowIndex = 0;
398 if (clusterOut !=
nullptr) {
399 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
403 clusterer.mNMaxClusterPerRow,
404 clusterer.mPclusterInRow,
406 if (clusterer.mPclusterPosInRow !=
nullptr) {
407 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
409 }
else if (clusterer.mPclusterPosInRow) {
410 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
412 CPU_ONLY(labelAcc->
commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow));
416 pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(),
417 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(),
418 clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(),
419 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(),
420 clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(),
421 clustererNN.mClusterFlags[2 * glo_idx],
422 clustererNN.mClusterFlags[2 * glo_idx + 1]);
423 }
else if (dtype == 1) {
424 pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9],
425 static_cast<float>(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1],
426 clustererNN.mOutputDataReg2_32[model_output_index + 5],
427 (clusterer.mPmemory->fragment).start +
static_cast<float>(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3],
428 clustererNN.mOutputDataReg2_32[model_output_index + 7],
429 clustererNN.mClusterFlags[2 * glo_idx],
430 clustererNN.mClusterFlags[2 * glo_idx + 1]);
433 rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
435 if (clusterer.mPclusterPosInRow) {
436 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
441 if (clusterOut !=
nullptr) {
442 rowIndex = GPUTPCCFClusterizer::sortIntoBuckets(
446 clusterer.mNMaxClusterPerRow,
447 clusterer.mPclusterInRow,
449 if (clusterer.mPclusterPosInRow !=
nullptr) {
450 clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex;
452 }
else if (clusterer.mPclusterPosInRow) {
453 rowIndex = clusterer.mPclusterPosInRow[full_glo_idx];
457 if (clusterer.mPclusterPosInRow) {
458 clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;