Project
Loading...
Searching...
No Matches
GPUTPCTrackingData.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUParam.h"
16#include "GPUTPCClusterData.h"
17#include "GPUTPCHit.h"
18#include "GPUTPCTrackingData.h"
19#include "GPUProcessor.h"
20#include "GPUO2DataTypes.h"
21#include "GPUTPCConvertImpl.h"
22#include "GPUCommonMath.h"
23
24#ifndef GPUCA_GPUCODE_DEVICE
25#include "utils/vecpod.h"
26#include <iostream>
27#include <cstring>
28#include "GPUReconstruction.h"
29#endif
30
31using namespace o2::gpu;
32
33#ifndef GPUCA_GPUCODE
34
36{
37 // initialisation of rows
38 for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; i++) {
39 new (&mRows[i]) GPUTPCRow;
40 }
41 for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) {
42 mRows[i].mX = p.tpcGeometry.Row2X(i);
43 mRows[i].mMaxY = CAMath::Tan(p.par.dAlpha / 2.f) * mRows[i].mX;
44 }
45}
46
47void GPUTPCTrackingData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset)
48{
49 mClusterData = data;
50 mNumberOfHits = nClusters;
51 mClusterIdOffset = clusterIdOffset;
52}
53
55{
56 int32_t hitMemCount = GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT + mNumberOfHits;
57 const uint32_t kVectorAlignment = 256;
58 mNumberOfHitsPlusAlign = GPUProcessor::nextMultipleOf<(kVectorAlignment > GPUCA_ROWALIGNMENT ? kVectorAlignment : GPUCA_ROWALIGNMENT) / sizeof(int32_t)>(hitMemCount);
59}
60
62{
63 GPUProcessor::computePointerWithAlignment(mem, mLinkUpData, mNumberOfHitsPlusAlign);
64 GPUProcessor::computePointerWithAlignment(mem, mLinkDownData, mNumberOfHitsPlusAlign);
65 return mem;
66}
67
69{
70 GPUProcessor::computePointerWithAlignment(mem, mHitWeights, mNumberOfHitsPlusAlign + 16 / sizeof(*mHitWeights));
71 return mem;
72}
73
74void* GPUTPCTrackingData::SetPointersScratch(void* mem, bool idsOnGPU)
75{
76 const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t);
77 GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign);
78 GPUProcessor::computePointerWithAlignment(mem, mFirstHitInBin, firstHitInBinSize);
79 if (idsOnGPU) {
80 mem = SetPointersClusterIds(mem, false); // Hijack the allocation from SetPointersClusterIds
81 }
82 return mem;
83}
84
85void* GPUTPCTrackingData::SetPointersClusterIds(void* mem, bool idsOnGPU)
86{
87 if (!idsOnGPU) {
88 GPUProcessor::computePointerWithAlignment(mem, mClusterDataIndex, mNumberOfHitsPlusAlign);
89 }
90 return mem;
91}
92
94{
96 return mem;
97}
98
99#endif
100
101GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ)
102{
103 maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1;
104 maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50;
106}
107
108GPUd() uint32_t GPUTPCTrackingData::GetGridSize(uint32_t nHits, uint32_t nRows)
109{
110 return 128 * nRows + 4 * nHits;
111}
112
113GPUdi() void GPUTPCTrackingData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax)
114{
115 float dz = zMax - zMin;
116 float tfFactor = 1.f;
117 if (dz > GPUTPCGeometry::TPCLength() + 20.f) {
118 tfFactor = dz / GPUTPCGeometry::TPCLength();
119 dz = GPUTPCGeometry::TPCLength();
120 }
121 const float norm = CAMath::InvSqrt(row->mNHits / tfFactor);
122 float sy = CAMath::Min(CAMath::Max((yMax - yMin) * norm, GPUCA_MIN_BIN_SIZE), GPUCA_MAX_BIN_SIZE);
123 float sz = CAMath::Min(CAMath::Max(dz * norm, GPUCA_MIN_BIN_SIZE), GPUCA_MAX_BIN_SIZE);
124 int32_t maxy, maxz;
125 GetMaxNBins(mem, row, maxy, maxz);
126 int32_t ny = CAMath::Max(1, CAMath::Min<int32_t>(maxy, (yMax - yMin) / sy + 1));
127 int32_t nz = CAMath::Max(1, CAMath::Min<int32_t>(maxz, (zMax - zMin) / sz + 1));
128 row->mGrid.Create(yMin, yMax, zMin, zMax, ny, nz);
129}
130
131GPUdi() static void UpdateMinMaxYZ(float& yMin, float& yMax, float& zMin, float& zMax, float y, float z)
132{
133 if (yMax < y) {
134 yMax = y;
135 }
136 if (yMin > y) {
137 yMin = y;
138 }
139 if (zMax < z) {
140 zMax = z;
141 }
142 if (zMin > z) {
143 zMin = z;
144 }
145}
146
148{
149 GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset;
150 row.mGrid.CreateEmpty();
151 row.mNHits = 0;
152 row.mHitNumberOffset = 0;
153 row.mHy0 = 0.f;
154 row.mHz0 = 0.f;
155 row.mHstepY = 1.f;
156 row.mHstepZ = 1.f;
157 row.mHstepYi = 1.f;
158 row.mHstepZi = 1.f;
159 for (int32_t i = 0; i < 4; i++) {
160 c[i] = 0;
161 }
162}
163
164GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSector, float* tmpMinMax)
165{
166#ifdef GPUCA_GPUCODE
167 constexpr bool EarlyTransformWithoutClusterNative = false;
168#else
169 bool EarlyTransformWithoutClusterNative = mem->param.par.earlyTpcTransform && mem->ioPtrs.clustersNative == nullptr;
170#endif
171 int32_t* tmpHitIndex = nullptr;
172 const uint32_t* NumberOfClustersInRow = nullptr;
173 const uint32_t* RowOffsets = nullptr;
174
175#ifndef GPUCA_GPUCODE
176 vecpod<float2> YZData(mNumberOfHits);
177 vecpod<calink> binMemory(mNumberOfHits);
178 uint32_t RowOffsetsA[GPUCA_ROW_COUNT];
179 uint32_t NumberOfClustersInRowA[GPUCA_ROW_COUNT];
180
181 vecpod<int32_t> tmpHitIndexA;
182 if (EarlyTransformWithoutClusterNative) { // Implies mem->param.par.earlyTpcTransform but no ClusterNative present
183 NumberOfClustersInRow = NumberOfClustersInRowA;
184 RowOffsets = RowOffsetsA;
185 tmpHitIndexA.resize(mNumberOfHits);
186 tmpHitIndex = tmpHitIndexA.data();
187
188 memset(NumberOfClustersInRowA, 0, GPUCA_ROW_COUNT * sizeof(NumberOfClustersInRowA[0]));
189 for (int32_t i = 0; i < mNumberOfHits; i++) {
190 const int32_t tmpRow = mClusterData[i].row;
191 NumberOfClustersInRowA[tmpRow]++;
192 }
193 int32_t tmpOffset = 0;
194 for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) {
195 RowOffsetsA[i] = tmpOffset;
196 tmpOffset += NumberOfClustersInRow[i];
197 }
198 int32_t RowsFilled[GPUCA_ROW_COUNT];
199 memset(RowsFilled, 0, GPUCA_ROW_COUNT * sizeof(int32_t));
200 for (int32_t i = 0; i < mNumberOfHits; i++) {
201 float2 tmp;
202 tmp.x = mClusterData[i].y;
203 tmp.y = mClusterData[i].z;
204 int32_t tmpRow = mClusterData[i].row;
205 int32_t newIndex = RowOffsetsA[tmpRow] + (RowsFilled[tmpRow])++;
206 YZData[newIndex] = tmp;
207 tmpHitIndex[newIndex] = i;
208 }
209 } // Other cases below in loop over rows
210#else
211 float2* YZData = (float2*)mLinkUpData; // TODO: we can do this as well on the CPU, just must make sure that CPU has the scratch memory
212 calink* binMemory = (calink*)mHitWeights;
213 static_assert(sizeof(*YZData) <= (sizeof(*mLinkUpData) + sizeof(*mLinkDownData)), "Cannot reuse memory");
214 static_assert(sizeof(*binMemory) <= sizeof(*mHitWeights), "Cannot reuse memory");
215#endif
216
217 for (int32_t rowIndex = iBlock; rowIndex < GPUCA_ROW_COUNT; rowIndex += nBlocks) {
218 float yMin = 1.e6f;
219 float yMax = -1.e6f;
220 float zMin = 1.e6f;
221 float zMax = -1.e6f;
222
223 const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex];
224 const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]);
225 constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24);
226 GPUTPCRow& row = mRows[rowIndex];
227 if (iThread == 0) {
228 row.mFirstHitInBinOffset = CAMath::nextMultipleOf<GPUCA_ROWALIGNMENT / sizeof(calink)>(GetGridSize(RowOffset, rowIndex) + rowIndex * GPUCA_ROWALIGNMENT / sizeof(int32_t));
229 }
230 if (NumberOfClusters >= maxN) {
231 if (iThread == 0) {
232 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN);
233 SetRowGridEmpty(row);
234 }
235 continue;
236 }
237
238 if (iThread == 0) {
239 tmpMinMax[0] = yMin;
240 tmpMinMax[1] = yMax;
241 tmpMinMax[2] = zMin;
242 tmpMinMax[3] = zMax;
243 }
244 GPUbarrier();
245 GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset;
246 if (NumberOfClusters == 0) {
247 if (iThread == 0) {
248 SetRowGridEmpty(row);
249 }
250 continue;
251 }
252
253 if (EarlyTransformWithoutClusterNative) {
254 for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) {
255 UpdateMinMaxYZ(yMin, yMax, zMin, zMax, YZData[RowOffset + i].x, YZData[RowOffset + i].y);
256 }
257 } else if (mem->param.par.earlyTpcTransform) { // Early transform case with ClusterNative present
258 for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) {
259 float2 tmp;
260 tmp.x = mClusterData[RowOffset + i].y;
261 tmp.y = mClusterData[RowOffset + i].z;
262 UpdateMinMaxYZ(yMin, yMax, zMin, zMax, tmp.x, tmp.y);
263 YZData[RowOffset + i] = tmp;
264 }
265 } else {
266 for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) {
267 float x, y, z;
268 GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z);
269 UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z);
270 YZData[RowOffset + i] = CAMath::MakeFloat2(y, z);
271 }
272 }
273
274 if (iThread == 0) {
275 row.mNHits = NumberOfClusters;
276 row.mHitNumberOffset = CAMath::nextMultipleOf<GPUCA_ROWALIGNMENT / sizeof(calink)>(RowOffset + rowIndex * GPUCA_ROWALIGNMENT / sizeof(calink));
277 }
278
279#ifdef GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
280 CAMath::AtomicMinShared(&tmpMinMax[0], yMin);
281 CAMath::AtomicMaxShared(&tmpMinMax[1], yMax);
282 CAMath::AtomicMinShared(&tmpMinMax[2], zMin);
283 CAMath::AtomicMaxShared(&tmpMinMax[3], zMax);
284#else
285 for (int32_t i = 0; i < nThreads; i++) {
286 GPUbarrier();
287 if (iThread == i) {
288 if (tmpMinMax[0] > yMin) {
289 tmpMinMax[0] = yMin;
290 }
291 if (tmpMinMax[1] < yMax) {
292 tmpMinMax[1] = yMax;
293 }
294 if (tmpMinMax[2] > zMin) {
295 tmpMinMax[2] = zMin;
296 }
297 if (tmpMinMax[3] < zMax) {
298 tmpMinMax[3] = zMax;
299 }
300 }
301 }
302#endif
303 GPUbarrier();
304 if (iThread == 0) {
305 CreateGrid(mem, &row, tmpMinMax[0], tmpMinMax[1], tmpMinMax[2], tmpMinMax[3]);
306 }
307 GPUbarrier();
308 const GPUTPCGrid& grid = row.mGrid;
309 const int32_t numberOfBins = grid.N();
310 constexpr const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning
311 if (sizeof(calink) < 4 && numberOfBins >= maxBins) {
312 if (iThread == 0) {
313 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins);
314 SetRowGridEmpty(row);
315 }
316 continue;
317 }
318 const uint32_t nn = numberOfBins + grid.Ny() + 3;
319 const uint32_t maxnn = GetGridSize(NumberOfClusters, 1);
320 if (nn >= maxnn) {
321 if (iThread == 0) {
322 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn);
323 SetRowGridEmpty(row);
324 }
325 continue;
326 }
327
328 calink* bins = &binMemory[RowOffset]; // Reuse mLinkUpData memory as temporary memory
329
330 for (int32_t bin = iThread; bin < numberOfBins; bin += nThreads) {
331 c[bin] = 0; // initialize to 0
332 }
333 GPUbarrier();
334 for (int32_t hitIndex = iThread; hitIndex < row.mNHits; hitIndex += nThreads) {
335 const int32_t globalHitIndex = RowOffset + hitIndex;
336 const calink bin = row.mGrid.GetBin(YZData[globalHitIndex].x, YZData[globalHitIndex].y);
337
338 bins[hitIndex] = bin;
339 CAMath::AtomicAdd(&c[bin], 1u);
340 }
341 GPUbarrier();
342
343 if (iThread == 0) {
344 calink n = 0;
345 for (int32_t bin = 0; bin < numberOfBins; ++bin) { // TODO: Parallelize
346 n += c[bin];
347 c[bin] = n;
348 }
349 for (uint32_t bin = numberOfBins; bin < nn; bin++) {
350 c[bin] = n;
351 }
352 }
353 GPUbarrier();
354
355 constexpr float maxVal = (((int64_t)1 << (sizeof(cahit) < 3 ? sizeof(cahit) * 8 : 24)) - 1); // Stay within float precision in any case!
356 constexpr float packingConstant = 1.f / (maxVal - 2.f);
357 const float y0 = row.mGrid.YMin();
358 const float z0 = row.mGrid.ZMin();
359 const float stepY = (row.mGrid.YMax() - y0) * packingConstant;
360 const float stepZ = (row.mGrid.ZMax() - z0) * packingConstant;
361 const float stepYi = 1.f / stepY;
362 const float stepZi = 1.f / stepZ;
363
364 if (iThread == 0) {
365 row.mHy0 = y0;
366 row.mHz0 = z0;
367 row.mHstepY = stepY;
368 row.mHstepZ = stepZ;
369 row.mHstepYi = stepYi;
370 row.mHstepZi = stepZi;
371 }
372
373 GPUbarrier();
374
375 for (int32_t hitIndex = iThread; hitIndex < row.mNHits; hitIndex += nThreads) {
376 const calink bin = bins[hitIndex];
377 const calink ind = CAMath::AtomicAdd(&c[bin], (calink)-1) - 1; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
378 const int32_t globalBinsortedIndex = row.mHitNumberOffset + ind;
379 const int32_t globalHitIndex = RowOffset + hitIndex;
380
381 // allows to find the global hit index / coordinates from a global bin sorted hit index
382 mClusterDataIndex[globalBinsortedIndex] = EarlyTransformWithoutClusterNative ? tmpHitIndex[globalHitIndex] : (RowOffset + hitIndex);
383
384 const float xx = ((YZData[globalHitIndex].x - y0) * stepYi) + .5;
385 const float yy = ((YZData[globalHitIndex].y - z0) * stepZi) + .5;
386#if !defined(GPUCA_GPUCODE) && !defined(NDEBUG)
387 if (xx < 0 || yy < 0 || xx > maxVal || yy > maxVal) {
388 std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " (" << maxVal << ")" << std::endl;
389 return 1;
390 }
391#endif
392 // HitData is bin sorted
393 mHitData[globalBinsortedIndex].x = (cahit)xx;
394 mHitData[globalBinsortedIndex].y = (cahit)yy;
395 }
396
397 GPUbarrier();
398
399 if (iThread == 0 && !mem->param.par.continuousTracking) {
400 const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3]));
401 if (maxAbsZ > 300) {
402 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ);
403 SetRowGridEmpty(row);
404 continue;
405 }
406 }
407 }
408
409 return 0;
410}
const auto bins
Definition PID.cxx:49
int32_t i
#define GPUconstantref()
#define GPUdii()
#define GPUAtomic(type)
#define GPUbarrier()
#define GPUrestrict()
#define GPUCA_MAX_BIN_SIZE
#define GPUCA_MIN_BIN_SIZE
#define GPUCA_ROWALIGNMENT
#define GPUCA_ROW_COUNT
float float float & zMax
float float & zMin
float & yMax
uint32_t c
Definition RawData.h:2
int nClusters
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static size_t nextMultipleOf(size_t size)
void InitializeRows(const GPUParam &p)
void * SetPointersScratch(void *mem, bool idsOnGPU)
void SetClusterData(const GPUTPCClusterData *data, int32_t nClusters, int32_t clusterIdOffset)
void * SetPointersClusterIds(void *mem, bool idsOnGPU)
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
Definition glcorearb.h:2910
GLint y
Definition glcorearb.h:270
GLboolean * data
Definition glcorearb.h:298
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
Definition glcorearb.h:2910
GLuint GLfloat GLfloat y0
Definition glcorearb.h:5034
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GPUdi() o2
Definition TrackTRD.h:38
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
uint32_t cahit
Definition GPUTPCDef.h:31
uint32_t calink
Definition GPUTPCDef.h:30
GPUTPCGeometry tpcGeometry
Definition GPUParam.h:62
std::vector< int > row
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31