Project
Loading...
Searching...
No Matches
GPUTPCTrackingData.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUParam.h"
16#include "GPUTPCHit.h"
17#include "GPUTPCTrackingData.h"
18#include "GPUProcessor.h"
19#include "GPUO2DataTypes.h"
20#include "GPUTPCConvertImpl.h"
21#include "GPUTPCGeometry.h"
22#include "GPUCommonMath.h"
23
24#ifndef GPUCA_GPUCODE_DEVICE
25#include "utils/vecpod.h"
26#include <iostream>
27#include <cstring>
28#include "GPUReconstruction.h"
29#endif
30
31using namespace o2::gpu;
32
33#ifndef GPUCA_GPUCODE
34
36{
37 // initialisation of rows
38 for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; i++) {
39 new (&mRows[i]) GPUTPCRow;
40 }
41 for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) {
42 mRows[i].mX = GPUTPCGeometry::Row2X(i);
43 mRows[i].mMaxY = CAMath::Tan(p.dAlpha / 2.f) * mRows[i].mX;
44 }
45}
46
47void GPUTPCTrackingData::SetClusterData(int32_t nClusters, int32_t clusterIdOffset)
48{
49 mNumberOfHits = nClusters;
50 mClusterIdOffset = clusterIdOffset;
51}
52
54{
55 int32_t hitMemCount = GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT + mNumberOfHits;
56 const uint32_t kVectorAlignment = 256;
57 mNumberOfHitsPlusAlign = GPUProcessor::nextMultipleOf<(kVectorAlignment > GPUCA_ROWALIGNMENT ? kVectorAlignment : GPUCA_ROWALIGNMENT) / sizeof(int32_t)>(hitMemCount);
58}
59
61{
62 GPUProcessor::computePointerWithAlignment(mem, mLinkUpData, mNumberOfHitsPlusAlign);
63 GPUProcessor::computePointerWithAlignment(mem, mLinkDownData, mNumberOfHitsPlusAlign);
64 return mem;
65}
66
68{
69 GPUProcessor::computePointerWithAlignment(mem, mHitWeights, mNumberOfHitsPlusAlign + 16 / sizeof(*mHitWeights));
70 return mem;
71}
72
73void* GPUTPCTrackingData::SetPointersScratch(void* mem, bool idsOnGPU)
74{
75 const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t);
76 GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign);
77 GPUProcessor::computePointerWithAlignment(mem, mFirstHitInBin, firstHitInBinSize);
78 if (idsOnGPU) {
79 mem = SetPointersClusterIds(mem, false); // Hijack the allocation from SetPointersClusterIds
80 }
81 return mem;
82}
83
84void* GPUTPCTrackingData::SetPointersClusterIds(void* mem, bool idsOnGPU)
85{
86 if (!idsOnGPU) {
87 GPUProcessor::computePointerWithAlignment(mem, mClusterDataIndex, mNumberOfHitsPlusAlign);
88 }
89 return mem;
90}
91
93{
95 return mem;
96}
97
98#endif
99
100GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ)
101{
102 maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1;
103 maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : GPUTPCGeometry::TPCLength()) + 50;
105}
106
107GPUd() uint32_t GPUTPCTrackingData::GetGridSize(uint32_t nHits, uint32_t nRows)
108{
109 return 128 * nRows + 4 * nHits;
110}
111
112GPUdi() void GPUTPCTrackingData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax)
113{
114 float dz = zMax - zMin;
115 float tfFactor = 1.f;
116 if (dz > GPUTPCGeometry::TPCLength() + 20.f) {
117 tfFactor = dz / GPUTPCGeometry::TPCLength();
118 dz = GPUTPCGeometry::TPCLength();
119 }
120 const float norm = CAMath::InvSqrt(row->mNHits / tfFactor);
121 float sy = CAMath::Min(CAMath::Max((yMax - yMin) * norm, GPUCA_MIN_BIN_SIZE), GPUCA_MAX_BIN_SIZE);
122 float sz = CAMath::Min(CAMath::Max(dz * norm, GPUCA_MIN_BIN_SIZE), GPUCA_MAX_BIN_SIZE);
123 int32_t maxy, maxz;
124 GetMaxNBins(mem, row, maxy, maxz);
125 int32_t ny = CAMath::Max(1, CAMath::Min<int32_t>(maxy, (yMax - yMin) / sy + 1));
126 int32_t nz = CAMath::Max(1, CAMath::Min<int32_t>(maxz, (zMax - zMin) / sz + 1));
127 row->mGrid.Create(yMin, yMax, zMin, zMax, ny, nz);
128}
129
130GPUdi() static void UpdateMinMaxYZ(float& yMin, float& yMax, float& zMin, float& zMax, float y, float z)
131{
132 if (yMax < y) {
133 yMax = y;
134 }
135 if (yMin > y) {
136 yMin = y;
137 }
138 if (zMax < z) {
139 zMax = z;
140 }
141 if (zMin > z) {
142 zMin = z;
143 }
144}
145
147{
148 GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset;
149 row.mGrid.CreateEmpty();
150 row.mNHits = 0;
151 row.mHitNumberOffset = 0;
152 row.mHy0 = 0.f;
153 row.mHz0 = 0.f;
154 row.mHstepY = 1.f;
155 row.mHstepZ = 1.f;
156 row.mHstepYi = 1.f;
157 row.mHstepZi = 1.f;
158 for (int32_t i = 0; i < 4; i++) {
159 c[i] = 0;
160 }
161}
162
163GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSector, float* tmpMinMax)
164{
165#ifndef GPUCA_GPUCODE
166 vecpod<float2> YZData(mNumberOfHits);
167 vecpod<calink> binMemory(mNumberOfHits);
168 vecpod<int32_t> tmpHitIndexA;
169#else
170 float2* YZData = (float2*)mLinkUpData; // TODO: we can do this as well on the CPU, just must make sure that CPU has the scratch memory
171 calink* binMemory = (calink*)mHitWeights;
172 static_assert(sizeof(*YZData) <= (sizeof(*mLinkUpData) + sizeof(*mLinkDownData)), "Cannot reuse memory");
173 static_assert(sizeof(*binMemory) <= sizeof(*mHitWeights), "Cannot reuse memory");
174#endif
175
176 for (int32_t rowIndex = iBlock; rowIndex < GPUCA_ROW_COUNT; rowIndex += nBlocks) {
177 float yMin = 1.e6f;
178 float yMax = -1.e6f;
179 float zMin = 1.e6f;
180 float zMax = -1.e6f;
181
182 const uint32_t NumberOfClusters = mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex];
183 const uint32_t RowOffset = mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0];
184 constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24);
185 GPUTPCRow& row = mRows[rowIndex];
186 if (iThread == 0) {
187 row.mFirstHitInBinOffset = CAMath::nextMultipleOf<GPUCA_ROWALIGNMENT / sizeof(calink)>(GetGridSize(RowOffset, rowIndex) + rowIndex * GPUCA_ROWALIGNMENT / sizeof(int32_t));
188 }
189 if (NumberOfClusters >= maxN) {
190 if (iThread == 0) {
191 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN);
192 SetRowGridEmpty(row);
193 }
194 continue;
195 }
196
197 if (iThread == 0) {
198 tmpMinMax[0] = yMin;
199 tmpMinMax[1] = yMax;
200 tmpMinMax[2] = zMin;
201 tmpMinMax[3] = zMax;
202 }
203 GPUbarrier();
204 GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset;
205 if (NumberOfClusters == 0) {
206 if (iThread == 0) {
207 SetRowGridEmpty(row);
208 }
209 continue;
210 }
211
212 for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) {
213 float x, y, z;
214 GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z);
215 UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z);
216 YZData[RowOffset + i] = CAMath::MakeFloat2(y, z);
217 }
218
219 if (iThread == 0) {
220 row.mNHits = NumberOfClusters;
221 row.mHitNumberOffset = CAMath::nextMultipleOf<GPUCA_ROWALIGNMENT / sizeof(calink)>(RowOffset + rowIndex * GPUCA_ROWALIGNMENT / sizeof(calink));
222 }
223
224#ifdef GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
225 CAMath::AtomicMinShared(&tmpMinMax[0], yMin);
226 CAMath::AtomicMaxShared(&tmpMinMax[1], yMax);
227 CAMath::AtomicMinShared(&tmpMinMax[2], zMin);
228 CAMath::AtomicMaxShared(&tmpMinMax[3], zMax);
229#else
230 for (int32_t i = 0; i < nThreads; i++) {
231 GPUbarrier();
232 if (iThread == i) {
233 if (tmpMinMax[0] > yMin) {
234 tmpMinMax[0] = yMin;
235 }
236 if (tmpMinMax[1] < yMax) {
237 tmpMinMax[1] = yMax;
238 }
239 if (tmpMinMax[2] > zMin) {
240 tmpMinMax[2] = zMin;
241 }
242 if (tmpMinMax[3] < zMax) {
243 tmpMinMax[3] = zMax;
244 }
245 }
246 }
247#endif
248 GPUbarrier();
249 if (iThread == 0) {
250 CreateGrid(mem, &row, tmpMinMax[0], tmpMinMax[1], tmpMinMax[2], tmpMinMax[3]);
251 }
252 GPUbarrier();
253 const GPUTPCGrid& grid = row.mGrid;
254 const int32_t numberOfBins = grid.N();
255 constexpr const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning
256 if (sizeof(calink) < 4 && numberOfBins >= maxBins) {
257 if (iThread == 0) {
258 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins);
259 SetRowGridEmpty(row);
260 }
261 continue;
262 }
263 const uint32_t nn = numberOfBins + grid.Ny() + 3;
264 const uint32_t maxnn = GetGridSize(NumberOfClusters, 1);
265 if (nn >= maxnn) {
266 if (iThread == 0) {
267 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn);
268 SetRowGridEmpty(row);
269 }
270 continue;
271 }
272
273 calink* bins = &binMemory[RowOffset]; // Reuse mLinkUpData memory as temporary memory
274
275 for (int32_t bin = iThread; bin < numberOfBins; bin += nThreads) {
276 c[bin] = 0; // initialize to 0
277 }
278 GPUbarrier();
279 for (int32_t hitIndex = iThread; hitIndex < row.mNHits; hitIndex += nThreads) {
280 const int32_t globalHitIndex = RowOffset + hitIndex;
281 const calink bin = row.mGrid.GetBin(YZData[globalHitIndex].x, YZData[globalHitIndex].y);
282
283 bins[hitIndex] = bin;
284 CAMath::AtomicAdd(&c[bin], 1u);
285 }
286 GPUbarrier();
287
288 if (iThread == 0) {
289 calink n = 0;
290 for (int32_t bin = 0; bin < numberOfBins; ++bin) { // TODO: Parallelize
291 n += c[bin];
292 c[bin] = n;
293 }
294 for (uint32_t bin = numberOfBins; bin < nn; bin++) {
295 c[bin] = n;
296 }
297 }
298 GPUbarrier();
299
300 constexpr float maxVal = (((int64_t)1 << (sizeof(cahit) < 3 ? sizeof(cahit) * 8 : 24)) - 1); // Stay within float precision in any case!
301 constexpr float packingConstant = 1.f / (maxVal - 2.f);
302 const float y0 = row.mGrid.YMin();
303 const float z0 = row.mGrid.ZMin();
304 const float stepY = (row.mGrid.YMax() - y0) * packingConstant;
305 const float stepZ = (row.mGrid.ZMax() - z0) * packingConstant;
306 const float stepYi = 1.f / stepY;
307 const float stepZi = 1.f / stepZ;
308
309 if (iThread == 0) {
310 row.mHy0 = y0;
311 row.mHz0 = z0;
312 row.mHstepY = stepY;
313 row.mHstepZ = stepZ;
314 row.mHstepYi = stepYi;
315 row.mHstepZi = stepZi;
316 }
317
318 GPUbarrier();
319
320 for (int32_t hitIndex = iThread; hitIndex < row.mNHits; hitIndex += nThreads) {
321 const calink bin = bins[hitIndex];
322 const calink ind = CAMath::AtomicAdd(&c[bin], (calink)-1) - 1; // generate an index for this hit that is >= c[bin] and < c[bin + 1]
323 const int32_t globalBinsortedIndex = row.mHitNumberOffset + ind;
324 const int32_t globalHitIndex = RowOffset + hitIndex;
325
326 // allows to find the global hit index / coordinates from a global bin sorted hit index
327 mClusterDataIndex[globalBinsortedIndex] = RowOffset + hitIndex;
328
329 const float xx = ((YZData[globalHitIndex].x - y0) * stepYi) + .5;
330 const float yy = ((YZData[globalHitIndex].y - z0) * stepZi) + .5;
331#if !defined(GPUCA_GPUCODE) && !defined(NDEBUG)
332 if (xx < 0 || yy < 0 || xx > maxVal || yy > maxVal) {
333 std::cout << "!!!! hit packing error!!! " << xx << " " << yy << " (" << maxVal << ")" << std::endl;
334 return 1;
335 }
336#endif
337 // HitData is bin sorted
338 mHitData[globalBinsortedIndex].x = (cahit)xx;
339 mHitData[globalBinsortedIndex].y = (cahit)yy;
340 }
341
342 GPUbarrier();
343
344 if (iThread == 0 && !mem->param.par.continuousTracking) {
345 const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3]));
346 if (maxAbsZ > 300) {
347 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ);
348 SetRowGridEmpty(row);
349 continue;
350 }
351 }
352 }
353
354 return 0;
355}
const auto bins
Definition PID.cxx:49
int32_t i
#define GPUconstantref()
#define GPUdii()
#define GPUAtomic(type)
#define GPUbarrier()
#define GPUrestrict()
#define GPUCA_MAX_BIN_SIZE
#define GPUCA_MIN_BIN_SIZE
#define GPUCA_ROWALIGNMENT
#define GPUCA_ROW_COUNT
float float float & zMax
float float & zMin
float & yMax
uint32_t c
Definition RawData.h:2
int nClusters
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static constexpr size_t nextMultipleOf(size_t size)
void SetClusterData(int32_t nClusters, int32_t clusterIdOffset)
void InitializeRows(const GPUParam &p)
void * SetPointersScratch(void *mem, bool idsOnGPU)
void * SetPointersClusterIds(void *mem, bool idsOnGPU)
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
Definition glcorearb.h:2910
GLint y
Definition glcorearb.h:270
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
Definition glcorearb.h:2910
GLuint GLfloat GLfloat y0
Definition glcorearb.h:5034
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GPUdi() o2
Definition TrackTRD.h:38
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
uint32_t cahit
Definition GPUTPCDef.h:31
uint32_t calink
Definition GPUTPCDef.h:30
constexpr std::array< int, nLayers > nRows
Definition Specs.h:56
std::vector< int > row
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31