Project
Loading...
Searching...
No Matches
GPUTPCNeighboursFinder.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCHit.h"
17#include "GPUTPCTracker.h"
18// #include "GPUCommonMath.h"
19#include "GPUDefMacros.h"
20using namespace o2::gpu;
21
22template <>
23GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker)
24{
25 //* find neighbours
26
27#ifdef GPUCA_GPUCODE
28 for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) {
29 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRow)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock])[i];
30 if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) {
31 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowUp)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock + 2])[i];
32 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowDown)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock - 2])[i];
33 }
34 }
35 GPUbarrier();
36 const GPUsharedref() GPUTPCRow& GPUrestrict() row = s.mRow;
37 const GPUsharedref() GPUTPCRow& GPUrestrict() rowUp = s.mRowUp;
38 const GPUsharedref() GPUTPCRow& GPUrestrict() rowDn = s.mRowDown;
39#else
40 const GPUglobalref() GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[iBlock];
41 const GPUglobalref() GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2];
42 const GPUglobalref() GPUTPCRow& GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2];
43#endif
44
45 if (iThread == 0) {
46 s.mIRow = iBlock;
47 s.mIRowUp = iBlock + 2;
48 s.mIRowDn = iBlock - 2;
49 if (s.mIRow < GPUCA_ROW_COUNT) {
50 s.mNHits = row.mNHits;
51 if ((s.mIRow >= 2) && (s.mIRow <= GPUCA_ROW_COUNT - 3)) {
52 // the axis perpendicular to the rows
53 const float xDn = rowDn.mX;
54 const float x = row.mX;
55 const float xUp = rowUp.mX;
56
57 // distance of the rows (absolute and relative)
58 s.mUpDx = xUp - x;
59 s.mDnDx = xDn - x;
60 s.mUpTx = xUp / x;
61 s.mDnTx = xDn / x;
62 }
63 }
64 }
65 GPUbarrier();
66
67 // local copies
68
69 if ((s.mIRow <= 1) || (s.mIRow >= GPUCA_ROW_COUNT - 2) || (rowUp.mNHits <= 0) || (rowDn.mNHits <= 0)) {
70 const int32_t lHitNumberOffset = row.mHitNumberOffset;
71 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
72 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = CALINK_INVAL;
73 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = CALINK_INVAL;
74 }
75 return;
76 }
77
78 static constexpr uint32_t UNROLL_GLOBAL = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL > 1 ? GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL : 1;
79 static_assert(GPUCA_MAXN % UNROLL_GLOBAL == 0);
80 static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP;
81 static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0;
82 static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL;
83
84 const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx);
85 // float chi2Cut = 3.f*3.f*(s.mUpDx*s.mUpDx + s.mDnDx*s.mDnDx ); //SG
86
87 const int32_t lHitNumberOffset = row.mHitNumberOffset;
88 const int32_t lHitNumberOffsetUp = rowUp.mHitNumberOffset;
89 const int32_t lHitNumberOffsetDn = rowDn.mHitNumberOffset;
90 const uint32_t lFirstHitInBinOffsetUp = rowUp.mFirstHitInBinOffset;
91 const uint32_t lFirstHitInBinOffsetDn = rowDn.mFirstHitInBinOffset;
92 const GPUglobalref() calink* GPUrestrict() lFirstHitInBin = tracker.mData.mFirstHitInBin;
93 const GPUglobalref() cahit2* GPUrestrict() pHitData = tracker.mData.mHitData;
94
95 const float y0 = row.mGrid.mYMin;
96 const float z0 = row.mGrid.mZMin;
97 const float stepY = row.mHstepY;
98 const float stepZ = row.mHstepZ;
99
100 const float y0Up = rowUp.mGrid.mYMin;
101 const float z0Up = rowUp.mGrid.mZMin;
102 const float stepYUp = rowUp.mHstepY;
103 const float stepZUp = rowUp.mHstepZ;
104
105 const float y0Dn = rowDn.mGrid.mYMin;
106 const float z0Dn = rowDn.mGrid.mZMin;
107 const float stepYDn = rowDn.mHstepY;
108 const float stepZDn = rowDn.mHstepZ;
109
110 const float kAngularMultiplier = tracker.mConstantMem->param.rec.tpc.searchWindowDZDR;
111 const float kAreaSizeY = tracker.mConstantMem->param.rec.tpc.neighboursSearchArea;
112 const float kAreaSizeZUp = kAngularMultiplier != 0.f ? (s.mUpDx * kAngularMultiplier) : kAreaSizeY;
113 const float kAreaSizeZDn = kAngularMultiplier != 0.f ? (-s.mDnDx * kAngularMultiplier) : kAreaSizeY;
114 const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx;
115 const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx;
116
117 calink neighUp[MAX_GLOBAL];
118 float yzUp[2 * MAX_GLOBAL];
119
120 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
121
122 const GPUglobalref() cahit2& hitData = pHitData[lHitNumberOffset + ih];
123 const float y = y0 + hitData.x * stepY;
124 const float z = z0 + hitData.y * stepZ;
125
126 uint32_t nNeighUp = 0;
127 float minZ, maxZ, minY, maxY;
128 int32_t binYmin, binYmax, binZmin, binZmax;
129 int32_t nY;
130
131 { // area in the upper row
132 const float yy = y * s.mUpTx;
133 const float zz = z * kAreaSlopeZUp;
134 minZ = zz - kAreaSizeZUp;
135 maxZ = zz + kAreaSizeZUp;
136 minY = yy - kAreaSizeY;
137 maxY = yy + kAreaSizeY;
138 rowUp.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
139 rowUp.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
140 nY = rowUp.Grid().Ny();
141 }
142
143 for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) {
144 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin];
145 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1];
146 GPUCA_UNROLL(U(4), U(2))
147 for (int32_t i = iMin; i < iMax && (nNeighUp < MAX_TOTAL); i++) {
148 const GPUglobalref() cahit2& hitDataUp = pHitData[lHitNumberOffsetUp + i];
149 GPUTPCHit h;
150 h.mY = y0Up + (hitDataUp.x) * stepYUp;
151 h.mZ = z0Up + (hitDataUp.y) * stepZUp;
152
153 if (h.mY < minY || h.mY > maxY || h.mZ < minZ || h.mZ > maxZ) {
154 continue;
155 }
156
157 const bool inGlobal = nNeighUp >= MAX_SHARED;
158 if constexpr (MAX_GLOBAL > 0) {
159 if (inGlobal) {
160 neighUp[nNeighUp - MAX_SHARED] = (calink)i;
161 yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (h.Y() - y);
162 yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (h.Z() - z);
163 }
164 }
165 if constexpr (MAX_SHARED > 0) {
166 if (!inGlobal) {
167 s.mB[nNeighUp][iThread] = (calink)i;
168 s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y);
169 s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z);
170 }
171 }
172 nNeighUp++;
173 }
174 }
175
176 if constexpr (MAX_SHARED > 0 && GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED) { // init the rest of the shared array
177 for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) {
178 s.mA1[iUp][iThread] = -1.e10f;
179 s.mA2[iUp][iThread] = -1.e10f;
180 s.mB[iUp][iThread] = (calink)-1;
181 }
182 }
183
184 const uint32_t nRest = nNeighUp - MAX_SHARED;
185 uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL;
186 if constexpr (MAX_GLOBAL > 1) { // init the rest of the UNROLL_GLOBAL chunk of the global array
187 if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) {
188 nRestUnrolled += UNROLL_GLOBAL;
189 GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1))
190 for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) {
191 if (nRest + k < nRestUnrolled) {
192 yzUp[2 * (nRest + k)] = -1.e10f;
193 yzUp[2 * (nRest + k) + 1] = -1.e10f;
194 neighUp[nRest + k] = (calink)-1;
195 }
196 }
197 }
198 }
199
200 { // area in the lower row
201 const float yy = y * s.mDnTx;
202 const float zz = z * kAreaSlopeZDn;
203 minZ = zz - kAreaSizeZDn;
204 maxZ = zz + kAreaSizeZDn;
205 minY = yy - kAreaSizeY;
206 maxY = yy + kAreaSizeY;
207 }
208 rowDn.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
209 rowDn.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
210 nY = rowDn.Grid().Ny();
211
212 int32_t linkUp = -1; // CALINK_INVAL as integer
213 int32_t linkDn = -1; // CALINK_INVAL as integer
214 float bestD = chi2Cut;
215
216 for (int32_t k1 = binZmin; k1 <= binZmax; k1++) {
217 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmin];
218 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmax + 1];
219 for (int32_t i = iMin; i < iMax; i++) {
220 const GPUglobalref() cahit2& hitDataDn = pHitData[lHitNumberOffsetDn + i];
221 float yDn = y0Dn + (hitDataDn.x) * stepYDn;
222 float zDn = z0Dn + (hitDataDn.y) * stepZDn;
223
224 if (yDn < minY || yDn > maxY || zDn < minZ || zDn > maxZ) {
225 continue;
226 }
227
228 float yDnProjUp = s.mUpDx * (yDn - y);
229 float zDnProjUp = s.mUpDx * (zDn - z);
230
231 if constexpr (MAX_SHARED > 0) {
232 const uint32_t maxSharedUp = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED ? MAX_SHARED : CAMath::Min(nNeighUp, MAX_SHARED);
233 GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED))
234 for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) {
235 const float dy = yDnProjUp - s.mA1[iUp][iThread];
236 const float dz = zDnProjUp - s.mA2[iUp][iThread];
237 const float d = dy * dy + dz * dz;
238 if (d < bestD) {
239 bestD = d;
240 linkDn = i;
241 linkUp = iUp;
242 }
243 }
244 }
245
246 if constexpr (MAX_GLOBAL > 0) {
247 if (nNeighUp > MAX_SHARED) {
248 for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) {
249 GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL))
250 for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) {
251 const uint32_t jUp = iUp + k;
252 const float dy = yDnProjUp - yzUp[2 * jUp];
253 const float dz = zDnProjUp - yzUp[2 * jUp + 1];
254 const float d = dy * dy + dz * dz;
255 if (d < bestD) {
256 bestD = d;
257 linkDn = i;
258 linkUp = MAX_SHARED + jUp;
259 }
260 }
261 }
262 }
263 }
264 }
265 }
266
267 if (linkUp >= 0) {
268 if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) {
269 linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread];
270 } else if constexpr (MAX_SHARED > 0) {
271 linkUp = s.mB[linkUp][iThread];
272 } else {
273 linkUp = neighUp[linkUp];
274 }
275 }
276
277 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp;
278 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = linkDn;
279 }
280}
int32_t i
#define GPUsharedref()
#define GPUbarrier()
#define GPUrestrict()
#define GPUglobalref()
#define GPUCA_MAXN
#define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL
#define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED
#define GPUCA_UNROLL(optCu, optHi)
#define CALINK_INVAL
Definition GPUTPCDef.h:21
#define GPUCA_ROW_COUNT
GPUdii() void GPUTPCNeighboursFinder
Class for time synchronization of RawReader instances.
GLint GLenum GLint x
Definition glcorearb.h:403
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
Definition glcorearb.h:2910
GLfloat minY
Definition glcorearb.h:2910
GLint y
Definition glcorearb.h:270
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
Definition glcorearb.h:2910
GLuint GLfloat GLfloat y0
Definition glcorearb.h:5034
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GLfloat GLfloat minZ
Definition glcorearb.h:2910
uint32_t calink
Definition GPUTPCDef.h:30
std::vector< int > row