Project
Loading...
Searching...
No Matches
GPUTPCNeighboursFinder.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCHit.h"
17#include "GPUTPCTracker.h"
18// #include "GPUCommonMath.h"
19#include "GPUDefMacros.h"
20using namespace o2::gpu;
21
22template <>
23GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker)
24{
25 //* find neighbours
26
27#ifdef GPUCA_GPUCODE
28 for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) {
29 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRow)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock])[i];
30 if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) {
31 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowUp)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock + 2])[i];
32 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowDown)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock - 2])[i];
33 }
34 }
35 GPUbarrier();
36 const GPUsharedref() GPUTPCRow& GPUrestrict() row = s.mRow;
37 const GPUsharedref() GPUTPCRow& GPUrestrict() rowUp = s.mRowUp;
38 const GPUsharedref() GPUTPCRow& GPUrestrict() rowDn = s.mRowDown;
39#else
40 const GPUglobalref() GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[iBlock];
41 const GPUglobalref() GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2];
42 const GPUglobalref() GPUTPCRow& GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2];
43#endif
44
45 if (iThread == 0) {
46 s.mIRow = iBlock;
47 s.mIRowUp = iBlock + 2;
48 s.mIRowDn = iBlock - 2;
49 if (s.mIRow < GPUCA_ROW_COUNT) {
50 s.mNHits = row.mNHits;
51 if ((s.mIRow >= 2) && (s.mIRow <= GPUCA_ROW_COUNT - 3)) {
52 // the axis perpendicular to the rows
53 const float xDn = rowDn.mX;
54 const float x = row.mX;
55 const float xUp = rowUp.mX;
56
57 // distance of the rows (absolute and relative)
58 s.mUpDx = xUp - x;
59 s.mDnDx = xDn - x;
60 s.mUpTx = xUp / x;
61 s.mDnTx = xDn / x;
62 }
63 }
64 }
65 GPUbarrier();
66
67 // local copies
68
69 if ((s.mIRow <= 1) || (s.mIRow >= GPUCA_ROW_COUNT - 2) || (rowUp.mNHits <= 0) || (rowDn.mNHits <= 0)) {
70 const int32_t lHitNumberOffset = row.mHitNumberOffset;
71 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
72 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = CALINK_INVAL;
73 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = CALINK_INVAL;
74 }
75 return;
76 }
77
78#define UnrollGlobal 4
79#define MaxShared GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
80#if MaxShared < GPUCA_MAXN
81#define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal
82#else
83#define MaxGlobal 0
84#endif
85#define MaxTotal MaxShared + MaxGlobal
86
87 const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx);
88 // float chi2Cut = 3.f*3.f*(s.mUpDx*s.mUpDx + s.mDnDx*s.mDnDx ); //SG
89
90 const int32_t lHitNumberOffset = row.mHitNumberOffset;
91 const int32_t lHitNumberOffsetUp = rowUp.mHitNumberOffset;
92 const int32_t lHitNumberOffsetDn = rowDn.mHitNumberOffset;
93 const uint32_t lFirstHitInBinOffsetUp = rowUp.mFirstHitInBinOffset;
94 const uint32_t lFirstHitInBinOffsetDn = rowDn.mFirstHitInBinOffset;
95 const GPUglobalref() calink* GPUrestrict() lFirstHitInBin = tracker.mData.mFirstHitInBin;
96 const GPUglobalref() cahit2* GPUrestrict() pHitData = tracker.mData.mHitData;
97
98 const float y0 = row.mGrid.mYMin;
99 const float z0 = row.mGrid.mZMin;
100 const float stepY = row.mHstepY;
101 const float stepZ = row.mHstepZ;
102
103 const float y0Up = rowUp.mGrid.mYMin;
104 const float z0Up = rowUp.mGrid.mZMin;
105 const float stepYUp = rowUp.mHstepY;
106 const float stepZUp = rowUp.mHstepZ;
107
108 const float y0Dn = rowDn.mGrid.mYMin;
109 const float z0Dn = rowDn.mGrid.mZMin;
110 const float stepYDn = rowDn.mHstepY;
111 const float stepZDn = rowDn.mHstepZ;
112
113 const float kAngularMultiplier = tracker.mConstantMem->param.rec.tpc.searchWindowDZDR;
114 const float kAreaSizeY = tracker.mConstantMem->param.rec.tpc.neighboursSearchArea;
115 const float kAreaSizeZUp = kAngularMultiplier != 0.f ? (s.mUpDx * kAngularMultiplier) : kAreaSizeY;
116 const float kAreaSizeZDn = kAngularMultiplier != 0.f ? (-s.mDnDx * kAngularMultiplier) : kAreaSizeY;
117 const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx;
118 const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx;
119
120#if MaxGlobal > 0
121 calink neighUp[MaxGlobal];
122 float yzUp[2 * MaxGlobal];
123#endif
124
125 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
126
127 const GPUglobalref() cahit2& hitData = pHitData[lHitNumberOffset + ih];
128 const float y = y0 + hitData.x * stepY;
129 const float z = z0 + hitData.y * stepZ;
130
131 int32_t nNeighUp = 0;
132 float minZ, maxZ, minY, maxY;
133 int32_t binYmin, binYmax, binZmin, binZmax;
134 int32_t nY;
135
136 { // area in the upper row
137 const float yy = y * s.mUpTx;
138 const float zz = z * kAreaSlopeZUp;
139 minZ = zz - kAreaSizeZUp;
140 maxZ = zz + kAreaSizeZUp;
141 minY = yy - kAreaSizeY;
142 maxY = yy + kAreaSizeY;
143 rowUp.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
144 rowUp.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
145 nY = rowUp.Grid().Ny();
146 }
147
148 for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MaxTotal); k1++) {
149 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin];
150 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1];
151 GPUCA_UNROLL(U(4), U(2))
152 for (int32_t i = iMin; i < iMax && (nNeighUp < MaxTotal); i++) {
153 const GPUglobalref() cahit2& hitDataUp = pHitData[lHitNumberOffsetUp + i];
154 GPUTPCHit h;
155 h.mY = y0Up + (hitDataUp.x) * stepYUp;
156 h.mZ = z0Up + (hitDataUp.y) * stepZUp;
157
158 if (h.mY < minY || h.mY > maxY || h.mZ < minZ || h.mZ > maxZ) {
159 continue;
160 }
161
162#if MaxGlobal > 0
163#if MaxShared == 0
164 if (true) {
165#else
166 if (nNeighUp >= MaxShared) {
167#endif
168 neighUp[nNeighUp - MaxShared] = (calink)i;
169 yzUp[2 * (nNeighUp - MaxShared)] = s.mDnDx * (h.Y() - y);
170 yzUp[2 * (nNeighUp - MaxShared) + 1] = s.mDnDx * (h.Z() - z);
171 } else
172#endif
173 {
174#if MaxShared > 0
175 s.mB[nNeighUp][iThread] = (calink)i;
176 s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y);
177 s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z);
178#endif
179 }
180 nNeighUp++;
181 }
182 }
183
184#if MaxShared > 0 // init a rest of the shared array
185 for (int32_t iUp = nNeighUp; iUp < MaxShared; iUp++) {
186 s.mA1[iUp][iThread] = -1.e10f;
187 s.mA2[iUp][iThread] = -1.e10f;
188 s.mB[iUp][iThread] = (calink)-1;
189 }
190#endif
191
192#if MaxGlobal > 0 // init a rest of the UnrollGlobal chunk of the global array
193 int32_t Nrest = nNeighUp - MaxShared;
194 int32_t N4 = (Nrest / UnrollGlobal) * UnrollGlobal;
195 if (N4 < Nrest) {
196 N4 += UnrollGlobal;
198 for (int32_t k = 0; k < UnrollGlobal - 1; k++) {
199 if (Nrest + k < N4) {
200 yzUp[2 * (Nrest + k)] = -1.e10f;
201 yzUp[2 * (Nrest + k) + 1] = -1.e10f;
202 neighUp[Nrest + k] = (calink)-1;
203 }
204 }
205 }
206#endif
207
208 { // area in the lower row
209 const float yy = y * s.mDnTx;
210 const float zz = z * kAreaSlopeZDn;
211 minZ = zz - kAreaSizeZDn;
212 maxZ = zz + kAreaSizeZDn;
213 minY = yy - kAreaSizeY;
214 maxY = yy + kAreaSizeY;
215 }
216 rowDn.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
217 rowDn.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
218 nY = rowDn.Grid().Ny();
219
220 int32_t linkUp = -1; // CALINK_INVAL as integer
221 int32_t linkDn = -1; // CALINK_INVAL as integer
222 float bestD = chi2Cut;
223
224 for (int32_t k1 = binZmin; k1 <= binZmax; k1++) {
225 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmin];
226 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmax + 1];
227 for (int32_t i = iMin; i < iMax; i++) {
228 const GPUglobalref() cahit2& hitDataDn = pHitData[lHitNumberOffsetDn + i];
229 float yDn = y0Dn + (hitDataDn.x) * stepYDn;
230 float zDn = z0Dn + (hitDataDn.y) * stepZDn;
231
232 if (yDn < minY || yDn > maxY || zDn < minZ || zDn > maxZ) {
233 continue;
234 }
235
236 float yDnProjUp = s.mUpDx * (yDn - y);
237 float zDnProjUp = s.mUpDx * (zDn - z);
238
239#if MaxShared > 0
241 for (int32_t iUp = 0; iUp < MaxShared; iUp++) {
242 const float dy = yDnProjUp - s.mA1[iUp][iThread];
243 const float dz = zDnProjUp - s.mA2[iUp][iThread];
244 const float d = dy * dy + dz * dz;
245 if (d < bestD) {
246 bestD = d;
247 linkDn = i;
248 linkUp = iUp;
249 }
250 }
251#endif
252
253#if MaxGlobal > 0
254 for (int32_t iUp = 0; iUp < N4; iUp += UnrollGlobal) {
256 for (int32_t k = 0; k < UnrollGlobal; k++) {
257 int32_t jUp = iUp + k;
258 const float dy = yDnProjUp - yzUp[2 * jUp];
259 const float dz = zDnProjUp - yzUp[2 * jUp + 1];
260 const float d = dy * dy + dz * dz;
261 if (d < bestD) {
262 bestD = d;
263 linkDn = i;
264 linkUp = MaxShared + jUp;
265 }
266 }
267 }
268#endif
269 }
270 }
271
272 if (linkUp >= 0) {
273#if MaxShared > 0 && MaxGlobal > 0
274 linkUp = (linkUp >= MaxShared) ? neighUp[linkUp - MaxShared] : s.mB[linkUp][iThread];
275#elif MaxShared > 0
276 linkUp = s.mB[linkUp][iThread];
277#else
278 linkUp = neighUp[linkUp];
279#endif
280 }
281
282 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp;
283 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = linkDn;
284 }
285}
int32_t i
#define GPUsharedref()
#define GPUbarrier()
#define GPUrestrict()
#define GPUglobalref()
#define GPUCA_UNROLL(optCu, optHi)
#define CALINK_INVAL
Definition GPUTPCDef.h:21
#define GPUCA_ROW_COUNT
#define UnrollGlobal
GPUdii() void GPUTPCNeighboursFinder
#define MaxTotal
#define MaxShared
#define MaxGlobal
Class for time synchronization of RawReader instances.
GLint GLenum GLint x
Definition glcorearb.h:403
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
Definition glcorearb.h:2910
GLfloat minY
Definition glcorearb.h:2910
GLint y
Definition glcorearb.h:270
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
Definition glcorearb.h:2910
GLuint GLfloat GLfloat y0
Definition glcorearb.h:5034
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GLfloat GLfloat minZ
Definition glcorearb.h:2910
uint32_t calink
Definition GPUTPCDef.h:30
std::vector< int > row