Project
Loading...
Searching...
No Matches
GPUTPCNeighboursFinder.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUTPCHit.h"
17#include "GPUTPCTracker.h"
18// #include "GPUCommonMath.h"
19#include "GPUDefMacros.h"
20using namespace o2::gpu;
21
22template <>
23GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker)
24{
25 //* find neighbours
26
27#ifdef GPUCA_GPUCODE
28 for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) {
29 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRow)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock])[i];
30 if (iBlock >= 2 && iBlock < (int32_t)GPUTPCGeometry::NROWS - 2) {
31 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowUp)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock + 2])[i];
32 reinterpret_cast<GPUsharedref() int32_t*>(&s.mRowDown)[i] = reinterpret_cast<GPUglobalref() int32_t*>(&tracker.TrackingDataRows()[iBlock - 2])[i];
33 }
34 }
35 GPUbarrier();
36 const GPUsharedref() GPUTPCRow& GPUrestrict() row = s.mRow;
37 const GPUsharedref() GPUTPCRow& GPUrestrict() rowUp = s.mRowUp;
38 const GPUsharedref() GPUTPCRow& GPUrestrict() rowDn = s.mRowDown;
39#else
40 const GPUglobalref() GPUTPCRow& GPUrestrict() row = tracker.mData.mRows[iBlock];
41 const GPUglobalref() GPUTPCRow& GPUrestrict() rowUp = tracker.mData.mRows[iBlock + 2];
42 const GPUglobalref() GPUTPCRow& GPUrestrict() rowDn = tracker.mData.mRows[iBlock - 2];
43#endif
44
45 if (iThread == 0) {
46 s.mIRow = iBlock;
47 s.mIRowUp = iBlock + 2;
48 s.mIRowDn = iBlock - 2;
49 s.mNHits = row.mNHits;
50 if ((s.mIRow >= 2) && (s.mIRow < GPUTPCGeometry::NROWS - 2)) {
51 // the axis perpendicular to the rows
52 const float xDn = rowDn.mX;
53 const float x = row.mX;
54 const float xUp = rowUp.mX;
55
56 // distance of the rows (absolute and relative)
57 s.mUpDx = xUp - x;
58 s.mDnDx = xDn - x;
59 s.mUpTx = xUp / x;
60 s.mDnTx = xDn / x;
61 }
62 }
63 GPUbarrier();
64
65 // local copies
66
67 if ((s.mIRow <= 1) || (s.mIRow >= GPUTPCGeometry::NROWS - 2) || (rowUp.mNHits <= 0) || (rowDn.mNHits <= 0)) {
68 const int32_t lHitNumberOffset = row.mHitNumberOffset;
69 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
70 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = CALINK_INVAL;
71 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = CALINK_INVAL;
72 }
73 return;
74 }
75
76 static constexpr uint32_t UNROLL_GLOBAL = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL > 1 ? GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL : 1;
77 static_assert(constants::NEIGHBOURS_MAX_N % UNROLL_GLOBAL == 0);
78 static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP;
79 static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < constants::NEIGHBOURS_MAX_N) ? (((constants::NEIGHBOURS_MAX_N - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0;
80 static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL;
81
82 const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx);
83 // float chi2Cut = 3.f*3.f*(s.mUpDx*s.mUpDx + s.mDnDx*s.mDnDx ); //SG
84
85 const int32_t lHitNumberOffset = row.mHitNumberOffset;
86 const int32_t lHitNumberOffsetUp = rowUp.mHitNumberOffset;
87 const int32_t lHitNumberOffsetDn = rowDn.mHitNumberOffset;
88 const uint32_t lFirstHitInBinOffsetUp = rowUp.mFirstHitInBinOffset;
89 const uint32_t lFirstHitInBinOffsetDn = rowDn.mFirstHitInBinOffset;
90 const GPUglobalref() calink* GPUrestrict() lFirstHitInBin = tracker.mData.mFirstHitInBin;
91 const GPUglobalref() cahit2* GPUrestrict() pHitData = tracker.mData.mHitData;
92
93 const float y0 = row.mGrid.mYMin;
94 const float z0 = row.mGrid.mZMin;
95 const float stepY = row.mHstepY;
96 const float stepZ = row.mHstepZ;
97
98 const float y0Up = rowUp.mGrid.mYMin;
99 const float z0Up = rowUp.mGrid.mZMin;
100 const float stepYUp = rowUp.mHstepY;
101 const float stepZUp = rowUp.mHstepZ;
102
103 const float y0Dn = rowDn.mGrid.mYMin;
104 const float z0Dn = rowDn.mGrid.mZMin;
105 const float stepYDn = rowDn.mHstepY;
106 const float stepZDn = rowDn.mHstepZ;
107
108 const float kAngularMultiplier = tracker.mConstantMem->param.rec.tpc.searchWindowDZDR;
109 const float kAreaSizeY = tracker.mConstantMem->param.rec.tpc.neighboursSearchArea;
110 const float kAreaSizeZUp = kAngularMultiplier != 0.f ? (s.mUpDx * kAngularMultiplier) : kAreaSizeY;
111 const float kAreaSizeZDn = kAngularMultiplier != 0.f ? (-s.mDnDx * kAngularMultiplier) : kAreaSizeY;
112 const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx;
113 const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx;
114
115 calink neighUp[MAX_GLOBAL];
116 float yzUp[2 * MAX_GLOBAL];
117
118 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
119
120 const GPUglobalref() cahit2& hitData = pHitData[lHitNumberOffset + ih];
121 const float y = y0 + hitData.x * stepY;
122 const float z = z0 + hitData.y * stepZ;
123
124 uint32_t nNeighUp = 0;
125 float minZ, maxZ, minY, maxY;
126 int32_t binYmin, binYmax, binZmin, binZmax;
127 int32_t nY;
128
129 { // area in the upper row
130 const float yy = y * s.mUpTx;
131 const float zz = z * kAreaSlopeZUp;
132 minZ = zz - kAreaSizeZUp;
133 maxZ = zz + kAreaSizeZUp;
134 minY = yy - kAreaSizeY;
135 maxY = yy + kAreaSizeY;
136 rowUp.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
137 rowUp.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
138 nY = rowUp.Grid().Ny();
139 }
140
141 for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) {
142 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin];
143 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1];
144 GPUCA_UNROLL(U(4), U(2))
145 for (int32_t i = iMin; i < iMax && (nNeighUp < MAX_TOTAL); i++) {
146 const GPUglobalref() cahit2& hitDataUp = pHitData[lHitNumberOffsetUp + i];
147 GPUTPCHit h;
148 h.mY = y0Up + (hitDataUp.x) * stepYUp;
149 h.mZ = z0Up + (hitDataUp.y) * stepZUp;
150
151 if (h.mY < minY || h.mY > maxY || h.mZ < minZ || h.mZ > maxZ) {
152 continue;
153 }
154
155 const bool inGlobal = nNeighUp >= MAX_SHARED;
156 if constexpr (MAX_GLOBAL > 0) {
157 if (inGlobal) {
158 neighUp[nNeighUp - MAX_SHARED] = (calink)i;
159 yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (h.Y() - y);
160 yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (h.Z() - z);
161 }
162 }
163 if constexpr (MAX_SHARED > 0) {
164 if (!inGlobal) {
165 s.mB[nNeighUp][iThread] = (calink)i;
166 s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y);
167 s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z);
168 }
169 }
170 nNeighUp++;
171 }
172 }
173
174 if constexpr (MAX_SHARED > 0 && GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED) { // init the rest of the shared array
175 for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) {
176 s.mA1[iUp][iThread] = -1.e10f;
177 s.mA2[iUp][iThread] = -1.e10f;
178 s.mB[iUp][iThread] = (calink)-1;
179 }
180 }
181
182 const uint32_t nRest = nNeighUp - MAX_SHARED;
183 uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL;
184 if constexpr (MAX_GLOBAL > 1) { // init the rest of the UNROLL_GLOBAL chunk of the global array
185 if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) {
186 nRestUnrolled += UNROLL_GLOBAL;
187 GPUCA_UNROLL(U(std::max<int32_t>(UNROLL_GLOBAL - 1, 1)), U(std::max<int32_t>(UNROLL_GLOBAL - 1, 1)))
188 for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) {
189 if (nRest + k < nRestUnrolled) {
190 yzUp[2 * (nRest + k)] = -1.e10f;
191 yzUp[2 * (nRest + k) + 1] = -1.e10f;
192 neighUp[nRest + k] = (calink)-1;
193 }
194 }
195 }
196 }
197
198 { // area in the lower row
199 const float yy = y * s.mDnTx;
200 const float zz = z * kAreaSlopeZDn;
201 minZ = zz - kAreaSizeZDn;
202 maxZ = zz + kAreaSizeZDn;
203 minY = yy - kAreaSizeY;
204 maxY = yy + kAreaSizeY;
205 }
206 rowDn.Grid().GetBin(minY, minZ, &binYmin, &binZmin);
207 rowDn.Grid().GetBin(maxY, maxZ, &binYmax, &binZmax);
208 nY = rowDn.Grid().Ny();
209
210 int32_t linkUp = -1; // CALINK_INVAL as integer
211 int32_t linkDn = -1; // CALINK_INVAL as integer
212 float bestD = chi2Cut;
213
214 for (int32_t k1 = binZmin; k1 <= binZmax; k1++) {
215 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmin];
216 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmax + 1];
217 for (int32_t i = iMin; i < iMax; i++) {
218 const GPUglobalref() cahit2& hitDataDn = pHitData[lHitNumberOffsetDn + i];
219 float yDn = y0Dn + (hitDataDn.x) * stepYDn;
220 float zDn = z0Dn + (hitDataDn.y) * stepZDn;
221
222 if (yDn < minY || yDn > maxY || zDn < minZ || zDn > maxZ) {
223 continue;
224 }
225
226 float yDnProjUp = s.mUpDx * (yDn - y);
227 float zDnProjUp = s.mUpDx * (zDn - z);
228
229 if constexpr (MAX_SHARED > 0) {
230 const uint32_t maxSharedUp = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED ? MAX_SHARED : CAMath::Min(nNeighUp, MAX_SHARED);
231 GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED))
232 for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) {
233 const float dy = yDnProjUp - s.mA1[iUp][iThread];
234 const float dz = zDnProjUp - s.mA2[iUp][iThread];
235 const float d = dy * dy + dz * dz;
236 if (d < bestD) {
237 bestD = d;
238 linkDn = i;
239 linkUp = iUp;
240 }
241 }
242 }
243
244 if constexpr (MAX_GLOBAL > 0) {
245 if (nNeighUp > MAX_SHARED) {
246 for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) {
247 GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL))
248 for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) {
249 const uint32_t jUp = iUp + k;
250 const float dy = yDnProjUp - yzUp[2 * jUp];
251 const float dz = zDnProjUp - yzUp[2 * jUp + 1];
252 const float d = dy * dy + dz * dz;
253 if (d < bestD) {
254 bestD = d;
255 linkDn = i;
256 linkUp = MAX_SHARED + jUp;
257 }
258 }
259 }
260 }
261 }
262 }
263 }
264
265 if (linkUp >= 0) {
266 if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) {
267 linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread];
268 } else if constexpr (MAX_SHARED > 0) {
269 linkUp = s.mB[linkUp][iThread];
270 } else {
271 linkUp = neighUp[linkUp];
272 }
273 }
274
275 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp;
276 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = linkDn;
277 }
278}
int32_t i
#define GPUsharedref()
#define GPUbarrier()
#define GPUrestrict()
#define GPUglobalref()
#define GPUCA_UNROLL(optCu, optHi)
#define CALINK_INVAL
Definition GPUTPCDef.h:21
GPUdii() void GPUTPCNeighboursFinder
Class for time synchronization of RawReader instances.
static constexpr uint32_t NROWS
GLint GLenum GLint x
Definition glcorearb.h:403
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
Definition glcorearb.h:2910
GLfloat minY
Definition glcorearb.h:2910
GLint y
Definition glcorearb.h:270
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
Definition glcorearb.h:2910
GLuint GLfloat GLfloat y0
Definition glcorearb.h:5034
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843
GLfloat GLfloat minZ
Definition glcorearb.h:2910
uint32_t calink
Definition GPUTPCDef.h:26
std::vector< int > row