28 for (uint32_t
i = iThread;
i <
sizeof(
GPUTPCRow) /
sizeof(int32_t);
i += nThreads) {
29 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRow)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock])[
i];
31 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRowUp)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock + 2])[
i];
32 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRowDown)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock - 2])[
i];
47 s.mIRowUp = iBlock + 2;
48 s.mIRowDn = iBlock - 2;
49 s.mNHits =
row.mNHits;
52 const float xDn = rowDn.mX;
53 const float x =
row.mX;
54 const float xUp = rowUp.mX;
67 if ((s.mIRow <= 1) || (s.mIRow >=
GPUTPCGeometry::NROWS - 2) || (rowUp.mNHits <= 0) || (rowDn.mNHits <= 0)) {
68 const int32_t lHitNumberOffset =
row.mHitNumberOffset;
69 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
70 tracker.mData.mLinkUpData[lHitNumberOffset + ih] =
CALINK_INVAL;
71 tracker.mData.mLinkDownData[lHitNumberOffset + ih] =
CALINK_INVAL;
76 static constexpr uint32_t UNROLL_GLOBAL = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL > 1 ? GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL : 1;
77 static_assert(constants::NEIGHBOURS_MAX_N % UNROLL_GLOBAL == 0);
78 static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP;
79 static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < constants::NEIGHBOURS_MAX_N) ? (((constants::NEIGHBOURS_MAX_N - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0;
80 static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL;
82 const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx);
85 const int32_t lHitNumberOffset =
row.mHitNumberOffset;
86 const int32_t lHitNumberOffsetUp = rowUp.mHitNumberOffset;
87 const int32_t lHitNumberOffsetDn = rowDn.mHitNumberOffset;
88 const uint32_t lFirstHitInBinOffsetUp = rowUp.mFirstHitInBinOffset;
89 const uint32_t lFirstHitInBinOffsetDn = rowDn.mFirstHitInBinOffset;
93 const float y0 =
row.mGrid.mYMin;
94 const float z0 =
row.mGrid.mZMin;
95 const float stepY =
row.mHstepY;
96 const float stepZ =
row.mHstepZ;
98 const float y0Up = rowUp.mGrid.mYMin;
99 const float z0Up = rowUp.mGrid.mZMin;
100 const float stepYUp = rowUp.mHstepY;
101 const float stepZUp = rowUp.mHstepZ;
103 const float y0Dn = rowDn.mGrid.mYMin;
104 const float z0Dn = rowDn.mGrid.mZMin;
105 const float stepYDn = rowDn.mHstepY;
106 const float stepZDn = rowDn.mHstepZ;
108 const float kAngularMultiplier = tracker.mConstantMem->param.rec.tpc.searchWindowDZDR;
109 const float kAreaSizeY = tracker.mConstantMem->param.rec.tpc.neighboursSearchArea;
110 const float kAreaSizeZUp = kAngularMultiplier != 0.f ? (s.mUpDx * kAngularMultiplier) : kAreaSizeY;
111 const float kAreaSizeZDn = kAngularMultiplier != 0.f ? (-s.mDnDx * kAngularMultiplier) : kAreaSizeY;
112 const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx;
113 const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx;
115 calink neighUp[MAX_GLOBAL];
116 float yzUp[2 * MAX_GLOBAL];
118 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
121 const float y =
y0 + hitData.
x * stepY;
122 const float z = z0 + hitData.
y * stepZ;
124 uint32_t nNeighUp = 0;
126 int32_t binYmin, binYmax, binZmin, binZmax;
130 const float yy =
y * s.mUpTx;
131 const float zz =
z * kAreaSlopeZUp;
132 minZ = zz - kAreaSizeZUp;
133 maxZ = zz + kAreaSizeZUp;
134 minY = yy - kAreaSizeY;
135 maxY = yy + kAreaSizeY;
136 rowUp.Grid().GetBin(
minY,
minZ, &binYmin, &binZmin);
137 rowUp.Grid().GetBin(
maxY,
maxZ, &binYmax, &binZmax);
138 nY = rowUp.Grid().Ny();
141 for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) {
142 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin];
143 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1];
145 for (int32_t
i = iMin;
i < iMax && (nNeighUp < MAX_TOTAL);
i++) {
148 h.mY = y0Up + (hitDataUp.
x) * stepYUp;
149 h.mZ = z0Up + (hitDataUp.
y) * stepZUp;
155 const bool inGlobal = nNeighUp >= MAX_SHARED;
156 if constexpr (MAX_GLOBAL > 0) {
158 neighUp[nNeighUp - MAX_SHARED] = (
calink)
i;
159 yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (
h.Y() -
y);
160 yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (
h.Z() -
z);
163 if constexpr (MAX_SHARED > 0) {
165 s.mB[nNeighUp][iThread] = (
calink)
i;
166 s.mA1[nNeighUp][iThread] = s.mDnDx * (
h.Y() -
y);
167 s.mA2[nNeighUp][iThread] = s.mDnDx * (
h.Z() -
z);
174 if constexpr (MAX_SHARED > 0 && GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED) {
175 for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) {
176 s.mA1[iUp][iThread] = -1.e10f;
177 s.mA2[iUp][iThread] = -1.e10f;
178 s.mB[iUp][iThread] = (
calink)-1;
182 const uint32_t nRest = nNeighUp - MAX_SHARED;
183 uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL;
184 if constexpr (MAX_GLOBAL > 1) {
185 if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) {
186 nRestUnrolled += UNROLL_GLOBAL;
187 GPUCA_UNROLL(U(std::max<int32_t>(UNROLL_GLOBAL - 1, 1)), U(std::max<int32_t>(UNROLL_GLOBAL - 1, 1)))
188 for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) {
189 if (nRest + k < nRestUnrolled) {
190 yzUp[2 * (nRest + k)] = -1.e10f;
191 yzUp[2 * (nRest + k) + 1] = -1.e10f;
192 neighUp[nRest + k] = (
calink)-1;
199 const float yy =
y * s.mDnTx;
200 const float zz =
z * kAreaSlopeZDn;
201 minZ = zz - kAreaSizeZDn;
202 maxZ = zz + kAreaSizeZDn;
203 minY = yy - kAreaSizeY;
204 maxY = yy + kAreaSizeY;
206 rowDn.Grid().GetBin(
minY,
minZ, &binYmin, &binZmin);
207 rowDn.Grid().GetBin(
maxY,
maxZ, &binYmax, &binZmax);
208 nY = rowDn.Grid().Ny();
212 float bestD = chi2Cut;
214 for (int32_t k1 = binZmin; k1 <= binZmax; k1++) {
215 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmin];
216 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmax + 1];
217 for (int32_t
i = iMin;
i < iMax;
i++) {
219 float yDn = y0Dn + (hitDataDn.
x) * stepYDn;
220 float zDn = z0Dn + (hitDataDn.
y) * stepZDn;
222 if (yDn < minY || yDn >
maxY || zDn < minZ || zDn >
maxZ) {
226 float yDnProjUp = s.mUpDx * (yDn -
y);
227 float zDnProjUp = s.mUpDx * (zDn -
z);
229 if constexpr (MAX_SHARED > 0) {
230 const uint32_t maxSharedUp = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED ? MAX_SHARED : CAMath::Min(nNeighUp, MAX_SHARED);
232 for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) {
233 const float dy = yDnProjUp - s.mA1[iUp][iThread];
234 const float dz = zDnProjUp - s.mA2[iUp][iThread];
235 const float d = dy * dy + dz * dz;
244 if constexpr (MAX_GLOBAL > 0) {
245 if (nNeighUp > MAX_SHARED) {
246 for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) {
248 for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) {
249 const uint32_t jUp = iUp + k;
250 const float dy = yDnProjUp - yzUp[2 * jUp];
251 const float dz = zDnProjUp - yzUp[2 * jUp + 1];
252 const float d = dy * dy + dz * dz;
256 linkUp = MAX_SHARED + jUp;
266 if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) {
267 linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread];
268 }
else if constexpr (MAX_SHARED > 0) {
269 linkUp = s.mB[linkUp][iThread];
271 linkUp = neighUp[linkUp];
275 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp;
276 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = linkDn;