28 for (uint32_t
i = iThread;
i <
sizeof(
GPUTPCRow) /
sizeof(int32_t);
i += nThreads) {
29 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRow)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock])[
i];
31 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRowUp)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock + 2])[
i];
32 reinterpret_cast<GPUsharedref() int32_t*
>(&s.mRowDown)[
i] =
reinterpret_cast<GPUglobalref() int32_t*
>(&tracker.TrackingDataRows()[iBlock - 2])[
i];
47 s.mIRowUp = iBlock + 2;
48 s.mIRowDn = iBlock - 2;
50 s.mNHits =
row.mNHits;
53 const float xDn = rowDn.mX;
54 const float x =
row.mX;
55 const float xUp = rowUp.mX;
69 if ((s.mIRow <= 1) || (s.mIRow >=
GPUCA_ROW_COUNT - 2) || (rowUp.mNHits <= 0) || (rowDn.mNHits <= 0)) {
70 const int32_t lHitNumberOffset =
row.mHitNumberOffset;
71 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
72 tracker.mData.mLinkUpData[lHitNumberOffset + ih] =
CALINK_INVAL;
73 tracker.mData.mLinkDownData[lHitNumberOffset + ih] =
CALINK_INVAL;
79 static_assert(
GPUCA_MAXN % UNROLL_GLOBAL == 0);
81 static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED <
GPUCA_MAXN) ? (((
GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0;
82 static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL;
84 const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx);
87 const int32_t lHitNumberOffset =
row.mHitNumberOffset;
88 const int32_t lHitNumberOffsetUp = rowUp.mHitNumberOffset;
89 const int32_t lHitNumberOffsetDn = rowDn.mHitNumberOffset;
90 const uint32_t lFirstHitInBinOffsetUp = rowUp.mFirstHitInBinOffset;
91 const uint32_t lFirstHitInBinOffsetDn = rowDn.mFirstHitInBinOffset;
95 const float y0 =
row.mGrid.mYMin;
96 const float z0 =
row.mGrid.mZMin;
97 const float stepY =
row.mHstepY;
98 const float stepZ =
row.mHstepZ;
100 const float y0Up = rowUp.mGrid.mYMin;
101 const float z0Up = rowUp.mGrid.mZMin;
102 const float stepYUp = rowUp.mHstepY;
103 const float stepZUp = rowUp.mHstepZ;
105 const float y0Dn = rowDn.mGrid.mYMin;
106 const float z0Dn = rowDn.mGrid.mZMin;
107 const float stepYDn = rowDn.mHstepY;
108 const float stepZDn = rowDn.mHstepZ;
110 const float kAngularMultiplier = tracker.mConstantMem->param.rec.tpc.searchWindowDZDR;
111 const float kAreaSizeY = tracker.mConstantMem->param.rec.tpc.neighboursSearchArea;
112 const float kAreaSizeZUp = kAngularMultiplier != 0.f ? (s.mUpDx * kAngularMultiplier) : kAreaSizeY;
113 const float kAreaSizeZDn = kAngularMultiplier != 0.f ? (-s.mDnDx * kAngularMultiplier) : kAreaSizeY;
114 const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx;
115 const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx;
117 calink neighUp[MAX_GLOBAL];
118 float yzUp[2 * MAX_GLOBAL];
120 for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) {
123 const float y =
y0 + hitData.
x * stepY;
124 const float z = z0 + hitData.
y * stepZ;
126 uint32_t nNeighUp = 0;
128 int32_t binYmin, binYmax, binZmin, binZmax;
132 const float yy =
y * s.mUpTx;
133 const float zz =
z * kAreaSlopeZUp;
134 minZ = zz - kAreaSizeZUp;
135 maxZ = zz + kAreaSizeZUp;
136 minY = yy - kAreaSizeY;
137 maxY = yy + kAreaSizeY;
138 rowUp.Grid().GetBin(
minY,
minZ, &binYmin, &binZmin);
139 rowUp.Grid().GetBin(
maxY,
maxZ, &binYmax, &binZmax);
140 nY = rowUp.Grid().Ny();
143 for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) {
144 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin];
145 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1];
147 for (int32_t
i = iMin;
i < iMax && (nNeighUp < MAX_TOTAL);
i++) {
150 h.mY = y0Up + (hitDataUp.
x) * stepYUp;
151 h.mZ = z0Up + (hitDataUp.
y) * stepZUp;
157 const bool inGlobal = nNeighUp >= MAX_SHARED;
158 if constexpr (MAX_GLOBAL > 0) {
160 neighUp[nNeighUp - MAX_SHARED] = (
calink)
i;
161 yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (
h.Y() -
y);
162 yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (
h.Z() -
z);
165 if constexpr (MAX_SHARED > 0) {
167 s.mB[nNeighUp][iThread] = (
calink)
i;
168 s.mA1[nNeighUp][iThread] = s.mDnDx * (
h.Y() -
y);
169 s.mA2[nNeighUp][iThread] = s.mDnDx * (
h.Z() -
z);
177 for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) {
178 s.mA1[iUp][iThread] = -1.e10f;
179 s.mA2[iUp][iThread] = -1.e10f;
180 s.mB[iUp][iThread] = (
calink)-1;
184 const uint32_t nRest = nNeighUp - MAX_SHARED;
185 uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL;
186 if constexpr (MAX_GLOBAL > 1) {
187 if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) {
188 nRestUnrolled += UNROLL_GLOBAL;
189 GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1))
190 for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) {
191 if (nRest + k < nRestUnrolled) {
192 yzUp[2 * (nRest + k)] = -1.e10f;
193 yzUp[2 * (nRest + k) + 1] = -1.e10f;
194 neighUp[nRest + k] = (
calink)-1;
201 const float yy =
y * s.mDnTx;
202 const float zz =
z * kAreaSlopeZDn;
203 minZ = zz - kAreaSizeZDn;
204 maxZ = zz + kAreaSizeZDn;
205 minY = yy - kAreaSizeY;
206 maxY = yy + kAreaSizeY;
208 rowDn.Grid().GetBin(
minY,
minZ, &binYmin, &binZmin);
209 rowDn.Grid().GetBin(
maxY,
maxZ, &binYmax, &binZmax);
210 nY = rowDn.Grid().Ny();
214 float bestD = chi2Cut;
216 for (int32_t k1 = binZmin; k1 <= binZmax; k1++) {
217 int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmin];
218 int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetDn + k1 * nY + binYmax + 1];
219 for (int32_t
i = iMin;
i < iMax;
i++) {
221 float yDn = y0Dn + (hitDataDn.
x) * stepYDn;
222 float zDn = z0Dn + (hitDataDn.
y) * stepZDn;
224 if (yDn < minY || yDn >
maxY || zDn < minZ || zDn >
maxZ) {
228 float yDnProjUp = s.mUpDx * (yDn -
y);
229 float zDnProjUp = s.mUpDx * (zDn -
z);
231 if constexpr (MAX_SHARED > 0) {
234 for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) {
235 const float dy = yDnProjUp - s.mA1[iUp][iThread];
236 const float dz = zDnProjUp - s.mA2[iUp][iThread];
237 const float d = dy * dy + dz * dz;
246 if constexpr (MAX_GLOBAL > 0) {
247 if (nNeighUp > MAX_SHARED) {
248 for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) {
250 for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) {
251 const uint32_t jUp = iUp + k;
252 const float dy = yDnProjUp - yzUp[2 * jUp];
253 const float dz = zDnProjUp - yzUp[2 * jUp + 1];
254 const float d = dy * dy + dz * dz;
258 linkUp = MAX_SHARED + jUp;
268 if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) {
269 linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread];
270 }
else if constexpr (MAX_SHARED > 0) {
271 linkUp = s.mB[linkUp][iThread];
273 linkUp = neighUp[linkUp];
277 tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp;
278 tracker.mData.mLinkDownData[lHitNumberOffset + ih] = linkDn;