24#ifndef GPUCA_GPUCODE_DEVICE
43 mRows[
i].mMaxY = CAMath::Tan(p.
par.dAlpha / 2.f) * mRows[
i].mX;
51 mClusterIdOffset = clusterIdOffset;
57 const uint32_t kVectorAlignment = 256;
104 maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50;
110 return 128 *
nRows + 4 * nHits;
116 float tfFactor = 1.f;
117 if (dz > GPUTPCGeometry::TPCLength() + 20.f) {
118 tfFactor = dz / GPUTPCGeometry::TPCLength();
119 dz = GPUTPCGeometry::TPCLength();
121 const float norm = CAMath::InvSqrt(
row->mNHits / tfFactor);
125 GetMaxNBins(mem,
row, maxy, maxz);
126 int32_t ny = CAMath::Max(1, CAMath::Min<int32_t>(maxy, (
yMax - yMin) / sy + 1));
127 int32_t nz = CAMath::Max(1, CAMath::Min<int32_t>(maxz, (
zMax -
zMin) / sz + 1));
150 row.mGrid.CreateEmpty();
152 row.mHitNumberOffset = 0;
159 for (int32_t
i = 0;
i < 4;
i++) {
167 constexpr bool EarlyTransformWithoutClusterNative =
false;
169 bool EarlyTransformWithoutClusterNative = mem->param.par.earlyTpcTransform && mem->ioPtrs.clustersNative ==
nullptr;
171 int32_t* tmpHitIndex =
nullptr;
172 const uint32_t* NumberOfClustersInRow =
nullptr;
173 const uint32_t* RowOffsets =
nullptr;
182 if (EarlyTransformWithoutClusterNative) {
183 NumberOfClustersInRow = NumberOfClustersInRowA;
184 RowOffsets = RowOffsetsA;
185 tmpHitIndexA.resize(mNumberOfHits);
186 tmpHitIndex = tmpHitIndexA.data();
188 memset(NumberOfClustersInRowA, 0,
GPUCA_ROW_COUNT *
sizeof(NumberOfClustersInRowA[0]));
189 for (int32_t
i = 0;
i < mNumberOfHits;
i++) {
190 const int32_t tmpRow = mClusterData[
i].row;
191 NumberOfClustersInRowA[tmpRow]++;
193 int32_t tmpOffset = 0;
195 RowOffsetsA[
i] = tmpOffset;
196 tmpOffset += NumberOfClustersInRow[
i];
200 for (int32_t
i = 0;
i < mNumberOfHits;
i++) {
202 tmp.
x = mClusterData[
i].y;
203 tmp.
y = mClusterData[
i].z;
204 int32_t tmpRow = mClusterData[
i].row;
205 int32_t newIndex = RowOffsetsA[tmpRow] + (RowsFilled[tmpRow])++;
206 YZData[newIndex] = tmp;
207 tmpHitIndex[newIndex] =
i;
213 static_assert(
sizeof(*YZData) <= (
sizeof(*mLinkUpData) +
sizeof(*mLinkDownData)),
"Cannot reuse memory");
214 static_assert(
sizeof(*binMemory) <=
sizeof(*mHitWeights),
"Cannot reuse memory");
217 for (int32_t rowIndex = iBlock; rowIndex <
GPUCA_ROW_COUNT; rowIndex += nBlocks) {
223 const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex];
224 const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]);
225 constexpr const uint32_t maxN = 1u << (
sizeof(
calink) < 3 ? (
sizeof(
calink) * 8) : 24);
230 if (NumberOfClusters >= maxN) {
232 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN);
233 SetRowGridEmpty(
row);
246 if (NumberOfClusters == 0) {
248 SetRowGridEmpty(
row);
253 if (EarlyTransformWithoutClusterNative) {
254 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
255 UpdateMinMaxYZ(yMin,
yMax,
zMin,
zMax, YZData[RowOffset +
i].
x, YZData[RowOffset +
i].
y);
257 }
else if (mem->param.par.earlyTpcTransform) {
258 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
260 tmp.
x = mClusterData[RowOffset +
i].y;
261 tmp.
y = mClusterData[RowOffset +
i].z;
263 YZData[RowOffset +
i] = tmp;
266 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
268 GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][
i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][
i].getTime(),
x,
y,
z);
270 YZData[RowOffset +
i] = CAMath::MakeFloat2(
y,
z);
275 row.mNHits = NumberOfClusters;
279#ifdef GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
280 CAMath::AtomicMinShared(&tmpMinMax[0], yMin);
281 CAMath::AtomicMaxShared(&tmpMinMax[1],
yMax);
282 CAMath::AtomicMinShared(&tmpMinMax[2],
zMin);
283 CAMath::AtomicMaxShared(&tmpMinMax[3],
zMax);
285 for (int32_t
i = 0;
i < nThreads;
i++) {
288 if (tmpMinMax[0] > yMin) {
291 if (tmpMinMax[1] <
yMax) {
294 if (tmpMinMax[2] >
zMin) {
297 if (tmpMinMax[3] <
zMax) {
305 CreateGrid(mem, &
row, tmpMinMax[0], tmpMinMax[1], tmpMinMax[2], tmpMinMax[3]);
309 const int32_t numberOfBins = grid.N();
310 constexpr const int32_t maxBins =
sizeof(
calink) < 4 ? (int32_t)(1ul << (
sizeof(
calink) * 8)) : 0x7FFFFFFF;
311 if (
sizeof(
calink) < 4 && numberOfBins >= maxBins) {
313 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins);
314 SetRowGridEmpty(
row);
318 const uint32_t nn = numberOfBins + grid.Ny() + 3;
319 const uint32_t maxnn = GetGridSize(NumberOfClusters, 1);
322 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn);
323 SetRowGridEmpty(
row);
330 for (int32_t bin = iThread; bin < numberOfBins; bin += nThreads) {
334 for (int32_t hitIndex = iThread; hitIndex <
row.mNHits; hitIndex += nThreads) {
335 const int32_t globalHitIndex = RowOffset + hitIndex;
336 const calink bin =
row.mGrid.GetBin(YZData[globalHitIndex].
x, YZData[globalHitIndex].
y);
338 bins[hitIndex] = bin;
339 CAMath::AtomicAdd(&
c[bin], 1u);
345 for (int32_t bin = 0; bin < numberOfBins; ++bin) {
349 for (uint32_t bin = numberOfBins; bin < nn; bin++) {
355 constexpr float maxVal = (((int64_t)1 << (
sizeof(
cahit) < 3 ?
sizeof(
cahit) * 8 : 24)) - 1);
356 constexpr float packingConstant = 1.f / (maxVal - 2.f);
357 const float y0 =
row.mGrid.YMin();
358 const float z0 =
row.mGrid.ZMin();
359 const float stepY = (
row.mGrid.YMax() -
y0) * packingConstant;
360 const float stepZ = (
row.mGrid.ZMax() - z0) * packingConstant;
361 const float stepYi = 1.f / stepY;
362 const float stepZi = 1.f / stepZ;
369 row.mHstepYi = stepYi;
370 row.mHstepZi = stepZi;
375 for (int32_t hitIndex = iThread; hitIndex <
row.mNHits; hitIndex += nThreads) {
377 const calink ind = CAMath::AtomicAdd(&
c[bin], (
calink)-1) - 1;
378 const int32_t globalBinsortedIndex =
row.mHitNumberOffset + ind;
379 const int32_t globalHitIndex = RowOffset + hitIndex;
382 mClusterDataIndex[globalBinsortedIndex] = EarlyTransformWithoutClusterNative ? tmpHitIndex[globalHitIndex] : (RowOffset + hitIndex);
384 const float xx = ((YZData[globalHitIndex].
x -
y0) * stepYi) + .5;
385 const float yy = ((YZData[globalHitIndex].
y - z0) * stepZi) + .5;
386#if !defined(GPUCA_GPUCODE) && !defined(NDEBUG)
387 if (xx < 0 || yy < 0 || xx > maxVal || yy > maxVal) {
388 std::cout <<
"!!!! hit packing error!!! " << xx <<
" " << yy <<
" (" << maxVal <<
")" << std::endl;
393 mHitData[globalBinsortedIndex].x = (
cahit)xx;
394 mHitData[globalBinsortedIndex].y = (
cahit)yy;
399 if (iThread == 0 && !mem->param.par.continuousTracking) {
400 const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3]));
402 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ);
403 SetRowGridEmpty(
row);
#define GPUCA_MAX_BIN_SIZE
#define GPUCA_MIN_BIN_SIZE
#define GPUCA_ROWALIGNMENT
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static size_t nextMultipleOf(size_t size)
void InitializeRows(const GPUParam &p)
void * SetPointersScratch(void *mem, bool idsOnGPU)
void SetClusterData(const GPUTPCClusterData *data, int32_t nClusters, int32_t clusterIdOffset)
void * SetPointersWeights(void *mem)
void * SetPointersClusterIds(void *mem, bool idsOnGPU)
void * SetPointersLinks(void *mem)
void * SetPointersRows(void *mem)
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
GLuint GLfloat GLfloat y0
GLdouble GLdouble GLdouble z
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
GPUTPCGeometry tpcGeometry
typename std::vector< T, vecpod_allocator< T > > vecpod