25#ifndef GPUCA_GPUCODE_DEVICE
43 mRows[
i].mX = GPUTPCGeometry::Row2X(
i);
44 mRows[
i].mMaxY = CAMath::Tan(p.
par.dAlpha / 2.f) * mRows[
i].mX;
52 mClusterIdOffset = clusterIdOffset;
58 const uint32_t kVectorAlignment = 256;
105 maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : GPUTPCGeometry::TPCLength()) + 50;
111 return 128 *
nRows + 4 * nHits;
117 float tfFactor = 1.f;
118 if (dz > GPUTPCGeometry::TPCLength() + 20.f) {
119 tfFactor = dz / GPUTPCGeometry::TPCLength();
120 dz = GPUTPCGeometry::TPCLength();
122 const float norm = CAMath::InvSqrt(
row->mNHits / tfFactor);
126 GetMaxNBins(mem,
row, maxy, maxz);
127 int32_t ny = CAMath::Max(1, CAMath::Min<int32_t>(maxy, (
yMax - yMin) / sy + 1));
128 int32_t nz = CAMath::Max(1, CAMath::Min<int32_t>(maxz, (
zMax -
zMin) / sz + 1));
151 row.mGrid.CreateEmpty();
153 row.mHitNumberOffset = 0;
160 for (int32_t
i = 0;
i < 4;
i++) {
168 constexpr bool EarlyTransformWithoutClusterNative =
false;
170 bool EarlyTransformWithoutClusterNative = mem->param.par.earlyTpcTransform && mem->ioPtrs.clustersNative ==
nullptr;
172 int32_t* tmpHitIndex =
nullptr;
173 const uint32_t* NumberOfClustersInRow =
nullptr;
174 const uint32_t* RowOffsets =
nullptr;
183 if (EarlyTransformWithoutClusterNative) {
184 NumberOfClustersInRow = NumberOfClustersInRowA;
185 RowOffsets = RowOffsetsA;
186 tmpHitIndexA.resize(mNumberOfHits);
187 tmpHitIndex = tmpHitIndexA.data();
189 memset(NumberOfClustersInRowA, 0,
GPUCA_ROW_COUNT *
sizeof(NumberOfClustersInRowA[0]));
190 for (int32_t
i = 0;
i < mNumberOfHits;
i++) {
191 const int32_t tmpRow = mClusterData[
i].row;
192 NumberOfClustersInRowA[tmpRow]++;
194 int32_t tmpOffset = 0;
196 RowOffsetsA[
i] = tmpOffset;
197 tmpOffset += NumberOfClustersInRow[
i];
201 for (int32_t
i = 0;
i < mNumberOfHits;
i++) {
203 tmp.
x = mClusterData[
i].y;
204 tmp.
y = mClusterData[
i].z;
205 int32_t tmpRow = mClusterData[
i].row;
206 int32_t newIndex = RowOffsetsA[tmpRow] + (RowsFilled[tmpRow])++;
207 YZData[newIndex] = tmp;
208 tmpHitIndex[newIndex] =
i;
214 static_assert(
sizeof(*YZData) <= (
sizeof(*mLinkUpData) +
sizeof(*mLinkDownData)),
"Cannot reuse memory");
215 static_assert(
sizeof(*binMemory) <=
sizeof(*mHitWeights),
"Cannot reuse memory");
218 for (int32_t rowIndex = iBlock; rowIndex <
GPUCA_ROW_COUNT; rowIndex += nBlocks) {
224 const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex];
225 const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]);
226 constexpr const uint32_t maxN = 1u << (
sizeof(
calink) < 3 ? (
sizeof(
calink) * 8) : 24);
231 if (NumberOfClusters >= maxN) {
233 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN);
234 SetRowGridEmpty(
row);
247 if (NumberOfClusters == 0) {
249 SetRowGridEmpty(
row);
254 if (EarlyTransformWithoutClusterNative) {
255 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
256 UpdateMinMaxYZ(yMin,
yMax,
zMin,
zMax, YZData[RowOffset +
i].
x, YZData[RowOffset +
i].
y);
258 }
else if (mem->param.par.earlyTpcTransform) {
259 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
261 tmp.
x = mClusterData[RowOffset +
i].y;
262 tmp.
y = mClusterData[RowOffset +
i].z;
264 YZData[RowOffset +
i] = tmp;
267 for (uint32_t
i = iThread;
i < NumberOfClusters;
i += nThreads) {
269 GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][
i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][
i].getTime(),
x,
y,
z);
271 YZData[RowOffset +
i] = CAMath::MakeFloat2(
y,
z);
276 row.mNHits = NumberOfClusters;
280#ifdef GPUCA_HAVE_ATOMIC_MINMAX_FLOAT
281 CAMath::AtomicMinShared(&tmpMinMax[0], yMin);
282 CAMath::AtomicMaxShared(&tmpMinMax[1],
yMax);
283 CAMath::AtomicMinShared(&tmpMinMax[2],
zMin);
284 CAMath::AtomicMaxShared(&tmpMinMax[3],
zMax);
286 for (int32_t
i = 0;
i < nThreads;
i++) {
289 if (tmpMinMax[0] > yMin) {
292 if (tmpMinMax[1] <
yMax) {
295 if (tmpMinMax[2] >
zMin) {
298 if (tmpMinMax[3] <
zMax) {
306 CreateGrid(mem, &
row, tmpMinMax[0], tmpMinMax[1], tmpMinMax[2], tmpMinMax[3]);
310 const int32_t numberOfBins = grid.N();
311 constexpr const int32_t maxBins =
sizeof(
calink) < 4 ? (int32_t)(1ul << (
sizeof(
calink) * 8)) : 0x7FFFFFFF;
312 if (
sizeof(
calink) < 4 && numberOfBins >= maxBins) {
314 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins);
315 SetRowGridEmpty(
row);
319 const uint32_t nn = numberOfBins + grid.Ny() + 3;
320 const uint32_t maxnn = GetGridSize(NumberOfClusters, 1);
323 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn);
324 SetRowGridEmpty(
row);
331 for (int32_t bin = iThread; bin < numberOfBins; bin += nThreads) {
335 for (int32_t hitIndex = iThread; hitIndex <
row.mNHits; hitIndex += nThreads) {
336 const int32_t globalHitIndex = RowOffset + hitIndex;
337 const calink bin =
row.mGrid.GetBin(YZData[globalHitIndex].
x, YZData[globalHitIndex].
y);
339 bins[hitIndex] = bin;
340 CAMath::AtomicAdd(&
c[bin], 1u);
346 for (int32_t bin = 0; bin < numberOfBins; ++bin) {
350 for (uint32_t bin = numberOfBins; bin < nn; bin++) {
356 constexpr float maxVal = (((int64_t)1 << (
sizeof(
cahit) < 3 ?
sizeof(
cahit) * 8 : 24)) - 1);
357 constexpr float packingConstant = 1.f / (maxVal - 2.f);
358 const float y0 =
row.mGrid.YMin();
359 const float z0 =
row.mGrid.ZMin();
360 const float stepY = (
row.mGrid.YMax() -
y0) * packingConstant;
361 const float stepZ = (
row.mGrid.ZMax() - z0) * packingConstant;
362 const float stepYi = 1.f / stepY;
363 const float stepZi = 1.f / stepZ;
370 row.mHstepYi = stepYi;
371 row.mHstepZi = stepZi;
376 for (int32_t hitIndex = iThread; hitIndex <
row.mNHits; hitIndex += nThreads) {
378 const calink ind = CAMath::AtomicAdd(&
c[bin], (
calink)-1) - 1;
379 const int32_t globalBinsortedIndex =
row.mHitNumberOffset + ind;
380 const int32_t globalHitIndex = RowOffset + hitIndex;
383 mClusterDataIndex[globalBinsortedIndex] = EarlyTransformWithoutClusterNative ? tmpHitIndex[globalHitIndex] : (RowOffset + hitIndex);
385 const float xx = ((YZData[globalHitIndex].
x -
y0) * stepYi) + .5;
386 const float yy = ((YZData[globalHitIndex].
y - z0) * stepZi) + .5;
387#if !defined(GPUCA_GPUCODE) && !defined(NDEBUG)
388 if (xx < 0 || yy < 0 || xx > maxVal || yy > maxVal) {
389 std::cout <<
"!!!! hit packing error!!! " << xx <<
" " << yy <<
" (" << maxVal <<
")" << std::endl;
394 mHitData[globalBinsortedIndex].x = (
cahit)xx;
395 mHitData[globalBinsortedIndex].y = (
cahit)yy;
400 if (iThread == 0 && !mem->param.par.continuousTracking) {
401 const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3]));
403 mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ);
404 SetRowGridEmpty(
row);
#define GPUCA_MAX_BIN_SIZE
#define GPUCA_MIN_BIN_SIZE
#define GPUCA_ROWALIGNMENT
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
static size_t nextMultipleOf(size_t size)
void InitializeRows(const GPUParam &p)
void * SetPointersScratch(void *mem, bool idsOnGPU)
void SetClusterData(const GPUTPCClusterData *data, int32_t nClusters, int32_t clusterIdOffset)
void * SetPointersWeights(void *mem)
void * SetPointersClusterIds(void *mem, bool idsOnGPU)
void * SetPointersLinks(void *mem)
void * SetPointersRows(void *mem)
GLfloat GLfloat GLfloat GLfloat GLfloat maxY
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat maxZ
GLuint GLfloat GLfloat y0
GLdouble GLdouble GLdouble z
GPUd() const expr uint32_t MultivariatePolynomialHelper< Dim
typename std::vector< T, vecpod_allocator< T > > vecpod