Project
Loading...
Searching...
No Matches
ROFLookupTables.h
Go to the documentation of this file.
1// Copyright 2019-2026 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#ifndef TRACKINGITSU_INCLUDE_ROFOVERLAPTABLE_H_
13#define TRACKINGITSU_INCLUDE_ROFOVERLAPTABLE_H_
14
15#include <cstddef>
16#include <cstdint>
17#include <limits>
18#include <string>
19#include <vector>
20#include <ranges>
21
22#ifndef GPUCA_GPUCODE
23#include <format>
24#include "Framework/Logger.h"
25#endif
26
31#include "GPUCommonMath.h"
32#include "GPUCommonDef.h"
33
34namespace o2::its
35{
36
37// Layer timing definition
41 BCType mNROFsTF{0}; // number of ROFs per timeframe
42 BCType mROFLength{0}; // ROF length in BC
43 BCType mROFDelay{0}; // delay of ROFs wrt start of first orbit in TF in BC
44 BCType mROFBias{0}; // bias wrt to the LHC clock in BC
45 BCType mROFAddTimeErr{0}; // additionally imposed uncertainty on ROF time in BC
46
47 // return start of ROF in BC
48 // this does not account for the opt. error!
49 GPUhdi() BCType getROFStartInBC(BCType rofId) const noexcept
50 {
51 assert(rofId < mNROFsTF && rofId >= 0);
52 return (mROFLength * rofId) + mROFDelay + mROFBias;
53 }
54
55 // return end of ROF in BCs
56 // this does not account for the opt. error!
57 GPUhdi() BCType getROFEndInBC(BCType rofId) const noexcept
58 {
59 assert(rofId < mNROFsTF);
60 return getROFStartInBC(rofId) + mROFLength;
61 }
62
63 // return (clamped) time-interval of rof
64 GPUhdi() TimeEstBC getROFTimeBounds(BCType rofId, bool withError = false) const noexcept
65 {
66 if (withError) {
67 int64_t start = getROFStartInBC(rofId);
68 int64_t end = getROFEndInBC(rofId);
69 start = o2::gpu::CAMath::Max(start - mROFAddTimeErr, int64_t(0));
71 return {static_cast<BCType>(start), static_cast<TimeStampErrorType>(end - start)};
72 }
73 return {getROFStartInBC(rofId), static_cast<TimeStampErrorType>(mROFLength)};
74 }
75
76 // return which ROF this BC belongs to
77 GPUhdi() BCType getROF(BCType bc) const noexcept
78 {
80 if (bc <= offset) {
81 return 0;
82 }
83 return (bc - offset) / mROFLength;
84 }
85
86 // return which ROF this timestamp belongs by its lower edge
87 GPUhdi() BCType getROF(TimeStamp ts) const noexcept
88 {
90 const BCType bc = (ts.getTimeStamp() < ts.getTimeStampError()) ? BCType(0) : static_cast<BCType>(o2::gpu::CAMath::Floor(ts.getTimeStamp() - ts.getTimeStampError()));
91 if (bc <= offset) {
92 return 0;
93 }
94 return (bc - offset) / mROFLength;
95 }
96
97 // return which ROF this floating point (number of BCs) time belongs
98 GPUhdi() BCType getROF(float time) const noexcept
99 {
100 const float offset = static_cast<float>(mROFDelay + mROFBias);
101 if (time <= offset) {
102 return 0;
103 }
104 return static_cast<BCType>((time - offset) / mROFLength);
105 }
106
107 GPUhdi() bool intersectROF(BCType rof, float lower, float upper) const noexcept
108 {
109 const auto rofTS = getROFTimeBounds(rof, true);
110 return static_cast<float>(rofTS.upper()) > lower && upper > static_cast<float>(rofTS.lower());
111 }
112
113 // return clamped ROF range with strictly positive overlap with timestamp interval
114 GPUhdi() BCRange getROFRange(TimeStamp ts) const noexcept
115 {
116 const float lower = ts.getTimeStamp() - ts.getTimeStampError();
117 const float upper = ts.getTimeStamp() + ts.getTimeStampError();
118 return getROFRange(lower, upper);
119 }
120
121 GPUhdi() BCRange getROFRange(TimeEstBC ts) const noexcept
122 {
123 return getROFRange(static_cast<float>(ts.lower()), static_cast<float>(ts.upper()));
124 }
125
126 GPUhdi() BCRange getROFRange(float lower, float upper) const noexcept
127 {
128 const BCType maxROF = mNROFsTF - 1;
129 BCType first = o2::gpu::CAMath::Clamp(getROF(lower - mROFAddTimeErr), BCType{0}, maxROF);
130 BCType last = o2::gpu::CAMath::Clamp(getROF(upper + mROFAddTimeErr), BCType{0}, maxROF);
131
132 if (first <= last && !intersectROF(first, lower, upper)) {
133 ++first;
134 }
135 if (last >= first && !intersectROF(last, lower, upper)) {
136 --last;
137 }
138 return {first, first <= last ? static_cast<BCType>(last - first + 1) : BCType{0}};
139 }
140
141#ifndef GPUCA_GPUCODE
142 GPUh() std::string asString() const
143 {
144 return std::format("NROFsPerTF {:4} ROFLength {:4} ({:4} per Orbit) ROFDelay {:4} ROFBias {:4} ROFAddTimeErr {:4}", mNROFsTF, mROFLength, (o2::constants::lhc::LHCMaxBunches / mROFLength), mROFDelay, mROFBias, mROFAddTimeErr);
145 }
146
147 GPUh() void print() const
148 {
149 LOG(info) << asString();
150 }
151#endif
152};
153
154// Base class for lookup to define layers
155template <int32_t NLayers>
157{
158 protected:
159 LayerTiming mLayers[NLayers];
160
161 public:
163 LayerTimingBase() = default;
164
165 GPUh() void defineLayer(int32_t layer, T nROFsTF, T rofLength, T rofDelay, T rofBias, T rofTE)
166 {
167 assert(layer >= 0 && layer < NLayers);
168 mLayers[layer] = {nROFsTF, rofLength, rofDelay, rofBias, rofTE};
169 }
170
171 GPUh() void defineLayer(int32_t layer, const LayerTiming& timing)
172 {
173 assert(layer >= 0 && layer < NLayers);
174 mLayers[layer] = timing;
175 }
176
177 GPUhdi() const LayerTiming& getLayer(int32_t layer) const
178 {
179 assert(layer >= 0 && layer < NLayers);
180 return mLayers[layer];
181 }
182
183 GPUhdi() constexpr int32_t getEntries() noexcept { return NLayers; }
184
185#ifndef GPUCA_GPUCODE
186 GPUh() void print() const
187 {
188 LOGP(info, "Imposed time structure:");
189 for (int32_t iL{0}; iL < NLayers; ++iL) {
190 LOGP(info, "\tLayer:{} {}", iL, mLayers[iL].asString());
191 }
192 }
193#endif
194};
195
196// GPU friendly view of the table below
197template <int32_t NLayers, typename TableEntry, typename TableIndex>
199 const TableEntry* mFlatTable{nullptr};
200 const TableIndex* mIndices{nullptr};
201 const LayerTiming* mLayers{nullptr};
202
203 GPUhdi() const LayerTiming& getLayer(int32_t layer) const noexcept
204 {
205 assert(layer >= 0 && layer < NLayers);
206 return mLayers[layer];
207 }
208
209 GPUh() int32_t getClock() const noexcept
210 {
211 // we take the fastest layer as clock
212 int32_t fastest = 0;
213 uint32_t maxNROFs{0};
214 for (int32_t iL{0}; iL < NLayers; ++iL) {
215 const auto& layer = getLayer(iL);
216 // by definition the fastest layer has the most ROFs
217 // this also solves the problem of a delay large than ROFLength
218 // if mNROFsTF is correct
219 if (layer.mNROFsTF > maxNROFs) {
220 fastest = iL;
221 maxNROFs = layer.mNROFsTF;
222 }
223 }
224 return fastest;
225 }
226
227 GPUh() const LayerTiming& getClockLayer() const noexcept
228 {
229 return mLayers[getClock()];
230 }
231
232 GPUhdi() const TableEntry& getOverlap(int32_t from, int32_t to, size_t rofIdx) const noexcept
233 {
234 assert(from < NLayers && to < NLayers);
235 const size_t linearIdx = (from * NLayers) + to;
236 const auto& idx = mIndices[linearIdx];
237 assert(rofIdx < idx.getEntries());
238 return mFlatTable[idx.getFirstEntry() + rofIdx];
239 }
240
241 GPUhdi() bool doROFsOverlap(int32_t layer0, size_t rof0, int32_t layer1, size_t rof1) const noexcept
242 {
243 if (layer0 == layer1) { // layer is compatible with itself
244 return rof0 == rof1;
245 }
246
247 assert(layer0 < NLayers && layer1 < NLayers);
248 const size_t linearIdx = (layer0 * NLayers) + layer1;
249 const auto& idx = mIndices[linearIdx];
250
251 if (rof0 >= idx.getEntries()) {
252 return false;
253 }
254
255 const auto& overlap = mFlatTable[idx.getFirstEntry() + rof0];
256
257 if (overlap.getEntries() == 0) {
258 return false;
259 }
260
261 const size_t firstCompatible = overlap.getFirstEntry();
262 const size_t lastCompatible = firstCompatible + overlap.getEntries() - 1;
263 return rof1 >= firstCompatible && rof1 <= lastCompatible;
264 }
265
266 GPUhdi() TimeEstBC getTimeStamp(int32_t layer0, size_t rof0, int32_t layer1, size_t rof1) const noexcept
267 {
268 assert(layer0 < NLayers && layer1 < NLayers);
269 assert(doROFsOverlap(layer0, rof0, layer1, rof1));
270 // retrieves the combined timestamp
271 // e.g., taking one cluster from rof0 and one from rof1
272 // and constructing a tracklet (doublet) what is its time
273 // this assumes that the rofs overlap, e.g. doROFsOverlap -> true
274 // get timestamp including margins from rof0 and rof1
275 const auto t0 = mLayers[layer0].getROFTimeBounds(rof0, true);
276 const auto t1 = mLayers[layer1].getROFTimeBounds(rof1, true);
277 return t0 + t1;
278 }
279
280#ifndef GPUCA_GPUCODE
282 GPUh() void printAll() const
283 {
284 for (int32_t i = 0; i < NLayers; ++i) {
285 for (int32_t j = 0; j < NLayers; ++j) {
286 if (i != j) {
287 printMapping(i, j);
288 }
289 }
290 }
291 printSummary();
292 }
293
294 GPUh() void printMapping(int32_t from, int32_t to) const
295 {
296 if (from == to) {
297 LOGP(error, "No self-lookup supported");
298 return;
299 }
300
301 constexpr int w_index = 10;
302 constexpr int w_first = 12;
303 constexpr int w_last = 12;
304 constexpr int w_count = 10;
305
306 LOGF(info, "Overlap mapping: Layer %d -> Layer %d", from, to);
307 LOGP(info, "From: {}", mLayers[from].asString());
308 LOGP(info, "To : {}", mLayers[to].asString());
309 LOGF(info, "%*s | %*s | %*s | %*s", w_index, "ROF.index", w_first, "First.ROF", w_last, "Last.ROF", w_count, "Count");
310 LOGF(info, "%.*s-+-%.*s-+-%.*s-+-%.*s", w_index, "----------", w_first, "------------", w_last, "------------", w_count, "----------");
311
312 const size_t linearIdx = (from * NLayers) + to;
313 const auto& idx = mIndices[linearIdx];
314 for (int32_t i = 0; i < idx.getEntries(); ++i) {
315 const auto& overlap = getOverlap(from, to, i);
316 LOGF(info, "%*d | %*d | %*d | %*d", w_index, i, w_first, overlap.getFirstEntry(), w_last, overlap.getEntriesBound() - 1, w_count, overlap.getEntries());
317 }
318 }
319
320 GPUh() void printSummary() const
321 {
322 uint32_t totalEntries{0};
323 size_t flatTableSize{0};
324
325 for (int32_t i = 0; i < NLayers; ++i) {
326 for (int32_t j = 0; j < NLayers; ++j) {
327 if (i != j) {
328 const size_t linearIdx = (i * NLayers) + j;
329 const auto& idx = mIndices[linearIdx];
330 totalEntries += idx.getEntries();
331 flatTableSize += idx.getEntries();
332 }
333 }
334 }
335
336 for (int32_t i = 0; i < NLayers; ++i) {
337 mLayers[i].print();
338 }
339
340 const uint32_t totalBytes = (flatTableSize * sizeof(TableEntry)) + (static_cast<unsigned long>(NLayers * NLayers) * sizeof(TableIndex));
341 LOGF(info, "------------------------------------------------------------");
342 LOGF(info, "Total overlap table size: %u entries", totalEntries);
343 LOGF(info, "Flat table size: %zu entries", flatTableSize);
344 LOGF(info, "Total view size: %u bytes", totalBytes);
345 LOGF(info, "------------------------------------------------------------");
346 }
347#endif
348};
349
350// Precalculated lookup table to find overlapping ROFs in another layer given a ROF index in the current layer
351template <int32_t NLayers>
352class ROFOverlapTable : public LayerTimingBase<NLayers>
353{
354 public:
358
360 ROFOverlapTable() = default;
361
362 GPUh() void init()
363 {
364 std::vector<TableEntry> table[NLayers][NLayers];
365 for (int32_t i{0}; i < NLayers; ++i) {
366 for (int32_t j{0}; j < NLayers; ++j) {
367 if (i != j) { // we do not need self-lookup
368 buildMapping(i, j, table[i][j]);
369 }
370 }
371 }
372 flatten(table);
373 }
374
375 GPUh() View getView() const
376 {
377 View view;
378 view.mFlatTable = mFlatTable.data();
379 view.mIndices = mIndices;
380 view.mLayers = this->mLayers;
381 return view;
382 }
383
384 GPUh() View getDeviceView(const TableEntry* deviceFlatTablePtr, const TableIndex* deviceIndicesPtr, const LayerTiming* deviceLayerTimingPtr) const
385 {
386 View view;
387 view.mFlatTable = deviceFlatTablePtr;
388 view.mIndices = deviceIndicesPtr;
389 view.mLayers = deviceLayerTimingPtr;
390 return view;
391 }
392
393 GPUh() size_t getFlatTableSize() const noexcept { return mFlatTable.size(); }
394 static GPUh() constexpr size_t getIndicesSize() { return static_cast<size_t>(NLayers * NLayers); }
395
396 private:
397 GPUh() void buildMapping(int32_t from, int32_t to, std::vector<TableEntry>& table)
398 {
399 const auto& layerFrom = this->mLayers[from];
400 const auto& layerTo = this->mLayers[to];
401 table.resize(layerFrom.mNROFsTF);
402
403 for (int32_t iROF{0}; iROF < layerFrom.mNROFsTF; ++iROF) {
404 int64_t fromStart = o2::gpu::CAMath::Max((int64_t)layerFrom.getROFStartInBC(iROF) - (int64_t)layerFrom.mROFAddTimeErr, int64_t(0));
405 int64_t fromEnd = (int64_t)layerFrom.getROFEndInBC(iROF) + layerFrom.mROFAddTimeErr;
406
407 int32_t firstROFTo = o2::gpu::CAMath::Max(0, (int32_t)((fromStart - (int64_t)layerTo.mROFAddTimeErr - (int64_t)layerTo.mROFDelay - (int64_t)layerTo.mROFBias) / (int64_t)layerTo.mROFLength));
408 auto lastROFTo = (int32_t)((fromEnd + (int64_t)layerTo.mROFAddTimeErr - (int64_t)layerTo.mROFDelay - (int64_t)layerTo.mROFBias - 1) / (int64_t)layerTo.mROFLength);
409 firstROFTo = o2::gpu::CAMath::Max(0, firstROFTo);
410 lastROFTo = o2::gpu::CAMath::Min((int32_t)layerTo.mNROFsTF - 1, lastROFTo);
411
412 while (firstROFTo <= lastROFTo) {
413 int64_t toStart = o2::gpu::CAMath::Max((int64_t)layerTo.getROFStartInBC(firstROFTo) - (int64_t)layerTo.mROFAddTimeErr, int64_t(0));
414 int64_t toEnd = (int64_t)layerTo.getROFEndInBC(firstROFTo) + layerTo.mROFAddTimeErr;
415 if (toEnd > fromStart && toStart < fromEnd) {
416 break;
417 }
418 ++firstROFTo;
419 }
420 while (lastROFTo >= firstROFTo) {
421 int64_t toStart = o2::gpu::CAMath::Max((int64_t)layerTo.getROFStartInBC(lastROFTo) - (int64_t)layerTo.mROFAddTimeErr, int64_t(0));
422 int64_t toEnd = (int64_t)layerTo.getROFEndInBC(lastROFTo) + layerTo.mROFAddTimeErr;
423 if (toEnd > fromStart && toStart < fromEnd) {
424 break;
425 }
426 --lastROFTo;
427 }
428 int32_t count = (firstROFTo <= lastROFTo) ? (lastROFTo - firstROFTo + 1) : 0;
429 table[iROF] = {static_cast<T>(firstROFTo), static_cast<T>(count)};
430 }
431 }
432
433 GPUh() void flatten(const std::vector<TableEntry> table[NLayers][NLayers])
434 {
435 size_t total{0};
436 for (int32_t i{0}; i < NLayers; ++i) {
437 for (int32_t j{0}; j < NLayers; ++j) {
438 if (i != j) { // we do not need self-lookup
439 total += table[i][j].size();
440 }
441 }
442 }
443
444 mFlatTable.reserve(total);
445
446 for (int32_t i{0}; i < NLayers; ++i) {
447 for (int32_t j{0}; j < NLayers; ++j) {
448 size_t idx = (i * NLayers) + j;
449 if (i != j) {
450 mIndices[idx].setFirstEntry(static_cast<T>(mFlatTable.size()));
451 mIndices[idx].setEntries(static_cast<T>(table[i][j].size()));
452 mFlatTable.insert(mFlatTable.end(), table[i][j].begin(), table[i][j].end());
453 } else {
454 mIndices[idx] = {0, 0};
455 }
456 }
457 }
458 }
459
460 TableIndex mIndices[NLayers * NLayers];
461 std::vector<TableEntry> mFlatTable;
462};
463
464// GPU friendly view of the table below
465template <int32_t NLayers, typename TableEntry, typename TableIndex>
467 const TableEntry* mFlatTable{nullptr};
468 const TableIndex* mIndices{nullptr};
469 const LayerTiming* mLayers{nullptr};
470
471 GPUhdi() const LayerTiming& getLayer(int32_t layer) const noexcept
472 {
473 assert(layer >= 0 && layer < NLayers);
474 return mLayers[layer];
475 }
476
477 GPUhdi() const TableEntry& getVertices(int32_t layer, size_t rofIdx) const noexcept
478 {
479 assert(layer < NLayers);
480 const auto& idx = mIndices[layer];
481 assert(rofIdx < idx.getEntries());
482 return mFlatTable[idx.getFirstEntry() + rofIdx];
483 }
484
485 GPUh() int32_t getMaxVerticesPerROF() const noexcept
486 {
487 int32_t maxCount = 0;
488 for (int32_t layer = 0; layer < NLayers; ++layer) {
489 const auto& idx = mIndices[layer];
490 for (int32_t i = 0; i < idx.getEntries(); ++i) {
491 const auto& entry = mFlatTable[idx.getFirstEntry() + i];
492 maxCount = o2::gpu::CAMath::Max(maxCount, static_cast<int32_t>(entry.getEntries()));
493 }
494 }
495 return maxCount;
496 }
497
498 // Check if a specific vertex is compatible with a given ROF
499 GPUhdi() bool isVertexCompatible(int32_t layer, size_t rofIdx, const Vertex& vertex) const noexcept
500 {
501 assert(layer < NLayers);
502 const auto& layerDef = mLayers[layer];
503 int64_t rofLower = o2::gpu::CAMath::Max((int64_t)layerDef.getROFStartInBC(rofIdx) - (int64_t)layerDef.mROFAddTimeErr, int64_t(0));
504 int64_t rofUpper = (int64_t)layerDef.getROFEndInBC(rofIdx) + layerDef.mROFAddTimeErr;
505 auto vLower = (int64_t)vertex.getTimeStamp().lower();
506 auto vUpper = (int64_t)vertex.getTimeStamp().upper();
507 return vUpper >= rofLower && vLower < rofUpper;
508 }
509
510#ifndef GPUCA_GPUCODE
511 GPUh() void printAll() const
512 {
513 for (int32_t i = 0; i < NLayers; ++i) {
514 printLayer(i);
515 }
516 printSummary();
517 }
518
519 GPUh() void printLayer(int32_t layer) const
520 {
521 constexpr int w_rof = 10;
522 constexpr int w_first = 12;
523 constexpr int w_last = 12;
524 constexpr int w_count = 10;
525
526 LOGF(info, "Vertex lookup: Layer %d", layer);
527 LOGF(info, "%*s | %*s | %*s | %*s", w_rof, "ROF.index", w_first, "First.Vtx", w_last, "Last.Vtx", w_count, "Count");
528 LOGF(info, "%.*s-+-%.*s-+-%.*s-+-%.*s", w_rof, "----------", w_first, "------------", w_last, "------------", w_count, "----------");
529
530 const auto& idx = mIndices[layer];
531 for (int32_t i = 0; i < idx.getEntries(); ++i) {
532 const auto& entry = mFlatTable[idx.getFirstEntry() + i];
533 int first = entry.getFirstEntry();
534 int count = entry.getEntries();
535 int last = first + count - 1;
536 LOGF(info, "%*d | %*d | %*d | %*d", w_rof, i, w_first, first, w_last, last, w_count, count);
537 }
538 }
539
540 GPUh() void printSummary() const
541 {
542 uint32_t totalROFs{0};
543 uint32_t totalVertexRefs{0};
544
545 for (int32_t i = 0; i < NLayers; ++i) {
546 const auto& idx = mIndices[i];
547 totalROFs += idx.getEntries();
548
549 for (int32_t j = 0; j < idx.getEntries(); ++j) {
550 const auto& entry = mFlatTable[idx.getFirstEntry() + j];
551 totalVertexRefs += entry.getEntries();
552 }
553 }
554
555 const uint32_t totalBytes = (totalROFs * sizeof(TableEntry)) + (NLayers * sizeof(TableIndex));
556 LOGF(info, "------------------------------------------------------------");
557 LOGF(info, "Total ROFs in table: %u", totalROFs);
558 LOGF(info, "Total vertex references: %u", totalVertexRefs);
559 LOGF(info, "Total view size: %u bytes", totalBytes);
560 LOGF(info, "------------------------------------------------------------");
561 }
562#endif
563};
564
565// Precalculated lookup table to find vertices compatible with ROFs
566// Given a layer and ROF index, returns the range of vertices that overlap in time.
567// The vertex time is defined as symmetrical [t0-e,t0+e]
568// It needs to be guaranteed that the input vertices are sorted by their lower-bound!
569// additionally compatibliyty has to be queried per vertex!
570template <int32_t NLayers>
572{
573 public:
579
581
582 GPUh() size_t getFlatTableSize() const noexcept { return mFlatTable.size(); }
583 static GPUh() constexpr size_t getIndicesSize() { return NLayers; }
584
585 // Build the lookup table given a sorted array of vertices
586 // vertices must be sorted by timestamp, then by error (secondary)
587 GPUh() void init(const Vertex* vertices, size_t nVertices)
588 {
589 if (nVertices > std::numeric_limits<T>::max()) {
590 LOGF(fatal, "too many vertices %zu, max supported is %u", nVertices, std::numeric_limits<T>::max());
591 }
592
593 std::vector<TableEntry> table[NLayers];
594 for (int32_t layer{0}; layer < NLayers; ++layer) {
595 buildMapping(layer, vertices, nVertices, table[layer]);
596 }
597 flatten(table);
598 }
599
600 // Pre-allocated needed memory, then use update(...)
601 GPUh() void init()
602 {
603 size_t total{0};
604 for (int32_t layer{0}; layer < NLayers; ++layer) {
605 total += this->mLayers[layer].mNROFsTF;
606 }
607 mFlatTable.resize(total, {0, 0});
608 size_t offset = 0;
609 for (int32_t layer{0}; layer < NLayers; ++layer) {
610 size_t nROFs = this->mLayers[layer].mNROFsTF;
611 mIndices[layer].setFirstEntry(static_cast<T>(offset));
612 mIndices[layer].setEntries(static_cast<T>(nROFs));
613 offset += nROFs;
614 }
615 }
616
617 // Recalculate lookup table with new vertices
618 GPUh() void update(const Vertex* vertices, size_t nVertices)
619 {
620 size_t offset = 0;
621 for (int32_t layer{0}; layer < NLayers; ++layer) {
622 const auto& idx = mIndices[layer];
623 size_t nROFs = idx.getEntries();
624 for (size_t iROF = 0; iROF < nROFs; ++iROF) {
625 updateROFMapping(layer, iROF, vertices, nVertices, offset + iROF);
626 }
627 offset += nROFs;
628 }
629 }
630
631 GPUh() View getView() const
632 {
633 View view;
634 view.mFlatTable = mFlatTable.data();
635 view.mIndices = mIndices;
636 view.mLayers = this->mLayers;
637 return view;
638 }
639
640 GPUh() View getDeviceView(const TableEntry* deviceFlatTablePtr, const TableIndex* deviceIndicesPtr, const LayerTiming* deviceLayerTimingPtr) const
641 {
642 View view;
643 view.mFlatTable = deviceFlatTablePtr;
644 view.mIndices = deviceIndicesPtr;
645 view.mLayers = deviceLayerTimingPtr;
646 return view;
647 }
648
649 private:
650 // Build the mapping for one layer
651 GPUh() void buildMapping(int32_t layer, const Vertex* vertices, size_t nVertices, std::vector<TableEntry>& table)
652 {
653 const auto& layerDef = this->mLayers[layer];
654 table.resize(layerDef.mNROFsTF);
655 size_t vertexSearchStart = 0;
656 for (int32_t iROF{0}; iROF < layerDef.mNROFsTF; ++iROF) {
657 int64_t rofLower = o2::gpu::CAMath::Max((int64_t)layerDef.getROFStartInBC(iROF) - (int64_t)layerDef.mROFAddTimeErr, int64_t(0));
658 int64_t rofUpper = (int64_t)layerDef.getROFEndInBC(iROF) + layerDef.mROFAddTimeErr;
659 size_t lastVertex = binarySearchFirst(vertices, nVertices, vertexSearchStart, rofUpper);
660 size_t firstVertex = vertexSearchStart;
661 while (firstVertex < lastVertex) {
662 auto vUpper = (int64_t)vertices[firstVertex].getTimeStamp().upper();
663 if (vUpper > rofLower) {
664 break;
665 }
666 ++firstVertex;
667 }
668 size_t count = (lastVertex > firstVertex) ? (lastVertex - firstVertex) : 0;
669 table[iROF] = {static_cast<T>(firstVertex), static_cast<T>(count)};
670 vertexSearchStart = firstVertex;
671 }
672 }
673
674 // Update a single ROF's vertex mapping
675 GPUh() void updateROFMapping(int32_t layer, size_t iROF, const Vertex* vertices, size_t nVertices, size_t flatTableIdx)
676 {
677 const auto& layerDef = this->mLayers[layer];
678 int64_t rofLower = o2::gpu::CAMath::Max((int64_t)layerDef.getROFStartInBC(iROF) - (int64_t)layerDef.mROFAddTimeErr, int64_t(0));
679 int64_t rofUpper = (int64_t)layerDef.getROFEndInBC(iROF) + layerDef.mROFAddTimeErr;
680 size_t lastVertex = binarySearchFirst(vertices, nVertices, 0, rofUpper);
681 size_t firstVertex = 0;
682 while (firstVertex < lastVertex) {
683 int64_t vUpper = (int64_t)vertices[firstVertex].getTimeStamp().getTimeStamp() +
684 (int64_t)vertices[firstVertex].getTimeStamp().getTimeStampError();
685 if (vUpper > rofLower) {
686 break;
687 }
688 ++firstVertex;
689 }
690 size_t count = (lastVertex > firstVertex) ? (lastVertex - firstVertex) : 0;
691 mFlatTable[flatTableIdx].setFirstEntry(static_cast<T>(firstVertex));
692 mFlatTable[flatTableIdx].setEntries(static_cast<T>(count));
693 }
694
695 // Binary search for first vertex where lowerBC >= targetBC
696 GPUh() size_t binarySearchFirst(const Vertex* vertices, size_t nVertices, size_t searchStart, BCType targetBC) const
697 {
698 size_t left = searchStart;
699 size_t right = nVertices;
700 while (left < right) {
701 size_t mid = left + ((right - left) / 2);
702 int64_t lower = (int64_t)vertices[mid].getTimeStamp().lower();
703 if (lower < targetBC) {
704 left = mid + 1;
705 } else {
706 right = mid;
707 }
708 }
709 return left;
710 }
711
712 // Compress the temporary table into a single flat table
713 GPUh() void flatten(const std::vector<TableEntry> table[NLayers])
714 {
715 // Count total entries
716 size_t total{0};
717 for (int32_t i{0}; i < NLayers; ++i) {
718 total += table[i].size();
719 }
720
721 mFlatTable.reserve(total);
722
723 // Build flat table and indices
724 for (int32_t i{0}; i < NLayers; ++i) {
725 mIndices[i].setFirstEntry(static_cast<T>(mFlatTable.size()));
726 mIndices[i].setEntries(static_cast<T>(table[i].size()));
727 mFlatTable.insert(mFlatTable.end(), table[i].begin(), table[i].end());
728 }
729 }
730
731 TableIndex mIndices[NLayers];
732 std::vector<TableEntry> mFlatTable;
733};
734
735// GPU-friendly view of the ROF mask table
736template <int32_t NLayers, typename TableEntry, typename TableIndex>
738 const TableEntry* mFlatMask{nullptr};
739 const TableIndex* mLayerROFOffsets{nullptr}; // size NLayers+1
740
741 GPUhdi() bool isROFEnabled(int32_t layer, int32_t rofId) const noexcept
742 {
743 assert(layer >= 0 && layer < NLayers);
744 return mFlatMask[mLayerROFOffsets[layer] + rofId] != 0u;
745 }
746
747#ifndef GPUCA_GPUCODE
748 GPUh() void printAll() const
749 {
750 for (int32_t i = 0; i < NLayers; ++i) {
751 printLayer(i);
752 }
753 }
754
755 GPUh() void printLayer(int32_t layer) const
756 {
757 constexpr int w_rof = 10;
758 constexpr int w_active = 10;
759 int32_t nROFs = mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer];
760 LOGF(info, "Mask table: Layer %d", layer);
761 LOGF(info, "%*s | %*s", w_rof, "ROF", w_active, "Enabled");
762 LOGF(info, "%.*s-+-%.*s", w_rof, "----------", w_active, "----------");
763 for (int32_t i = 0; i < nROFs; ++i) {
764 LOGF(info, "%*d | %*d", w_rof, i, w_active, (int)isROFEnabled(layer, i));
765 }
766 }
767
768 GPUh() std::string asString(int32_t layer) const
769 {
770 int32_t nROFs = mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer];
771 int32_t enabledROFs = 0;
772 for (int32_t j = 0; j < nROFs; ++j) {
773 if (isROFEnabled(layer, j)) {
774 ++enabledROFs;
775 }
776 }
777 return std::format("ROFMask on Layer {} ROFs enabled: {}/{}", layer, enabledROFs, nROFs);
778 }
779
780 GPUh() void print(int32_t layer) const
781 {
782 LOG(info) << asString(layer);
783 }
784#endif
785};
786
787// Per-ROF per-layer boolean mask (uint8_t for GPU compatibility).
788template <int32_t NLayers>
789class ROFMaskTable : public LayerTimingBase<NLayers>
790{
791 public:
794 using TableIndex = uint32_t;
795 using TableEntry = uint8_t;
797
798 ROFMaskTable() = default;
799 GPUh() explicit ROFMaskTable(const LayerTimingBase<NLayers>& timingBase) : LayerTimingBase<NLayers>(timingBase) { init(); }
800
801 GPUh() void init()
802 {
803 int32_t totalROFs = 0;
804 for (int32_t layer{0}; layer < NLayers; ++layer) {
805 mLayerROFOffsets[layer] = totalROFs;
806 totalROFs += this->getLayer(layer).mNROFsTF;
807 }
808 mLayerROFOffsets[NLayers] = totalROFs; // sentinel
809 mFlatMask.resize(totalROFs, 0u);
810 }
811
812 GPUh() size_t getFlatMaskSize() const noexcept { return mFlatMask.size(); }
813
814 GPUh() void setROFEnabled(int32_t layer, int32_t rofId, uint8_t state = 1) noexcept
815 {
816 assert(layer >= 0 && layer < NLayers);
817 assert(rofId >= 0 && rofId < mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer]);
818 mFlatMask[mLayerROFOffsets[layer] + rofId] = state;
819 }
820
821 GPUh() void setROFsEnabled(int32_t layer, int32_t firstRof, int32_t nRofs, uint8_t state = 1) noexcept
822 {
823 assert(layer >= 0 && layer < NLayers);
824 assert(firstRof >= 0);
825 assert(firstRof + nRofs <= mLayerROFOffsets[layer + 1] - mLayerROFOffsets[layer]);
826 std::memset(mFlatMask.data() + mLayerROFOffsets[layer] + firstRof, state, nRofs);
827 }
828
829 // Enable all ROFs in all layers that are time-compatible with the given BC range
830 GPUh() void selectROF(const BCRange& t)
831 {
832 const int32_t bcStart = t.getFirstEntry();
833 const int32_t bcEnd = t.getEntriesBound();
834 for (int32_t layer{0}; layer < NLayers; ++layer) {
835 const auto& lay = this->getLayer(layer);
836 const int32_t offset = mLayerROFOffsets[layer];
837 for (int32_t rofId{0}; rofId < lay.mNROFsTF; ++rofId) {
838 if (static_cast<int32_t>(lay.getROFStartInBC(rofId)) < bcEnd &&
839 static_cast<int32_t>(lay.getROFEndInBC(rofId)) > bcStart) {
840 mFlatMask[offset + rofId] = 1u;
841 }
842 }
843 }
844 }
845
846 // Reset mask to 0, then enable all ROFs compatible with any of the given BC ranges
847 GPUh() void selectROFs(const std::vector<BCRange>& ts)
848 {
849 resetMask();
850 for (const auto& t : ts) {
851 selectROF(t);
852 }
853 }
854
855 GPUh() void resetMask(uint8_t s = 0u)
856 {
857 std::memset(mFlatMask.data(), s, mFlatMask.size());
858 }
859
860 GPUh() void invertMask()
861 {
862 std::ranges::transform(mFlatMask, mFlatMask.begin(), [](uint8_t x) { return 1 - x; });
863 }
864
865 GPUh() void swap(ROFMaskTable& other) noexcept
866 {
867 std::swap(mFlatMask, other.mFlatMask);
868 std::swap(mLayerROFOffsets, other.mLayerROFOffsets);
869 }
870
871 GPUh() View getView() const
872 {
873 View view;
874 view.mFlatMask = mFlatMask.data();
875 view.mLayerROFOffsets = mLayerROFOffsets;
876 return view;
877 }
878
879 GPUh() View getDeviceView(const TableEntry* deviceFlatMaskPtr, const TableIndex* deviceOffsetPtr) const
880 {
881 View view;
882 view.mFlatMask = deviceFlatMaskPtr;
883 view.mLayerROFOffsets = deviceOffsetPtr;
884 return view;
885 }
886
887 private:
888 TableIndex mLayerROFOffsets[NLayers + 1] = {0};
889 std::vector<TableEntry> mFlatMask;
890};
891
892} // namespace o2::its
893
894#endif
benchmark::State & state
std::string asString(TDataMember const &dm, char *pointer)
uint64_t vertex
Definition RawEventData.h:9
uint64_t bc
Definition RawEventData.h:5
void print() const
int16_t time
Definition RawEventData.h:4
int32_t i
#define GPUh()
Header to collect LHC related constants.
Class to refer to the 1st entry and N elements of some group in the continuous container.
uint32_t j
Definition RawData.h:0
GPUhdi() const LayerTiming &getLayer(int32_t layer) const
GPUh() void defineLayer(int32_t layer
LayerTiming::BCType T
GPUhdi() const expr int32_t getEntries() noexcept
GPUh() void print() const
GPUh() explicit ROFMaskTable(const LayerTimingBase< NLayers > &timingBase)
GPUh() size_t getFlatMaskSize() const noexcept
GPUh() void setROFEnabled(int32_t layer
LayerTimingBase< NLayers >::T T
std::vector< TableEntry > mFlatMask
LayerTimingBase< NLayers >::T T
GPUh() View getView() const
static GPUh() const expr size_t getIndicesSize()
GPUh() size_t getFlatTableSize() const noexcept
GPUh() View getView() const
GPUh() void update(const Vertex *vertices
static GPUh() const expr size_t getIndicesSize()
GPUh() size_t getFlatTableSize() const noexcept
GPUh() void init(const Vertex *vertices
LayerTimingBase< NLayers >::T T
GLint GLenum GLint x
Definition glcorearb.h:403
GLint GLsizei count
Definition glcorearb.h:399
GLuint entry
Definition glcorearb.h:5735
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
GLdouble GLdouble right
Definition glcorearb.h:4077
GLint first
Definition glcorearb.h:399
GLsizei maxCount
Definition glcorearb.h:792
GLint left
Definition glcorearb.h:1979
GLintptr offset
Definition glcorearb.h:660
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLuint GLint GLint layer
Definition glcorearb.h:1310
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t0
Definition glcorearb.h:5034
GLuint start
Definition glcorearb.h:469
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Definition glcorearb.h:5034
constexpr int LHCMaxBunches
uint32_t TimeStampType
Definition TimeEstBC.h:26
uint16_t TimeStampErrorType
Definition TimeEstBC.h:27
uint64_t getTimeStamp(o2::framework::ProcessingContext &pc)
GPUhdi() BCType getROFStartInBC(BCType rofId) const noexcept
GPUhdi() TimeEstBC getROFTimeBounds(BCType rofId
GPUhdi() BCType getROFEndInBC(BCType rofId) const noexcept
dataformats::RangeReference< BCType, BCType > BCRange
GPUhdi() bool isROFEnabled(int32_t layer
GPUh() void printLayer(int32_t layer) const
GPUh() void printAll() const
LOGP(info, "From: {}", mLayers[from].asString())
LOGF(info, "%*s | %*s | %*s | %*s", w_index, "ROF.index", w_first, "First.ROF", w_last, "Last.ROF", w_count, "Count")
GPUhdi() const LayerTiming &getLayer(int32_t layer) const noexcept
GPUhdi() bool doROFsOverlap(int32_t layer0
assert(rofIdx< idx.getEntries())
GPUh() void printSummary() const
LOGF(info, "Overlap mapping: Layer %d -> Layer %d", from, to)
GPUh() const LayerTiming &getClockLayer() const noexcept
GPUh() void printAll() const
Print functions.
GPUh() int32_t getClock() const noexcept
GPUhdi() const TableEntry &getOverlap(int32_t from
GPUhdi() TimeEstBC getTimeStamp(int32_t layer0
LOGP(info, "To : {}", mLayers[to].asString())
LOGF(info, "%.*s-+-%.*s-+-%.*s-+-%.*s", w_index, "----------", w_first, "------------", w_last, "------------", w_count, "----------")
assert(doROFsOverlap(layer0, rof0, layer1, rof1))
assert(rofIdx< idx.getEntries())
GPUh() void printLayer(int32_t layer) const
GPUhdi() const LayerTiming &getLayer(int32_t layer) const noexcept
GPUh() int32_t getMaxVerticesPerROF() const noexcept
GPUh() void printSummary() const
GPUhdi() const TableEntry &getVertices(int32_t layer
GPUhdi() bool isVertexCompatible(int32_t layer
VectorOfTObjectPtrs other
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"