Project
Loading...
Searching...
No Matches
DecodingStat.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#ifndef _ALICEO2_DECODINGSTAT_H_
16#define _ALICEO2_DECODINGSTAT_H_
17
18#include <string>
19#include <array>
20#include <Rtypes.h>
22
23namespace o2
24{
25namespace itsmft
26{
27class ChipPixelData;
28
29struct ChipStat {
30 enum ActionOnError : int {
31 ErrActNone = 0x0, // do nothing
32 ErrActPropagate = 0x1, // propagate to decoded data
33 ErrActDump = 0x2 // produce raw data dump
34 };
35
36 enum DecErrors : int {
37 BusyViolation, // Busy violation
38 DataOverrun, // Data overrun
39 Fatal, // Fatal (ALPIDE trigger fifo overflow, trigger-event matching compromised)
40 BusyOn, // Busy On
41 BusyOff, // Busy Off
42 TruncatedChipEmpty, // Data was truncated after ChipEmpty
43 TruncatedChipHeader, // Data was truncated after ChipHeader
44 TruncatedRegion, // Data was truncated after Region record
45 TruncatedLondData, // Data was truncated in the LongData record
46 WrongDataLongPattern, // LongData pattern has highest bit set
47 NoDataFound, // Region is not followed by Short or Long data
48 UnknownWord, // Unknown word was seen
49 RepeatingPixel, // Same pixel fired more than once
50 WrongRow, // Non-existing row decoded
51 APE_STRIP_START, // 0xF2 - Lane data stripped for this chip event (behaviour changed with RU FW v1.16.0, for general APE behaviour see https://alice.its.cern.ch/jira/browse/O2-1717)
52 APE_ILLEGAL_CHIPID, // 0xF3 - Chip ID jumped downwards within an ROF on a OB module (FATAL)
53 APE_DET_TIMEOUT, // 0xF4 - Detector timeout (FATAL)
54 APE_OOT, // 0xF5 - 8b10b OOT (FATAL, start)
55 APE_PROTOCOL_ERROR, // 0xF6 - Event protocol error marker (FATAL, start)
56 APE_LANE_FIFO_OVERFLOW_ERROR, // 0xF7 - Lane FIFO overflow error (FATAL)
57 APE_FSM_ERROR, // 0xF8 - FSM error (FATAL, SEU error, reached an unknown state)
58 APE_PENDING_DETECTOR_EVENT_LIMIT, // 0xF9 - Pending detector events limit (FATAL)
59 APE_PENDING_LANE_EVENT_LIMIT, // 0xFA - Pending detector events limit in packager (FATAL)
60 APE_O2N_ERROR, // 0xFB - Lane protocol error (FATAL)
61 APE_RATE_MISSING_TRG_ERROR, // 0xFC - Received start of event before trigger (FATAL)
62 APE_PE_DATA_MISSING, // 0xFD - Error in non critical byte
63 APE_OOT_DATA_MISSING, // 0xFE - OOT non-critical
64 WrongDColOrder, // DColumns non increasing
65 InterleavedChipData, // Chip data interleaved on the cable
66 TruncatedBuffer, // Truncated buffer, 0 padding
67 TrailerAfterHeader, // Trailer seen after header w/o FE of FD set
68 FlushedIncomplete, // ALPIDE MEB was flushed by the busy handling
69 StrobeExtended, // ALPIDE received a second trigger while the strobe was still open
70 WrongAlpideChipID, // Impossible for given cable ALPIDE ChipOnModule ID
71 DecreasingRow, // Decreasing row in the same column
73 };
74
75 static constexpr std::array<std::string_view, NErrorsDefined> ErrNames = {
76 "BusyViolation flag ON", // BusyViolation
77 "DataOverrun flag ON", // DataOverrun
78 "Fatal flag ON", // Fatal (ALPIDE trigger fifo overflow, trigger-event matching compromised)
79 "BusyON", // BusyOn
80 "BusyOFF", // BusyOff
81 "Data truncated after ChipEmpty", // TruncatedChipEmpty
82 "Data truncated after ChipHeader", // TruncatedChipHeader
83 "Data truncated after Region", // TruncatedRegion
84 "Data truncated after LongData", // TruncatedLondData
85 "LongData pattern has highest bit set", // WrongDataLongPattern
86 "Region is not followed by Short or Long data", // NoDataFound
87 "Unknown word", // UnknownWord
88 "Same pixel fired multiple times", // RepeatingPixel
89 "Non-existing row decoded", // WrongRow
90 "APE_STRIP_START", // 0xF2 - Lane data stripped for this chip event (behaviour changed with RU FW v1.16.0, for general APE behaviour see https://alice.its.cern.ch/jira/browse/O2-1717)
91 "APE_ILLEGAL_CHIPID", // 0xF3 - Chip ID jumped downwards within an ROF on a OB module (FATAL)
92 "APE_DET_TIMEOUT", // 0xF4 - Detector timeout (FATAL)
93 "APE_OOT", // 0xF5 - 8b10b OOT (FATAL, start)
94 "APE_PROTOCOL_ERROR", // 0xF6 - Event protocol error marker (FATAL, start)
95 "APE_LANE_FIFO_OVERFLOW_ERROR", // 0xF7 - Lane FIFO overflow error (FATAL)
96 "APE_FSM_ERROR", // 0xF8 - FSM error (FATAL, SEU error, reached an unknown state)
97 "APE_PENDING_DETECTOR_EVENT_LIMIT", // 0xF9 - Pending detector events limit (FATAL)
98 "APE_PENDING_LANE_EVENT_LIMIT", // 0xFA - Pending detector events limit in packager (FATAL)
99 "APE_O2N_ERROR", // 0xFB - Lane protocol error (FATAL)
100 "APE_RATE_MISSING_TRG_ERROR", // 0xFC - Received start of event before trigger (FATAL)
101 "APE_PE_DATA_MISSING", // 0xFD - Error in non critical byte
102 "APE_OOT_NON_CRITICAL", // 0xFE - OOT non-critical
103 "DColumns non-increasing", // DColumns non increasing
104 "Chip data interleaved on the cable", // Chip data interleaved on the cable
105 "TruncatedBuffer", // Truncated buffer, 0 padding
106 "TrailerAfterHeader", // Trailer seen after header w/o FE of FD set
107 "FlushedIncomplete", // ALPIDE MEB was flushed by the busy handling
108 "StrobeExtended", // ALPIDE received a second trigger while the strobe was still open
109 "Wrong Alpide ChipID", // Impossible for given cable ALPIDE ChipOnModule ID
110 "Decreasing row", // Decreasing row in the same column
111 };
112
113 static constexpr std::array<uint32_t, NErrorsDefined> ErrActions = {
114 ErrActPropagate | ErrActDump, // Busy violation
115 ErrActPropagate | ErrActDump, // Data overrun
116 ErrActPropagate | ErrActDump, // Fatal (ALPIDE trigger fifo overflow, trigger-event matching compromised)
117 ErrActNone, // Busy On
118 ErrActNone, // Busy Off
119 ErrActPropagate | ErrActDump, // Data was truncated after ChipEmpty
120 ErrActPropagate | ErrActDump, // Data was truncated after ChipHeader
121 ErrActPropagate | ErrActDump, // Data was truncated after Region record
122 ErrActPropagate | ErrActDump, // Data was truncated in the LongData record
123 ErrActPropagate | ErrActDump, // LongData pattern has highest bit set
124 ErrActPropagate | ErrActDump, // Region is not followed by Short or Long data
125 ErrActPropagate | ErrActDump, // Unknown word was seen
126 ErrActPropagate, // Same pixel fired more than once
127 ErrActPropagate | ErrActDump, // Non-existing row decoded
128 ErrActPropagate | ErrActDump, // 0xF2 - Lane data stripped for this chip event (behaviour changed with RU FW v1.16.0, for general APE behaviour see https://alice.its.cern.ch/jira/browse/O2-1717)
129 ErrActPropagate | ErrActDump, // 0xF3 - Chip ID jumped downwards within an ROF on a OB module (FATAL)
130 ErrActPropagate | ErrActDump, // 0xF4 - Detector timeout (FATAL)
131 ErrActPropagate | ErrActDump, // 0xF5 - 8b10b OOT (FATAL, start)
132 ErrActPropagate | ErrActDump, // 0xF6 - Event protocol error marker (FATAL, start)
133 ErrActPropagate | ErrActDump, // 0xF7 - Lane FIFO overflow error (FATAL)
134 ErrActPropagate | ErrActDump, // 0xF8 - FSM error (FATAL, SEU error, reached an unknown state)
135 ErrActPropagate | ErrActDump, // 0xF9 - Pending detector events limit (FATAL)
136 ErrActPropagate | ErrActDump, // 0xFA - Pending detector events limit in packager (FATAL)
137 ErrActPropagate | ErrActDump, // 0xFB - Lane protocol error (FATAL)
138 ErrActPropagate | ErrActDump, // 0xFC - Received start of event before trigger (FATAL)
139 ErrActPropagate | ErrActDump, // 0xFD - Error in non critical byte
140 ErrActPropagate | ErrActDump, // 0xFE - OOT non-critical
141 ErrActPropagate | ErrActDump, // DColumns non increasing
142 ErrActPropagate | ErrActDump, // Chip data interleaved on the cable
143 ErrActPropagate | ErrActDump, // Truncated buffer while something was expected
144 ErrActPropagate | ErrActDump, // trailer seen after header w/o FE of FD set
145 ErrActPropagate | ErrActDump, // ALPIDE MEB was flushed by the busy handling
146 ErrActPropagate | ErrActDump, // ALPIDE received a second trigger while the strobe was still open
147 ErrActPropagate | ErrActDump, // Impossible for given cable ALPIDE ChipOnModule ID
148 ErrActPropagate | ErrActDump, // Decreasing row in the same column
149 };
150 uint16_t feeID = -1;
151 size_t nHits = 0;
152 std::array<uint32_t, NErrorsDefined> errorCounts = {};
153 ChipStat() = default;
154 ChipStat(uint16_t _feeID) : feeID(_feeID) {}
155
156 void clear()
157 {
158 memset(errorCounts.data(), 0, sizeof(uint32_t) * errorCounts.size());
159 nHits = 0;
160 }
161
162 static int getAPENonCritical(uint8_t c)
163 {
164 if (c == 0xfd || c == 0xfe) {
165 return APE_STRIP_START + c - 0xf2;
166 }
167 return -1;
168 }
169
170 // return APE DecErrors code or -1 if not APE error, set fatal flag if needd
171 static int getAPECode(uint8_t c, bool& ft)
172 {
173 if (c < 0xf2 || c > 0xfe) {
174 ft = false;
175 return -1;
176 }
177 ft = c >= 0xf2 && c <= 0xfe;
178 return APE_STRIP_START + c - 0xf2;
179 }
180
181 // return APE byte that corresponds to the given APE DecErrors
182 static uint8_t getAPEByte(DecErrors c)
183 {
184 if (c < APE_STRIP_START || c > APE_OOT_DATA_MISSING) {
185 return 0xFF;
186 }
187 return 0xF2 + c - APE_STRIP_START;
188 }
189 uint32_t getNErrors() const;
190 uint32_t addErrors(const ChipPixelData& d, int verbosity);
191 void print(bool skipNoErr = true, const std::string& pref = "FEEID") const;
192
193 template <typename Func>
194 static void forEachError(Func f)
195 {
196 for (int errIdx = 0; errIdx < NErrorsDefined; ++errIdx) {
197 f(errIdx);
198 }
199 }
200
202};
203
204struct ChipError {
205 uint32_t id = -1;
206 uint32_t nerrors = 0;
207 uint32_t errors = 0;
208
209 int16_t getChipID() const { return int16_t(id & 0xffff); }
210 uint16_t getFEEID() const { return uint16_t(id >> 16); }
211 static uint32_t composeID(uint16_t feeID, int16_t chipID) { return uint32_t(feeID) << 16 | uint16_t(chipID); }
213};
214
218
219 enum DecErrors : int {
220 ErrNoRDHAtStart, // page does not start with RDH
221 ErrPageNotStopped, // RDH is stopped, but the time is not matching the ~stop packet
222 ErrStopPageNotEmpty, // Page with RDH.stop is not empty
223 ErrPageCounterDiscontinuity, // RDH page counters for the same RU/trigger are not continuous
224 ErrRDHvsGBTHPageCnt, // RDH and GBT header page counters are not consistent
225 ErrMissingGBTTrigger, // GBT trigger word was expected but not found
226 ErrMissingGBTHeader, // GBT payload header was expected but not found
227 ErrMissingGBTTrailer, // GBT payload trailer was expected but not found
228 ErrNonZeroPageAfterStop, // all lanes were stopped but the page counter in not 0
229 ErrUnstoppedLanes, // end of FEE data reached while not all lanes received stop
230 ErrDataForStoppedLane, // data was received for stopped lane
231 ErrNoDataForActiveLane, // no data was seen for lane (which was not in timeout)
232 ErrIBChipLaneMismatch, // chipID (on module) was different from the lane ID on the IB stave
233 ErrCableDataHeadWrong, // cable data does not start with chip header or empty chip
234 ErrInvalidActiveLanes, // active lanes pattern conflicts with expected for given RU type
235 ErrPacketCounterJump, // jump in RDH.packetCounter
236 ErrPacketDoneMissing, // packet done is missing in the trailer while CRU page is not over
237 ErrMissingDiagnosticWord, // missing diagnostic word after RDH with stop
238 ErrGBTWordNotRecognized, // GBT word not recognized
239 ErrWrongeCableID, // Invalid cable ID
240 ErrWrongAlignmentWord, // unexpected alignment word
241 ErrMissingROF, // missing ROF (desync?)
242 ErrOldROF, // old ROF (desync?)
243 ErrLinkRecovery, // data skipped since recovery is declared
245 };
246 static constexpr std::array<std::string_view, NErrorsDefined> ErrNames = {
247 "Page data does not start with expected RDH", // ErrNoRDHAtStart
248 "RDH is stopped, but the time is not matching the stop packet", // ErrPageNotStopped
249 "Page with RDH.stop does not contain diagnostic word only", // ErrStopPageNotEmpty
250 "RDH page counters for the same RU/trigger are not continuous", // ErrPageCounterDiscontinuity
251 "RDH and GBT header page counters are not consistent", // ErrRDHvsGBTHPageCnt
252 "GBT trigger word was expected but not found", // ErrMissingGBTTrigger
253 "GBT payload header was expected but not found", // ErrMissingGBTHeader
254 "GBT payload trailer was expected but not found", // ErrMissingGBTTrailer
255 "All lanes were stopped but the page counter in not 0", // ErrNonZeroPageAfterStop
256 "End of FEE data reached while not all lanes received stop", // ErrUnstoppedLanes
257 "Data was received for stopped lane", // ErrDataForStoppedLane
258 "No data was seen for lane (which was not in timeout)", // ErrNoDataForActiveLane
259 "ChipID (on module) was different from the lane ID on the IB stave", // ErrIBChipLaneMismatch
260 "Cable data does not start with chip header or empty chip", // ErrCableDataHeadWrong
261 "Active lanes pattern conflicts with expected for given RU type", // ErrInvalidActiveLanes
262 "Jump in RDH_packetCounter", // ErrPacketCounterJump
263 "Packet done is missing in the trailer while CRU page is not over", // ErrPacketDoneMissing
264 "Wrong/missing diagnostic GBT word after RDH with stop", // ErrMissingDiagnosticWord
265 "GBT word not recognized", // ErrGBTWordNotRecognized
266 "Wrong cable ID", // ErrWrongeCableID
267 "Unexpected CRU page alignment padding word", // ErrWrongAlignmentWord
268 "ROF in future, pause decoding to synchronize", // ErrMissingROF
269 "Old ROF, discarding", // ErrOldROF
270 "Data discarded due to the recovery flag in RDH", // ErrLinkRecovery
271 };
272
273 uint16_t feeID = 0; // FeeID
274 // Note: packet here is meant as a group of CRU pages belonging to the same trigger
275 uint32_t nPackets = 0; // total number of packets (RDH pages)
276 uint32_t nTriggers = 0; // total number of triggers (ROFs)
277 std::array<uint32_t, NErrorsDefined> errorCounts = {}; // error counters
278 std::array<uint32_t, GBTDataTrailer::MaxStateCombinations> packetStates = {}; // packet status from the trailer
279
280 void clear()
281 {
282 nPackets = 0;
283 nTriggers = 0;
284 errorCounts.fill(0);
285 packetStates.fill(0);
286 }
287
288 void print(bool skipNoErr = true) const;
289
291};
292
294 uint16_t id = -1;
295 uint16_t errType = 0;
296 uint16_t errInfo0 = 0;
297 uint16_t errInfo1 = 0;
298 ClassDefNV(ErrorMessage, 1)
299};
300
301} // namespace itsmft
302} // namespace o2
303#endif
#define verbosity
void print() const
uint32_t c
Definition RawData.h:2
GLdouble f
Definition glcorearb.h:310
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
static uint32_t composeID(uint16_t feeID, int16_t chipID)
uint16_t getFEEID() const
ClassDefNV(ChipError, 1)
int16_t getChipID() const
static int getAPENonCritical(uint8_t c)
static constexpr std::array< uint32_t, NErrorsDefined > ErrActions
uint32_t addErrors(const ChipPixelData &d, int verbosity)
ClassDefNV(ChipStat, 1)
static int getAPECode(uint8_t c, bool &ft)
static uint8_t getAPEByte(DecErrors c)
static constexpr std::array< std::string_view, NErrorsDefined > ErrNames
std::array< uint32_t, NErrorsDefined > errorCounts
static void forEachError(Func f)
uint32_t getNErrors() const
ChipStat(uint16_t _feeID)
Statistics for per-link decoding.
DecErrors
counters for format checks
std::array< uint32_t, GBTDataTrailer::MaxStateCombinations > packetStates
static constexpr std::array< std::string_view, NErrorsDefined > ErrNames
ClassDefNV(GBTLinkDecodingStat, 3)
std::array< uint32_t, NErrorsDefined > errorCounts