Project
Loading...
Searching...
No Matches
EncodedBlocks.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16
17#ifndef ALICEO2_ENCODED_BLOCKS_H
18#define ALICEO2_ENCODED_BLOCKS_H
19// #undef NDEBUG
20// #include <cassert>
21#include <type_traits>
22#include <cstddef>
23#include <Rtypes.h>
24#include <any>
25
26#include "TTree.h"
28#include "Framework/Logger.h"
34#ifndef __CLING__
37#include "rANS/compat.h"
38#include "rANS/histogram.h"
39#include "rANS/serialize.h"
40#include "rANS/factory.h"
41#include "rANS/metrics.h"
42#include "rANS/utils.h"
43#endif
44
45namespace o2
46{
47namespace ctf
48{
49
50namespace detail
51{
52
53template <class, class Enable = void>
54struct is_iterator : std::false_type {
55};
56
57template <class T>
58struct is_iterator<T, std::enable_if_t<
59 std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<T>::iterator_category> ||
60 std::is_same_v<std::output_iterator_tag, typename std::iterator_traits<T>::iterator_category>>>
61 : std::true_type {
62};
63
64template <class T>
65inline constexpr bool is_iterator_v = is_iterator<T>::value;
66
67inline constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
68{
70}
71
72inline constexpr bool mayPack(Metadata::OptStore opt) noexcept
73{
75}
76
77} // namespace detail
78constexpr size_t PackingThreshold = 512;
79
80constexpr size_t Alignment = 16;
81
82constexpr int WrappersSplitLevel = 99;
83constexpr int WrappersCompressionLevel = 1;
84
86using BufferType = uint8_t; // to avoid every detector using different types, we better define it here
87
89inline size_t alignSize(size_t sizeBytes)
90{
91 auto res = sizeBytes % Alignment;
92 return res ? sizeBytes + (Alignment - res) : sizeBytes;
93}
94
96template <class T>
97inline T* relocatePointer(const char* oldBase, char* newBase, const T* ptr)
98{
99 return (ptr != nullptr) ? reinterpret_cast<T*>(newBase + (reinterpret_cast<const char*>(ptr) - oldBase)) : nullptr;
100}
101
102template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
103inline constexpr size_t calculateNDestTElements(size_t nElems) noexcept
104{
105 const size_t srcBufferSize = nElems * sizeof(source_T);
106 return srcBufferSize / sizeof(dest_T) + (srcBufferSize % sizeof(dest_T) != 0);
107};
108
109template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
110inline size_t calculatePaddedSize(size_t nElems) noexcept
111{
112 const size_t sizeOfSourceT = sizeof(source_T);
113 const size_t sizeOfDestT = sizeof(dest_T);
114
115 // this is equivalent to (sizeOfSourceT / sizeOfDestT) * std::ceil(sizeOfSourceArray/ sizeOfDestT)
116 return (sizeOfDestT / sizeOfSourceT) * calculateNDestTElements<source_T, dest_T>(nElems);
117};
118
120
122struct Registry {
123 char* head = nullptr;
124 int nFilledBlocks = 0; // number of filled blocks = next block to fill (must be strictly consecutive)
125 size_t offsFreeStart = 0;
126 size_t size = 0; // full size in bytes!!!
127
129 char* getFreeBlockStart() const
130 {
131 assert(offsFreeStart <= size);
132 return head + offsFreeStart;
133 }
134
136 size_t getFreeSize() const
137 {
138 return size - offsFreeStart;
139 }
140
141 char* getFreeBlockEnd() const
142 {
143 assert(offsFreeStart <= size);
144 return getFreeBlockStart() + getFreeSize();
145 }
146
148};
149
151template <typename W = uint32_t>
152struct Block {
153
154 Registry* registry = nullptr;
155 int nDict = 0; // dictionary length (if any)
156 int nData = 0; // length of data
157 int nLiterals = 0; // length of literals vector (if any)
158 int nStored = 0; // total length
159 W* payload = nullptr; //[nStored];
160
161 inline const W* getDict() const { return nDict ? payload : nullptr; }
162 inline const W* getData() const { return nData ? (payload + nDict) : nullptr; }
163 inline const W* getDataPointer() const { return payload ? (payload + nDict) : nullptr; } // needed when nData is not set yet
164 inline const W* getLiterals() const { return nLiterals ? (payload + nDict + nData) : nullptr; }
165 inline const W* getEndOfBlock() const
166 {
167 if (!registry) {
168 return nullptr;
169 }
170 // get last legal W*, since unaligned data is undefined behavior!
171 const size_t delta = reinterpret_cast<uintptr_t>(registry->getFreeBlockEnd()) % sizeof(W);
172 return reinterpret_cast<const W*>(registry->getFreeBlockEnd() - delta);
173 }
174
175 inline W* getCreatePayload() { return payload ? payload : (registry ? (payload = reinterpret_cast<W*>(registry->getFreeBlockStart())) : nullptr); }
176 inline W* getCreateDict() { return payload ? payload : getCreatePayload(); }
177 inline W* getCreateData() { return payload ? (payload + nDict) : getCreatePayload(); }
178 inline W* getCreateLiterals() { return payload ? payload + (nDict + nData) : getCreatePayload(); }
179 inline W* getEndOfBlock() { return const_cast<W*>(static_cast<const Block&>(*this).getEndOfBlock()); };
180
181 inline auto getOffsDict() { return reinterpret_cast<std::uintptr_t>(getCreateDict()) - reinterpret_cast<std::uintptr_t>(registry->head); }
182 inline auto getOffsData() { return reinterpret_cast<std::uintptr_t>(getCreateData()) - reinterpret_cast<std::uintptr_t>(registry->head); }
183 inline auto getOffsLiterals() { return reinterpret_cast<std::uintptr_t>(getCreateLiterals()) - reinterpret_cast<std::uintptr_t>(registry->head); }
184
185 inline void setNDict(int _ndict)
186 {
187 nDict = _ndict;
188 nStored += nDict;
189 }
190
191 inline void setNData(int _ndata)
192 {
193 nData = _ndata;
194 nStored += nData;
195 }
196
197 inline void setNLiterals(int _nliterals)
198 {
199 nLiterals = _nliterals;
201 }
202
203 inline int getNDict() const { return nDict; }
204 inline int getNData() const { return nData; }
205 inline int getNLiterals() const { return nLiterals; }
206 inline int getNStored() const { return nStored; }
207
209 {
210 if (!registry) { // this is a standalone block owning its data
211 delete[] payload;
212 }
213 }
214
216 void clear()
217 {
218 nDict = 0;
219 nData = 0;
220 nLiterals = 0;
221 nStored = 0;
222 payload = nullptr;
223 }
224
226 static size_t estimateSize(int n)
227 {
228 return alignSize(n * sizeof(W));
229 }
230
231 // store a dictionary in an empty block
232 void storeDict(int _ndict, const W* _dict)
233 {
234 if (getNStored() > 0) {
235 throw std::runtime_error("trying to write in occupied block");
236 }
237 size_t sz = estimateSize(_ndict);
238 assert(registry); // this method is valid only for flat version, which has a registry
239 assert(sz <= registry->getFreeSize());
240 assert((_ndict > 0) == (_dict != nullptr));
241 setNDict(_ndict);
242 if (nDict) {
243 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
244 realignBlock();
245 }
246 };
247
248 // store a dictionary to a block which can either be empty or contain a dict.
249 void storeData(int _ndata, const W* _data)
250 {
251 if (getNStored() > getNDict()) {
252 throw std::runtime_error("trying to write in occupied block");
253 }
254
255 size_t sz = estimateSize(_ndata);
256 assert(registry); // this method is valid only for flat version, which has a registry
257 assert(sz <= registry->getFreeSize());
258 assert((_ndata > 0) == (_data != nullptr));
259 setNData(_ndata);
260 if (nData) {
261 memcpy(getCreateData(), _data, _ndata * sizeof(W));
262 realignBlock();
263 }
264 }
265
266 // store a dictionary to a block which can either be empty or contain a dict.
267 void storeLiterals(int _nliterals, const W* _literals)
268 {
269 if (getNStored() > getNDict() + getNData()) {
270 throw std::runtime_error("trying to write in occupied block");
271 }
272
273 size_t sz = estimateSize(_nliterals);
274 assert(registry); // this method is valid only for flat version, which has a registry
275 assert(sz <= registry->getFreeSize());
276 // assert((_nliterals > 0) == (_literals != nullptr));
277 setNLiterals(_nliterals);
278 if (nLiterals) {
279 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
280 realignBlock();
281 }
282 }
283
284 // resize block and free up unused buffer space.
286 {
287 if (payload) {
288 size_t sz = estimateSize(getNStored());
289 registry->offsFreeStart = (reinterpret_cast<char*>(payload) - registry->head) + sz;
290 }
291 }
292
294 void store(int _ndict, int _ndata, int _nliterals, const W* _dict, const W* _data, const W* _literals)
295 {
296 size_t sz = estimateSize(_ndict + _ndata + _nliterals);
297 assert(registry); // this method is valid only for flat version, which has a registry
298 assert(sz <= registry->getFreeSize());
299 assert((_ndict > 0) == (_dict != nullptr));
300 assert((_ndata > 0) == (_data != nullptr));
301 // assert(_literals == _data + _nliterals);
302 setNDict(_ndict);
303 setNData(_ndata);
304 setNLiterals(_nliterals);
305 getCreatePayload(); // do this even for empty block!!!
306 if (getNStored()) {
307 payload = reinterpret_cast<W*>(registry->getFreeBlockStart());
308 if (getNDict()) {
309 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
310 }
311 if (getNData()) {
312 memcpy(getCreateData(), _data, _ndata * sizeof(W));
313 }
314 if (getNLiterals()) {
315 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
316 }
317 }
318 realignBlock();
319 }
320
322 void relocate(const char* oldHead, char* newHeadData, char* newHeadRegistry)
323 {
324 payload = relocatePointer(oldHead, newHeadData, payload);
325 registry = relocatePointer(oldHead, newHeadRegistry, registry);
326 }
327
329}; // namespace ctf
330
332
333template <typename H, int N, typename W = uint32_t>
335{
336 public:
338
339#ifndef __CLING__
340 template <typename source_T>
341 using dictionaryType = std::variant<rans::RenormedSparseHistogram<source_T>, rans::RenormedDenseHistogram<source_T>>;
342#endif
343
344 void setHeader(const H& h)
345 {
346 mHeader = h;
347 }
348 const H& getHeader() const { return mHeader; }
349 H& getHeader() { return mHeader; }
350 std::shared_ptr<H> cloneHeader() const { return std::shared_ptr<H>(new H(mHeader)); } // for dictionary creation
351
352 const auto& getRegistry() const { return mRegistry; }
353
354 const auto& getMetadata() const { return mMetadata; }
355
356 auto& getMetadata(int i) const
357 {
358 assert(i < N);
359 return mMetadata[i];
360 }
361
362 auto& getBlock(int i) const
363 {
364 assert(i < N);
365 return mBlocks[i];
366 }
367
368#ifndef __CLING__
369 template <typename source_T>
371 {
372 const auto& block = getBlock(i);
373 const auto& metadata = getMetadata(i);
374 ansVersion = checkANSVersion(ansVersion);
375
376 assert(static_cast<int64_t>(std::numeric_limits<source_T>::min()) <= static_cast<int64_t>(metadata.max));
377 assert(static_cast<int64_t>(std::numeric_limits<source_T>::max()) >= static_cast<int64_t>(metadata.min));
378
379 // check consistency of metadata and type
380 [&]() {
381 const int64_t sourceMin = std::numeric_limits<source_T>::min();
382 const int64_t sourceMax = std::numeric_limits<source_T>::max();
383
384 auto view = rans::trim(rans::HistogramView{block.getDict(), block.getDict() + block.getNDict(), metadata.min});
385 const int64_t dictMin = view.getMin();
386 const int64_t dictMax = view.getMax();
387 assert(dictMin >= metadata.min);
388 assert(dictMax <= metadata.max);
389
390 if ((dictMin < sourceMin) || (dictMax > sourceMax)) {
391 if (ansVersion == ANSVersionCompat && mHeader.majorVersion == 1 && mHeader.minorVersion == 0 && mHeader.dictTimeStamp < 1653192000000) {
392 LOGP(warn, "value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}], tolerate in compat mode for old dictionaries", sourceMin, sourceMax, dictMin, dictMax);
393 } else {
394 throw std::runtime_error(fmt::format("value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}]", sourceMin, sourceMax, dictMin, dictMax));
395 }
396 }
397 }();
398
399 if (ansVersion == ANSVersionCompat) {
400 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
401 return rans::compat::renorm(std::move(histogram), metadata.probabilityBits);
402 } else if (ansVersion == ANSVersion1) {
403 // dictionary is loaded from an explicit dict file and is stored densly
405 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
406 size_t renormingBits = rans::utils::sanitizeRenormingBitRange(metadata.probabilityBits);
407 LOG_IF(debug, renormingBits != metadata.probabilityBits)
408 << fmt::format("While reading metadata from external dictionary, rANSV1 is rounding renorming precision from {} to {}", metadata.probabilityBits, renormingBits);
409 return rans::renorm(std::move(histogram), renormingBits, rans::RenormingPolicy::ForceIncompressible);
410 } else {
411 // dictionary is elias-delta coded inside the block
412 if constexpr (sizeof(source_T) > 2) {
413 return rans::readRenormedSetDictionary(block.getDict(), block.getDict() + block.getNDict(),
414 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
415 metadata.probabilityBits);
416 } else {
417 return rans::readRenormedDictionary(block.getDict(), block.getDict() + block.getNDict(),
418 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
419 metadata.probabilityBits);
420 }
421 }
422 } else {
423 throw std::runtime_error(fmt::format("Failed to load serialized Dictionary. Unsupported ANS Version: {}", static_cast<std::string>(ansVersion)));
424 }
425 };
426#endif
427
429 {
430 mANSHeader = h;
431 }
432 const ANSHeader& getANSHeader() const { return mANSHeader; }
434
435 static constexpr int getNBlocks() { return N; }
436
437 static size_t getMinAlignedSize() { return alignSize(sizeof(base)); }
438
440 static auto get(void* head) { return reinterpret_cast<EncodedBlocks*>(head); }
441 static auto get(const void* head) { return reinterpret_cast<const EncodedBlocks*>(head); }
442
444 static auto getImage(const void* newHead);
445
447 static auto create(void* head, size_t sz);
448
450 template <typename VD>
451 static auto create(VD& v);
452
454 static size_t estimateBlockSize(int n) { return Block<W>::estimateSize(n); }
455
457 bool empty() const { return (mRegistry.offsFreeStart == alignSize(sizeof(*this))) && (mRegistry.size >= mRegistry.offsFreeStart); }
458
460 bool flat() const { return mRegistry.size > 0 && (mRegistry.size >= mRegistry.offsFreeStart) && (mBlocks[0].registry == &mRegistry) && (mBlocks[N - 1].registry == &mRegistry); }
461
463 void clear();
464
466 size_t compactify() { return (mRegistry.size = estimateSize()); }
467
469 size_t size() const { return mRegistry.size; }
470
472 size_t getFreeSize() const { return mRegistry.getFreeSize(); }
473
475 template <typename buffer_T>
476 static auto expand(buffer_T& buffer, size_t newsizeBytes);
477
479 template <typename V>
480 void copyToFlat(V& vec);
481
484
486 size_t appendToTree(TTree& tree, const std::string& name) const;
487
489 void readFromTree(TTree& tree, const std::string& name, int ev = 0);
490
492 template <typename VD>
493 static void readFromTree(VD& vec, TTree& tree, const std::string& name, int ev = 0);
494
496 template <typename VE, typename buffer_T>
497 inline o2::ctf::CTFIOSize encode(const VE& src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f)
498 {
499 return encode(std::begin(src), std::end(src), slot, symbolTablePrecision, opt, buffer, encoderExt, memfc);
500 }
501
503 template <typename input_IT, typename buffer_T>
504 o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
505
507 template <class container_T, class container_IT = typename container_T::iterator>
508 o2::ctf::CTFIOSize decode(container_T& dest, int slot, const std::any& decoderExt = {}) const;
509
511 template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool> = true>
512 o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any& decoderExt = {}) const;
513
514#ifndef __CLING__
516 static std::vector<char> createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& prbits);
517#endif
518
520 void print(const std::string& prefix = "", int verbosity = 1) const;
521 void dump(const std::string& prefix = "", int ncol = 20) const;
522
523 protected:
524 static_assert(N > 0, "number of encoded blocks < 1");
525
527 ANSHeader mANSHeader; // ANS header
528 H mHeader; // detector specific header
529 std::array<Metadata, N> mMetadata; // compressed block's details
530 std::array<Block<W>, N> mBlocks;
531
533
535 void init(size_t sz);
536
541 static void relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize = 0);
542
545 size_t estimateSize() const;
546
549
551 void fillFlatCopy(EncodedBlocks& dest) const;
552
554 template <typename D>
555 static size_t fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel = 99);
556
558 template <typename D>
559 static bool readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev = 0);
560
561 template <typename T>
562 auto expandStorage(size_t slot, size_t nElemets, T* buffer = nullptr) -> decltype(auto);
563
564 inline ANSHeader checkANSVersion(ANSHeader ansVersion) const
565 {
566 auto ctfANSHeader = getANSHeader();
568
569 const bool isEqual{ansVersion == ctfANSHeader};
570 const bool isHeaderUnspecified{ctfANSHeader == ANSVersionUnspecified};
571
572 if (isEqual) {
573 if (isHeaderUnspecified) {
574 throw std::runtime_error{fmt::format("Missmatch of ANSVersions, trying to encode/decode CTF with ANS Version Header {} with ANS Version {}",
575 static_cast<std::string>(ctfANSHeader),
576 static_cast<std::string>(ansVersion))};
577 } else {
578 ret = ctfANSHeader;
579 }
580 } else {
581 if (isHeaderUnspecified) {
582 ret = ansVersion;
583 } else {
584 ret = ctfANSHeader;
585 }
586 }
587
588 return ret;
589 };
590
591 template <typename input_IT, typename buffer_T>
592 o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
593
594 template <typename input_IT, typename buffer_T>
595 o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
596
597 template <typename input_IT, typename buffer_T>
598 o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
599
600 template <typename input_IT, typename buffer_T>
601 o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
602
603#ifndef __CLING__
604 template <typename input_IT, typename buffer_T>
605 o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer = nullptr);
606
607 template <typename input_IT, typename buffer_T>
608 inline o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T* buffer = nullptr)
609 {
610 using source_type = typename std::iterator_traits<input_IT>::value_type;
611
613 metrics.getDatasetProperties().numSamples = std::distance(srcBegin, srcEnd);
614
615 if (metrics.getDatasetProperties().numSamples != 0) {
616 const auto [minIter, maxIter] = std::minmax_element(srcBegin, srcEnd);
617 metrics.getDatasetProperties().min = *minIter;
618 metrics.getDatasetProperties().max = *maxIter;
619
620 // special case: if min === max, the range is 0 and the data can be reconstructed just via the metadata.
621 metrics.getDatasetProperties().alphabetRangeBits =
622 rans::utils::getRangeBits(metrics.getDatasetProperties().min,
623 metrics.getDatasetProperties().max);
624 }
625
626 return pack(srcBegin, srcEnd, slot, metrics, buffer);
627 }
628#endif
629
630 template <typename input_IT, typename buffer_T>
631 o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr);
632
633 // decode
634 template <typename dst_IT>
635 CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any& decoderExt) const;
636
637 template <typename dst_IT>
638 CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any& decoderExt) const;
639
640 template <typename dst_IT>
641 CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const;
642
643 template <typename dst_IT>
644 CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const;
645
647}; // namespace ctf
648
651template <typename H, int N, typename W>
652void EncodedBlocks<H, N, W>::readFromTree(TTree& tree, const std::string& name, int ev)
653{
654 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *this, ev);
655 for (int i = 0; i < N; i++) {
656 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), mBlocks[i], ev);
657 }
658}
659
662template <typename H, int N, typename W>
663template <typename VD>
664void EncodedBlocks<H, N, W>::readFromTree(VD& vec, TTree& tree, const std::string& name, int ev)
665{
666 auto tmp = create(vec);
667 if (!readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *tmp, ev)) {
668 throw std::runtime_error(fmt::format("Failed to read CTF header for {}", name));
669 }
670 tmp = tmp->expand(vec, tmp->estimateSizeFromMetadata());
671 const auto& meta = tmp->getMetadata();
672 for (int i = 0; i < N; i++) {
673 Block<W> bl;
674 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), bl, ev);
675 assert(meta[i].nDictWords == bl.getNDict());
676 assert(meta[i].nDataWords == bl.getNData());
677 assert(meta[i].nLiteralWords == bl.getNLiterals());
678 tmp->mBlocks[i].store(bl.getNDict(), bl.getNData(), bl.getNLiterals(), bl.getDict(), bl.getData(), bl.getLiterals());
679 }
680}
681
684template <typename H, int N, typename W>
685size_t EncodedBlocks<H, N, W>::appendToTree(TTree& tree, const std::string& name) const
686{
687 long s = 0;
688 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), const_cast<base&>(*this), WrappersCompressionLevel, WrappersSplitLevel);
689 for (int i = 0; i < N; i++) {
690 int compression = mMetadata[i].opt == Metadata::OptStore::ROOTCompression ? 1 : 0;
691 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), const_cast<Block<W>&>(mBlocks[i]), compression);
692 }
693 tree.SetEntries(tree.GetEntries() + 1);
694 return s;
695}
696
699template <typename H, int N, typename W>
700template <typename D>
701bool EncodedBlocks<H, N, W>::readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev)
702{
703 auto* br = tree.GetBranch(brname.c_str());
704 if (!br) {
705 LOG(debug) << "Branch " << brname << " is absent";
706 return false;
707 }
708 auto* ptr = &dt;
709 br->SetAddress(&ptr);
710 br->GetEntry(ev);
711 br->ResetAddress();
712 return true;
713}
714
717template <typename H, int N, typename W>
718template <typename D>
719inline size_t EncodedBlocks<H, N, W>::fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel)
720{
721 auto* br = tree.GetBranch(brname.c_str());
722 if (!br) {
723 br = tree.Branch(brname.c_str(), &dt, 512, splitLevel);
724 br->SetCompressionLevel(compLevel);
725 }
726 return br->Fill();
727}
728
731template <typename H, int N, typename W>
733{
734 assert(dest.empty() && dest.mRegistry.getFreeSize() < estimateSize());
735 dest.mANSHeader = mANSHeader;
736 dest.mHeader = mHeader;
737 dest.mMetadata = mMetadata;
738 for (int i = 0; i < N; i++) {
739 dest.mBlocks[i].store(mBlocks[i].getNDict(), mBlocks[i].getNData(), mBlocks[i].getDict(), mBlocks[i].getData());
740 }
741}
742
745template <typename H, int N, typename W>
746template <typename V>
748{
749 auto vtsz = sizeof(typename std::remove_reference<decltype(vec)>::type::value_type), sz = estimateSize();
750 vec.resize(sz / vtsz);
751 copyToFlat(vec.data());
752}
753
757template <typename H, int N, typename W>
759{
760 size_t sz = 0;
761 sz += alignSize(sizeof(*this));
762 for (int i = 0; i < N; i++) {
763 sz += alignSize(mBlocks[i].nStored * sizeof(W));
764 }
765 return sz;
766}
767
771template <typename H, int N, typename W>
773{
774 size_t sz = alignSize(sizeof(*this));
775 for (int i = 0; i < N; i++) {
776 sz += alignSize((mMetadata[i].nDictWords + mMetadata[i].nDataWords + mMetadata[i].nLiteralWords) * sizeof(W));
777 }
778 return sz;
779}
780
783template <typename H, int N, typename W>
784template <typename buffer_T>
785auto EncodedBlocks<H, N, W>::expand(buffer_T& buffer, size_t newsizeBytes)
786{
787 auto buftypesize = sizeof(typename std::remove_reference<decltype(buffer)>::type::value_type);
788 auto* oldHead = get(buffer.data())->mRegistry.head;
789 buffer.resize(alignSize(newsizeBytes) / buftypesize);
790 relocate(oldHead, reinterpret_cast<char*>(buffer.data()), reinterpret_cast<char*>(buffer.data()), newsizeBytes);
791 return get(buffer.data());
792}
793
799template <typename H, int N, typename W>
800void EncodedBlocks<H, N, W>::relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize)
801{
802 auto newStr = get(wrapper);
803 for (int i = 0; i < N; i++) {
804 newStr->mBlocks[i].relocate(oldHead, newHead, wrapper);
805 }
806 newStr->mRegistry.head = newHead; // newHead points on the real data
807 // if asked, update the size
808 if (newsize) { // in bytes!!!
809 assert(newStr->estimateSize() <= newsize);
810 newStr->mRegistry.size = newsize;
811 }
812}
813
816template <typename H, int N, typename W>
818{
819 mRegistry.head = reinterpret_cast<char*>(this);
820 mRegistry.size = sz;
821 mRegistry.offsFreeStart = alignSize(sizeof(*this));
822 for (int i = 0; i < N; i++) {
823 mMetadata[i].clear();
824 mBlocks[i].registry = &mRegistry;
825 mBlocks[i].clear();
826 }
827}
828
831template <typename H, int N, typename W>
833{
834 for (int i = 0; i < N; i++) {
835 mBlocks[i].clear();
836 mMetadata[i].clear();
837 }
838 mRegistry.offsFreeStart = alignSize(sizeof(*this));
839}
840
843template <typename H, int N, typename W>
844auto EncodedBlocks<H, N, W>::getImage(const void* newHead)
845{
846 assert(newHead);
847 auto image(*get(newHead)); // 1st make a shalow copy
848 // now fix its pointers
849 // we don't modify newHead, but still need to remove constness for relocation interface
850 relocate(image.mRegistry.head, const_cast<char*>(reinterpret_cast<const char*>(newHead)), reinterpret_cast<char*>(&image));
851
852 return image;
853}
854
857template <typename H, int N, typename W>
858inline auto EncodedBlocks<H, N, W>::create(void* head, size_t sz)
859{
860 const H defh;
861 auto b = get(head);
862 b->init(sz);
863 b->setHeader(defh);
864 return b;
865}
866
869template <typename H, int N, typename W>
870template <typename VD>
872{
873 size_t vsz = sizeof(typename std::remove_reference<decltype(v)>::type::value_type); // size of the element of the buffer
874 auto baseSize = getMinAlignedSize() / vsz;
875 if (v.size() < baseSize) {
876 v.resize(baseSize);
877 }
878 return create(v.data(), v.size() * vsz);
879}
880
883template <typename H, int N, typename W>
884void EncodedBlocks<H, N, W>::print(const std::string& prefix, int verbosity) const
885{
886 if (verbosity > 0) {
887 LOG(info) << prefix << "Container of " << N << " blocks, size: " << size() << " bytes, unused: " << getFreeSize();
888 for (int i = 0; i < N; i++) {
889 LOG(info) << "Block " << i << " for " << static_cast<uint32_t>(mMetadata[i].messageLength) << " message words of "
890 << static_cast<uint32_t>(mMetadata[i].messageWordSize) << " bytes |"
891 << " NDictWords: " << mBlocks[i].getNDict() << " NDataWords: " << mBlocks[i].getNData()
892 << " NLiteralWords: " << mBlocks[i].getNLiterals();
893 }
894 } else if (verbosity == 0) {
895 size_t inpSize = 0, ndict = 0, ndata = 0, nlit = 0;
896 for (int i = 0; i < N; i++) {
897 inpSize += mMetadata[i].messageLength * mMetadata[i].messageWordSize;
898 ndict += mBlocks[i].getNDict();
899 ndata += mBlocks[i].getNData();
900 nlit += mBlocks[i].getNLiterals();
901 }
902 LOG(info) << prefix << N << " blocks, input size: " << inpSize << ", output size: " << size()
903 << " NDictWords: " << ndict << " NDataWords: " << ndata << " NLiteralWords: " << nlit;
904 }
905}
906
908template <typename H, int N, typename W>
909template <class container_T, class container_IT>
910inline o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::decode(container_T& dest, // destination container
911 int slot, // slot of the block to decode
912 const std::any& decoderExt) const // optional externally provided decoder
913{
914 dest.resize(mMetadata[slot].messageLength); // allocate output buffer
915 return decode(std::begin(dest), slot, decoderExt);
916}
917
919template <typename H, int N, typename W>
920template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool>>
921CTFIOSize EncodedBlocks<H, N, W>::decode(D_IT dest, // iterator to destination
922 int slot, // slot of the block to decode
923 const std::any& decoderExt) const // optional externally provided decoder
924{
925
926 // get references to the right data
927 const auto& ansVersion = getANSHeader();
928 const auto& block = mBlocks[slot];
929 const auto& md = mMetadata[slot];
930 LOGP(debug, "Slot{} | NStored={} Ndict={} nData={}, MD: messageLength:{} opt:{} min:{} max:{} offs:{} width:{} ", slot, block.getNStored(), block.getNDict(), block.getNData(), md.messageLength, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth);
931
932 constexpr size_t word_size = sizeof(W);
933
934 if (ansVersion == ANSVersionCompat) {
935 if (!block.getNStored()) {
936 return {0, md.getUncompressedSize(), md.getCompressedSize() * word_size};
937 }
938 if (md.opt == Metadata::OptStore::EENCODE) {
939 return decodeCompatImpl(dest, slot, decoderExt);
940 } else {
941 return decodeCopyImpl(dest, slot);
942 }
943 } else if (ansVersion == ANSVersion1) {
944 if (md.opt == Metadata::OptStore::PACK) {
945 return decodeUnpackImpl(dest, slot);
946 }
947 if (!block.getNStored()) {
948 return {0, md.getUncompressedSize(), md.getCompressedSize() * word_size};
949 }
950 if (md.opt == Metadata::OptStore::EENCODE) {
951 return decodeRansV1Impl(dest, slot, decoderExt);
952 } else {
953 return decodeCopyImpl(dest, slot);
954 }
955 } else {
956 throw std::runtime_error("unsupported ANS Version");
957 }
958};
959
960#ifndef __CLING__
961template <typename H, int N, typename W>
962template <typename dst_IT>
963CTFIOSize EncodedBlocks<H, N, W>::decodeCompatImpl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
964{
965
966 // get references to the right data
967 const auto& block = mBlocks[slot];
968 const auto& md = mMetadata[slot];
969
970 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
971 using decoder_type = typename rans::compat::decoder_type<dst_type>;
972
973 std::optional<decoder_type> inplaceDecoder{};
974 if (md.nDictWords > 0) {
975 inplaceDecoder = decoder_type{std::get<rans::RenormedDenseHistogram<dst_type>>(this->getDictionary<dst_type>(slot))};
976 } else if (!decoderExt.has_value()) {
977 throw std::runtime_error("neither dictionary nor external decoder provided");
978 }
979
980 auto getDecoder = [&]() -> const decoder_type& {
981 if (inplaceDecoder.has_value()) {
982 return inplaceDecoder.value();
983 } else {
984 return std::any_cast<const decoder_type&>(decoderExt);
985 }
986 };
987
988 const size_t NDecoderStreams = rans::compat::defaults::CoderPreset::nStreams;
989
990 if (block.getNLiterals()) {
991 auto* literalsEnd = reinterpret_cast<const dst_type*>(block.getLiterals()) + md.nLiterals;
992 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams, literalsEnd);
993 } else {
994 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams);
995 }
996 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
997};
998
999template <typename H, int N, typename W>
1000template <typename dst_IT>
1001CTFIOSize EncodedBlocks<H, N, W>::decodeRansV1Impl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
1002{
1003
1004 // get references to the right data
1005 const auto& block = mBlocks[slot];
1006 const auto& md = mMetadata[slot];
1007
1008 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
1009 using decoder_type = typename rans::defaultDecoder_type<dst_type>;
1010
1011 std::optional<decoder_type> inplaceDecoder{};
1012 if (md.nDictWords > 0) {
1013 std::visit([&](auto&& arg) { inplaceDecoder = decoder_type{arg}; }, this->getDictionary<dst_type>(slot));
1014 } else if (!decoderExt.has_value()) {
1015 throw std::runtime_error("no dictionary nor external decoder provided");
1016 }
1017
1018 auto getDecoder = [&]() -> const decoder_type& {
1019 if (inplaceDecoder.has_value()) {
1020 return inplaceDecoder.value();
1021 } else {
1022 return std::any_cast<const decoder_type&>(decoderExt);
1023 }
1024 };
1025
1026 // verify decoders
1027 [&]() {
1028 const decoder_type& decoder = getDecoder();
1029 const size_t decoderSymbolTablePrecision = decoder.getSymbolTablePrecision();
1030
1031 if (md.probabilityBits != decoderSymbolTablePrecision) {
1032 throw std::runtime_error(fmt::format(
1033 "Missmatch in decoder renorming precision vs metadata:{} Bits vs {} Bits.",
1034 md.probabilityBits, decoderSymbolTablePrecision));
1035 }
1036
1037 if (md.streamSize != rans::utils::getStreamingLowerBound_v<typename decoder_type::coder_type>) {
1038 throw std::runtime_error("Streaming lower bound of dataset and decoder do not match");
1039 }
1040 }();
1041
1042 // do the actual decoding
1043 if (block.getNLiterals()) {
1044 std::vector<dst_type> literals(md.nLiterals);
1045 rans::unpack(block.getLiterals(), md.nLiterals, literals.data(), md.literalsPackingWidth, md.literalsPackingOffset);
1046 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams, literals.end());
1047 } else {
1048 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams);
1049 }
1050 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1051};
1052
1053template <typename H, int N, typename W>
1054template <typename dst_IT>
1056{
1057 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1058
1059 const auto& block = mBlocks[slot];
1060 const auto& md = mMetadata[slot];
1061
1062 const size_t messageLength = md.messageLength;
1063 const size_t packingWidth = md.probabilityBits;
1064 const dest_t offset = md.min;
1065 const auto* srcIt = block.getData();
1066 // we have a vector of one and the same value. All information is in the metadata
1067 if (packingWidth == 0) {
1068 const dest_t value = [&]() -> dest_t {
1069 // Bugfix: We tried packing values with a width of 0 Bits;
1070 if (md.nDataWords > 0) {
1071 LOGP(debug, "packing bug recovery: MD nStreams:{} messageLength:{} nLiterals:{} messageWordSize:{} coderType:{} streamSize:{} probabilityBits:{} (int)opt:{} min:{} max:{} literalsPackingOffset:{} literalsPackingWidth:{} nDictWords:{} nDataWords:{} nLiteralWords:{}",
1072 value, md.nStreams, md.messageLength, md.nLiterals, md.messageWordSize, md.coderType, md.streamSize, md.probabilityBits, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth, md.nDictWords, md.nDataWords, md.nLiteralWords);
1073 return offset + static_cast<dest_t>(*srcIt);
1074 }
1075 // normal case:
1076 return offset;
1077 }();
1078 for (size_t i = 0; i < messageLength; ++i) {
1079 *dest++ = value;
1080 }
1081 } else {
1082 rans::unpack(srcIt, messageLength, dest, packingWidth, offset);
1083 }
1084 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1085};
1086
1087template <typename H, int N, typename W>
1088template <typename dst_IT>
1090{
1091 // get references to the right data
1092 const auto& block = mBlocks[slot];
1093 const auto& md = mMetadata[slot];
1094
1095 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1096 using decoder_t = typename rans::compat::decoder_type<dest_t>;
1097 using destPtr_t = typename std::iterator_traits<dst_IT>::pointer;
1098
1099 destPtr_t srcBegin = reinterpret_cast<destPtr_t>(block.payload);
1100 destPtr_t srcEnd = srcBegin + md.messageLength * sizeof(dest_t);
1101 std::copy(srcBegin, srcEnd, dest);
1102
1103 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1104};
1105
1107template <typename H, int N, typename W>
1108template <typename input_IT, typename buffer_T>
1109o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::encode(const input_IT srcBegin, // iterator begin of source message
1110 const input_IT srcEnd, // iterator end of source message
1111 int slot, // slot in encoded data to fill
1112 uint8_t symbolTablePrecision, // encoding into
1113 Metadata::OptStore opt, // option for data compression
1114 buffer_T* buffer, // optional buffer (vector) providing memory for encoded blocks
1115 const std::any& encoderExt, // optional external encoder
1116 float memfc) // memory allocation margin factor
1117{
1118 // fill a new block
1119 assert(slot == mRegistry.nFilledBlocks);
1120 mRegistry.nFilledBlocks++;
1121
1122 const size_t messageLength = std::distance(srcBegin, srcEnd);
1123 // cover three cases:
1124 // * empty source message: no co
1125 // * source message to pass through without any entropy coding
1126 // * source message where entropy coding should be applied
1127
1128 // case 1: empty source message
1129 if (messageLength == 0) {
1130 mMetadata[slot] = Metadata{};
1131 mMetadata[slot].opt = Metadata::OptStore::NODATA;
1132 return {};
1133 }
1134 if (detail::mayEEncode(opt)) {
1135 const ANSHeader& ansVersion = getANSHeader();
1136 if (ansVersion == ANSVersionCompat) {
1137 return entropyCodeRANSCompat(srcBegin, srcEnd, slot, symbolTablePrecision, buffer, encoderExt, memfc);
1138 } else if (ansVersion == ANSVersion1) {
1139 return entropyCodeRANSV1(srcBegin, srcEnd, slot, opt, buffer, encoderExt, memfc);
1140 } else {
1141 throw std::runtime_error(fmt::format("Unsupported ANS Coder Version: {}.{}", ansVersion.majorVersion, ansVersion.minorVersion));
1142 }
1143 } else if (detail::mayPack(opt)) {
1144 return pack(srcBegin, srcEnd, slot, buffer);
1145 } else {
1146 return store(srcBegin, srcEnd, slot, opt, buffer);
1147 }
1148};
1149
1150template <typename H, int N, typename W>
1151template <typename T>
1152[[nodiscard]] auto EncodedBlocks<H, N, W>::expandStorage(size_t slot, size_t nElements, T* buffer) -> decltype(auto)
1153{
1154 // after previous relocation this (hence its data members) are not guaranteed to be valid
1155 auto* old = get(buffer->data());
1156 auto* thisBlock = &(old->mBlocks[slot]);
1157 auto* thisMetadata = &(old->mMetadata[slot]);
1158
1159 // resize underlying buffer of block if necessary and update all pointers.
1160 auto* const blockHead = get(thisBlock->registry->head); // extract pointer from the block, as "this" might be invalid
1161 const size_t additionalSize = blockHead->estimateBlockSize(nElements); // additionalSize is in bytes!!!
1162 if (additionalSize >= thisBlock->registry->getFreeSize()) {
1163 LOGP(debug, "Slot {} with {} available words needs to allocate {} bytes for a total of {} words.", slot, thisBlock->registry->getFreeSize(), additionalSize, nElements);
1164 if (buffer) {
1165 blockHead->expand(*buffer, blockHead->size() + (additionalSize - blockHead->getFreeSize()));
1166 thisMetadata = &(get(buffer->data())->mMetadata[slot]);
1167 thisBlock = &(get(buffer->data())->mBlocks[slot]); // in case of resizing this and any this.xxx becomes invalid
1168 } else {
1169 throw std::runtime_error("failed to allocate additional space in provided external buffer");
1170 }
1171 }
1172 return std::make_pair(thisBlock, thisMetadata);
1173};
1174
1175template <typename H, int N, typename W>
1176template <typename input_IT, typename buffer_T>
1177o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer, const std::any& encoderExt, float memfc)
1178{
1179 using storageBuffer_t = W;
1180 using input_t = typename std::iterator_traits<input_IT>::value_type;
1181 using ransEncoder_t = typename rans::compat::encoder_type<input_t>;
1182 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1183 using ransStream_t = typename ransEncoder_t::stream_type;
1184
1185 // assert at compile time that output types align so that padding is not necessary.
1186 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1187 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1188
1189 auto* thisBlock = &mBlocks[slot];
1190 auto* thisMetadata = &mMetadata[slot];
1191
1192 // build symbol statistics
1193 constexpr size_t SizeEstMarginAbs = 10 * 1024;
1194 const float SizeEstMarginRel = 1.5 * memfc;
1195
1196 const size_t messageLength = std::distance(srcBegin, srcEnd);
1197 rans::DenseHistogram<input_t> frequencyTable{};
1198 rans::compat::encoder_type<input_t> inplaceEncoder{};
1199
1200 try {
1201 std::tie(inplaceEncoder, frequencyTable) = [&]() {
1202 if (encoderExt.has_value()) {
1203 return std::make_tuple(ransEncoder_t{}, rans::DenseHistogram<input_t>{});
1204 } else {
1205 auto histogram = rans::makeDenseHistogram::fromSamples(srcBegin, srcEnd);
1206 auto encoder = rans::compat::makeEncoder::fromHistogram(histogram, symbolTablePrecision);
1207 return std::make_tuple(std::move(encoder), std::move(histogram));
1208 }
1209 }();
1210 } catch (const rans::HistogramError& error) {
1211 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1212 return store(srcBegin, srcEnd, slot, this->FallbackStorageType, buffer);
1213 }
1214 const ransEncoder_t& encoder = encoderExt.has_value() ? std::any_cast<const ransEncoder_t&>(encoderExt) : inplaceEncoder;
1215
1216 // estimate size of encode buffer
1217 int dataSize = rans::compat::calculateMaxBufferSizeB(messageLength, rans::compat::getAlphabetRangeBits(encoder.getSymbolTable())); // size in bytes
1218 // preliminary expansion of storage based on dict size + estimated size of encode buffer
1219 dataSize = SizeEstMarginAbs + int(SizeEstMarginRel * (dataSize / sizeof(storageBuffer_t))) + (sizeof(input_t) < sizeof(storageBuffer_t)); // size in words of output stream
1220
1221 const auto view = rans::trim(rans::makeHistogramView(frequencyTable));
1222 std::tie(thisBlock, thisMetadata) = expandStorage(slot, view.size() + dataSize, buffer);
1223
1224 // store dictionary first
1225
1226 if (!view.empty()) {
1227 thisBlock->storeDict(view.size(), view.data());
1228 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", view.size() * sizeof(W), thisBlock->getOffsDict(), thisBlock->getOffsDict() + view.size() * sizeof(W));
1229 }
1230 // vector of incompressible literal symbols
1231 std::vector<input_t> literals;
1232 // directly encode source message into block buffer.
1233 storageBuffer_t* const blockBufferBegin = thisBlock->getCreateData();
1234 const size_t maxBufferSize = thisBlock->registry->getFreeSize(); // note: "this" might be not valid after expandStorage call!!!
1235 const auto [encodedMessageEnd, literalsEnd] = encoder.process(srcBegin, srcEnd, blockBufferBegin, std::back_inserter(literals));
1236 rans::utils::checkBounds(encodedMessageEnd, blockBufferBegin + maxBufferSize / sizeof(W));
1237 dataSize = encodedMessageEnd - thisBlock->getDataPointer();
1238 thisBlock->setNData(dataSize);
1239 thisBlock->realignBlock();
1240 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(W), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(W));
1241 // update the size claimed by encode message directly inside the block
1242
1243 // store incompressible symbols if any
1244 const size_t nLiteralSymbols = literals.size();
1245 const size_t nLiteralWords = [&]() {
1246 if (!literals.empty()) {
1247 const size_t nSymbols = literals.size();
1248 // introduce padding in case literals don't align;
1249 const size_t nLiteralSymbolsPadded = calculatePaddedSize<input_t, storageBuffer_t>(nSymbols);
1250 literals.resize(nLiteralSymbolsPadded, {});
1251
1252 const size_t nLiteralStorageElems = calculateNDestTElements<input_t, storageBuffer_t>(nSymbols);
1253 std::tie(thisBlock, thisMetadata) = expandStorage(slot, nLiteralStorageElems, buffer);
1254 thisBlock->storeLiterals(nLiteralStorageElems, reinterpret_cast<const storageBuffer_t*>(literals.data()));
1255 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", nLiteralStorageElems * sizeof(W), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + nLiteralStorageElems * sizeof(W));
1256 return nLiteralStorageElems;
1257 }
1258 return size_t(0);
1259 }();
1260
1261 LOGP(debug, "Min, {} Max, {}, size, {}, nSamples {}", view.getMin(), view.getMax(), view.size(), frequencyTable.getNumSamples());
1262
1263 *thisMetadata = detail::makeMetadataRansCompat<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1264 messageLength,
1265 nLiteralSymbols,
1266 encoder.getSymbolTable().getPrecision(),
1267 view.getMin(),
1268 view.getMax(),
1269 view.size(),
1270 dataSize,
1271 nLiteralWords);
1272
1273 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1274}
1275
1276template <typename H, int N, typename W>
1277template <typename input_IT, typename buffer_T>
1278o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, const std::any& encoderExt, float memfc)
1279{
1280 CTFIOSize encoderStatistics{};
1281
1282 const size_t nSamples = std::distance(srcBegin, srcEnd);
1283 if (detail::mayPack(opt) && nSamples < PackingThreshold) {
1284 encoderStatistics = pack(srcBegin, srcEnd, slot, buffer);
1285 } else {
1286
1287 if (encoderExt.has_value()) {
1288 encoderStatistics = encodeRANSV1External(srcBegin, srcEnd, slot, encoderExt, buffer, memfc);
1289 } else {
1290 encoderStatistics = encodeRANSV1Inplace(srcBegin, srcEnd, slot, opt, buffer, memfc);
1291 }
1292 }
1293 return encoderStatistics;
1294}
1295
1296template <typename H, int N, typename W>
1297template <typename input_IT, typename buffer_T>
1298CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1299{
1300 using storageBuffer_t = W;
1301 using input_t = typename std::iterator_traits<input_IT>::value_type;
1302 using ransEncoder_t = typename internal::ExternalEntropyCoder<input_t>::encoder_type;
1303 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1304 using ransStream_t = typename ransEncoder_t::stream_type;
1305
1306 // assert at compile time that output types align so that padding is not necessary.
1307 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1308 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1309
1310 auto* thisBlock = &mBlocks[slot];
1311 auto* thisMetadata = &mMetadata[slot];
1312
1313 const size_t messageLength = std::distance(srcBegin, srcEnd);
1314 internal::ExternalEntropyCoder<input_t> encoder{std::any_cast<const ransEncoder_t&>(encoderExt)};
1315
1316 const size_t payloadSizeWords = encoder.template computePayloadSizeEstimate<storageBuffer_t>(messageLength);
1317 std::tie(thisBlock, thisMetadata) = expandStorage(slot, payloadSizeWords, buffer);
1318
1319 // encode payload
1320 auto encodedMessageEnd = encoder.encode(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1321 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1322 thisBlock->setNData(dataSize);
1323 thisBlock->realignBlock();
1324 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1325 // update the size claimed by encoded message directly inside the block
1326
1327 // encode literals
1328 size_t literalsSize = 0;
1329 if (encoder.getNIncompressibleSamples() > 0) {
1330 const size_t literalsBufferSizeWords = encoder.template computePackedIncompressibleSize<storageBuffer_t>();
1331 std::tie(thisBlock, thisMetadata) = expandStorage(slot, literalsBufferSizeWords, buffer);
1332 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1333 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1334 thisBlock->setNLiterals(literalsSize);
1335 thisBlock->realignBlock();
1336 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1337 }
1338
1339 // write metadata
1340 const auto& symbolTable = encoder.getEncoder().getSymbolTable();
1341 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getEncoder().getNStreams(),
1342 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1343 messageLength,
1344 encoder.getNIncompressibleSamples(),
1345 symbolTable.getPrecision(),
1346 symbolTable.getOffset(),
1347 symbolTable.getOffset() + symbolTable.size(),
1348 encoder.getIncompressibleSymbolOffset(),
1349 encoder.getIncompressibleSymbolPackingBits(),
1350 0,
1351 dataSize,
1352 literalsSize);
1353
1354 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1355};
1356
1357template <typename H, int N, typename W>
1358template <typename input_IT, typename buffer_T>
1359CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1360{
1361 using storageBuffer_t = W;
1362 using input_t = typename std::iterator_traits<input_IT>::value_type;
1363 using ransEncoder_t = typename rans::denseEncoder_type<input_t>;
1364 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1365 using ransStream_t = typename ransEncoder_t::stream_type;
1366
1367 // assert at compile time that output types align so that padding is not necessary.
1368 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1369 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1370
1371 auto* thisBlock = &mBlocks[slot];
1372 auto* thisMetadata = &mMetadata[slot];
1373
1375 rans::SourceProxy<input_IT> proxy{srcBegin, srcEnd, [](input_IT begin, input_IT end) {
1376 const size_t nSamples = std::distance(begin, end);
1377 return (!std::is_pointer_v<input_IT> && (nSamples < rans::utils::pow2(23)));
1378 }};
1379
1380 try {
1381 if (proxy.isCached()) {
1382 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginCache(), proxy.endCache()};
1383 } else {
1384 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginIter(), proxy.endIter()};
1385 }
1386 } catch (const rans::HistogramError& error) {
1387 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1388 if (proxy.isCached()) {
1389 return store(proxy.beginCache(), proxy.endCache(), slot, this->FallbackStorageType, buffer);
1390 } else {
1391 return store(proxy.beginIter(), proxy.endIter(), slot, this->FallbackStorageType, buffer);
1392 }
1393 }
1394
1395 const rans::Metrics<input_t>& metrics = encoder.getMetrics();
1396 /*
1397 if constexpr (sizeof(input_t) > 2) {
1398 const auto& dp = metrics.getDatasetProperties();
1399 LOGP(info, "Metrics:{{slot: {}, numSamples: {}, min: {}, max: {}, alphabetRangeBits: {}, nUsedAlphabetSymbols: {}, preferPacking: {}}}", slot, dp.numSamples, dp.min, dp.max, dp.alphabetRangeBits, dp.nUsedAlphabetSymbols, metrics.getSizeEstimate().preferPacking());
1400 }
1401 */
1402 if (detail::mayPack(opt) && metrics.getSizeEstimate().preferPacking()) {
1403 if (proxy.isCached()) {
1404 return pack(proxy.beginCache(), proxy.endCache(), slot, metrics, buffer);
1405 } else {
1406 return pack(proxy.beginIter(), proxy.endIter(), slot, metrics, buffer);
1407 };
1408 }
1409
1410 encoder.makeEncoder();
1411
1412 const rans::SizeEstimate sizeEstimate = metrics.getSizeEstimate();
1413 const size_t bufferSizeWords = rans::utils::nBytesTo<storageBuffer_t>((sizeEstimate.getCompressedDictionarySize() +
1414 sizeEstimate.getCompressedDatasetSize() +
1415 sizeEstimate.getIncompressibleSize()) *
1416 sizeEstimateSafetyFactor);
1417 std::tie(thisBlock, thisMetadata) = expandStorage(slot, bufferSizeWords, buffer);
1418
1419 // encode dict
1420 auto encodedDictEnd = encoder.writeDictionary(thisBlock->getCreateDict(), thisBlock->getEndOfBlock());
1421 const size_t dictSize = std::distance(thisBlock->getCreateDict(), encodedDictEnd);
1422 thisBlock->setNDict(dictSize);
1423 thisBlock->realignBlock();
1424 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", dictSize * sizeof(storageBuffer_t), thisBlock->getOffsDict(), thisBlock->getOffsDict() + dictSize * sizeof(storageBuffer_t));
1425
1426 // encode payload
1427 auto encodedMessageEnd = thisBlock->getCreateData();
1428 if (proxy.isCached()) {
1429 encodedMessageEnd = encoder.encode(proxy.beginCache(), proxy.endCache(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1430 } else {
1431 encodedMessageEnd = encoder.encode(proxy.beginIter(), proxy.endIter(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1432 }
1433 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1434 thisBlock->setNData(dataSize);
1435 thisBlock->realignBlock();
1436 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1437 // update the size claimed by encoded message directly inside the block
1438
1439 // encode literals
1440 size_t literalsSize{};
1441 if (encoder.getNIncompressibleSamples() > 0) {
1442 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1443 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1444 thisBlock->setNLiterals(literalsSize);
1445 thisBlock->realignBlock();
1446 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1447 }
1448
1449 // write metadata
1450 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1451 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1452 std::distance(srcBegin, srcEnd),
1453 encoder.getNIncompressibleSamples(),
1454 encoder.getSymbolTablePrecision(),
1455 *metrics.getCoderProperties().min,
1456 *metrics.getCoderProperties().max,
1457 metrics.getDatasetProperties().min,
1458 metrics.getDatasetProperties().alphabetRangeBits,
1459 dictSize,
1460 dataSize,
1461 literalsSize);
1462
1463 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1464}; // namespace ctf
1465
1466template <typename H, int N, typename W>
1467template <typename input_IT, typename buffer_T>
1468o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer)
1469{
1470 using storageBuffer_t = W;
1471 using input_t = typename std::iterator_traits<input_IT>::value_type;
1472
1473 const size_t messageLength = metrics.getDatasetProperties().numSamples;
1474 const auto alphabetRangeBits = metrics.getDatasetProperties().alphabetRangeBits;
1475
1476 auto* thisBlock = &mBlocks[slot];
1477 auto* thisMetadata = &mMetadata[slot];
1478 size_t packedSize = 0;
1479
1480 if (messageLength == 0) {
1481 *thisMetadata = detail::makeMetadataPack<input_t>(0, 0, 0, 0);
1482 } else if (metrics.getDatasetProperties().alphabetRangeBits == 0) {
1483 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, 0, *srcBegin, 0);
1484 } else {
1486 size_t packingBufferWords = packer.template getPackingBufferSize<storageBuffer_t>(messageLength);
1487 std::tie(thisBlock, thisMetadata) = expandStorage(slot, packingBufferWords, buffer);
1488 auto packedMessageEnd = packer.pack(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1489 packedSize = std::distance(thisBlock->getCreateData(), packedMessageEnd);
1490 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, packer.getPackingWidth(), packer.getOffset(), packedSize);
1491 thisBlock->setNData(packedSize);
1492 thisBlock->realignBlock();
1493 }
1494
1495 LOGP(debug, "StoreData {} bytes, offs: {}:{}", packedSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + packedSize * sizeof(storageBuffer_t));
1496 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1497};
1498
1499template <typename H, int N, typename W>
1500template <typename input_IT, typename buffer_T>
1501o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer)
1502{
1503 using storageBuffer_t = W;
1504 using input_t = typename std::iterator_traits<input_IT>::value_type;
1505
1506 const size_t messageLength = std::distance(srcBegin, srcEnd);
1507 // introduce padding in case literals don't align;
1508 const size_t nSourceElemsPadded = calculatePaddedSize<input_t, storageBuffer_t>(messageLength);
1509 std::vector<input_t> tmp(nSourceElemsPadded, {});
1510 std::copy(srcBegin, srcEnd, std::begin(tmp));
1511
1512 const size_t nBufferElems = calculateNDestTElements<input_t, storageBuffer_t>(messageLength);
1513 auto [thisBlock, thisMetadata] = expandStorage(slot, nBufferElems, buffer);
1514 thisBlock->storeData(nBufferElems, reinterpret_cast<const storageBuffer_t*>(tmp.data()));
1515
1516 *thisMetadata = detail::makeMetadataStore<input_t, storageBuffer_t>(messageLength, opt, nBufferElems);
1517
1518 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1519};
1520
1522template <typename H, int N, typename W>
1523std::vector<char> EncodedBlocks<H, N, W>::createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& vmd)
1524{
1525
1526 if (vfreq.size() != N) {
1527 throw std::runtime_error(fmt::format("mismatch between the size of frequencies vector {} and number of blocks {}", vfreq.size(), N));
1528 }
1529 size_t sz = alignSize(sizeof(EncodedBlocks<H, N, W>));
1530 for (int ib = 0; ib < N; ib++) {
1531 sz += Block<W>::estimateSize(vfreq[ib].size());
1532 }
1533 std::vector<char> vdict(sz); // memory space for dictionary
1534 auto dictBlocks = create(vdict.data(), sz);
1535 for (int ib = 0; ib < N; ib++) {
1536 const auto& thisHistogram = vfreq[ib];
1537 const auto view = rans::trim(rans::makeHistogramView(thisHistogram));
1538
1539 if (!view.empty()) {
1540 LOG(info) << "adding dictionary of " << view.size() << " words for block " << ib << ", min/max= " << view.getMin() << "/" << view.getMax();
1541 dictBlocks->mBlocks[ib].storeDict(view.size(), view.data());
1542 dictBlocks = get(vdict.data()); // !!! rellocation might have invalidated dictBlocks pointer
1543 dictBlocks->mMetadata[ib] = vmd[ib];
1544 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::ROOTCompression; // we will compress the dictionary with root!
1545 dictBlocks->mBlocks[ib].realignBlock();
1546 } else {
1547 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::NONE;
1548 }
1549 dictBlocks->mRegistry.nFilledBlocks++;
1550 }
1551 return vdict;
1552}
1553#endif
1554
1555template <typename H, int N, typename W>
1556void EncodedBlocks<H, N, W>::dump(const std::string& prefix, int ncol) const
1557{
1558 for (int ibl = 0; ibl < getNBlocks(); ibl++) {
1559 const auto& blc = getBlock(ibl);
1560 std::string ss;
1561 LOGP(info, "{} Bloc:{} Dict: {} words", prefix, ibl, blc.getNDict());
1562 const auto* ptr = blc.getDict();
1563 for (int i = 0; i < blc.getNDict(); i++) {
1564 if (i && (i % ncol) == 0) {
1565 LOG(info) << ss;
1566 ss.clear();
1567 }
1568 ss += fmt::format(" {:#010x}", ptr[i]);
1569 }
1570 if (!ss.empty()) {
1571 LOG(info) << ss;
1572 ss.clear();
1573 }
1574 LOG(info) << "\n";
1575 LOGP(info, "{} Bloc:{} Data: {} words", prefix, ibl, blc.getNData());
1576 ptr = blc.getData();
1577 for (int i = 0; i < blc.getNData(); i++) {
1578 if (i && (i % ncol) == 0) {
1579 LOG(info) << ss;
1580 ss.clear();
1581 }
1582 ss += fmt::format(" {:#010x}", ptr[i]);
1583 }
1584 if (!ss.empty()) {
1585 LOG(info) << ss;
1586 ss.clear();
1587 }
1588 LOG(info) << "\n";
1589 LOGP(info, "{} Bloc:{} Literals: {} words", prefix, ibl, blc.getNLiterals());
1590 ptr = blc.getData();
1591 for (int i = 0; i < blc.getNLiterals(); i++) {
1592 if (i && (i % 20) == 0) {
1593 LOG(info) << ss;
1594 ss.clear();
1595 }
1596 ss += fmt::format(" {:#010x}", ptr[i]);
1597 }
1598 if (!ss.empty()) {
1599 LOG(info) << ss;
1600 ss.clear();
1601 }
1602 LOG(info) << "\n";
1603 }
1604}
1605
1606} // namespace ctf
1607} // namespace o2
1608
1609#endif
representation of ANS Version number in a comparable way
#define verbosity
Header: timestamps and format version for detector CTF dictionary.
int32_t i
Metadata required to decode a Block.
Interfaces for BitPacking using librans.
uint32_t res
Definition RawData.h:0
TBranch * ptr
useful public helper functions.
std::ostringstream debug
const auto & getData()
uint64_t ransState_t
uint32_t source_type
Class for time synchronization of RawReader instances.
<<======================== Auxiliary classes =======================<<
CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const
static constexpr Metadata::OptStore FallbackStorageType
this is in fact stored, but to overcome TBuffer limits we have to define the branches per block!...
void readFromTree(TTree &tree, const std::string &name, int ev=0)
read from tree to non-flat object
o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
static auto get(void *head)
cast arbitrary buffer head to container class. Head is supposed to respect the alignment
ClassDefNV(EncodedBlocks, 3)
static size_t estimateBlockSize(int n)
estimate free size needed to add new block
CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any &decoderExt) const
void clear()
clear itself
o2::ctf::CTFIOSize decode(container_T &dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination vector (will be resized as needed)
static auto get(const void *head)
size_t estimateSize() const
size_t estimateSizeFromMetadata() const
do the same using metadata info
static void relocate(const char *oldHead, char *newHead, char *wrapper, size_t newsize=0)
auto expandStorage(size_t slot, size_t nElemets, T *buffer=nullptr) -> decltype(auto)
void setHeader(const H &h)
o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any &encoderExt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
static void readFromTree(VD &vec, TTree &tree, const std::string &name, int ev=0)
read from tree to destination buffer vector
void dump(const std::string &prefix="", int ncol=20) const
CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any &decoderExt) const
ANSHeader checkANSVersion(ANSHeader ansVersion) const
void fillFlatCopy(EncodedBlocks &dest) const
Create its own flat copy in the destination empty flat object.
void copyToFlat(void *base)
copy itself to flat buffer created on the fly at the provided pointer. The destination block should b...
std::array< Block< W >, N > mBlocks
const H & getHeader() const
static size_t fillTreeBranch(TTree &tree, const std::string &brname, D &dt, int compLevel, int splitLevel=99)
add and fill single branch
auto & getMetadata(int i) const
static size_t getMinAlignedSize()
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T *buffer=nullptr)
static auto expand(buffer_T &buffer, size_t newsizeBytes)
expand the storage to new size in bytes
void print(const std::string &prefix="", int verbosity=1) const
print itself
o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
dictionaryType< source_T > getDictionary(int i, ANSHeader ansVersion=ANSVersionUnspecified) const
bool flat() const
check if flat and valid
static std::vector< char > createDictionaryBlocks(const std::vector< rans::DenseHistogram< int32_t > > &vfreq, const std::vector< Metadata > &prbits)
create a special EncodedBlocks containing only dictionaries made from provided vector of frequency ta...
const auto & getMetadata() const
static constexpr int getNBlocks()
const ANSHeader & getANSHeader() const
auto & getBlock(int i) const
bool empty() const
check if empty and valid
static auto getImage(const void *newHead)
get const image of the container wrapper, with pointers in the image relocated to new head
static bool readTreeBranch(TTree &tree, const std::string &brname, D &dt, int ev=0)
read single branch
size_t compactify()
Compactify by eliminating empty space.
CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const
std::variant< rans::RenormedSparseHistogram< source_T >, rans::RenormedDenseHistogram< source_T > > dictionaryType
o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination pointer, the needed space assumed to be available
size_t size() const
total allocated size in bytes
EncodedBlocks< H, N, W > base
static auto create(VD &v)
create container from vector. Head is supposed to respect the alignment
void setANSHeader(const ANSHeader &h)
o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr)
o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
size_t getFreeSize() const
size remaining for additional data
o2::ctf::CTFIOSize encode(const VE &src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
const auto & getRegistry() const
static auto create(void *head, size_t sz)
create container from arbitrary buffer of predefined size (in bytes!!!). Head is supposed to respect ...
std::shared_ptr< H > cloneHeader() const
ANSHeader & getANSHeader()
void init(size_t sz)
setup internal structure and registry for given buffer size (in bytes!!!)
void copyToFlat(V &vec)
copy itself to flat buffer created on the fly from the vector
size_t appendToTree(TTree &tree, const std::string &name) const
attach to tree
o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
std::array< Metadata, N > mMetadata
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics< typename std::iterator_traits< input_IT >::value_type > metrics, buffer_T *buffer=nullptr)
typename rans::denseEncoder_type< source_type > encoder_type
const DatasetProperties< source_type > & getDatasetProperties() const noexcept
Definition Metrics.h:52
size_t getIncompressibleSize(double_t safetyFactor=1.2) const
size_t getCompressedDatasetSize(double_t safetyFactor=1.2) const
size_t getCompressedDictionarySize(double_t safetyFactor=2) const
static decltype(auto) fromHistogram(DenseHistogram< source_T > histogram, size_t renormingPrecision=0)
Definition compat.h:187
functionality to maintain compatibility with previous version of this library
static factory classes for building histograms, encoders and decoders.
GLdouble n
Definition glcorearb.h:1982
GLeglImageOES image
Definition glcorearb.h:4021
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLenum GLsizei dataSize
Definition glcorearb.h:3994
GLuint const GLchar * name
Definition glcorearb.h:781
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLsizei const GLfloat * value
Definition glcorearb.h:819
GLintptr offset
Definition glcorearb.h:660
public interface for building and renorming histograms from source data.
constexpr bool mayPack(Metadata::OptStore opt) noexcept
constexpr bool is_iterator_v
constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
constexpr size_t calculateNDestTElements(size_t nElems) noexcept
constexpr size_t PackingThreshold
constexpr int WrappersCompressionLevel
uint8_t BufferType
This is the type of the vector to be used for the EncodedBlocks buffer allocation.
constexpr ANSHeader ANSVersionCompat
Definition ANSHeader.h:54
constexpr ANSHeader ANSVersion1
Definition ANSHeader.h:55
constexpr ANSHeader ANSVersionUnspecified
Definition ANSHeader.h:53
size_t calculatePaddedSize(size_t nElems) noexcept
constexpr size_t Alignment
constexpr int WrappersSplitLevel
size_t alignSize(size_t sizeBytes)
align size to given diven number of bytes
T * relocatePointer(const char *oldBase, char *newBase, const T *ptr)
relocate pointer by the difference of addresses
decltype(makeEncoder::fromRenormed(RenormedDenseHistogram< source_T >{})) encoder_type
Definition compat.h:292
RenormedDenseHistogram< source_T > renorm(DenseHistogram< source_T > histogram, size_t newPrecision=0)
Definition compat.h:75
size_t getAlphabetRangeBits(const DenseHistogram< source_T > &histogram) noexcept
Definition compat.h:260
size_t calculateMaxBufferSizeB(size_t nElements, size_t rangeBits)
Definition compat.h:282
decltype(makeDecoder::fromRenormed(RenormedDenseHistogram< source_T >{})) decoder_type
Definition compat.h:295
constexpr uint32_t getRangeBits(T min, T max) noexcept
Definition utils.h:200
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
void checkBounds(IT iteratorPosition, IT upperBound)
Definition utils.h:244
size_t sanitizeRenormingBitRange(size_t renormPrecision)
Definition utils.h:212
decltype(makeDenseEncoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) denseEncoder_type
Definition factory.h:229
decltype(makeDecoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) defaultDecoder_type
Definition factory.h:238
auto makeHistogramView(container_T &container, std::ptrdiff_t offset) noexcept -> HistogramView< decltype(std::begin(container))>
void unpack(const input_T *__restrict inputBegin, size_t extent, output_IT outputBegin, size_t packingWidth, typename std::iterator_traits< output_IT >::value_type offset=static_cast< typename std::iterator_traits< output_IT >::value_type >(0))
Definition pack.h:346
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
RenormedSparseHistogram< source_T > readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:215
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
public interface for serializing histograms (dictionaries) to JSON or compressed binary.
uint8_t majorVersion
Definition ANSHeader.h:28
uint8_t minorVersion
Definition ANSHeader.h:29
binary blob for single entropy-compressed column: metadata + (optional) dictionary and data buffer + ...
int getNStored() const
const W * getData() const
Registry * registry
const W * getDict() const
int getNLiterals() const
const W * getDataPointer() const
void storeDict(int _ndict, const W *_dict)
void storeLiterals(int _nliterals, const W *_literals)
const W * getLiterals() const
int nDict
non-persistent info for in-memory ops
const W * getEndOfBlock() const
int getNDict() const
void storeData(int _ndata, const W *_data)
void setNData(int _ndata)
int getNData() const
void clear()
clear itself
void store(int _ndict, int _ndata, int _nliterals, const W *_dict, const W *_data, const W *_literals)
store binary blob data (buffer filled from head to tail)
void setNDict(int _ndict)
void setNLiterals(int _nliterals)
ClassDefNV(Block, 1)
static size_t estimateSize(int n)
estimate free size needed to add new block
void relocate(const char *oldHead, char *newHeadData, char *newHeadRegistry)
relocate to different head position
‍>======================== Auxiliary classes =======================>>
int nFilledBlocks
pointer on the head of the CTF
char * getFreeBlockEnd() const
size_t getFreeSize() const
size in bytes available to fill data
size_t size
offset of the start of the writable space (wrt head), in bytes!!!
ClassDefNV(Registry, 1)
char * getFreeBlockStart() const
calculate the pointer of the head of the writable space
static constexpr size_t nStreams
Definition compat.h:52
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
static std::string concat_string(Ts const &... ts)
int estimateSize(bool withHB=false)
std::vector< o2::ctf::BufferType > vec
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
coder decode(ctfImage, triggersD, clustersD)
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))