Project
Loading...
Searching...
No Matches
EncodedBlocks.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16
17#ifndef ALICEO2_ENCODED_BLOCKS_H
18#define ALICEO2_ENCODED_BLOCKS_H
19// #undef NDEBUG
20// #include <cassert>
21#include <type_traits>
22#include <cstddef>
23#include <Rtypes.h>
24#include <any>
25
26#include "TTree.h"
28#include "Framework/Logger.h"
34#ifndef __CLING__
37#include "rANS/compat.h"
38#include "rANS/histogram.h"
39#include "rANS/serialize.h"
40#include "rANS/factory.h"
41#include "rANS/metrics.h"
42#include "rANS/utils.h"
43#endif
44
45namespace o2
46{
47namespace ctf
48{
49
50namespace detail
51{
52
53template <class, class Enable = void>
54struct is_iterator : std::false_type {
55};
56
57template <class T>
58struct is_iterator<T, std::enable_if_t<
59 std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<T>::iterator_category> ||
60 std::is_same_v<std::output_iterator_tag, typename std::iterator_traits<T>::iterator_category>>>
61 : std::true_type {
62};
63
64template <class T>
65inline constexpr bool is_iterator_v = is_iterator<T>::value;
66
67inline constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
68{
70}
71
72inline constexpr bool mayPack(Metadata::OptStore opt) noexcept
73{
75}
76
77} // namespace detail
78constexpr size_t PackingThreshold = 512;
79
80constexpr size_t Alignment = 16;
81
82constexpr int WrappersSplitLevel = 99;
83constexpr int WrappersCompressionLevel = 1;
84
86using BufferType = uint8_t; // to avoid every detector using different types, we better define it here
87
89inline size_t alignSize(size_t sizeBytes)
90{
91 auto res = sizeBytes % Alignment;
92 return res ? sizeBytes + (Alignment - res) : sizeBytes;
93}
94
96template <class T>
97inline T* relocatePointer(const char* oldBase, char* newBase, const T* ptr)
98{
99 return (ptr != nullptr) ? reinterpret_cast<T*>(newBase + (reinterpret_cast<const char*>(ptr) - oldBase)) : nullptr;
100}
101
102template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
103inline constexpr size_t calculateNDestTElements(size_t nElems) noexcept
104{
105 const size_t srcBufferSize = nElems * sizeof(source_T);
106 return srcBufferSize / sizeof(dest_T) + (srcBufferSize % sizeof(dest_T) != 0);
107};
108
109template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
110inline size_t calculatePaddedSize(size_t nElems) noexcept
111{
112 const size_t sizeOfSourceT = sizeof(source_T);
113 const size_t sizeOfDestT = sizeof(dest_T);
114
115 // this is equivalent to (sizeOfSourceT / sizeOfDestT) * std::ceil(sizeOfSourceArray/ sizeOfDestT)
116 return (sizeOfDestT / sizeOfSourceT) * calculateNDestTElements<source_T, dest_T>(nElems);
117};
118
120
122struct Registry {
123 char* head = nullptr;
124 int nFilledBlocks = 0; // number of filled blocks = next block to fill (must be strictly consecutive)
125 size_t offsFreeStart = 0;
126 size_t size = 0; // full size in bytes!!!
127
129 char* getFreeBlockStart() const
130 {
131 assert(offsFreeStart <= size);
132 return head + offsFreeStart;
133 }
134
136 size_t getFreeSize() const
137 {
138 return size - offsFreeStart;
139 }
140
141 char* getFreeBlockEnd() const
142 {
143 assert(offsFreeStart <= size);
144 return getFreeBlockStart() + getFreeSize();
145 }
146
148};
149
151template <typename W = uint32_t>
152struct Block {
153
154 Registry* registry = nullptr;
155 int nDict = 0; // dictionary length (if any)
156 int nData = 0; // length of data
157 int nLiterals = 0; // length of literals vector (if any)
158 int nStored = 0; // total length
159 W* payload = nullptr; //[nStored];
160
161 inline const W* getDict() const { return nDict ? payload : nullptr; }
162 inline const W* getData() const { return nData ? (payload + nDict) : nullptr; }
163 inline const W* getDataPointer() const { return payload ? (payload + nDict) : nullptr; } // needed when nData is not set yet
164 inline const W* getLiterals() const { return nLiterals ? (payload + nDict + nData) : nullptr; }
165 inline const W* getEndOfBlock() const
166 {
167 if (!registry) {
168 return nullptr;
169 }
170 // get last legal W*, since unaligned data is undefined behavior!
171 const size_t delta = reinterpret_cast<uintptr_t>(registry->getFreeBlockEnd()) % sizeof(W);
172 return reinterpret_cast<const W*>(registry->getFreeBlockEnd() - delta);
173 }
174
175 inline W* getCreatePayload() { return payload ? payload : (registry ? (payload = reinterpret_cast<W*>(registry->getFreeBlockStart())) : nullptr); }
176 inline W* getCreateDict() { return payload ? payload : getCreatePayload(); }
177 inline W* getCreateData() { return payload ? (payload + nDict) : getCreatePayload(); }
178 inline W* getCreateLiterals() { return payload ? payload + (nDict + nData) : getCreatePayload(); }
179 inline W* getEndOfBlock() { return const_cast<W*>(static_cast<const Block&>(*this).getEndOfBlock()); };
180
181 inline auto getOffsDict() { return reinterpret_cast<std::uintptr_t>(getCreateDict()) - reinterpret_cast<std::uintptr_t>(registry->head); }
182 inline auto getOffsData() { return reinterpret_cast<std::uintptr_t>(getCreateData()) - reinterpret_cast<std::uintptr_t>(registry->head); }
183 inline auto getOffsLiterals() { return reinterpret_cast<std::uintptr_t>(getCreateLiterals()) - reinterpret_cast<std::uintptr_t>(registry->head); }
184
185 inline void setNDict(int _ndict)
186 {
187 nDict = _ndict;
188 nStored += nDict;
189 }
190
191 inline void setNData(int _ndata)
192 {
193 nData = _ndata;
194 nStored += nData;
195 }
196
197 inline void setNLiterals(int _nliterals)
198 {
199 nLiterals = _nliterals;
201 }
202
203 inline int getNDict() const { return nDict; }
204 inline int getNData() const { return nData; }
205 inline int getNLiterals() const { return nLiterals; }
206 inline int getNStored() const { return nStored; }
207
209 {
210 if (!registry) { // this is a standalone block owning its data
211 delete[] payload;
212 }
213 }
214
216 void clear()
217 {
218 nDict = 0;
219 nData = 0;
220 nLiterals = 0;
221 nStored = 0;
222 payload = nullptr;
223 }
224
226 static size_t estimateSize(int n)
227 {
228 return alignSize(n * sizeof(W));
229 }
230
231 // store a dictionary in an empty block
232 void storeDict(int _ndict, const W* _dict)
233 {
234 if (getNStored() > 0) {
235 throw std::runtime_error("trying to write in occupied block");
236 }
237 size_t sz = estimateSize(_ndict);
238 assert(registry); // this method is valid only for flat version, which has a registry
239 assert(sz <= registry->getFreeSize());
240 assert((_ndict > 0) == (_dict != nullptr));
241 setNDict(_ndict);
242 if (nDict) {
243 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
244 realignBlock();
245 }
246 };
247
248 // store a dictionary to a block which can either be empty or contain a dict.
249 void storeData(int _ndata, const W* _data)
250 {
251 if (getNStored() > getNDict()) {
252 throw std::runtime_error("trying to write in occupied block");
253 }
254
255 size_t sz = estimateSize(_ndata);
256 assert(registry); // this method is valid only for flat version, which has a registry
257 assert(sz <= registry->getFreeSize());
258 assert((_ndata > 0) == (_data != nullptr));
259 setNData(_ndata);
260 if (nData) {
261 memcpy(getCreateData(), _data, _ndata * sizeof(W));
262 realignBlock();
263 }
264 }
265
266 // store a dictionary to a block which can either be empty or contain a dict.
267 void storeLiterals(int _nliterals, const W* _literals)
268 {
269 if (getNStored() > getNDict() + getNData()) {
270 throw std::runtime_error("trying to write in occupied block");
271 }
272
273 size_t sz = estimateSize(_nliterals);
274 assert(registry); // this method is valid only for flat version, which has a registry
275 assert(sz <= registry->getFreeSize());
276 // assert((_nliterals > 0) == (_literals != nullptr));
277 setNLiterals(_nliterals);
278 if (nLiterals) {
279 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
280 realignBlock();
281 }
282 }
283
284 // resize block and free up unused buffer space.
286 {
287 if (payload) {
288 size_t sz = estimateSize(getNStored());
289 registry->offsFreeStart = (reinterpret_cast<char*>(payload) - registry->head) + sz;
290 }
291 }
292
294 void store(int _ndict, int _ndata, int _nliterals, const W* _dict, const W* _data, const W* _literals)
295 {
296 size_t sz = estimateSize(_ndict + _ndata + _nliterals);
297 assert(registry); // this method is valid only for flat version, which has a registry
298 assert(sz <= registry->getFreeSize());
299 assert((_ndict > 0) == (_dict != nullptr));
300 assert((_ndata > 0) == (_data != nullptr));
301 // assert(_literals == _data + _nliterals);
302 setNDict(_ndict);
303 setNData(_ndata);
304 setNLiterals(_nliterals);
305 getCreatePayload(); // do this even for empty block!!!
306 if (getNStored()) {
307 payload = reinterpret_cast<W*>(registry->getFreeBlockStart());
308 if (getNDict()) {
309 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
310 }
311 if (getNData()) {
312 memcpy(getCreateData(), _data, _ndata * sizeof(W));
313 }
314 if (getNLiterals()) {
315 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
316 }
317 }
318 realignBlock();
319 }
320
322 void relocate(const char* oldHead, char* newHeadData, char* newHeadRegistry)
323 {
324 payload = relocatePointer(oldHead, newHeadData, payload);
325 registry = relocatePointer(oldHead, newHeadRegistry, registry);
326 }
327
329}; // namespace ctf
330
332
333template <typename H, int N, typename W = uint32_t>
335{
336 public:
338
339#ifndef __CLING__
340 template <typename source_T>
341 using dictionaryType = std::variant<rans::RenormedSparseHistogram<source_T>, rans::RenormedDenseHistogram<source_T>>;
342#endif
343
344 void setHeader(const H& h)
345 {
346 mHeader = h;
347 }
348 const H& getHeader() const { return mHeader; }
349 H& getHeader() { return mHeader; }
350 std::shared_ptr<H> cloneHeader() const { return std::shared_ptr<H>(new H(mHeader)); } // for dictionary creation
351
352 const auto& getRegistry() const { return mRegistry; }
353
354 const auto& getMetadata() const { return mMetadata; }
355
356 auto& getMetadata(int i) const
357 {
358 assert(i < N);
359 return mMetadata[i];
360 }
361
362 auto& getBlock(int i) const
363 {
364 assert(i < N);
365 return mBlocks[i];
366 }
367
368#ifndef __CLING__
369 template <typename source_T>
371 {
372 const auto& block = getBlock(i);
373 const auto& metadata = getMetadata(i);
374 ansVersion = checkANSVersion(ansVersion);
375
376 assert(static_cast<int64_t>(std::numeric_limits<source_T>::min()) <= static_cast<int64_t>(metadata.max));
377 assert(static_cast<int64_t>(std::numeric_limits<source_T>::max()) >= static_cast<int64_t>(metadata.min));
378
379 // check consistency of metadata and type
380 [&]() {
381 const int64_t sourceMin = std::numeric_limits<source_T>::min();
382 const int64_t sourceMax = std::numeric_limits<source_T>::max();
383
384 auto view = rans::trim(rans::HistogramView{block.getDict(), block.getDict() + block.getNDict(), metadata.min});
385 const int64_t dictMin = view.getMin();
386 const int64_t dictMax = view.getMax();
387 assert(dictMin >= metadata.min);
388 assert(dictMax <= metadata.max);
389
390 if ((dictMin < sourceMin) || (dictMax > sourceMax)) {
391 if (ansVersion == ANSVersionCompat && mHeader.majorVersion == 1 && mHeader.minorVersion == 0 && mHeader.dictTimeStamp < 1653192000000) {
392 LOGP(warn, "value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}], tolerate in compat mode for old dictionaries", sourceMin, sourceMax, dictMin, dictMax);
393 } else {
394 throw std::runtime_error(fmt::format("value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}]", sourceMin, sourceMax, dictMin, dictMax));
395 }
396 }
397 }();
398
399 if (ansVersion == ANSVersionCompat) {
400 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
401 return rans::compat::renorm(std::move(histogram), metadata.probabilityBits);
402 } else if (ansVersion == ANSVersion1) {
403 // dictionary is loaded from an explicit dict file and is stored densly
405 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
406 size_t renormingBits = rans::utils::sanitizeRenormingBitRange(metadata.probabilityBits);
407 LOG_IF(debug, renormingBits != metadata.probabilityBits)
408 << fmt::format("While reading metadata from external dictionary, rANSV1 is rounding renorming precision from {} to {}", metadata.probabilityBits, renormingBits);
409 return rans::renorm(std::move(histogram), renormingBits, rans::RenormingPolicy::ForceIncompressible);
410 } else {
411 // dictionary is elias-delta coded inside the block
412 if constexpr (sizeof(source_T) > 2) {
413 return rans::readRenormedSetDictionary(block.getDict(), block.getDict() + block.getNDict(),
414 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
415 metadata.probabilityBits);
416 } else {
417 return rans::readRenormedDictionary(block.getDict(), block.getDict() + block.getNDict(),
418 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
419 metadata.probabilityBits);
420 }
421 }
422 } else {
423 throw std::runtime_error(fmt::format("Failed to load serialized Dictionary. Unsupported ANS Version: {}", static_cast<std::string>(ansVersion)));
424 }
425 };
426#endif
427
429 {
430 mANSHeader = h;
431 }
432 const ANSHeader& getANSHeader() const { return mANSHeader; }
434
435 static constexpr int getNBlocks() { return N; }
436
437 static size_t getMinAlignedSize() { return alignSize(sizeof(base)); }
438
440 static auto get(void* head) { return reinterpret_cast<EncodedBlocks*>(head); }
441 static auto get(const void* head) { return reinterpret_cast<const EncodedBlocks*>(head); }
442
444 static auto getImage(const void* newHead);
445
447 static auto create(void* head, size_t sz);
448
450 template <typename VD>
451 static auto create(VD& v);
452
454 static size_t estimateBlockSize(int n) { return Block<W>::estimateSize(n); }
455
457 bool empty() const { return (mRegistry.offsFreeStart == alignSize(sizeof(*this))) && (mRegistry.size >= mRegistry.offsFreeStart); }
458
460 bool flat() const { return mRegistry.size > 0 && (mRegistry.size >= mRegistry.offsFreeStart) && (mBlocks[0].registry == &mRegistry) && (mBlocks[N - 1].registry == &mRegistry); }
461
463 void clear();
464
466 size_t compactify() { return (mRegistry.size = estimateSize()); }
467
469 size_t size() const { return mRegistry.size; }
470
472 size_t getFreeSize() const { return mRegistry.getFreeSize(); }
473
475 template <typename buffer_T>
476 static auto expand(buffer_T& buffer, size_t newsizeBytes);
477
479 template <typename V>
480 void copyToFlat(V& vec);
481
484
486 size_t appendToTree(TTree& tree, const std::string& name) const;
487
489 void readFromTree(TTree& tree, const std::string& name, int ev = 0);
490
492 template <typename VD>
493 static void readFromTree(VD& vec, TTree& tree, const std::string& name, int ev = 0);
494
496 template <typename VE, typename buffer_T>
497 inline o2::ctf::CTFIOSize encode(const VE& src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f)
498 {
499 return encode(std::begin(src), std::end(src), slot, symbolTablePrecision, opt, buffer, encoderExt, memfc);
500 }
501
503 template <typename input_IT, typename buffer_T>
504 o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
505
507 template <class container_T, class container_IT = typename container_T::iterator>
508 o2::ctf::CTFIOSize decode(container_T& dest, int slot, const std::any& decoderExt = {}) const;
509
511 template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool> = true>
512 o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any& decoderExt = {}) const;
513
514#ifndef __CLING__
516 static std::vector<char> createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& prbits);
517#endif
518
520 void print(const std::string& prefix = "", int verbosity = 1) const;
521 void dump(const std::string& prefix = "", int ncol = 20) const;
522
523 protected:
524 static_assert(N > 0, "number of encoded blocks < 1");
525
527 ANSHeader mANSHeader; // ANS header
528 H mHeader; // detector specific header
529 std::array<Metadata, N> mMetadata; // compressed block's details
530 std::array<Block<W>, N> mBlocks;
531
533
535 void init(size_t sz);
536
541 static void relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize = 0);
542
545 size_t estimateSize() const;
546
549
551 void fillFlatCopy(EncodedBlocks& dest) const;
552
554 template <typename D>
555 static size_t fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel = 99);
556
558 template <typename D>
559 static bool readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev = 0);
560
561 template <typename T>
562 auto expandStorage(size_t slot, size_t nElemets, T* buffer = nullptr) -> decltype(auto);
563
564 inline ANSHeader checkANSVersion(ANSHeader ansVersion) const
565 {
566 auto ctfANSHeader = getANSHeader();
568
569 const bool isEqual{ansVersion == ctfANSHeader};
570 const bool isHeaderUnspecified{ctfANSHeader == ANSVersionUnspecified};
571
572 if (isEqual) {
573 if (isHeaderUnspecified) {
574 throw std::runtime_error{fmt::format("Missmatch of ANSVersions, trying to encode/decode CTF with ANS Version Header {} with ANS Version {}",
575 static_cast<std::string>(ctfANSHeader),
576 static_cast<std::string>(ansVersion))};
577 } else {
578 ret = ctfANSHeader;
579 }
580 } else {
581 if (isHeaderUnspecified) {
582 ret = ansVersion;
583 } else {
584 ret = ctfANSHeader;
585 }
586 }
587
588 return ret;
589 };
590
591 template <typename input_IT, typename buffer_T>
592 o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
593
594 template <typename input_IT, typename buffer_T>
595 o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
596
597 template <typename input_IT, typename buffer_T>
598 o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
599
600 template <typename input_IT, typename buffer_T>
601 o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
602
603#ifndef __CLING__
604 template <typename input_IT, typename buffer_T>
605 o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer = nullptr);
606
607 template <typename input_IT, typename buffer_T>
608 inline o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T* buffer = nullptr)
609 {
610 using source_type = typename std::iterator_traits<input_IT>::value_type;
611
613 metrics.getDatasetProperties().numSamples = std::distance(srcBegin, srcEnd);
614
615 if (metrics.getDatasetProperties().numSamples != 0) {
616 const auto [minIter, maxIter] = std::minmax_element(srcBegin, srcEnd);
617 metrics.getDatasetProperties().min = *minIter;
618 metrics.getDatasetProperties().max = *maxIter;
619
620 // special case: if min === max, the range is 0 and the data can be reconstructed just via the metadata.
621 metrics.getDatasetProperties().alphabetRangeBits =
622 rans::utils::getRangeBits(metrics.getDatasetProperties().min,
623 metrics.getDatasetProperties().max);
624 }
625
626 return pack(srcBegin, srcEnd, slot, metrics, buffer);
627 }
628#endif
629
630 template <typename input_IT, typename buffer_T>
631 o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr);
632
633 // decode
634 template <typename dst_IT>
635 CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any& decoderExt) const;
636
637 template <typename dst_IT>
638 CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any& decoderExt) const;
639
640 template <typename dst_IT>
641 CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const;
642
643 template <typename dst_IT>
644 CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const;
645
647}; // namespace ctf
648
651template <typename H, int N, typename W>
652void EncodedBlocks<H, N, W>::readFromTree(TTree& tree, const std::string& name, int ev)
653{
654 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *this, ev);
655 for (int i = 0; i < N; i++) {
656 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), mBlocks[i], ev);
657 }
658}
659
662template <typename H, int N, typename W>
663template <typename VD>
664void EncodedBlocks<H, N, W>::readFromTree(VD& vec, TTree& tree, const std::string& name, int ev)
665{
666 auto tmp = create(vec);
667 if (!readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *tmp, ev)) {
668 throw std::runtime_error(fmt::format("Failed to read CTF header for {}", name));
669 }
670 tmp = tmp->expand(vec, tmp->estimateSizeFromMetadata());
671 const auto& meta = tmp->getMetadata();
672 for (int i = 0; i < N; i++) {
673 Block<W> bl;
674 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), bl, ev);
675 assert(meta[i].nDictWords == bl.getNDict());
676 assert(meta[i].nDataWords == bl.getNData());
677 assert(meta[i].nLiteralWords == bl.getNLiterals());
678 tmp->mBlocks[i].store(bl.getNDict(), bl.getNData(), bl.getNLiterals(), bl.getDict(), bl.getData(), bl.getLiterals());
679 }
680}
681
684template <typename H, int N, typename W>
685size_t EncodedBlocks<H, N, W>::appendToTree(TTree& tree, const std::string& name) const
686{
687 long s = 0;
688 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), const_cast<base&>(*this), WrappersCompressionLevel, WrappersSplitLevel);
689 for (int i = 0; i < N; i++) {
690 int compression = mMetadata[i].opt == Metadata::OptStore::ROOTCompression ? 1 : 0;
691 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), const_cast<Block<W>&>(mBlocks[i]), compression);
692 }
693 tree.SetEntries(tree.GetEntries() + 1);
694 return s;
695}
696
699template <typename H, int N, typename W>
700template <typename D>
701bool EncodedBlocks<H, N, W>::readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev)
702{
703 auto* br = tree.GetBranch(brname.c_str());
704 if (!br) {
705 LOG(debug) << "Branch " << brname << " is absent";
706 return false;
707 }
708 auto* ptr = &dt;
709 br->SetAddress(&ptr);
710 br->GetEntry(ev);
711 br->ResetAddress();
712 return true;
713}
714
717template <typename H, int N, typename W>
718template <typename D>
719inline size_t EncodedBlocks<H, N, W>::fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel)
720{
721 auto* br = tree.GetBranch(brname.c_str());
722 if (!br) {
723 br = tree.Branch(brname.c_str(), &dt, 512, splitLevel);
724 br->SetCompressionLevel(compLevel);
725 }
726 return br->Fill();
727}
728
731template <typename H, int N, typename W>
733{
734 assert(dest.empty() && dest.mRegistry.getFreeSize() < estimateSize());
735 dest.mANSHeader = mANSHeader;
736 dest.mHeader = mHeader;
737 dest.mMetadata = mMetadata;
738 for (int i = 0; i < N; i++) {
739 dest.mBlocks[i].store(mBlocks[i].getNDict(), mBlocks[i].getNData(), mBlocks[i].getDict(), mBlocks[i].getData());
740 }
741}
742
745template <typename H, int N, typename W>
746template <typename V>
748{
749 auto vtsz = sizeof(typename std::remove_reference<decltype(vec)>::type::value_type), sz = estimateSize();
750 vec.resize(sz / vtsz);
751 copyToFlat(vec.data());
752}
753
757template <typename H, int N, typename W>
759{
760 size_t sz = 0;
761 sz += alignSize(sizeof(*this));
762 for (int i = 0; i < N; i++) {
763 sz += alignSize(mBlocks[i].nStored * sizeof(W));
764 }
765 return sz;
766}
767
771template <typename H, int N, typename W>
773{
774 size_t sz = alignSize(sizeof(*this));
775 for (int i = 0; i < N; i++) {
776 sz += alignSize((mMetadata[i].nDictWords + mMetadata[i].nDataWords + mMetadata[i].nLiteralWords) * sizeof(W));
777 }
778 return sz;
779}
780
783template <typename H, int N, typename W>
784template <typename buffer_T>
785auto EncodedBlocks<H, N, W>::expand(buffer_T& buffer, size_t newsizeBytes)
786{
787 auto buftypesize = sizeof(typename std::remove_reference<decltype(buffer)>::type::value_type);
788 auto* oldHead = get(buffer.data())->mRegistry.head;
789 buffer.resize(alignSize(newsizeBytes) / buftypesize);
790 relocate(oldHead, reinterpret_cast<char*>(buffer.data()), reinterpret_cast<char*>(buffer.data()), newsizeBytes);
791 return get(buffer.data());
792}
793
799template <typename H, int N, typename W>
800void EncodedBlocks<H, N, W>::relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize)
801{
802 auto newStr = get(wrapper);
803 for (int i = 0; i < N; i++) {
804 newStr->mBlocks[i].relocate(oldHead, newHead, wrapper);
805 }
806 newStr->mRegistry.head = newHead; // newHead points on the real data
807 // if asked, update the size
808 if (newsize) { // in bytes!!!
809 assert(newStr->estimateSize() <= newsize);
810 newStr->mRegistry.size = newsize;
811 }
812}
813
816template <typename H, int N, typename W>
818{
819 mRegistry.head = reinterpret_cast<char*>(this);
820 mRegistry.size = sz;
821 mRegistry.offsFreeStart = alignSize(sizeof(*this));
822 for (int i = 0; i < N; i++) {
823 mMetadata[i].clear();
824 mBlocks[i].registry = &mRegistry;
825 mBlocks[i].clear();
826 }
827}
828
831template <typename H, int N, typename W>
833{
834 for (int i = 0; i < N; i++) {
835 mBlocks[i].clear();
836 mMetadata[i].clear();
837 }
838 mRegistry.offsFreeStart = alignSize(sizeof(*this));
839}
840
843template <typename H, int N, typename W>
844auto EncodedBlocks<H, N, W>::getImage(const void* newHead)
845{
846 assert(newHead);
847 auto image(*get(newHead)); // 1st make a shalow copy
848 // now fix its pointers
849 // we don't modify newHead, but still need to remove constness for relocation interface
850 relocate(image.mRegistry.head, const_cast<char*>(reinterpret_cast<const char*>(newHead)), reinterpret_cast<char*>(&image));
851
852 return image;
853}
854
857template <typename H, int N, typename W>
858inline auto EncodedBlocks<H, N, W>::create(void* head, size_t sz)
859{
860 const H defh;
861 auto b = get(head);
862 b->init(sz);
863 b->setHeader(defh);
864 return b;
865}
866
869template <typename H, int N, typename W>
870template <typename VD>
872{
873 size_t vsz = sizeof(typename std::remove_reference<decltype(v)>::type::value_type); // size of the element of the buffer
874 auto baseSize = getMinAlignedSize() / vsz;
875 if (v.size() < baseSize) {
876 v.resize(baseSize);
877 }
878 return create(v.data(), v.size() * vsz);
879}
880
883template <typename H, int N, typename W>
884void EncodedBlocks<H, N, W>::print(const std::string& prefix, int verbosity) const
885{
886 if (verbosity > 0) {
887 LOG(info) << prefix << "Container of " << N << " blocks, size: " << size() << " bytes, unused: " << getFreeSize();
888 for (int i = 0; i < N; i++) {
889 LOG(info) << "Block " << i << " for " << static_cast<uint32_t>(mMetadata[i].messageLength) << " message words of "
890 << static_cast<uint32_t>(mMetadata[i].messageWordSize) << " bytes |"
891 << " NDictWords: " << mBlocks[i].getNDict() << " NDataWords: " << mBlocks[i].getNData()
892 << " NLiteralWords: " << mBlocks[i].getNLiterals();
893 }
894 } else if (verbosity == 0) {
895 size_t inpSize = 0, ndict = 0, ndata = 0, nlit = 0;
896 for (int i = 0; i < N; i++) {
897 inpSize += mMetadata[i].messageLength * mMetadata[i].messageWordSize;
898 ndict += mBlocks[i].getNDict();
899 ndata += mBlocks[i].getNData();
900 nlit += mBlocks[i].getNLiterals();
901 }
902 LOG(info) << prefix << N << " blocks, input size: " << inpSize << ", output size: " << size()
903 << " NDictWords: " << ndict << " NDataWords: " << ndata << " NLiteralWords: " << nlit;
904 }
905}
906
908template <typename H, int N, typename W>
909template <class container_T, class container_IT>
910inline o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::decode(container_T& dest, // destination container
911 int slot, // slot of the block to decode
912 const std::any& decoderExt) const // optional externally provided decoder
913{
914 dest.resize(mMetadata[slot].messageLength); // allocate output buffer
915 return decode(std::begin(dest), slot, decoderExt);
916}
917
919template <typename H, int N, typename W>
920template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool>>
921CTFIOSize EncodedBlocks<H, N, W>::decode(D_IT dest, // iterator to destination
922 int slot, // slot of the block to decode
923 const std::any& decoderExt) const // optional externally provided decoder
924{
925
926 // get references to the right data
927 const auto& ansVersion = getANSHeader();
928 const auto& block = mBlocks[slot];
929 const auto& md = mMetadata[slot];
930 LOGP(debug, "Slot{} | NStored={} Ndict={} nData={}, MD: messageLength:{} opt:{} min:{} max:{} offs:{} width:{} ", slot, block.getNStored(), block.getNDict(), block.getNData(), md.messageLength, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth);
931
932 if (ansVersion == ANSVersionCompat) {
933 if (!block.getNStored()) {
934 return {0, md.getUncompressedSize(), md.getCompressedSize()};
935 }
936 if (md.opt == Metadata::OptStore::EENCODE) {
937 return decodeCompatImpl(dest, slot, decoderExt);
938 } else {
939 return decodeCopyImpl(dest, slot);
940 }
941 } else if (ansVersion == ANSVersion1) {
942 if (md.opt == Metadata::OptStore::PACK) {
943 return decodeUnpackImpl(dest, slot);
944 }
945 if (!block.getNStored()) {
946 return {0, md.getUncompressedSize(), md.getCompressedSize()};
947 }
948 if (md.opt == Metadata::OptStore::EENCODE) {
949 return decodeRansV1Impl(dest, slot, decoderExt);
950 } else {
951 return decodeCopyImpl(dest, slot);
952 }
953 } else {
954 throw std::runtime_error("unsupported ANS Version");
955 }
956};
957
958#ifndef __CLING__
959template <typename H, int N, typename W>
960template <typename dst_IT>
961CTFIOSize EncodedBlocks<H, N, W>::decodeCompatImpl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
962{
963
964 // get references to the right data
965 const auto& block = mBlocks[slot];
966 const auto& md = mMetadata[slot];
967
968 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
969 using decoder_type = typename rans::compat::decoder_type<dst_type>;
970
971 std::optional<decoder_type> inplaceDecoder{};
972 if (md.nDictWords > 0) {
973 inplaceDecoder = decoder_type{std::get<rans::RenormedDenseHistogram<dst_type>>(this->getDictionary<dst_type>(slot))};
974 } else if (!decoderExt.has_value()) {
975 throw std::runtime_error("neither dictionary nor external decoder provided");
976 }
977
978 auto getDecoder = [&]() -> const decoder_type& {
979 if (inplaceDecoder.has_value()) {
980 return inplaceDecoder.value();
981 } else {
982 return std::any_cast<const decoder_type&>(decoderExt);
983 }
984 };
985
986 const size_t NDecoderStreams = rans::compat::defaults::CoderPreset::nStreams;
987
988 if (block.getNLiterals()) {
989 auto* literalsEnd = reinterpret_cast<const dst_type*>(block.getLiterals()) + md.nLiterals;
990 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams, literalsEnd);
991 } else {
992 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams);
993 }
994 return {0, md.getUncompressedSize(), md.getCompressedSize()};
995};
996
997template <typename H, int N, typename W>
998template <typename dst_IT>
999CTFIOSize EncodedBlocks<H, N, W>::decodeRansV1Impl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
1000{
1001
1002 // get references to the right data
1003 const auto& block = mBlocks[slot];
1004 const auto& md = mMetadata[slot];
1005
1006 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
1007 using decoder_type = typename rans::defaultDecoder_type<dst_type>;
1008
1009 std::optional<decoder_type> inplaceDecoder{};
1010 if (md.nDictWords > 0) {
1011 std::visit([&](auto&& arg) { inplaceDecoder = decoder_type{arg}; }, this->getDictionary<dst_type>(slot));
1012 } else if (!decoderExt.has_value()) {
1013 throw std::runtime_error("no dictionary nor external decoder provided");
1014 }
1015
1016 auto getDecoder = [&]() -> const decoder_type& {
1017 if (inplaceDecoder.has_value()) {
1018 return inplaceDecoder.value();
1019 } else {
1020 return std::any_cast<const decoder_type&>(decoderExt);
1021 }
1022 };
1023
1024 // verify decoders
1025 [&]() {
1026 const decoder_type& decoder = getDecoder();
1027 const size_t decoderSymbolTablePrecision = decoder.getSymbolTablePrecision();
1028
1029 if (md.probabilityBits != decoderSymbolTablePrecision) {
1030 throw std::runtime_error(fmt::format(
1031 "Missmatch in decoder renorming precision vs metadata:{} Bits vs {} Bits.",
1032 md.probabilityBits, decoderSymbolTablePrecision));
1033 }
1034
1035 if (md.streamSize != rans::utils::getStreamingLowerBound_v<typename decoder_type::coder_type>) {
1036 throw std::runtime_error("Streaming lower bound of dataset and decoder do not match");
1037 }
1038 }();
1039
1040 // do the actual decoding
1041 if (block.getNLiterals()) {
1042 std::vector<dst_type> literals(md.nLiterals);
1043 rans::unpack(block.getLiterals(), md.nLiterals, literals.data(), md.literalsPackingWidth, md.literalsPackingOffset);
1044 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams, literals.end());
1045 } else {
1046 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams);
1047 }
1048 return {0, md.getUncompressedSize(), md.getCompressedSize()};
1049};
1050
1051template <typename H, int N, typename W>
1052template <typename dst_IT>
1054{
1055 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1056
1057 const auto& block = mBlocks[slot];
1058 const auto& md = mMetadata[slot];
1059
1060 const size_t messageLength = md.messageLength;
1061 const size_t packingWidth = md.probabilityBits;
1062 const dest_t offset = md.min;
1063 const auto* srcIt = block.getData();
1064 // we have a vector of one and the same value. All information is in the metadata
1065 if (packingWidth == 0) {
1066 const dest_t value = [&]() -> dest_t {
1067 // Bugfix: We tried packing values with a width of 0 Bits;
1068 if (md.nDataWords > 0) {
1069 LOGP(debug, "packing bug recovery: MD nStreams:{} messageLength:{} nLiterals:{} messageWordSize:{} coderType:{} streamSize:{} probabilityBits:{} (int)opt:{} min:{} max:{} literalsPackingOffset:{} literalsPackingWidth:{} nDictWords:{} nDataWords:{} nLiteralWords:{}",
1070 value, md.nStreams, md.messageLength, md.nLiterals, md.messageWordSize, md.coderType, md.streamSize, md.probabilityBits, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth, md.nDictWords, md.nDataWords, md.nLiteralWords);
1071 return offset + static_cast<dest_t>(*srcIt);
1072 }
1073 // normal case:
1074 return offset;
1075 }();
1076 for (size_t i = 0; i < messageLength; ++i) {
1077 *dest++ = value;
1078 }
1079 } else {
1080 rans::unpack(srcIt, messageLength, dest, packingWidth, offset);
1081 }
1082 return {0, md.getUncompressedSize(), md.getCompressedSize()};
1083};
1084
1085template <typename H, int N, typename W>
1086template <typename dst_IT>
1088{
1089 // get references to the right data
1090 const auto& block = mBlocks[slot];
1091 const auto& md = mMetadata[slot];
1092
1093 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1094 using decoder_t = typename rans::compat::decoder_type<dest_t>;
1095 using destPtr_t = typename std::iterator_traits<dst_IT>::pointer;
1096
1097 destPtr_t srcBegin = reinterpret_cast<destPtr_t>(block.payload);
1098 destPtr_t srcEnd = srcBegin + md.messageLength * sizeof(dest_t);
1099 std::copy(srcBegin, srcEnd, dest);
1100
1101 return {0, md.getUncompressedSize(), md.getCompressedSize()};
1102};
1103
1105template <typename H, int N, typename W>
1106template <typename input_IT, typename buffer_T>
1107o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::encode(const input_IT srcBegin, // iterator begin of source message
1108 const input_IT srcEnd, // iterator end of source message
1109 int slot, // slot in encoded data to fill
1110 uint8_t symbolTablePrecision, // encoding into
1111 Metadata::OptStore opt, // option for data compression
1112 buffer_T* buffer, // optional buffer (vector) providing memory for encoded blocks
1113 const std::any& encoderExt, // optional external encoder
1114 float memfc) // memory allocation margin factor
1115{
1116 // fill a new block
1117 assert(slot == mRegistry.nFilledBlocks);
1118 mRegistry.nFilledBlocks++;
1119
1120 const size_t messageLength = std::distance(srcBegin, srcEnd);
1121 // cover three cases:
1122 // * empty source message: no co
1123 // * source message to pass through without any entropy coding
1124 // * source message where entropy coding should be applied
1125
1126 // case 1: empty source message
1127 if (messageLength == 0) {
1128 mMetadata[slot] = Metadata{};
1129 mMetadata[slot].opt = Metadata::OptStore::NODATA;
1130 return {};
1131 }
1132 if (detail::mayEEncode(opt)) {
1133 const ANSHeader& ansVersion = getANSHeader();
1134 if (ansVersion == ANSVersionCompat) {
1135 return entropyCodeRANSCompat(srcBegin, srcEnd, slot, symbolTablePrecision, buffer, encoderExt, memfc);
1136 } else if (ansVersion == ANSVersion1) {
1137 return entropyCodeRANSV1(srcBegin, srcEnd, slot, opt, buffer, encoderExt, memfc);
1138 } else {
1139 throw std::runtime_error(fmt::format("Unsupported ANS Coder Version: {}.{}", ansVersion.majorVersion, ansVersion.minorVersion));
1140 }
1141 } else if (detail::mayPack(opt)) {
1142 return pack(srcBegin, srcEnd, slot, buffer);
1143 } else {
1144 return store(srcBegin, srcEnd, slot, opt, buffer);
1145 }
1146};
1147
1148template <typename H, int N, typename W>
1149template <typename T>
1150[[nodiscard]] auto EncodedBlocks<H, N, W>::expandStorage(size_t slot, size_t nElements, T* buffer) -> decltype(auto)
1151{
1152 // after previous relocation this (hence its data members) are not guaranteed to be valid
1153 auto* old = get(buffer->data());
1154 auto* thisBlock = &(old->mBlocks[slot]);
1155 auto* thisMetadata = &(old->mMetadata[slot]);
1156
1157 // resize underlying buffer of block if necessary and update all pointers.
1158 auto* const blockHead = get(thisBlock->registry->head); // extract pointer from the block, as "this" might be invalid
1159 const size_t additionalSize = blockHead->estimateBlockSize(nElements); // additionalSize is in bytes!!!
1160 if (additionalSize >= thisBlock->registry->getFreeSize()) {
1161 LOGP(debug, "Slot {} with {} available words needs to allocate {} bytes for a total of {} words.", slot, thisBlock->registry->getFreeSize(), additionalSize, nElements);
1162 if (buffer) {
1163 blockHead->expand(*buffer, blockHead->size() + (additionalSize - blockHead->getFreeSize()));
1164 thisMetadata = &(get(buffer->data())->mMetadata[slot]);
1165 thisBlock = &(get(buffer->data())->mBlocks[slot]); // in case of resizing this and any this.xxx becomes invalid
1166 } else {
1167 throw std::runtime_error("failed to allocate additional space in provided external buffer");
1168 }
1169 }
1170 return std::make_pair(thisBlock, thisMetadata);
1171};
1172
1173template <typename H, int N, typename W>
1174template <typename input_IT, typename buffer_T>
1175o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer, const std::any& encoderExt, float memfc)
1176{
1177 using storageBuffer_t = W;
1178 using input_t = typename std::iterator_traits<input_IT>::value_type;
1179 using ransEncoder_t = typename rans::compat::encoder_type<input_t>;
1180 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1181 using ransStream_t = typename ransEncoder_t::stream_type;
1182
1183 // assert at compile time that output types align so that padding is not necessary.
1184 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1185 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1186
1187 auto* thisBlock = &mBlocks[slot];
1188 auto* thisMetadata = &mMetadata[slot];
1189
1190 // build symbol statistics
1191 constexpr size_t SizeEstMarginAbs = 10 * 1024;
1192 const float SizeEstMarginRel = 1.5 * memfc;
1193
1194 const size_t messageLength = std::distance(srcBegin, srcEnd);
1195 rans::DenseHistogram<input_t> frequencyTable{};
1196 rans::compat::encoder_type<input_t> inplaceEncoder{};
1197
1198 try {
1199 std::tie(inplaceEncoder, frequencyTable) = [&]() {
1200 if (encoderExt.has_value()) {
1201 return std::make_tuple(ransEncoder_t{}, rans::DenseHistogram<input_t>{});
1202 } else {
1203 auto histogram = rans::makeDenseHistogram::fromSamples(srcBegin, srcEnd);
1204 auto encoder = rans::compat::makeEncoder::fromHistogram(histogram, symbolTablePrecision);
1205 return std::make_tuple(std::move(encoder), std::move(histogram));
1206 }
1207 }();
1208 } catch (const rans::HistogramError& error) {
1209 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1210 return store(srcBegin, srcEnd, slot, this->FallbackStorageType, buffer);
1211 }
1212 const ransEncoder_t& encoder = encoderExt.has_value() ? std::any_cast<const ransEncoder_t&>(encoderExt) : inplaceEncoder;
1213
1214 // estimate size of encode buffer
1215 int dataSize = rans::compat::calculateMaxBufferSizeB(messageLength, rans::compat::getAlphabetRangeBits(encoder.getSymbolTable())); // size in bytes
1216 // preliminary expansion of storage based on dict size + estimated size of encode buffer
1217 dataSize = SizeEstMarginAbs + int(SizeEstMarginRel * (dataSize / sizeof(storageBuffer_t))) + (sizeof(input_t) < sizeof(storageBuffer_t)); // size in words of output stream
1218
1219 const auto view = rans::trim(rans::makeHistogramView(frequencyTable));
1220 std::tie(thisBlock, thisMetadata) = expandStorage(slot, view.size() + dataSize, buffer);
1221
1222 // store dictionary first
1223
1224 if (!view.empty()) {
1225 thisBlock->storeDict(view.size(), view.data());
1226 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", view.size() * sizeof(W), thisBlock->getOffsDict(), thisBlock->getOffsDict() + view.size() * sizeof(W));
1227 }
1228 // vector of incompressible literal symbols
1229 std::vector<input_t> literals;
1230 // directly encode source message into block buffer.
1231 storageBuffer_t* const blockBufferBegin = thisBlock->getCreateData();
1232 const size_t maxBufferSize = thisBlock->registry->getFreeSize(); // note: "this" might be not valid after expandStorage call!!!
1233 const auto [encodedMessageEnd, literalsEnd] = encoder.process(srcBegin, srcEnd, blockBufferBegin, std::back_inserter(literals));
1234 rans::utils::checkBounds(encodedMessageEnd, blockBufferBegin + maxBufferSize / sizeof(W));
1235 dataSize = encodedMessageEnd - thisBlock->getDataPointer();
1236 thisBlock->setNData(dataSize);
1237 thisBlock->realignBlock();
1238 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(W), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(W));
1239 // update the size claimed by encode message directly inside the block
1240
1241 // store incompressible symbols if any
1242 const size_t nLiteralSymbols = literals.size();
1243 const size_t nLiteralWords = [&]() {
1244 if (!literals.empty()) {
1245 const size_t nSymbols = literals.size();
1246 // introduce padding in case literals don't align;
1247 const size_t nLiteralSymbolsPadded = calculatePaddedSize<input_t, storageBuffer_t>(nSymbols);
1248 literals.resize(nLiteralSymbolsPadded, {});
1249
1250 const size_t nLiteralStorageElems = calculateNDestTElements<input_t, storageBuffer_t>(nSymbols);
1251 std::tie(thisBlock, thisMetadata) = expandStorage(slot, nLiteralStorageElems, buffer);
1252 thisBlock->storeLiterals(nLiteralStorageElems, reinterpret_cast<const storageBuffer_t*>(literals.data()));
1253 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", nLiteralStorageElems * sizeof(W), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + nLiteralStorageElems * sizeof(W));
1254 return nLiteralStorageElems;
1255 }
1256 return size_t(0);
1257 }();
1258
1259 LOGP(debug, "Min, {} Max, {}, size, {}, nSamples {}", view.getMin(), view.getMax(), view.size(), frequencyTable.getNumSamples());
1260
1261 *thisMetadata = detail::makeMetadataRansCompat<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1262 messageLength,
1263 nLiteralSymbols,
1264 encoder.getSymbolTable().getPrecision(),
1265 view.getMin(),
1266 view.getMax(),
1267 view.size(),
1268 dataSize,
1269 nLiteralWords);
1270
1271 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
1272}
1273
1274template <typename H, int N, typename W>
1275template <typename input_IT, typename buffer_T>
1276o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, const std::any& encoderExt, float memfc)
1277{
1278 CTFIOSize encoderStatistics{};
1279
1280 const size_t nSamples = std::distance(srcBegin, srcEnd);
1281 if (detail::mayPack(opt) && nSamples < PackingThreshold) {
1282 encoderStatistics = pack(srcBegin, srcEnd, slot, buffer);
1283 } else {
1284
1285 if (encoderExt.has_value()) {
1286 encoderStatistics = encodeRANSV1External(srcBegin, srcEnd, slot, encoderExt, buffer, memfc);
1287 } else {
1288 encoderStatistics = encodeRANSV1Inplace(srcBegin, srcEnd, slot, opt, buffer, memfc);
1289 }
1290 }
1291 return encoderStatistics;
1292}
1293
1294template <typename H, int N, typename W>
1295template <typename input_IT, typename buffer_T>
1296CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1297{
1298 using storageBuffer_t = W;
1299 using input_t = typename std::iterator_traits<input_IT>::value_type;
1300 using ransEncoder_t = typename internal::ExternalEntropyCoder<input_t>::encoder_type;
1301 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1302 using ransStream_t = typename ransEncoder_t::stream_type;
1303
1304 // assert at compile time that output types align so that padding is not necessary.
1305 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1306 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1307
1308 auto* thisBlock = &mBlocks[slot];
1309 auto* thisMetadata = &mMetadata[slot];
1310
1311 const size_t messageLength = std::distance(srcBegin, srcEnd);
1312 internal::ExternalEntropyCoder<input_t> encoder{std::any_cast<const ransEncoder_t&>(encoderExt)};
1313
1314 const size_t payloadSizeWords = encoder.template computePayloadSizeEstimate<storageBuffer_t>(messageLength);
1315 std::tie(thisBlock, thisMetadata) = expandStorage(slot, payloadSizeWords, buffer);
1316
1317 // encode payload
1318 auto encodedMessageEnd = encoder.encode(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1319 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1320 thisBlock->setNData(dataSize);
1321 thisBlock->realignBlock();
1322 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1323 // update the size claimed by encoded message directly inside the block
1324
1325 // encode literals
1326 size_t literalsSize = 0;
1327 if (encoder.getNIncompressibleSamples() > 0) {
1328 const size_t literalsBufferSizeWords = encoder.template computePackedIncompressibleSize<storageBuffer_t>();
1329 std::tie(thisBlock, thisMetadata) = expandStorage(slot, literalsBufferSizeWords, buffer);
1330 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1331 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1332 thisBlock->setNLiterals(literalsSize);
1333 thisBlock->realignBlock();
1334 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1335 }
1336
1337 // write metadata
1338 const auto& symbolTable = encoder.getEncoder().getSymbolTable();
1339 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getEncoder().getNStreams(),
1340 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1341 messageLength,
1342 encoder.getNIncompressibleSamples(),
1343 symbolTable.getPrecision(),
1344 symbolTable.getOffset(),
1345 symbolTable.getOffset() + symbolTable.size(),
1346 encoder.getIncompressibleSymbolOffset(),
1347 encoder.getIncompressibleSymbolPackingBits(),
1348 0,
1349 dataSize,
1350 literalsSize);
1351
1352 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
1353};
1354
1355template <typename H, int N, typename W>
1356template <typename input_IT, typename buffer_T>
1357CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1358{
1359 using storageBuffer_t = W;
1360 using input_t = typename std::iterator_traits<input_IT>::value_type;
1361 using ransEncoder_t = typename rans::denseEncoder_type<input_t>;
1362 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1363 using ransStream_t = typename ransEncoder_t::stream_type;
1364
1365 // assert at compile time that output types align so that padding is not necessary.
1366 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1367 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1368
1369 auto* thisBlock = &mBlocks[slot];
1370 auto* thisMetadata = &mMetadata[slot];
1371
1373 rans::SourceProxy<input_IT> proxy{srcBegin, srcEnd, [](input_IT begin, input_IT end) {
1374 const size_t nSamples = std::distance(begin, end);
1375 return (!std::is_pointer_v<input_IT> && (nSamples < rans::utils::pow2(23)));
1376 }};
1377
1378 try {
1379 if (proxy.isCached()) {
1380 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginCache(), proxy.endCache()};
1381 } else {
1382 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginIter(), proxy.endIter()};
1383 }
1384 } catch (const rans::HistogramError& error) {
1385 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1386 if (proxy.isCached()) {
1387 return store(proxy.beginCache(), proxy.endCache(), slot, this->FallbackStorageType, buffer);
1388 } else {
1389 return store(proxy.beginIter(), proxy.endIter(), slot, this->FallbackStorageType, buffer);
1390 }
1391 }
1392
1393 const rans::Metrics<input_t>& metrics = encoder.getMetrics();
1394 /*
1395 if constexpr (sizeof(input_t) > 2) {
1396 const auto& dp = metrics.getDatasetProperties();
1397 LOGP(info, "Metrics:{{slot: {}, numSamples: {}, min: {}, max: {}, alphabetRangeBits: {}, nUsedAlphabetSymbols: {}, preferPacking: {}}}", slot, dp.numSamples, dp.min, dp.max, dp.alphabetRangeBits, dp.nUsedAlphabetSymbols, metrics.getSizeEstimate().preferPacking());
1398 }
1399 */
1400 if (detail::mayPack(opt) && metrics.getSizeEstimate().preferPacking()) {
1401 if (proxy.isCached()) {
1402 return pack(proxy.beginCache(), proxy.endCache(), slot, metrics, buffer);
1403 } else {
1404 return pack(proxy.beginIter(), proxy.endIter(), slot, metrics, buffer);
1405 };
1406 }
1407
1408 encoder.makeEncoder();
1409
1410 const rans::SizeEstimate sizeEstimate = metrics.getSizeEstimate();
1411 const size_t bufferSizeWords = rans::utils::nBytesTo<storageBuffer_t>((sizeEstimate.getCompressedDictionarySize() +
1412 sizeEstimate.getCompressedDatasetSize() +
1413 sizeEstimate.getIncompressibleSize()) *
1414 sizeEstimateSafetyFactor);
1415 std::tie(thisBlock, thisMetadata) = expandStorage(slot, bufferSizeWords, buffer);
1416
1417 // encode dict
1418 auto encodedDictEnd = encoder.writeDictionary(thisBlock->getCreateDict(), thisBlock->getEndOfBlock());
1419 const size_t dictSize = std::distance(thisBlock->getCreateDict(), encodedDictEnd);
1420 thisBlock->setNDict(dictSize);
1421 thisBlock->realignBlock();
1422 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", dictSize * sizeof(storageBuffer_t), thisBlock->getOffsDict(), thisBlock->getOffsDict() + dictSize * sizeof(storageBuffer_t));
1423
1424 // encode payload
1425 auto encodedMessageEnd = thisBlock->getCreateData();
1426 if (proxy.isCached()) {
1427 encodedMessageEnd = encoder.encode(proxy.beginCache(), proxy.endCache(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1428 } else {
1429 encodedMessageEnd = encoder.encode(proxy.beginIter(), proxy.endIter(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1430 }
1431 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1432 thisBlock->setNData(dataSize);
1433 thisBlock->realignBlock();
1434 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1435 // update the size claimed by encoded message directly inside the block
1436
1437 // encode literals
1438 size_t literalsSize{};
1439 if (encoder.getNIncompressibleSamples() > 0) {
1440 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1441 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1442 thisBlock->setNLiterals(literalsSize);
1443 thisBlock->realignBlock();
1444 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1445 }
1446
1447 // write metadata
1448 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1449 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1450 std::distance(srcBegin, srcEnd),
1451 encoder.getNIncompressibleSamples(),
1452 encoder.getSymbolTablePrecision(),
1453 *metrics.getCoderProperties().min,
1454 *metrics.getCoderProperties().max,
1455 metrics.getDatasetProperties().min,
1456 metrics.getDatasetProperties().alphabetRangeBits,
1457 dictSize,
1458 dataSize,
1459 literalsSize);
1460
1461 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
1462}; // namespace ctf
1463
1464template <typename H, int N, typename W>
1465template <typename input_IT, typename buffer_T>
1466o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer)
1467{
1468 using storageBuffer_t = W;
1469 using input_t = typename std::iterator_traits<input_IT>::value_type;
1470
1471 const size_t messageLength = metrics.getDatasetProperties().numSamples;
1472 const auto alphabetRangeBits = metrics.getDatasetProperties().alphabetRangeBits;
1473
1474 auto* thisBlock = &mBlocks[slot];
1475 auto* thisMetadata = &mMetadata[slot];
1476 size_t packedSize = 0;
1477
1478 if (messageLength == 0) {
1479 *thisMetadata = detail::makeMetadataPack<input_t>(0, 0, 0, 0);
1480 } else if (metrics.getDatasetProperties().alphabetRangeBits == 0) {
1481 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, 0, *srcBegin, 0);
1482 } else {
1484 size_t packingBufferWords = packer.template getPackingBufferSize<storageBuffer_t>(messageLength);
1485 std::tie(thisBlock, thisMetadata) = expandStorage(slot, packingBufferWords, buffer);
1486 auto packedMessageEnd = packer.pack(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1487 packedSize = std::distance(thisBlock->getCreateData(), packedMessageEnd);
1488 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, packer.getPackingWidth(), packer.getOffset(), packedSize);
1489 thisBlock->setNData(packedSize);
1490 thisBlock->realignBlock();
1491 }
1492
1493 LOGP(debug, "StoreData {} bytes, offs: {}:{}", packedSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + packedSize * sizeof(storageBuffer_t));
1494 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
1495};
1496
1497template <typename H, int N, typename W>
1498template <typename input_IT, typename buffer_T>
1499o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer)
1500{
1501 using storageBuffer_t = W;
1502 using input_t = typename std::iterator_traits<input_IT>::value_type;
1503
1504 const size_t messageLength = std::distance(srcBegin, srcEnd);
1505 // introduce padding in case literals don't align;
1506 const size_t nSourceElemsPadded = calculatePaddedSize<input_t, storageBuffer_t>(messageLength);
1507 std::vector<input_t> tmp(nSourceElemsPadded, {});
1508 std::copy(srcBegin, srcEnd, std::begin(tmp));
1509
1510 const size_t nBufferElems = calculateNDestTElements<input_t, storageBuffer_t>(messageLength);
1511 auto [thisBlock, thisMetadata] = expandStorage(slot, nBufferElems, buffer);
1512 thisBlock->storeData(nBufferElems, reinterpret_cast<const storageBuffer_t*>(tmp.data()));
1513
1514 *thisMetadata = detail::makeMetadataStore<input_t, storageBuffer_t>(messageLength, opt, nBufferElems);
1515
1516 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize()};
1517};
1518
1520template <typename H, int N, typename W>
1521std::vector<char> EncodedBlocks<H, N, W>::createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& vmd)
1522{
1523
1524 if (vfreq.size() != N) {
1525 throw std::runtime_error(fmt::format("mismatch between the size of frequencies vector {} and number of blocks {}", vfreq.size(), N));
1526 }
1527 size_t sz = alignSize(sizeof(EncodedBlocks<H, N, W>));
1528 for (int ib = 0; ib < N; ib++) {
1529 sz += Block<W>::estimateSize(vfreq[ib].size());
1530 }
1531 std::vector<char> vdict(sz); // memory space for dictionary
1532 auto dictBlocks = create(vdict.data(), sz);
1533 for (int ib = 0; ib < N; ib++) {
1534 const auto& thisHistogram = vfreq[ib];
1535 const auto view = rans::trim(rans::makeHistogramView(thisHistogram));
1536
1537 if (!view.empty()) {
1538 LOG(info) << "adding dictionary of " << view.size() << " words for block " << ib << ", min/max= " << view.getMin() << "/" << view.getMax();
1539 dictBlocks->mBlocks[ib].storeDict(view.size(), view.data());
1540 dictBlocks = get(vdict.data()); // !!! rellocation might have invalidated dictBlocks pointer
1541 dictBlocks->mMetadata[ib] = vmd[ib];
1542 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::ROOTCompression; // we will compress the dictionary with root!
1543 dictBlocks->mBlocks[ib].realignBlock();
1544 } else {
1545 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::NONE;
1546 }
1547 dictBlocks->mRegistry.nFilledBlocks++;
1548 }
1549 return vdict;
1550}
1551#endif
1552
1553template <typename H, int N, typename W>
1554void EncodedBlocks<H, N, W>::dump(const std::string& prefix, int ncol) const
1555{
1556 for (int ibl = 0; ibl < getNBlocks(); ibl++) {
1557 const auto& blc = getBlock(ibl);
1558 std::string ss;
1559 LOGP(info, "{} Bloc:{} Dict: {} words", prefix, ibl, blc.getNDict());
1560 const auto* ptr = blc.getDict();
1561 for (int i = 0; i < blc.getNDict(); i++) {
1562 if (i && (i % ncol) == 0) {
1563 LOG(info) << ss;
1564 ss.clear();
1565 }
1566 ss += fmt::format(" {:#010x}", ptr[i]);
1567 }
1568 if (!ss.empty()) {
1569 LOG(info) << ss;
1570 ss.clear();
1571 }
1572 LOG(info) << "\n";
1573 LOGP(info, "{} Bloc:{} Data: {} words", prefix, ibl, blc.getNData());
1574 ptr = blc.getData();
1575 for (int i = 0; i < blc.getNData(); i++) {
1576 if (i && (i % ncol) == 0) {
1577 LOG(info) << ss;
1578 ss.clear();
1579 }
1580 ss += fmt::format(" {:#010x}", ptr[i]);
1581 }
1582 if (!ss.empty()) {
1583 LOG(info) << ss;
1584 ss.clear();
1585 }
1586 LOG(info) << "\n";
1587 LOGP(info, "{} Bloc:{} Literals: {} words", prefix, ibl, blc.getNLiterals());
1588 ptr = blc.getData();
1589 for (int i = 0; i < blc.getNLiterals(); i++) {
1590 if (i && (i % 20) == 0) {
1591 LOG(info) << ss;
1592 ss.clear();
1593 }
1594 ss += fmt::format(" {:#010x}", ptr[i]);
1595 }
1596 if (!ss.empty()) {
1597 LOG(info) << ss;
1598 ss.clear();
1599 }
1600 LOG(info) << "\n";
1601 }
1602}
1603
1604} // namespace ctf
1605} // namespace o2
1606
1607#endif
representation of ANS Version number in a comparable way
#define verbosity
Header: timestamps and format version for detector CTF dictionary.
int32_t i
Metadata required to decode a Block.
Interfaces for BitPacking using librans.
uint32_t res
Definition RawData.h:0
TBranch * ptr
useful public helper functions.
std::ostringstream debug
const auto & getData()
uint64_t ransState_t
uint32_t source_type
Class for time synchronization of RawReader instances.
<<======================== Auxiliary classes =======================<<
CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const
static constexpr Metadata::OptStore FallbackStorageType
this is in fact stored, but to overcome TBuffer limits we have to define the branches per block!...
void readFromTree(TTree &tree, const std::string &name, int ev=0)
read from tree to non-flat object
o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
static auto get(void *head)
cast arbitrary buffer head to container class. Head is supposed to respect the alignment
ClassDefNV(EncodedBlocks, 3)
static size_t estimateBlockSize(int n)
estimate free size needed to add new block
CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any &decoderExt) const
void clear()
clear itself
o2::ctf::CTFIOSize decode(container_T &dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination vector (will be resized as needed)
static auto get(const void *head)
size_t estimateSize() const
size_t estimateSizeFromMetadata() const
do the same using metadata info
static void relocate(const char *oldHead, char *newHead, char *wrapper, size_t newsize=0)
auto expandStorage(size_t slot, size_t nElemets, T *buffer=nullptr) -> decltype(auto)
void setHeader(const H &h)
o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any &encoderExt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
static void readFromTree(VD &vec, TTree &tree, const std::string &name, int ev=0)
read from tree to destination buffer vector
void dump(const std::string &prefix="", int ncol=20) const
CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any &decoderExt) const
ANSHeader checkANSVersion(ANSHeader ansVersion) const
void fillFlatCopy(EncodedBlocks &dest) const
Create its own flat copy in the destination empty flat object.
void copyToFlat(void *base)
copy itself to flat buffer created on the fly at the provided pointer. The destination block should b...
std::array< Block< W >, N > mBlocks
const H & getHeader() const
static size_t fillTreeBranch(TTree &tree, const std::string &brname, D &dt, int compLevel, int splitLevel=99)
add and fill single branch
auto & getMetadata(int i) const
static size_t getMinAlignedSize()
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T *buffer=nullptr)
static auto expand(buffer_T &buffer, size_t newsizeBytes)
expand the storage to new size in bytes
void print(const std::string &prefix="", int verbosity=1) const
print itself
o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
dictionaryType< source_T > getDictionary(int i, ANSHeader ansVersion=ANSVersionUnspecified) const
bool flat() const
check if flat and valid
static std::vector< char > createDictionaryBlocks(const std::vector< rans::DenseHistogram< int32_t > > &vfreq, const std::vector< Metadata > &prbits)
create a special EncodedBlocks containing only dictionaries made from provided vector of frequency ta...
const auto & getMetadata() const
static constexpr int getNBlocks()
const ANSHeader & getANSHeader() const
auto & getBlock(int i) const
bool empty() const
check if empty and valid
static auto getImage(const void *newHead)
get const image of the container wrapper, with pointers in the image relocated to new head
static bool readTreeBranch(TTree &tree, const std::string &brname, D &dt, int ev=0)
read single branch
size_t compactify()
Compactify by eliminating empty space.
CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const
std::variant< rans::RenormedSparseHistogram< source_T >, rans::RenormedDenseHistogram< source_T > > dictionaryType
o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination pointer, the needed space assumed to be available
size_t size() const
total allocated size in bytes
EncodedBlocks< H, N, W > base
static auto create(VD &v)
create container from vector. Head is supposed to respect the alignment
void setANSHeader(const ANSHeader &h)
o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr)
o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
size_t getFreeSize() const
size remaining for additional data
o2::ctf::CTFIOSize encode(const VE &src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
const auto & getRegistry() const
static auto create(void *head, size_t sz)
create container from arbitrary buffer of predefined size (in bytes!!!). Head is supposed to respect ...
std::shared_ptr< H > cloneHeader() const
ANSHeader & getANSHeader()
void init(size_t sz)
setup internal structure and registry for given buffer size (in bytes!!!)
void copyToFlat(V &vec)
copy itself to flat buffer created on the fly from the vector
size_t appendToTree(TTree &tree, const std::string &name) const
attach to tree
o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
std::array< Metadata, N > mMetadata
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics< typename std::iterator_traits< input_IT >::value_type > metrics, buffer_T *buffer=nullptr)
typename rans::denseEncoder_type< source_type > encoder_type
const DatasetProperties< source_type > & getDatasetProperties() const noexcept
Definition Metrics.h:52
size_t getIncompressibleSize(double_t safetyFactor=1.2) const
size_t getCompressedDatasetSize(double_t safetyFactor=1.2) const
size_t getCompressedDictionarySize(double_t safetyFactor=2) const
static decltype(auto) fromHistogram(DenseHistogram< source_T > histogram, size_t renormingPrecision=0)
Definition compat.h:187
functionality to maintain compatibility with previous version of this library
static factory classes for building histograms, encoders and decoders.
GLdouble n
Definition glcorearb.h:1982
GLeglImageOES image
Definition glcorearb.h:4021
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLenum GLsizei dataSize
Definition glcorearb.h:3994
GLuint const GLchar * name
Definition glcorearb.h:781
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLsizei const GLfloat * value
Definition glcorearb.h:819
GLintptr offset
Definition glcorearb.h:660
public interface for building and renorming histograms from source data.
constexpr bool mayPack(Metadata::OptStore opt) noexcept
constexpr bool is_iterator_v
constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
constexpr size_t calculateNDestTElements(size_t nElems) noexcept
constexpr size_t PackingThreshold
constexpr int WrappersCompressionLevel
uint8_t BufferType
This is the type of the vector to be used for the EncodedBlocks buffer allocation.
constexpr ANSHeader ANSVersionCompat
Definition ANSHeader.h:54
constexpr ANSHeader ANSVersion1
Definition ANSHeader.h:55
constexpr ANSHeader ANSVersionUnspecified
Definition ANSHeader.h:53
size_t calculatePaddedSize(size_t nElems) noexcept
constexpr size_t Alignment
constexpr int WrappersSplitLevel
size_t alignSize(size_t sizeBytes)
align size to given diven number of bytes
T * relocatePointer(const char *oldBase, char *newBase, const T *ptr)
relocate pointer by the difference of addresses
decltype(makeEncoder::fromRenormed(RenormedDenseHistogram< source_T >{})) encoder_type
Definition compat.h:292
RenormedDenseHistogram< source_T > renorm(DenseHistogram< source_T > histogram, size_t newPrecision=0)
Definition compat.h:75
size_t getAlphabetRangeBits(const DenseHistogram< source_T > &histogram) noexcept
Definition compat.h:260
size_t calculateMaxBufferSizeB(size_t nElements, size_t rangeBits)
Definition compat.h:282
decltype(makeDecoder::fromRenormed(RenormedDenseHistogram< source_T >{})) decoder_type
Definition compat.h:295
constexpr uint32_t getRangeBits(T min, T max) noexcept
Definition utils.h:200
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
void checkBounds(IT iteratorPosition, IT upperBound)
Definition utils.h:244
size_t sanitizeRenormingBitRange(size_t renormPrecision)
Definition utils.h:212
decltype(makeDenseEncoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) denseEncoder_type
Definition factory.h:229
decltype(makeDecoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) defaultDecoder_type
Definition factory.h:238
auto makeHistogramView(container_T &container, std::ptrdiff_t offset) noexcept -> HistogramView< decltype(std::begin(container))>
void unpack(const input_T *__restrict inputBegin, size_t extent, output_IT outputBegin, size_t packingWidth, typename std::iterator_traits< output_IT >::value_type offset=static_cast< typename std::iterator_traits< output_IT >::value_type >(0))
Definition pack.h:346
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
RenormedSparseHistogram< source_T > readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:215
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
public interface for serializing histograms (dictionaries) to JSON or compressed binary.
uint8_t majorVersion
Definition ANSHeader.h:28
uint8_t minorVersion
Definition ANSHeader.h:29
binary blob for single entropy-compressed column: metadata + (optional) dictionary and data buffer + ...
int getNStored() const
const W * getData() const
Registry * registry
const W * getDict() const
int getNLiterals() const
const W * getDataPointer() const
void storeDict(int _ndict, const W *_dict)
void storeLiterals(int _nliterals, const W *_literals)
const W * getLiterals() const
int nDict
non-persistent info for in-memory ops
const W * getEndOfBlock() const
int getNDict() const
void storeData(int _ndata, const W *_data)
void setNData(int _ndata)
int getNData() const
void clear()
clear itself
void store(int _ndict, int _ndata, int _nliterals, const W *_dict, const W *_data, const W *_literals)
store binary blob data (buffer filled from head to tail)
void setNDict(int _ndict)
void setNLiterals(int _nliterals)
ClassDefNV(Block, 1)
static size_t estimateSize(int n)
estimate free size needed to add new block
void relocate(const char *oldHead, char *newHeadData, char *newHeadRegistry)
relocate to different head position
‍>======================== Auxiliary classes =======================>>
int nFilledBlocks
pointer on the head of the CTF
char * getFreeBlockEnd() const
size_t getFreeSize() const
size in bytes available to fill data
size_t size
offset of the start of the writable space (wrt head), in bytes!!!
ClassDefNV(Registry, 1)
char * getFreeBlockStart() const
calculate the pointer of the head of the writable space
static constexpr size_t nStreams
Definition compat.h:52
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
static std::string concat_string(Ts const &... ts)
int estimateSize(bool withHB=false)
std::vector< o2::ctf::BufferType > vec
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
coder decode(ctfImage, triggersD, clustersD)
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))