Project
Loading...
Searching...
No Matches
EncodedBlocks.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16
17#ifndef ALICEO2_ENCODED_BLOCKS_H
18#define ALICEO2_ENCODED_BLOCKS_H
19// #undef NDEBUG
20// #include <cassert>
21#include <type_traits>
22#include <cstddef>
23#include <Rtypes.h>
24#include <any>
25
26#include "TTree.h"
28#include "Framework/Logger.h"
34#ifndef __CLING__
37#include "rANS/compat.h"
38#include "rANS/histogram.h"
39#include "rANS/serialize.h"
40#include "rANS/factory.h"
41#include "rANS/metrics.h"
42#include "rANS/utils.h"
43#endif
44
45namespace o2
46{
47namespace ctf
48{
49
50namespace detail
51{
52
53template <class, class Enable = void>
54struct is_iterator : std::false_type {
55};
56
57template <class T>
58struct is_iterator<T, std::enable_if_t<
59 std::is_base_of_v<std::input_iterator_tag, typename std::iterator_traits<T>::iterator_category> ||
60 std::is_same_v<std::output_iterator_tag, typename std::iterator_traits<T>::iterator_category>>>
61 : std::true_type {
62};
63
64template <class T>
65inline constexpr bool is_iterator_v = is_iterator<T>::value;
66
67inline constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
68{
70}
71
72inline constexpr bool mayPack(Metadata::OptStore opt) noexcept
73{
75}
76
77} // namespace detail
78constexpr size_t PackingThreshold = 512;
79
80constexpr size_t Alignment = 16;
81
82constexpr int WrappersSplitLevel = 99;
83constexpr int WrappersCompressionLevel = 1;
84
86using BufferType = uint8_t; // to avoid every detector using different types, we better define it here
87
89inline size_t alignSize(size_t sizeBytes)
90{
91 auto res = sizeBytes % Alignment;
92 return res ? sizeBytes + (Alignment - res) : sizeBytes;
93}
94
96template <class T>
97inline T* relocatePointer(const char* oldBase, char* newBase, const T* ptr)
98{
99 return (ptr != nullptr) ? reinterpret_cast<T*>(newBase + (reinterpret_cast<const char*>(ptr) - oldBase)) : nullptr;
100}
101
102template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
103inline constexpr size_t calculateNDestTElements(size_t nElems) noexcept
104{
105 const size_t srcBufferSize = nElems * sizeof(source_T);
106 return srcBufferSize / sizeof(dest_T) + (srcBufferSize % sizeof(dest_T) != 0);
107};
108
109template <typename source_T, typename dest_T, std::enable_if_t<(sizeof(dest_T) >= sizeof(source_T)), bool> = true>
110inline size_t calculatePaddedSize(size_t nElems) noexcept
111{
112 const size_t sizeOfSourceT = sizeof(source_T);
113 const size_t sizeOfDestT = sizeof(dest_T);
114
115 // this is equivalent to (sizeOfSourceT / sizeOfDestT) * std::ceil(sizeOfSourceArray/ sizeOfDestT)
116 return (sizeOfDestT / sizeOfSourceT) * calculateNDestTElements<source_T, dest_T>(nElems);
117};
118
120
122struct Registry {
123 char* head = nullptr;
124 int nFilledBlocks = 0; // number of filled blocks = next block to fill (must be strictly consecutive)
125 size_t offsFreeStart = 0;
126 size_t size = 0; // full size in bytes!!!
127
129 char* getFreeBlockStart() const
130 {
131 assert(offsFreeStart <= size);
132 return head + offsFreeStart;
133 }
134
136 size_t getFreeSize() const
137 {
138 return size - offsFreeStart;
139 }
140
141 char* getFreeBlockEnd() const
142 {
143 assert(offsFreeStart <= size);
144 return getFreeBlockStart() + getFreeSize();
145 }
146
148};
149
151template <typename W = uint32_t>
152struct Block {
153
154 Registry* registry = nullptr;
155 int nDict = 0; // dictionary length (if any)
156 int nData = 0; // length of data
157 int nLiterals = 0; // length of literals vector (if any)
158 int nStored = 0; // total length
159 W* payload = nullptr; //[nStored];
160
161 inline const W* getDict() const { return nDict ? payload : nullptr; }
162 inline const W* getData() const { return nData ? (payload + nDict) : nullptr; }
163 inline const W* getDataPointer() const { return payload ? (payload + nDict) : nullptr; } // needed when nData is not set yet
164 inline const W* getLiterals() const { return nLiterals ? (payload + nDict + nData) : nullptr; }
165 inline const W* getEndOfBlock() const
166 {
167 if (!registry) {
168 return nullptr;
169 }
170 // get last legal W*, since unaligned data is undefined behavior!
171 const size_t delta = reinterpret_cast<uintptr_t>(registry->getFreeBlockEnd()) % sizeof(W);
172 return reinterpret_cast<const W*>(registry->getFreeBlockEnd() - delta);
173 }
174
175 inline W* getCreatePayload() { return payload ? payload : (registry ? (payload = reinterpret_cast<W*>(registry->getFreeBlockStart())) : nullptr); }
176 inline W* getCreateDict() { return payload ? payload : getCreatePayload(); }
177 inline W* getCreateData() { return payload ? (payload + nDict) : getCreatePayload(); }
178 inline W* getCreateLiterals() { return payload ? payload + (nDict + nData) : getCreatePayload(); }
179 inline W* getEndOfBlock() { return const_cast<W*>(static_cast<const Block&>(*this).getEndOfBlock()); };
180
181 inline auto getOffsDict() { return reinterpret_cast<std::uintptr_t>(getCreateDict()) - reinterpret_cast<std::uintptr_t>(registry->head); }
182 inline auto getOffsData() { return reinterpret_cast<std::uintptr_t>(getCreateData()) - reinterpret_cast<std::uintptr_t>(registry->head); }
183 inline auto getOffsLiterals() { return reinterpret_cast<std::uintptr_t>(getCreateLiterals()) - reinterpret_cast<std::uintptr_t>(registry->head); }
184
185 inline void setNDict(int _ndict)
186 {
187 nDict = _ndict;
188 nStored += nDict;
189 }
190
191 inline void setNData(int _ndata)
192 {
193 nData = _ndata;
194 nStored += nData;
195 }
196
197 inline void setNLiterals(int _nliterals)
198 {
199 nLiterals = _nliterals;
201 }
202
203 inline int getNDict() const { return nDict; }
204 inline int getNData() const { return nData; }
205 inline int getNLiterals() const { return nLiterals; }
206 inline int getNStored() const { return nStored; }
207
209 {
210 if (!registry) { // this is a standalone block owning its data
211 delete[] payload;
212 }
213 }
214
216 void clear()
217 {
218 nDict = 0;
219 nData = 0;
220 nLiterals = 0;
221 nStored = 0;
222 payload = nullptr;
223 }
224
226 static size_t estimateSize(int n)
227 {
228 return alignSize(n * sizeof(W));
229 }
230
231 // store a dictionary in an empty block
232 void storeDict(int _ndict, const W* _dict)
233 {
234 if (getNStored() > 0) {
235 throw std::runtime_error("trying to write in occupied block");
236 }
237 size_t sz = estimateSize(_ndict);
238 assert(registry); // this method is valid only for flat version, which has a registry
239 assert(sz <= registry->getFreeSize());
240 assert((_ndict > 0) == (_dict != nullptr));
241 setNDict(_ndict);
242 if (nDict) {
243 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
244 realignBlock();
245 }
246 };
247
248 // store a dictionary to a block which can either be empty or contain a dict.
249 void storeData(int _ndata, const W* _data)
250 {
251 if (getNStored() > getNDict()) {
252 throw std::runtime_error("trying to write in occupied block");
253 }
254
255 size_t sz = estimateSize(_ndata);
256 assert(registry); // this method is valid only for flat version, which has a registry
257 assert(sz <= registry->getFreeSize());
258 assert((_ndata > 0) == (_data != nullptr));
259 setNData(_ndata);
260 if (nData) {
261 memcpy(getCreateData(), _data, _ndata * sizeof(W));
262 realignBlock();
263 }
264 }
265
266 // store a dictionary to a block which can either be empty or contain a dict.
267 void storeLiterals(int _nliterals, const W* _literals)
268 {
269 if (getNStored() > getNDict() + getNData()) {
270 throw std::runtime_error("trying to write in occupied block");
271 }
272
273 size_t sz = estimateSize(_nliterals);
274 assert(registry); // this method is valid only for flat version, which has a registry
275 assert(sz <= registry->getFreeSize());
276 // assert((_nliterals > 0) == (_literals != nullptr));
277 setNLiterals(_nliterals);
278 if (nLiterals) {
279 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
280 realignBlock();
281 }
282 }
283
284 // resize block and free up unused buffer space.
286 {
287 if (payload) {
288 size_t sz = estimateSize(getNStored());
289 registry->offsFreeStart = (reinterpret_cast<char*>(payload) - registry->head) + sz;
290 }
291 }
292
294 void store(int _ndict, int _ndata, int _nliterals, const W* _dict, const W* _data, const W* _literals)
295 {
296 size_t sz = estimateSize(_ndict + _ndata + _nliterals);
297 assert(registry); // this method is valid only for flat version, which has a registry
298 assert(sz <= registry->getFreeSize());
299 assert((_ndict > 0) == (_dict != nullptr));
300 assert((_ndata > 0) == (_data != nullptr));
301 // assert(_literals == _data + _nliterals);
302 setNDict(_ndict);
303 setNData(_ndata);
304 setNLiterals(_nliterals);
305 getCreatePayload(); // do this even for empty block!!!
306 if (getNStored()) {
307 payload = reinterpret_cast<W*>(registry->getFreeBlockStart());
308 if (getNDict()) {
309 memcpy(getCreateDict(), _dict, _ndict * sizeof(W));
310 }
311 if (getNData()) {
312 memcpy(getCreateData(), _data, _ndata * sizeof(W));
313 }
314 if (getNLiterals()) {
315 memcpy(getCreateLiterals(), _literals, _nliterals * sizeof(W));
316 }
317 }
318 realignBlock();
319 }
320
322 void relocate(const char* oldHead, char* newHeadData, char* newHeadRegistry)
323 {
324 payload = relocatePointer(oldHead, newHeadData, payload);
325 registry = relocatePointer(oldHead, newHeadRegistry, registry);
326 }
327
329}; // namespace ctf
330
332
333template <typename H, int N, typename W = uint32_t>
335{
336 public:
338
339#ifndef __CLING__
340 template <typename source_T>
341 using dictionaryType = std::variant<rans::RenormedSparseHistogram<source_T>, rans::RenormedDenseHistogram<source_T>>;
342#endif
343
344 void setHeader(const H& h)
345 {
346 mHeader = h;
347 }
348 const H& getHeader() const { return mHeader; }
349 H& getHeader() { return mHeader; }
350 std::shared_ptr<H> cloneHeader() const { return std::shared_ptr<H>(new H(mHeader)); } // for dictionary creation
351
352 const auto& getRegistry() const { return mRegistry; }
353
354 const auto& getMetadata() const { return mMetadata; }
355
356 auto& getMetadata(int i) const
357 {
358 assert(i < N);
359 return mMetadata[i];
360 }
361
362 auto& getBlock(int i) const
363 {
364 assert(i < N);
365 return mBlocks[i];
366 }
367
368#ifndef __CLING__
369 template <typename source_T>
371 {
372 const auto& block = getBlock(i);
373 const auto& metadata = getMetadata(i);
374 ansVersion = checkANSVersion(ansVersion);
375
376 assert(static_cast<int64_t>(std::numeric_limits<source_T>::min()) <= static_cast<int64_t>(metadata.max));
377 assert(static_cast<int64_t>(std::numeric_limits<source_T>::max()) >= static_cast<int64_t>(metadata.min));
378
379 // check consistency of metadata and type
380 [&]() {
381 const int64_t sourceMin = std::numeric_limits<source_T>::min();
382 const int64_t sourceMax = std::numeric_limits<source_T>::max();
383
384 auto view = rans::trim(rans::HistogramView{block.getDict(), block.getDict() + block.getNDict(), metadata.min});
385 const int64_t dictMin = view.getMin();
386 const int64_t dictMax = view.getMax();
387 assert(dictMin >= metadata.min);
388 assert(dictMax <= metadata.max);
389
390 if ((dictMin < sourceMin) || (dictMax > sourceMax)) {
391 if (ansVersion == ANSVersionCompat && mHeader.majorVersion == 1 && mHeader.minorVersion == 0 && mHeader.dictTimeStamp < 1653192000000) {
392 LOGP(warn, "value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}], tolerate in compat mode for old dictionaries", sourceMin, sourceMax, dictMin, dictMax);
393 } else {
394 throw std::runtime_error(fmt::format("value range of dictionary and target datatype are incompatible: target type [{},{}] vs dictionary [{},{}]", sourceMin, sourceMax, dictMin, dictMax));
395 }
396 }
397 }();
398
399 if (ansVersion == ANSVersionCompat) {
400 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
401 return rans::compat::renorm(std::move(histogram), metadata.probabilityBits);
402 } else if (ansVersion == ANSVersion1) {
403 // dictionary is loaded from an explicit dict file and is stored densly
405 rans::DenseHistogram<source_T> histogram{block.getDict(), block.getDict() + block.getNDict(), metadata.min};
406 size_t renormingBits = rans::utils::sanitizeRenormingBitRange(metadata.probabilityBits);
407 LOG_IF(debug, renormingBits != metadata.probabilityBits)
408 << fmt::format("While reading metadata from external dictionary, rANSV1 is rounding renorming precision from {} to {}", metadata.probabilityBits, renormingBits);
409 return rans::renorm(std::move(histogram), renormingBits, rans::RenormingPolicy::ForceIncompressible);
410 } else {
411 // dictionary is elias-delta coded inside the block
412 if constexpr (sizeof(source_T) > 2) {
413 return rans::readRenormedSetDictionary(block.getDict(), block.getDict() + block.getNDict(),
414 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
415 metadata.probabilityBits);
416 } else {
417 return rans::readRenormedDictionary(block.getDict(), block.getDict() + block.getNDict(),
418 static_cast<source_T>(metadata.min), static_cast<source_T>(metadata.max),
419 metadata.probabilityBits);
420 }
421 }
422 } else {
423 throw std::runtime_error(fmt::format("Failed to load serialized Dictionary. Unsupported ANS Version: {}", static_cast<std::string>(ansVersion)));
424 }
425 };
426#endif
427
429 {
430 mANSHeader = h;
431 }
432 const ANSHeader& getANSHeader() const { return mANSHeader; }
434
435 static constexpr int getNBlocks() { return N; }
436
437 static size_t getMinAlignedSize() { return alignSize(sizeof(base)); }
438
440 static auto get(void* head) { return reinterpret_cast<EncodedBlocks*>(head); }
441 static auto get(const void* head) { return reinterpret_cast<const EncodedBlocks*>(head); }
442
444 static auto getImage(const void* newHead);
445
447 static auto create(void* head, size_t sz);
448
450 template <typename VD>
451 static auto create(VD& v);
452
454 static size_t estimateBlockSize(int n) { return Block<W>::estimateSize(n); }
455
457 bool empty() const { return (mRegistry.offsFreeStart == alignSize(sizeof(*this))) && (mRegistry.size >= mRegistry.offsFreeStart); }
458
460 bool flat() const { return mRegistry.size > 0 && (mRegistry.size >= mRegistry.offsFreeStart) && (mBlocks[0].registry == &mRegistry) && (mBlocks[N - 1].registry == &mRegistry); }
461
463 void clear();
464
466 size_t compactify() { return (mRegistry.size = estimateSize()); }
467
469 size_t size() const { return mRegistry.size; }
470
472 size_t outputsize() const { return mRegistry.offsFreeStart; }
473
475 size_t getFreeSize() const { return mRegistry.getFreeSize(); }
476
478 template <typename buffer_T>
479 static auto expand(buffer_T& buffer, size_t newsizeBytes);
480
482 template <typename V>
483 void copyToFlat(V& vec);
484
487
489 size_t appendToTree(TTree& tree, const std::string& name) const;
490
492 void readFromTree(TTree& tree, const std::string& name, int ev = 0);
493
495 template <typename VD>
496 static void readFromTree(VD& vec, TTree& tree, const std::string& name, int ev = 0);
497
499 template <typename VE, typename buffer_T>
500 inline o2::ctf::CTFIOSize encode(const VE& src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f)
501 {
502 return encode(std::begin(src), std::end(src), slot, symbolTablePrecision, opt, buffer, encoderExt, memfc);
503 }
504
506 template <typename input_IT, typename buffer_T>
507 o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
508
510 template <class container_T, class container_IT = typename container_T::iterator>
511 o2::ctf::CTFIOSize decode(container_T& dest, int slot, const std::any& decoderExt = {}) const;
512
514 template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool> = true>
515 o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any& decoderExt = {}) const;
516
517#ifndef __CLING__
519 static std::vector<char> createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& prbits);
520#endif
521
523 void print(const std::string& prefix = "", int verbosity = 1) const;
524 void dump(const std::string& prefix = "", int ncol = 20) const;
525
526 protected:
527 static_assert(N > 0, "number of encoded blocks < 1");
528
530 ANSHeader mANSHeader; // ANS header
531 H mHeader; // detector specific header
532 std::array<Metadata, N> mMetadata; // compressed block's details
533 std::array<Block<W>, N> mBlocks;
534
536
538 void init(size_t sz);
539
544 static void relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize = 0);
545
548 size_t estimateSize() const;
549
552
554 void fillFlatCopy(EncodedBlocks& dest) const;
555
557 template <typename D>
558 static size_t fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel = 99);
559
561 template <typename D>
562 static bool readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev = 0);
563
564 template <typename T>
565 auto expandStorage(size_t slot, size_t nElemets, T* buffer = nullptr) -> decltype(auto);
566
567 inline ANSHeader checkANSVersion(ANSHeader ansVersion) const
568 {
569 auto ctfANSHeader = getANSHeader();
571
572 const bool isEqual{ansVersion == ctfANSHeader};
573 const bool isHeaderUnspecified{ctfANSHeader == ANSVersionUnspecified};
574
575 if (isEqual) {
576 if (isHeaderUnspecified) {
577 throw std::runtime_error{fmt::format("Missmatch of ANSVersions, trying to encode/decode CTF with ANS Version Header {} with ANS Version {}",
578 static_cast<std::string>(ctfANSHeader),
579 static_cast<std::string>(ansVersion))};
580 } else {
581 ret = ctfANSHeader;
582 }
583 } else {
584 if (isHeaderUnspecified) {
585 ret = ansVersion;
586 } else {
587 ret = ctfANSHeader;
588 }
589 }
590
591 return ret;
592 };
593
594 template <typename input_IT, typename buffer_T>
595 o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
596
597 template <typename input_IT, typename buffer_T>
598 o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, const std::any& encoderExt = {}, float memfc = 1.f);
599
600 template <typename input_IT, typename buffer_T>
601 o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
602
603 template <typename input_IT, typename buffer_T>
604 o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr, double_t sizeEstimateSafetyFactor = 1);
605
606#ifndef __CLING__
607 template <typename input_IT, typename buffer_T>
608 o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer = nullptr);
609
610 template <typename input_IT, typename buffer_T>
611 inline o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T* buffer = nullptr)
612 {
613 using source_type = typename std::iterator_traits<input_IT>::value_type;
614
616 metrics.getDatasetProperties().numSamples = std::distance(srcBegin, srcEnd);
617
618 if (metrics.getDatasetProperties().numSamples != 0) {
619 const auto [minIter, maxIter] = std::minmax_element(srcBegin, srcEnd);
620 metrics.getDatasetProperties().min = *minIter;
621 metrics.getDatasetProperties().max = *maxIter;
622
623 // special case: if min === max, the range is 0 and the data can be reconstructed just via the metadata.
624 metrics.getDatasetProperties().alphabetRangeBits =
625 rans::utils::getRangeBits(metrics.getDatasetProperties().min,
626 metrics.getDatasetProperties().max);
627 }
628
629 return pack(srcBegin, srcEnd, slot, metrics, buffer);
630 }
631#endif
632
633 template <typename input_IT, typename buffer_T>
634 o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer = nullptr);
635
636 // decode
637 template <typename dst_IT>
638 CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any& decoderExt) const;
639
640 template <typename dst_IT>
641 CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any& decoderExt) const;
642
643 template <typename dst_IT>
644 CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const;
645
646 template <typename dst_IT>
647 CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const;
648
650}; // namespace ctf
651
654template <typename H, int N, typename W>
655void EncodedBlocks<H, N, W>::readFromTree(TTree& tree, const std::string& name, int ev)
656{
657 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *this, ev);
658 for (int i = 0; i < N; i++) {
659 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), mBlocks[i], ev);
660 }
661}
662
665template <typename H, int N, typename W>
666template <typename VD>
667void EncodedBlocks<H, N, W>::readFromTree(VD& vec, TTree& tree, const std::string& name, int ev)
668{
669 auto tmp = create(vec);
670 if (!readTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), *tmp, ev)) {
671 throw std::runtime_error(fmt::format("Failed to read CTF header for {}", name));
672 }
673 tmp = tmp->expand(vec, tmp->estimateSizeFromMetadata());
674 const auto& meta = tmp->getMetadata();
675 for (int i = 0; i < N; i++) {
676 Block<W> bl;
677 readTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), bl, ev);
678 assert(meta[i].nDictWords == bl.getNDict());
679 assert(meta[i].nDataWords == bl.getNData());
680 assert(meta[i].nLiteralWords == bl.getNLiterals());
681 tmp->mBlocks[i].store(bl.getNDict(), bl.getNData(), bl.getNLiterals(), bl.getDict(), bl.getData(), bl.getLiterals());
682 }
683}
684
687template <typename H, int N, typename W>
688size_t EncodedBlocks<H, N, W>::appendToTree(TTree& tree, const std::string& name) const
689{
690 long s = 0;
691 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_wrapper."), const_cast<base&>(*this), WrappersCompressionLevel, WrappersSplitLevel);
692 for (int i = 0; i < N; i++) {
693 int compression = mMetadata[i].opt == Metadata::OptStore::ROOTCompression ? 1 : 0;
694 s += fillTreeBranch(tree, o2::utils::Str::concat_string(name, "_block.", std::to_string(i), "."), const_cast<Block<W>&>(mBlocks[i]), compression);
695 }
696 tree.SetEntries(tree.GetEntries() + 1);
697 return s;
698}
699
702template <typename H, int N, typename W>
703template <typename D>
704bool EncodedBlocks<H, N, W>::readTreeBranch(TTree& tree, const std::string& brname, D& dt, int ev)
705{
706 auto* br = tree.GetBranch(brname.c_str());
707 if (!br) {
708 LOG(debug) << "Branch " << brname << " is absent";
709 return false;
710 }
711 auto* ptr = &dt;
712 br->SetAddress(&ptr);
713 br->GetEntry(ev);
714 br->ResetAddress();
715 return true;
716}
717
720template <typename H, int N, typename W>
721template <typename D>
722inline size_t EncodedBlocks<H, N, W>::fillTreeBranch(TTree& tree, const std::string& brname, D& dt, int compLevel, int splitLevel)
723{
724 auto* br = tree.GetBranch(brname.c_str());
725 if (!br) {
726 br = tree.Branch(brname.c_str(), &dt, 512, splitLevel);
727 br->SetCompressionLevel(compLevel);
728 }
729 return br->Fill();
730}
731
734template <typename H, int N, typename W>
736{
737 assert(dest.empty() && dest.mRegistry.getFreeSize() < estimateSize());
738 dest.mANSHeader = mANSHeader;
739 dest.mHeader = mHeader;
740 dest.mMetadata = mMetadata;
741 for (int i = 0; i < N; i++) {
742 dest.mBlocks[i].store(mBlocks[i].getNDict(), mBlocks[i].getNData(), mBlocks[i].getDict(), mBlocks[i].getData());
743 }
744}
745
748template <typename H, int N, typename W>
749template <typename V>
751{
752 auto vtsz = sizeof(typename std::remove_reference<decltype(vec)>::type::value_type), sz = estimateSize();
753 vec.resize(sz / vtsz);
754 copyToFlat(vec.data());
755}
756
760template <typename H, int N, typename W>
762{
763 size_t sz = 0;
764 sz += alignSize(sizeof(*this));
765 for (int i = 0; i < N; i++) {
766 sz += alignSize(mBlocks[i].nStored * sizeof(W));
767 }
768 return sz;
769}
770
774template <typename H, int N, typename W>
776{
777 size_t sz = alignSize(sizeof(*this));
778 for (int i = 0; i < N; i++) {
779 sz += alignSize((mMetadata[i].nDictWords + mMetadata[i].nDataWords + mMetadata[i].nLiteralWords) * sizeof(W));
780 }
781 return sz;
782}
783
786template <typename H, int N, typename W>
787template <typename buffer_T>
788auto EncodedBlocks<H, N, W>::expand(buffer_T& buffer, size_t newsizeBytes)
789{
790 auto buftypesize = sizeof(typename std::remove_reference<decltype(buffer)>::type::value_type);
791 auto* oldHead = get(buffer.data())->mRegistry.head;
792 buffer.resize(alignSize(newsizeBytes) / buftypesize);
793 relocate(oldHead, reinterpret_cast<char*>(buffer.data()), reinterpret_cast<char*>(buffer.data()), newsizeBytes);
794 return get(buffer.data());
795}
796
802template <typename H, int N, typename W>
803void EncodedBlocks<H, N, W>::relocate(const char* oldHead, char* newHead, char* wrapper, size_t newsize)
804{
805 auto newStr = get(wrapper);
806 for (int i = 0; i < N; i++) {
807 newStr->mBlocks[i].relocate(oldHead, newHead, wrapper);
808 }
809 newStr->mRegistry.head = newHead; // newHead points on the real data
810 // if asked, update the size
811 if (newsize) { // in bytes!!!
812 assert(newStr->estimateSize() <= newsize);
813 newStr->mRegistry.size = newsize;
814 }
815}
816
819template <typename H, int N, typename W>
821{
822 mRegistry.head = reinterpret_cast<char*>(this);
823 mRegistry.size = sz;
824 mRegistry.offsFreeStart = alignSize(sizeof(*this));
825 for (int i = 0; i < N; i++) {
826 mMetadata[i].clear();
827 mBlocks[i].registry = &mRegistry;
828 mBlocks[i].clear();
829 }
830}
831
834template <typename H, int N, typename W>
836{
837 for (int i = 0; i < N; i++) {
838 mBlocks[i].clear();
839 mMetadata[i].clear();
840 }
841 mRegistry.offsFreeStart = alignSize(sizeof(*this));
842}
843
846template <typename H, int N, typename W>
847auto EncodedBlocks<H, N, W>::getImage(const void* newHead)
848{
849 assert(newHead);
850 auto image(*get(newHead)); // 1st make a shalow copy
851 // now fix its pointers
852 // we don't modify newHead, but still need to remove constness for relocation interface
853 relocate(image.mRegistry.head, const_cast<char*>(reinterpret_cast<const char*>(newHead)), reinterpret_cast<char*>(&image));
854
855 return image;
856}
857
860template <typename H, int N, typename W>
861inline auto EncodedBlocks<H, N, W>::create(void* head, size_t sz)
862{
863 const H defh;
864 auto b = get(head);
865 b->init(sz);
866 b->setHeader(defh);
867 return b;
868}
869
872template <typename H, int N, typename W>
873template <typename VD>
875{
876 size_t vsz = sizeof(typename std::remove_reference<decltype(v)>::type::value_type); // size of the element of the buffer
877 auto baseSize = getMinAlignedSize() / vsz;
878 if (v.size() < baseSize) {
879 v.resize(baseSize);
880 }
881 return create(v.data(), v.size() * vsz);
882}
883
886template <typename H, int N, typename W>
887void EncodedBlocks<H, N, W>::print(const std::string& prefix, int verbosity) const
888{
889 if (verbosity > 0) {
890 LOG(info) << prefix << "Container of " << N << " blocks, size: " << size() << " bytes, unused: " << getFreeSize();
891 for (int i = 0; i < N; i++) {
892 LOG(info) << "Block " << i << " for " << static_cast<uint32_t>(mMetadata[i].messageLength) << " message words of "
893 << static_cast<uint32_t>(mMetadata[i].messageWordSize) << " bytes |"
894 << " NDictWords: " << mBlocks[i].getNDict() << " NDataWords: " << mBlocks[i].getNData()
895 << " NLiteralWords: " << mBlocks[i].getNLiterals();
896 }
897 } else if (verbosity == 0) {
898 size_t inpSize = 0, ndict = 0, ndata = 0, nlit = 0;
899 for (int i = 0; i < N; i++) {
900 inpSize += mMetadata[i].messageLength * mMetadata[i].messageWordSize;
901 ndict += mBlocks[i].getNDict();
902 ndata += mBlocks[i].getNData();
903 nlit += mBlocks[i].getNLiterals();
904 }
905 LOG(info) << prefix << N << " blocks, input size: " << inpSize << ", output size: " << outputsize()
906 << " NDictWords: " << ndict << " NDataWords: " << ndata << " NLiteralWords: " << nlit;
907 }
908}
909
911template <typename H, int N, typename W>
912template <class container_T, class container_IT>
913inline o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::decode(container_T& dest, // destination container
914 int slot, // slot of the block to decode
915 const std::any& decoderExt) const // optional externally provided decoder
916{
917 dest.resize(mMetadata[slot].messageLength); // allocate output buffer
918 return decode(std::begin(dest), slot, decoderExt);
919}
920
922template <typename H, int N, typename W>
923template <typename D_IT, std::enable_if_t<detail::is_iterator_v<D_IT>, bool>>
924CTFIOSize EncodedBlocks<H, N, W>::decode(D_IT dest, // iterator to destination
925 int slot, // slot of the block to decode
926 const std::any& decoderExt) const // optional externally provided decoder
927{
928
929 // get references to the right data
930 const auto& ansVersion = getANSHeader();
931 const auto& block = mBlocks[slot];
932 const auto& md = mMetadata[slot];
933 LOGP(debug, "Slot{} | NStored={} Ndict={} nData={}, MD: messageLength:{} opt:{} min:{} max:{} offs:{} width:{} ", slot, block.getNStored(), block.getNDict(), block.getNData(), md.messageLength, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth);
934
935 constexpr size_t word_size = sizeof(W);
936
937 if (ansVersion == ANSVersionCompat) {
938 if (!block.getNStored()) {
939 return {0, md.getUncompressedSize(), md.getCompressedSize() * word_size};
940 }
941 if (md.opt == Metadata::OptStore::EENCODE) {
942 return decodeCompatImpl(dest, slot, decoderExt);
943 } else {
944 return decodeCopyImpl(dest, slot);
945 }
946 } else if (ansVersion == ANSVersion1) {
947 if (md.opt == Metadata::OptStore::PACK) {
948 return decodeUnpackImpl(dest, slot);
949 }
950 if (!block.getNStored()) {
951 return {0, md.getUncompressedSize(), md.getCompressedSize() * word_size};
952 }
953 if (md.opt == Metadata::OptStore::EENCODE) {
954 return decodeRansV1Impl(dest, slot, decoderExt);
955 } else {
956 return decodeCopyImpl(dest, slot);
957 }
958 } else {
959 throw std::runtime_error("unsupported ANS Version");
960 }
961};
962
963#ifndef __CLING__
964template <typename H, int N, typename W>
965template <typename dst_IT>
966CTFIOSize EncodedBlocks<H, N, W>::decodeCompatImpl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
967{
968
969 // get references to the right data
970 const auto& block = mBlocks[slot];
971 const auto& md = mMetadata[slot];
972
973 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
974 using decoder_type = typename rans::compat::decoder_type<dst_type>;
975
976 std::optional<decoder_type> inplaceDecoder{};
977 if (md.nDictWords > 0) {
978 inplaceDecoder = decoder_type{std::get<rans::RenormedDenseHistogram<dst_type>>(this->getDictionary<dst_type>(slot))};
979 } else if (!decoderExt.has_value()) {
980 throw std::runtime_error("neither dictionary nor external decoder provided");
981 }
982
983 auto getDecoder = [&]() -> const decoder_type& {
984 if (inplaceDecoder.has_value()) {
985 return inplaceDecoder.value();
986 } else {
987 return std::any_cast<const decoder_type&>(decoderExt);
988 }
989 };
990
991 const size_t NDecoderStreams = rans::compat::defaults::CoderPreset::nStreams;
992
993 if (block.getNLiterals()) {
994 auto* literalsEnd = reinterpret_cast<const dst_type*>(block.getLiterals()) + md.nLiterals;
995 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams, literalsEnd);
996 } else {
997 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, NDecoderStreams);
998 }
999 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1000};
1001
1002template <typename H, int N, typename W>
1003template <typename dst_IT>
1004CTFIOSize EncodedBlocks<H, N, W>::decodeRansV1Impl(dst_IT dstBegin, int slot, const std::any& decoderExt) const
1005{
1006
1007 // get references to the right data
1008 const auto& block = mBlocks[slot];
1009 const auto& md = mMetadata[slot];
1010
1011 using dst_type = typename std::iterator_traits<dst_IT>::value_type;
1012 using decoder_type = typename rans::defaultDecoder_type<dst_type>;
1013
1014 std::optional<decoder_type> inplaceDecoder{};
1015 if (md.nDictWords > 0) {
1016 std::visit([&](auto&& arg) { inplaceDecoder = decoder_type{arg}; }, this->getDictionary<dst_type>(slot));
1017 } else if (!decoderExt.has_value()) {
1018 throw std::runtime_error("no dictionary nor external decoder provided");
1019 }
1020
1021 auto getDecoder = [&]() -> const decoder_type& {
1022 if (inplaceDecoder.has_value()) {
1023 return inplaceDecoder.value();
1024 } else {
1025 return std::any_cast<const decoder_type&>(decoderExt);
1026 }
1027 };
1028
1029 // verify decoders
1030 [&]() {
1031 const decoder_type& decoder = getDecoder();
1032 const size_t decoderSymbolTablePrecision = decoder.getSymbolTablePrecision();
1033
1034 if (md.probabilityBits != decoderSymbolTablePrecision) {
1035 throw std::runtime_error(fmt::format(
1036 "Missmatch in decoder renorming precision vs metadata:{} Bits vs {} Bits.",
1037 md.probabilityBits, decoderSymbolTablePrecision));
1038 }
1039
1040 if (md.streamSize != rans::utils::getStreamingLowerBound_v<typename decoder_type::coder_type>) {
1041 throw std::runtime_error("Streaming lower bound of dataset and decoder do not match");
1042 }
1043 }();
1044
1045 // do the actual decoding
1046 if (block.getNLiterals()) {
1047 std::vector<dst_type> literals(md.nLiterals);
1048 rans::unpack(block.getLiterals(), md.nLiterals, literals.data(), md.literalsPackingWidth, md.literalsPackingOffset);
1049 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams, literals.end());
1050 } else {
1051 getDecoder().process(block.getData() + block.getNData(), dstBegin, md.messageLength, md.nStreams);
1052 }
1053 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1054};
1055
1056template <typename H, int N, typename W>
1057template <typename dst_IT>
1059{
1060 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1061
1062 const auto& block = mBlocks[slot];
1063 const auto& md = mMetadata[slot];
1064
1065 const size_t messageLength = md.messageLength;
1066 const size_t packingWidth = md.probabilityBits;
1067 const dest_t offset = md.min;
1068 const auto* srcIt = block.getData();
1069 // we have a vector of one and the same value. All information is in the metadata
1070 if (packingWidth == 0) {
1071 const dest_t value = [&]() -> dest_t {
1072 // Bugfix: We tried packing values with a width of 0 Bits;
1073 if (md.nDataWords > 0) {
1074 LOGP(debug, "packing bug recovery: MD nStreams:{} messageLength:{} nLiterals:{} messageWordSize:{} coderType:{} streamSize:{} probabilityBits:{} (int)opt:{} min:{} max:{} literalsPackingOffset:{} literalsPackingWidth:{} nDictWords:{} nDataWords:{} nLiteralWords:{}",
1075 value, md.nStreams, md.messageLength, md.nLiterals, md.messageWordSize, md.coderType, md.streamSize, md.probabilityBits, (int)md.opt, md.min, md.max, md.literalsPackingOffset, md.literalsPackingWidth, md.nDictWords, md.nDataWords, md.nLiteralWords);
1076 return offset + static_cast<dest_t>(*srcIt);
1077 }
1078 // normal case:
1079 return offset;
1080 }();
1081 for (size_t i = 0; i < messageLength; ++i) {
1082 *dest++ = value;
1083 }
1084 } else {
1085 rans::unpack(srcIt, messageLength, dest, packingWidth, offset);
1086 }
1087 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1088};
1089
1090template <typename H, int N, typename W>
1091template <typename dst_IT>
1093{
1094 // get references to the right data
1095 const auto& block = mBlocks[slot];
1096 const auto& md = mMetadata[slot];
1097
1098 using dest_t = typename std::iterator_traits<dst_IT>::value_type;
1099 using decoder_t = typename rans::compat::decoder_type<dest_t>;
1100 using destPtr_t = typename std::iterator_traits<dst_IT>::pointer;
1101
1102 destPtr_t srcBegin = reinterpret_cast<destPtr_t>(block.payload);
1103 destPtr_t srcEnd = srcBegin + md.messageLength * sizeof(dest_t);
1104 std::copy(srcBegin, srcEnd, dest);
1105
1106 return {0, md.getUncompressedSize(), md.getCompressedSize() * sizeof(W)};
1107};
1108
1110template <typename H, int N, typename W>
1111template <typename input_IT, typename buffer_T>
1112o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::encode(const input_IT srcBegin, // iterator begin of source message
1113 const input_IT srcEnd, // iterator end of source message
1114 int slot, // slot in encoded data to fill
1115 uint8_t symbolTablePrecision, // encoding into
1116 Metadata::OptStore opt, // option for data compression
1117 buffer_T* buffer, // optional buffer (vector) providing memory for encoded blocks
1118 const std::any& encoderExt, // optional external encoder
1119 float memfc) // memory allocation margin factor
1120{
1121 // fill a new block
1122 assert(slot == mRegistry.nFilledBlocks);
1123 mRegistry.nFilledBlocks++;
1124
1125 const size_t messageLength = std::distance(srcBegin, srcEnd);
1126 // cover three cases:
1127 // * empty source message: no co
1128 // * source message to pass through without any entropy coding
1129 // * source message where entropy coding should be applied
1130
1131 // case 1: empty source message
1132 if (messageLength == 0) {
1133 mMetadata[slot] = Metadata{};
1134 mMetadata[slot].opt = Metadata::OptStore::NODATA;
1135 return {};
1136 }
1137 if (detail::mayEEncode(opt)) {
1138 const ANSHeader& ansVersion = getANSHeader();
1139 if (ansVersion == ANSVersionCompat) {
1140 return entropyCodeRANSCompat(srcBegin, srcEnd, slot, symbolTablePrecision, buffer, encoderExt, memfc);
1141 } else if (ansVersion == ANSVersion1) {
1142 return entropyCodeRANSV1(srcBegin, srcEnd, slot, opt, buffer, encoderExt, memfc);
1143 } else {
1144 throw std::runtime_error(fmt::format("Unsupported ANS Coder Version: {}.{}", ansVersion.majorVersion, ansVersion.minorVersion));
1145 }
1146 } else if (detail::mayPack(opt)) {
1147 return pack(srcBegin, srcEnd, slot, buffer);
1148 } else {
1149 return store(srcBegin, srcEnd, slot, opt, buffer);
1150 }
1151};
1152
1153template <typename H, int N, typename W>
1154template <typename T>
1155[[nodiscard]] auto EncodedBlocks<H, N, W>::expandStorage(size_t slot, size_t nElements, T* buffer) -> decltype(auto)
1156{
1157 // after previous relocation this (hence its data members) are not guaranteed to be valid
1158 auto* old = get(buffer->data());
1159 auto* thisBlock = &(old->mBlocks[slot]);
1160 auto* thisMetadata = &(old->mMetadata[slot]);
1161
1162 // resize underlying buffer of block if necessary and update all pointers.
1163 auto* const blockHead = get(thisBlock->registry->head); // extract pointer from the block, as "this" might be invalid
1164 const size_t additionalSize = blockHead->estimateBlockSize(nElements); // additionalSize is in bytes!!!
1165 if (additionalSize >= thisBlock->registry->getFreeSize()) {
1166 LOGP(debug, "Slot {} with {} available words needs to allocate {} bytes for a total of {} words.", slot, thisBlock->registry->getFreeSize(), additionalSize, nElements);
1167 if (buffer) {
1168 blockHead->expand(*buffer, blockHead->size() + (additionalSize - blockHead->getFreeSize()));
1169 thisMetadata = &(get(buffer->data())->mMetadata[slot]);
1170 thisBlock = &(get(buffer->data())->mBlocks[slot]); // in case of resizing this and any this.xxx becomes invalid
1171 } else {
1172 throw std::runtime_error("failed to allocate additional space in provided external buffer");
1173 }
1174 }
1175 return std::make_pair(thisBlock, thisMetadata);
1176};
1177
1178template <typename H, int N, typename W>
1179template <typename input_IT, typename buffer_T>
1180o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T* buffer, const std::any& encoderExt, float memfc)
1181{
1182 using storageBuffer_t = W;
1183 using input_t = typename std::iterator_traits<input_IT>::value_type;
1184 using ransEncoder_t = typename rans::compat::encoder_type<input_t>;
1185 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1186 using ransStream_t = typename ransEncoder_t::stream_type;
1187
1188 // assert at compile time that output types align so that padding is not necessary.
1189 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1190 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1191
1192 auto* thisBlock = &mBlocks[slot];
1193 auto* thisMetadata = &mMetadata[slot];
1194
1195 // build symbol statistics
1196 constexpr size_t SizeEstMarginAbs = 10 * 1024;
1197 const float SizeEstMarginRel = 1.5 * memfc;
1198
1199 const size_t messageLength = std::distance(srcBegin, srcEnd);
1200 rans::DenseHistogram<input_t> frequencyTable{};
1201 rans::compat::encoder_type<input_t> inplaceEncoder{};
1202
1203 try {
1204 std::tie(inplaceEncoder, frequencyTable) = [&]() {
1205 if (encoderExt.has_value()) {
1206 return std::make_tuple(ransEncoder_t{}, rans::DenseHistogram<input_t>{});
1207 } else {
1208 auto histogram = rans::makeDenseHistogram::fromSamples(srcBegin, srcEnd);
1209 auto encoder = rans::compat::makeEncoder::fromHistogram(histogram, symbolTablePrecision);
1210 return std::make_tuple(std::move(encoder), std::move(histogram));
1211 }
1212 }();
1213 } catch (const rans::HistogramError& error) {
1214 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1215 return store(srcBegin, srcEnd, slot, this->FallbackStorageType, buffer);
1216 }
1217 const ransEncoder_t& encoder = encoderExt.has_value() ? std::any_cast<const ransEncoder_t&>(encoderExt) : inplaceEncoder;
1218
1219 // estimate size of encode buffer
1220 int dataSize = rans::compat::calculateMaxBufferSizeB(messageLength, rans::compat::getAlphabetRangeBits(encoder.getSymbolTable())); // size in bytes
1221 // preliminary expansion of storage based on dict size + estimated size of encode buffer
1222 dataSize = SizeEstMarginAbs + int(SizeEstMarginRel * (dataSize / sizeof(storageBuffer_t))) + (sizeof(input_t) < sizeof(storageBuffer_t)); // size in words of output stream
1223
1224 const auto view = rans::trim(rans::makeHistogramView(frequencyTable));
1225 std::tie(thisBlock, thisMetadata) = expandStorage(slot, view.size() + dataSize, buffer);
1226
1227 // store dictionary first
1228
1229 if (!view.empty()) {
1230 thisBlock->storeDict(view.size(), view.data());
1231 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", view.size() * sizeof(W), thisBlock->getOffsDict(), thisBlock->getOffsDict() + view.size() * sizeof(W));
1232 }
1233 // vector of incompressible literal symbols
1234 std::vector<input_t> literals;
1235 // directly encode source message into block buffer.
1236 storageBuffer_t* const blockBufferBegin = thisBlock->getCreateData();
1237 const size_t maxBufferSize = thisBlock->registry->getFreeSize(); // note: "this" might be not valid after expandStorage call!!!
1238 const auto [encodedMessageEnd, literalsEnd] = encoder.process(srcBegin, srcEnd, blockBufferBegin, std::back_inserter(literals));
1239 rans::utils::checkBounds(encodedMessageEnd, blockBufferBegin + maxBufferSize / sizeof(W));
1240 dataSize = encodedMessageEnd - thisBlock->getDataPointer();
1241 thisBlock->setNData(dataSize);
1242 thisBlock->realignBlock();
1243 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(W), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(W));
1244 // update the size claimed by encode message directly inside the block
1245
1246 // store incompressible symbols if any
1247 const size_t nLiteralSymbols = literals.size();
1248 const size_t nLiteralWords = [&]() {
1249 if (!literals.empty()) {
1250 const size_t nSymbols = literals.size();
1251 // introduce padding in case literals don't align;
1252 const size_t nLiteralSymbolsPadded = calculatePaddedSize<input_t, storageBuffer_t>(nSymbols);
1253 literals.resize(nLiteralSymbolsPadded, {});
1254
1255 const size_t nLiteralStorageElems = calculateNDestTElements<input_t, storageBuffer_t>(nSymbols);
1256 std::tie(thisBlock, thisMetadata) = expandStorage(slot, nLiteralStorageElems, buffer);
1257 thisBlock->storeLiterals(nLiteralStorageElems, reinterpret_cast<const storageBuffer_t*>(literals.data()));
1258 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", nLiteralStorageElems * sizeof(W), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + nLiteralStorageElems * sizeof(W));
1259 return nLiteralStorageElems;
1260 }
1261 return size_t(0);
1262 }();
1263
1264 LOGP(debug, "Min, {} Max, {}, size, {}, nSamples {}", view.getMin(), view.getMax(), view.size(), frequencyTable.getNumSamples());
1265
1266 *thisMetadata = detail::makeMetadataRansCompat<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1267 messageLength,
1268 nLiteralSymbols,
1269 encoder.getSymbolTable().getPrecision(),
1270 view.getMin(),
1271 view.getMax(),
1272 view.size(),
1273 dataSize,
1274 nLiteralWords);
1275
1276 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1277}
1278
1279template <typename H, int N, typename W>
1280template <typename input_IT, typename buffer_T>
1281o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, const std::any& encoderExt, float memfc)
1282{
1283 CTFIOSize encoderStatistics{};
1284
1285 const size_t nSamples = std::distance(srcBegin, srcEnd);
1286 if (detail::mayPack(opt) && nSamples < PackingThreshold) {
1287 encoderStatistics = pack(srcBegin, srcEnd, slot, buffer);
1288 } else {
1289
1290 if (encoderExt.has_value()) {
1291 encoderStatistics = encodeRANSV1External(srcBegin, srcEnd, slot, encoderExt, buffer, memfc);
1292 } else {
1293 encoderStatistics = encodeRANSV1Inplace(srcBegin, srcEnd, slot, opt, buffer, memfc);
1294 }
1295 }
1296 return encoderStatistics;
1297}
1298
1299template <typename H, int N, typename W>
1300template <typename input_IT, typename buffer_T>
1301CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any& encoderExt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1302{
1303 using storageBuffer_t = W;
1304 using input_t = typename std::iterator_traits<input_IT>::value_type;
1305 using ransEncoder_t = typename internal::ExternalEntropyCoder<input_t>::encoder_type;
1306 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1307 using ransStream_t = typename ransEncoder_t::stream_type;
1308
1309 // assert at compile time that output types align so that padding is not necessary.
1310 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1311 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1312
1313 auto* thisBlock = &mBlocks[slot];
1314 auto* thisMetadata = &mMetadata[slot];
1315
1316 const size_t messageLength = std::distance(srcBegin, srcEnd);
1317 internal::ExternalEntropyCoder<input_t> encoder{std::any_cast<const ransEncoder_t&>(encoderExt)};
1318
1319 const size_t payloadSizeWords = encoder.template computePayloadSizeEstimate<storageBuffer_t>(messageLength);
1320 std::tie(thisBlock, thisMetadata) = expandStorage(slot, payloadSizeWords, buffer);
1321
1322 // encode payload
1323 auto encodedMessageEnd = encoder.encode(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1324 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1325 thisBlock->setNData(dataSize);
1326 thisBlock->realignBlock();
1327 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1328 // update the size claimed by encoded message directly inside the block
1329
1330 // encode literals
1331 size_t literalsSize = 0;
1332 if (encoder.getNIncompressibleSamples() > 0) {
1333 const size_t literalsBufferSizeWords = encoder.template computePackedIncompressibleSize<storageBuffer_t>();
1334 std::tie(thisBlock, thisMetadata) = expandStorage(slot, literalsBufferSizeWords, buffer);
1335 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1336 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1337 thisBlock->setNLiterals(literalsSize);
1338 thisBlock->realignBlock();
1339 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1340 }
1341
1342 // write metadata
1343 const auto& symbolTable = encoder.getEncoder().getSymbolTable();
1344 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getEncoder().getNStreams(),
1345 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1346 messageLength,
1347 encoder.getNIncompressibleSamples(),
1348 symbolTable.getPrecision(),
1349 symbolTable.getOffset(),
1350 symbolTable.getOffset() + symbolTable.size(),
1351 encoder.getIncompressibleSymbolOffset(),
1352 encoder.getIncompressibleSymbolPackingBits(),
1353 0,
1354 dataSize,
1355 literalsSize);
1356
1357 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1358};
1359
1360template <typename H, int N, typename W>
1361template <typename input_IT, typename buffer_T>
1362CTFIOSize EncodedBlocks<H, N, W>::encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer, double_t sizeEstimateSafetyFactor)
1363{
1364 using storageBuffer_t = W;
1365 using input_t = typename std::iterator_traits<input_IT>::value_type;
1366 using ransEncoder_t = typename rans::denseEncoder_type<input_t>;
1367 using ransState_t = typename ransEncoder_t::coder_type::state_type;
1368 using ransStream_t = typename ransEncoder_t::stream_type;
1369
1370 // assert at compile time that output types align so that padding is not necessary.
1371 static_assert(std::is_same_v<storageBuffer_t, ransStream_t>);
1372 static_assert(std::is_same_v<storageBuffer_t, typename rans::count_t>);
1373
1374 auto* thisBlock = &mBlocks[slot];
1375 auto* thisMetadata = &mMetadata[slot];
1376
1378 rans::SourceProxy<input_IT> proxy{srcBegin, srcEnd, [](input_IT begin, input_IT end) {
1379 const size_t nSamples = std::distance(begin, end);
1380 return (!std::is_pointer_v<input_IT> && (nSamples < rans::utils::pow2(23)));
1381 }};
1382
1383 try {
1384 if (proxy.isCached()) {
1385 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginCache(), proxy.endCache()};
1386 } else {
1387 encoder = internal::InplaceEntropyCoder<input_t>{proxy.beginIter(), proxy.endIter()};
1388 }
1389 } catch (const rans::HistogramError& error) {
1390 LOGP(warning, "Failed to build Dictionary for rANS encoding, using fallback option");
1391 if (proxy.isCached()) {
1392 return store(proxy.beginCache(), proxy.endCache(), slot, this->FallbackStorageType, buffer);
1393 } else {
1394 return store(proxy.beginIter(), proxy.endIter(), slot, this->FallbackStorageType, buffer);
1395 }
1396 }
1397
1398 const rans::Metrics<input_t>& metrics = encoder.getMetrics();
1399 /*
1400 if constexpr (sizeof(input_t) > 2) {
1401 const auto& dp = metrics.getDatasetProperties();
1402 LOGP(info, "Metrics:{{slot: {}, numSamples: {}, min: {}, max: {}, alphabetRangeBits: {}, nUsedAlphabetSymbols: {}, preferPacking: {}}}", slot, dp.numSamples, dp.min, dp.max, dp.alphabetRangeBits, dp.nUsedAlphabetSymbols, metrics.getSizeEstimate().preferPacking());
1403 }
1404 */
1405 if (detail::mayPack(opt) && metrics.getSizeEstimate().preferPacking()) {
1406 if (proxy.isCached()) {
1407 return pack(proxy.beginCache(), proxy.endCache(), slot, metrics, buffer);
1408 } else {
1409 return pack(proxy.beginIter(), proxy.endIter(), slot, metrics, buffer);
1410 };
1411 }
1412
1413 encoder.makeEncoder();
1414
1415 const rans::SizeEstimate sizeEstimate = metrics.getSizeEstimate();
1416 const size_t bufferSizeWords = rans::utils::nBytesTo<storageBuffer_t>((sizeEstimate.getCompressedDictionarySize() +
1417 sizeEstimate.getCompressedDatasetSize() +
1418 sizeEstimate.getIncompressibleSize()) *
1419 sizeEstimateSafetyFactor);
1420 std::tie(thisBlock, thisMetadata) = expandStorage(slot, bufferSizeWords, buffer);
1421
1422 // encode dict
1423 auto encodedDictEnd = encoder.writeDictionary(thisBlock->getCreateDict(), thisBlock->getEndOfBlock());
1424 const size_t dictSize = std::distance(thisBlock->getCreateDict(), encodedDictEnd);
1425 thisBlock->setNDict(dictSize);
1426 thisBlock->realignBlock();
1427 LOGP(debug, "StoreDict {} bytes, offs: {}:{}", dictSize * sizeof(storageBuffer_t), thisBlock->getOffsDict(), thisBlock->getOffsDict() + dictSize * sizeof(storageBuffer_t));
1428
1429 // encode payload
1430 auto encodedMessageEnd = thisBlock->getCreateData();
1431 if (proxy.isCached()) {
1432 encodedMessageEnd = encoder.encode(proxy.beginCache(), proxy.endCache(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1433 } else {
1434 encodedMessageEnd = encoder.encode(proxy.beginIter(), proxy.endIter(), thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1435 }
1436 const size_t dataSize = std::distance(thisBlock->getCreateData(), encodedMessageEnd);
1437 thisBlock->setNData(dataSize);
1438 thisBlock->realignBlock();
1439 LOGP(debug, "StoreData {} bytes, offs: {}:{}", dataSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + dataSize * sizeof(storageBuffer_t));
1440 // update the size claimed by encoded message directly inside the block
1441
1442 // encode literals
1443 size_t literalsSize{};
1444 if (encoder.getNIncompressibleSamples() > 0) {
1445 auto literalsEnd = encoder.writeIncompressible(thisBlock->getCreateLiterals(), thisBlock->getEndOfBlock());
1446 literalsSize = std::distance(thisBlock->getCreateLiterals(), literalsEnd);
1447 thisBlock->setNLiterals(literalsSize);
1448 thisBlock->realignBlock();
1449 LOGP(debug, "StoreLiterals {} bytes, offs: {}:{}", literalsSize * sizeof(storageBuffer_t), thisBlock->getOffsLiterals(), thisBlock->getOffsLiterals() + literalsSize * sizeof(storageBuffer_t));
1450 }
1451
1452 // write metadata
1453 *thisMetadata = detail::makeMetadataRansV1<input_t, ransState_t, ransStream_t>(encoder.getNStreams(),
1454 rans::utils::getStreamingLowerBound_v<typename ransEncoder_t::coder_type>,
1455 std::distance(srcBegin, srcEnd),
1456 encoder.getNIncompressibleSamples(),
1457 encoder.getSymbolTablePrecision(),
1458 *metrics.getCoderProperties().min,
1459 *metrics.getCoderProperties().max,
1460 metrics.getDatasetProperties().min,
1461 metrics.getDatasetProperties().alphabetRangeBits,
1462 dictSize,
1463 dataSize,
1464 literalsSize);
1465
1466 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1467}; // namespace ctf
1468
1469template <typename H, int N, typename W>
1470template <typename input_IT, typename buffer_T>
1471o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics<typename std::iterator_traits<input_IT>::value_type> metrics, buffer_T* buffer)
1472{
1473 using storageBuffer_t = W;
1474 using input_t = typename std::iterator_traits<input_IT>::value_type;
1475
1476 const size_t messageLength = metrics.getDatasetProperties().numSamples;
1477 const auto alphabetRangeBits = metrics.getDatasetProperties().alphabetRangeBits;
1478
1479 auto* thisBlock = &mBlocks[slot];
1480 auto* thisMetadata = &mMetadata[slot];
1481 size_t packedSize = 0;
1482
1483 if (messageLength == 0) {
1484 *thisMetadata = detail::makeMetadataPack<input_t>(0, 0, 0, 0);
1485 } else if (metrics.getDatasetProperties().alphabetRangeBits == 0) {
1486 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, 0, *srcBegin, 0);
1487 } else {
1489 size_t packingBufferWords = packer.template getPackingBufferSize<storageBuffer_t>(messageLength);
1490 std::tie(thisBlock, thisMetadata) = expandStorage(slot, packingBufferWords, buffer);
1491 auto packedMessageEnd = packer.pack(srcBegin, srcEnd, thisBlock->getCreateData(), thisBlock->getEndOfBlock());
1492 packedSize = std::distance(thisBlock->getCreateData(), packedMessageEnd);
1493 *thisMetadata = detail::makeMetadataPack<input_t>(messageLength, packer.getPackingWidth(), packer.getOffset(), packedSize);
1494 thisBlock->setNData(packedSize);
1495 thisBlock->realignBlock();
1496 }
1497
1498 LOGP(debug, "StoreData {} bytes, offs: {}:{}", packedSize * sizeof(storageBuffer_t), thisBlock->getOffsData(), thisBlock->getOffsData() + packedSize * sizeof(storageBuffer_t));
1499 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1500};
1501
1502template <typename H, int N, typename W>
1503template <typename input_IT, typename buffer_T>
1504o2::ctf::CTFIOSize EncodedBlocks<H, N, W>::store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T* buffer)
1505{
1506 using storageBuffer_t = W;
1507 using input_t = typename std::iterator_traits<input_IT>::value_type;
1508
1509 const size_t messageLength = std::distance(srcBegin, srcEnd);
1510 // introduce padding in case literals don't align;
1511 const size_t nSourceElemsPadded = calculatePaddedSize<input_t, storageBuffer_t>(messageLength);
1512 std::vector<input_t> tmp(nSourceElemsPadded, {});
1513 std::copy(srcBegin, srcEnd, std::begin(tmp));
1514
1515 const size_t nBufferElems = calculateNDestTElements<input_t, storageBuffer_t>(messageLength);
1516 auto [thisBlock, thisMetadata] = expandStorage(slot, nBufferElems, buffer);
1517 thisBlock->storeData(nBufferElems, reinterpret_cast<const storageBuffer_t*>(tmp.data()));
1518
1519 *thisMetadata = detail::makeMetadataStore<input_t, storageBuffer_t>(messageLength, opt, nBufferElems);
1520
1521 return {0, thisMetadata->getUncompressedSize(), thisMetadata->getCompressedSize() * sizeof(W)};
1522};
1523
1525template <typename H, int N, typename W>
1526std::vector<char> EncodedBlocks<H, N, W>::createDictionaryBlocks(const std::vector<rans::DenseHistogram<int32_t>>& vfreq, const std::vector<Metadata>& vmd)
1527{
1528
1529 if (vfreq.size() != N) {
1530 throw std::runtime_error(fmt::format("mismatch between the size of frequencies vector {} and number of blocks {}", vfreq.size(), N));
1531 }
1532 size_t sz = alignSize(sizeof(EncodedBlocks<H, N, W>));
1533 for (int ib = 0; ib < N; ib++) {
1534 sz += Block<W>::estimateSize(vfreq[ib].size());
1535 }
1536 std::vector<char> vdict(sz); // memory space for dictionary
1537 auto dictBlocks = create(vdict.data(), sz);
1538 for (int ib = 0; ib < N; ib++) {
1539 const auto& thisHistogram = vfreq[ib];
1540 const auto view = rans::trim(rans::makeHistogramView(thisHistogram));
1541
1542 if (!view.empty()) {
1543 LOG(info) << "adding dictionary of " << view.size() << " words for block " << ib << ", min/max= " << view.getMin() << "/" << view.getMax();
1544 dictBlocks->mBlocks[ib].storeDict(view.size(), view.data());
1545 dictBlocks = get(vdict.data()); // !!! rellocation might have invalidated dictBlocks pointer
1546 dictBlocks->mMetadata[ib] = vmd[ib];
1547 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::ROOTCompression; // we will compress the dictionary with root!
1548 dictBlocks->mBlocks[ib].realignBlock();
1549 } else {
1550 dictBlocks->mMetadata[ib].opt = Metadata::OptStore::NONE;
1551 }
1552 dictBlocks->mRegistry.nFilledBlocks++;
1553 }
1554 return vdict;
1555}
1556#endif
1557
1558template <typename H, int N, typename W>
1559void EncodedBlocks<H, N, W>::dump(const std::string& prefix, int ncol) const
1560{
1561 for (int ibl = 0; ibl < getNBlocks(); ibl++) {
1562 const auto& blc = getBlock(ibl);
1563 std::string ss;
1564 LOGP(info, "{} Bloc:{} Dict: {} words", prefix, ibl, blc.getNDict());
1565 const auto* ptr = blc.getDict();
1566 for (int i = 0; i < blc.getNDict(); i++) {
1567 if (i && (i % ncol) == 0) {
1568 LOG(info) << ss;
1569 ss.clear();
1570 }
1571 ss += fmt::format(" {:#010x}", ptr[i]);
1572 }
1573 if (!ss.empty()) {
1574 LOG(info) << ss;
1575 ss.clear();
1576 }
1577 LOG(info) << "\n";
1578 LOGP(info, "{} Bloc:{} Data: {} words", prefix, ibl, blc.getNData());
1579 ptr = blc.getData();
1580 for (int i = 0; i < blc.getNData(); i++) {
1581 if (i && (i % ncol) == 0) {
1582 LOG(info) << ss;
1583 ss.clear();
1584 }
1585 ss += fmt::format(" {:#010x}", ptr[i]);
1586 }
1587 if (!ss.empty()) {
1588 LOG(info) << ss;
1589 ss.clear();
1590 }
1591 LOG(info) << "\n";
1592 LOGP(info, "{} Bloc:{} Literals: {} words", prefix, ibl, blc.getNLiterals());
1593 ptr = blc.getData();
1594 for (int i = 0; i < blc.getNLiterals(); i++) {
1595 if (i && (i % 20) == 0) {
1596 LOG(info) << ss;
1597 ss.clear();
1598 }
1599 ss += fmt::format(" {:#010x}", ptr[i]);
1600 }
1601 if (!ss.empty()) {
1602 LOG(info) << ss;
1603 ss.clear();
1604 }
1605 LOG(info) << "\n";
1606 }
1607}
1608
1609} // namespace ctf
1610} // namespace o2
1611
1612#endif
representation of ANS Version number in a comparable way
#define verbosity
Header: timestamps and format version for detector CTF dictionary.
std::ostringstream debug
int32_t i
Metadata required to decode a Block.
Interfaces for BitPacking using librans.
uint32_t res
Definition RawData.h:0
TBranch * ptr
useful public helper functions.
const auto & getData()
uint64_t ransState_t
uint32_t source_type
Class for time synchronization of RawReader instances.
<<======================== Auxiliary classes =======================<<
CTFIOSize decodeCopyImpl(dst_IT dest, int slot) const
static constexpr Metadata::OptStore FallbackStorageType
this is in fact stored, but to overcome TBuffer limits we have to define the branches per block!...
void readFromTree(TTree &tree, const std::string &name, int ev=0)
read from tree to non-flat object
o2::ctf::CTFIOSize entropyCodeRANSV1(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
static auto get(void *head)
cast arbitrary buffer head to container class. Head is supposed to respect the alignment
ClassDefNV(EncodedBlocks, 3)
static size_t estimateBlockSize(int n)
estimate free size needed to add new block
CTFIOSize decodeCompatImpl(dst_IT dest, int slot, const std::any &decoderExt) const
void clear()
clear itself
o2::ctf::CTFIOSize decode(container_T &dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination vector (will be resized as needed)
static auto get(const void *head)
size_t estimateSize() const
size_t estimateSizeFromMetadata() const
do the same using metadata info
static void relocate(const char *oldHead, char *newHead, char *wrapper, size_t newsize=0)
auto expandStorage(size_t slot, size_t nElemets, T *buffer=nullptr) -> decltype(auto)
void setHeader(const H &h)
o2::ctf::CTFIOSize encodeRANSV1External(const input_IT srcBegin, const input_IT srcEnd, int slot, const std::any &encoderExt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
static void readFromTree(VD &vec, TTree &tree, const std::string &name, int ev=0)
read from tree to destination buffer vector
void dump(const std::string &prefix="", int ncol=20) const
CTFIOSize decodeRansV1Impl(dst_IT dest, int slot, const std::any &decoderExt) const
ANSHeader checkANSVersion(ANSHeader ansVersion) const
void fillFlatCopy(EncodedBlocks &dest) const
Create its own flat copy in the destination empty flat object.
void copyToFlat(void *base)
copy itself to flat buffer created on the fly at the provided pointer. The destination block should b...
std::array< Block< W >, N > mBlocks
const H & getHeader() const
static size_t fillTreeBranch(TTree &tree, const std::string &brname, D &dt, int compLevel, int splitLevel=99)
add and fill single branch
auto & getMetadata(int i) const
static size_t getMinAlignedSize()
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, buffer_T *buffer=nullptr)
static auto expand(buffer_T &buffer, size_t newsizeBytes)
expand the storage to new size in bytes
void print(const std::string &prefix="", int verbosity=1) const
print itself
o2::ctf::CTFIOSize encodeRANSV1Inplace(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr, double_t sizeEstimateSafetyFactor=1)
dictionaryType< source_T > getDictionary(int i, ANSHeader ansVersion=ANSVersionUnspecified) const
bool flat() const
check if flat and valid
static std::vector< char > createDictionaryBlocks(const std::vector< rans::DenseHistogram< int32_t > > &vfreq, const std::vector< Metadata > &prbits)
create a special EncodedBlocks containing only dictionaries made from provided vector of frequency ta...
const auto & getMetadata() const
static constexpr int getNBlocks()
const ANSHeader & getANSHeader() const
auto & getBlock(int i) const
bool empty() const
check if empty and valid
static auto getImage(const void *newHead)
get const image of the container wrapper, with pointers in the image relocated to new head
size_t outputsize() const
used part of total allocated size in bytes (output size)
static bool readTreeBranch(TTree &tree, const std::string &brname, D &dt, int ev=0)
read single branch
size_t compactify()
Compactify by eliminating empty space.
CTFIOSize decodeUnpackImpl(dst_IT dest, int slot) const
std::variant< rans::RenormedSparseHistogram< source_T >, rans::RenormedDenseHistogram< source_T > > dictionaryType
o2::ctf::CTFIOSize decode(D_IT dest, int slot, const std::any &decoderExt={}) const
decode block at provided slot to destination pointer, the needed space assumed to be available
size_t size() const
total allocated size in bytes
EncodedBlocks< H, N, W > base
static auto create(VD &v)
create container from vector. Head is supposed to respect the alignment
void setANSHeader(const ANSHeader &h)
o2::ctf::CTFIOSize store(const input_IT srcBegin, const input_IT srcEnd, int slot, Metadata::OptStore opt, buffer_T *buffer=nullptr)
o2::ctf::CTFIOSize encode(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
size_t getFreeSize() const
size remaining for additional data
o2::ctf::CTFIOSize encode(const VE &src, int slot, uint8_t symbolTablePrecision, Metadata::OptStore opt, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
encode vector src to bloc at provided slot
const auto & getRegistry() const
static auto create(void *head, size_t sz)
create container from arbitrary buffer of predefined size (in bytes!!!). Head is supposed to respect ...
std::shared_ptr< H > cloneHeader() const
ANSHeader & getANSHeader()
void init(size_t sz)
setup internal structure and registry for given buffer size (in bytes!!!)
void copyToFlat(V &vec)
copy itself to flat buffer created on the fly from the vector
size_t appendToTree(TTree &tree, const std::string &name) const
attach to tree
o2::ctf::CTFIOSize entropyCodeRANSCompat(const input_IT srcBegin, const input_IT srcEnd, int slot, uint8_t symbolTablePrecision, buffer_T *buffer=nullptr, const std::any &encoderExt={}, float memfc=1.f)
std::array< Metadata, N > mMetadata
o2::ctf::CTFIOSize pack(const input_IT srcBegin, const input_IT srcEnd, int slot, rans::Metrics< typename std::iterator_traits< input_IT >::value_type > metrics, buffer_T *buffer=nullptr)
typename rans::denseEncoder_type< source_type > encoder_type
const DatasetProperties< source_type > & getDatasetProperties() const noexcept
Definition Metrics.h:52
size_t getIncompressibleSize(double_t safetyFactor=1.2) const
size_t getCompressedDatasetSize(double_t safetyFactor=1.2) const
size_t getCompressedDictionarySize(double_t safetyFactor=2) const
static decltype(auto) fromHistogram(DenseHistogram< source_T > histogram, size_t renormingPrecision=0)
Definition compat.h:187
functionality to maintain compatibility with previous version of this library
static factory classes for building histograms, encoders and decoders.
GLdouble n
Definition glcorearb.h:1982
GLeglImageOES image
Definition glcorearb.h:4021
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLenum GLsizei dataSize
Definition glcorearb.h:3994
GLuint const GLchar * name
Definition glcorearb.h:781
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLsizei const GLfloat * value
Definition glcorearb.h:819
GLintptr offset
Definition glcorearb.h:660
public interface for building and renorming histograms from source data.
constexpr bool mayPack(Metadata::OptStore opt) noexcept
constexpr bool is_iterator_v
constexpr bool mayEEncode(Metadata::OptStore opt) noexcept
constexpr size_t calculateNDestTElements(size_t nElems) noexcept
constexpr size_t PackingThreshold
constexpr int WrappersCompressionLevel
uint8_t BufferType
This is the type of the vector to be used for the EncodedBlocks buffer allocation.
constexpr ANSHeader ANSVersionCompat
Definition ANSHeader.h:54
constexpr ANSHeader ANSVersion1
Definition ANSHeader.h:55
constexpr ANSHeader ANSVersionUnspecified
Definition ANSHeader.h:53
size_t calculatePaddedSize(size_t nElems) noexcept
constexpr size_t Alignment
constexpr int WrappersSplitLevel
size_t alignSize(size_t sizeBytes)
align size to given diven number of bytes
T * relocatePointer(const char *oldBase, char *newBase, const T *ptr)
relocate pointer by the difference of addresses
decltype(makeEncoder::fromRenormed(RenormedDenseHistogram< source_T >{})) encoder_type
Definition compat.h:292
RenormedDenseHistogram< source_T > renorm(DenseHistogram< source_T > histogram, size_t newPrecision=0)
Definition compat.h:75
size_t getAlphabetRangeBits(const DenseHistogram< source_T > &histogram) noexcept
Definition compat.h:260
size_t calculateMaxBufferSizeB(size_t nElements, size_t rangeBits)
Definition compat.h:282
decltype(makeDecoder::fromRenormed(RenormedDenseHistogram< source_T >{})) decoder_type
Definition compat.h:295
constexpr uint32_t getRangeBits(T min, T max) noexcept
Definition utils.h:200
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
void checkBounds(IT iteratorPosition, IT upperBound)
Definition utils.h:244
size_t sanitizeRenormingBitRange(size_t renormPrecision)
Definition utils.h:212
decltype(makeDenseEncoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) denseEncoder_type
Definition factory.h:229
decltype(makeDecoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) defaultDecoder_type
Definition factory.h:238
auto makeHistogramView(container_T &container, std::ptrdiff_t offset) noexcept -> HistogramView< decltype(std::begin(container))>
void unpack(const input_T *__restrict inputBegin, size_t extent, output_IT outputBegin, size_t packingWidth, typename std::iterator_traits< output_IT >::value_type offset=static_cast< typename std::iterator_traits< output_IT >::value_type >(0))
Definition pack.h:346
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
RenormedSparseHistogram< source_T > readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:215
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
public interface for serializing histograms (dictionaries) to JSON or compressed binary.
uint8_t majorVersion
Definition ANSHeader.h:28
uint8_t minorVersion
Definition ANSHeader.h:29
binary blob for single entropy-compressed column: metadata + (optional) dictionary and data buffer + ...
int getNStored() const
const W * getData() const
Registry * registry
const W * getDict() const
int getNLiterals() const
const W * getDataPointer() const
void storeDict(int _ndict, const W *_dict)
void storeLiterals(int _nliterals, const W *_literals)
const W * getLiterals() const
int nDict
non-persistent info for in-memory ops
const W * getEndOfBlock() const
int getNDict() const
void storeData(int _ndata, const W *_data)
void setNData(int _ndata)
int getNData() const
void clear()
clear itself
void store(int _ndict, int _ndata, int _nliterals, const W *_dict, const W *_data, const W *_literals)
store binary blob data (buffer filled from head to tail)
void setNDict(int _ndict)
void setNLiterals(int _nliterals)
ClassDefNV(Block, 1)
static size_t estimateSize(int n)
estimate free size needed to add new block
void relocate(const char *oldHead, char *newHeadData, char *newHeadRegistry)
relocate to different head position
‍>======================== Auxiliary classes =======================>>
int nFilledBlocks
pointer on the head of the CTF
char * getFreeBlockEnd() const
size_t getFreeSize() const
size in bytes available to fill data
size_t size
offset of the start of the writable space (wrt head), in bytes!!!
ClassDefNV(Registry, 1)
char * getFreeBlockStart() const
calculate the pointer of the head of the writable space
static constexpr size_t nStreams
Definition compat.h:52
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
static std::string concat_string(Ts const &... ts)
int estimateSize(bool withHB=false)
std::vector< o2::ctf::BufferType > vec
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
coder decode(ctfImage, triggersD, clustersD)
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))