Project
Loading...
Searching...
No Matches
SizeEstimate.h
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#ifndef RANS_INTERNAL_METRICS_SIZEESTIMATE_H_
17#define RANS_INTERNAL_METRICS_SIZEESTIMATE_H_
18
19#include <cstdint>
20#include <cmath>
21
22#include <fairlogger/Logger.h>
23
29
30namespace o2::rans
31{
32
33template <CoderTag tag_V = defaults::DefaultTag, size_t lowerBound_V = defaults::CoderPreset<tag_V>::renormingLowerBound>
34inline constexpr size_t addEncoderOverheadEstimateB(size_t sizeB) noexcept
35{
36 constexpr size_t nStreams = defaults::CoderPreset<tag_V>::nStreams;
37 using state_type = typename internal::CoderTraits_t<tag_V>::state_type;
38 constexpr size_t minSize = nStreams * sizeof(state_type); // mandatory size of flushing
39 constexpr size_t overhead = utils::toBytes(lowerBound_V * nStreams);
40
41 return std::max(minSize, sizeB + overhead);
42}
43
44template <typename source_T>
45class Metrics;
46
48{
49 public:
50 inline SizeEstimate() = default;
51
52 template <typename source_T>
53 inline explicit SizeEstimate(const Metrics<source_T>& metrics) noexcept;
54
55 [[nodiscard]] size_t getEntropySizeB() const;
56 template <typename T = uint8_t>
57 [[nodiscard]] size_t getCompressedDatasetSize(double_t safetyFactor = 1.2) const;
58 template <typename T = uint8_t>
59 [[nodiscard]] size_t getCompressedDictionarySize(double_t safetyFactor = 2) const;
60 template <typename T = uint8_t>
61 [[nodiscard]] size_t getIncompressibleSize(double_t safetyFactor = 1.2) const;
62 template <typename T = uint8_t>
63 [[nodiscard]] size_t getPackedDatasetSize(double_t safetyFactor = 1) const;
64
65 [[nodiscard]] inline bool preferPacking(double_t weight = 1) const;
66
67 private:
68 size_t mEntropySizeB{};
69 size_t mCompressedDatasetSizeB{};
70 size_t mCompressedDictionarySizeB{};
71 size_t mIncompressibleSizeB{};
72 size_t mPackedDatasetSizeB{};
73};
74
75template <typename source_T>
77{
78 const auto& datasetProperties = metrics.getDatasetProperties();
79 const auto& coderProperties = metrics.getCoderProperties();
80 const auto& nSamples = datasetProperties.numSamples;
81
82 if (nSamples > 0) {
83 mEntropySizeB = utils::toBytes(datasetProperties.entropy * nSamples);
84 mCompressedDatasetSizeB = addEncoderOverheadEstimateB<>(mEntropySizeB);
85 mCompressedDictionarySizeB = coderProperties.dictSizeEstimate.getSizeB(datasetProperties.nUsedAlphabetSymbols,
86 *coderProperties.renormingPrecisionBits);
87 mIncompressibleSizeB = utils::toBytes(datasetProperties.alphabetRangeBits * (*coderProperties.nIncompressibleSamples));
88 mPackedDatasetSizeB = utils::toBytes(datasetProperties.alphabetRangeBits * nSamples);
89 } else {
90 // special case: store no data for empty dataset
91 mEntropySizeB = 0;
92 mCompressedDatasetSizeB = 0;
93 mCompressedDictionarySizeB = 0;
94 mIncompressibleSizeB = 0;
95 mPackedDatasetSizeB = 0;
96 }
97};
98
99[[nodiscard]] inline size_t SizeEstimate::getEntropySizeB() const
100{
101 return mEntropySizeB;
102};
103
104template <typename T>
105[[nodiscard]] inline size_t SizeEstimate::getCompressedDatasetSize(double_t safetyFactor) const
106{
107 return utils::nBytesTo<T>(std::ceil(mCompressedDatasetSizeB * safetyFactor));
108};
109
110template <typename T>
111[[nodiscard]] inline size_t SizeEstimate::getCompressedDictionarySize(double_t safetyFactor) const
112{
113 constexpr size_t MaxOverhead = 8; // maximal absolute overhead
114 return utils::nBytesTo<T>(std::ceil(mCompressedDictionarySizeB * safetyFactor) + MaxOverhead);
115};
116
117template <typename T>
118[[nodiscard]] inline size_t SizeEstimate::getIncompressibleSize(double_t safetyFactor) const
119{
120 return utils::nBytesTo<T>(std::ceil(mIncompressibleSizeB * safetyFactor));
121};
122
123template <typename T>
124[[nodiscard]] inline size_t SizeEstimate::getPackedDatasetSize(double_t safetyFactor) const
125{
126 return utils::nBytesTo<T>(std::ceil(mPackedDatasetSizeB * safetyFactor));
127};
128
129[[nodiscard]] inline bool SizeEstimate::preferPacking(double_t weight) const
130{
131 // convention: always pack empty dataset.
132 return (mPackedDatasetSizeB * weight) <= (mCompressedDatasetSizeB +
133 mCompressedDictionarySizeB +
134 mIncompressibleSizeB);
135};
136
137} // namespace o2::rans
138
139#endif /* RANS_INTERNAL_METRICS_SIZEESTIMATE_H_ */
Computes and provides essential metrics on the dataset used for parameter and size estimates by other...
common helper classes and functions
size_t getEntropySizeB() const
size_t getIncompressibleSize(double_t safetyFactor=1.2) const
size_t getCompressedDatasetSize(double_t safetyFactor=1.2) const
bool preferPacking(double_t weight=1) const
size_t getCompressedDictionarySize(double_t safetyFactor=2) const
size_t getPackedDatasetSize(double_t safetyFactor=1) const
sane compile time defaults for encoders/decoders
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477
typename CoderTraits< tag_V >::template type< lowerBound_V > CoderTraits_t
constexpr size_t toBytes(size_t bits) noexcept
Definition utils.h:163
constexpr size_t addEncoderOverheadEstimateB(size_t sizeB) noexcept
manipulation of types at compile time