Project
Loading...
Searching...
No Matches
compat.h
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#ifndef RANS_COMPAT_H_
17#define RANS_COMPAT_H_
18
19#ifdef __CLING__
20#error rANS should not be exposed to root
21#endif
22
23#include <numeric>
24
26
32
35
38
39#include "rANS/factory.h"
40
42{
43
44namespace defaults
45{
46namespace internal
47{
48inline constexpr size_t RenormingLowerBound = 31;
49} // namespace internal
50
52 inline static constexpr size_t nStreams = 2;
53 inline static constexpr size_t renormingLowerBound = internal::RenormingLowerBound;
54};
55
56} // namespace defaults
57
58namespace compatImpl
59{
60inline constexpr uint32_t MinRenormThreshold = 10;
61inline constexpr uint32_t MaxRenormThreshold = 20;
62} // namespace compatImpl
63
64inline size_t computeRenormingPrecision(size_t nUsedAlphabetSymbols)
65{
66 const uint32_t minBits = o2::rans::utils::log2UInt(nUsedAlphabetSymbols);
67 const uint32_t estimate = minBits * 3u / 2u;
68 const uint32_t maxThreshold = std::max(minBits, compatImpl::MaxRenormThreshold);
69 const uint32_t minThreshold = std::max(estimate, compatImpl::MinRenormThreshold);
70
71 return std::min(minThreshold, maxThreshold);
72};
73
74template <typename source_T>
76{
77 using namespace o2::rans::internal;
78
79 constexpr size_t IncompressibleSymbolFrequency = 1;
80
81 if (histogram.empty()) {
82 LOG(warning) << "rescaling empty histogram";
83 }
84
85 size_t nUsedAlphabetSymbols = countNUsedAlphabetSymbols(histogram);
86
87 if (newPrecision == 0) {
88 newPrecision = computeRenormingPrecision(nUsedAlphabetSymbols);
89 }
90
91 const size_t alphabetSize = histogram.size() + 1; // +1 for incompressible symbol
92 std::vector<uint64_t> cumulativeFrequencies(alphabetSize + 1); // +1 to store total cumulative frequencies
93 cumulativeFrequencies[0] = 0;
94 std::inclusive_scan(histogram.begin(), histogram.end(), ++cumulativeFrequencies.begin(), std::plus<>(), 0ull);
95 cumulativeFrequencies.back() = histogram.getNumSamples() + IncompressibleSymbolFrequency;
96
97 auto getFrequency = [&cumulativeFrequencies](count_t i) {
98 assert(cumulativeFrequencies[i + 1] >= cumulativeFrequencies[i]);
99 return cumulativeFrequencies[i + 1] - cumulativeFrequencies[i];
100 };
101
102 // we will memorize only those entries which can be used
103 const auto sortIdx = [&]() {
104 std::vector<size_t> indices;
105 indices.reserve(nUsedAlphabetSymbols + 1);
106
107 for (size_t i = 0; i < alphabetSize; ++i) {
108 if (getFrequency(i) != 0) {
109 indices.push_back(i);
110 }
111 }
112
113 std::sort(indices.begin(), indices.end(), [&](count_t i, count_t j) { return getFrequency(i) < getFrequency(j); });
114 return indices;
115 }();
116
117 // resample distribution based on cumulative frequencies
118 const count_t newCumulatedFrequency = utils::pow2(newPrecision);
119 size_t nSamples = histogram.getNumSamples() + IncompressibleSymbolFrequency;
120 assert(newCumulatedFrequency >= nUsedAlphabetSymbols);
121 size_t needsShift = 0;
122 for (size_t i = 0; i < sortIdx.size(); i++) {
123 if (static_cast<count_t>(getFrequency(sortIdx[i])) * (newCumulatedFrequency - needsShift) / nSamples >= 1) {
124 break;
125 }
126 needsShift++;
127 }
128
129 size_t shift = 0;
130 auto beforeUpdate = cumulativeFrequencies[0];
131 for (size_t i = 0; i < alphabetSize; i++) {
132 auto& nextCumulative = cumulativeFrequencies[i + 1];
133 uint64_t oldFrequeny = nextCumulative - beforeUpdate;
134 if (oldFrequeny && oldFrequeny * (newCumulatedFrequency - needsShift) / nSamples < 1) {
135 shift++;
136 }
137 beforeUpdate = cumulativeFrequencies[i + 1];
138 nextCumulative = (static_cast<uint64_t>(newCumulatedFrequency - needsShift) * nextCumulative) / nSamples + shift;
139 }
140 assert(shift == needsShift);
141
142 // verify
143#if !defined(NDEBUG)
144 assert(cumulativeFrequencies.front() == 0);
145 assert(cumulativeFrequencies.back() == newCumulatedFrequency);
146 size_t i = 0;
147 for (auto frequency : histogram) {
148 if (frequency == 0) {
149 assert(cumulativeFrequencies[i + 1] == cumulativeFrequencies[i]);
150 } else {
151 assert(cumulativeFrequencies[i + 1] > cumulativeFrequencies[i]);
152 }
153 ++i;
154 }
155#endif
156
157 typename RenormedDenseHistogram<source_T>::container_type rescaledFrequencies(histogram.size(), histogram.getOffset());
158
159 assert(cumulativeFrequencies.size() == histogram.size() + 2);
160 // calculate updated frequencies
161 for (size_t i = 0; i < histogram.size(); ++i) {
162 rescaledFrequencies(i) = getFrequency(i);
163 }
164 const typename RenormedDenseHistogram<source_T>::value_type incompressibleSymbolFrequency = getFrequency(histogram.size());
165
166 return RenormedDenseHistogram<source_T>{std::move(rescaledFrequencies), newPrecision, incompressibleSymbolFrequency};
167};
168
170{
171
172 public:
173 template <typename container_T>
174 [[nodiscard]] inline static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept<container_T>& renormed)
175 {
176 using namespace o2::rans::internal;
178 using symbol_type = internal::PrecomputedSymbol;
179 using coder_command = SingleStreamEncoderImpl<mRenormingLowerBound>;
180 using symbolTable_type = DenseSymbolTable<source_type, symbol_type>;
182
183 return encoderType{renormed};
184 };
185
186 template <typename source_T>
187 [[nodiscard]] inline static decltype(auto) fromHistogram(DenseHistogram<source_T> histogram, size_t renormingPrecision = 0)
188 {
189 const auto renormedHistogram = o2::rans::compat::renorm(std::move(histogram), renormingPrecision);
190 return makeEncoder::fromRenormed(renormedHistogram);
191 };
192
193 template <typename source_IT>
194 [[nodiscard]] inline static decltype(auto) fromSamples(source_IT begin, source_IT end, size_t renormingPrecision = 0)
195 {
196 auto histogram = makeDenseHistogram::fromSamples(begin, end);
197
198 return makeEncoder::fromHistogram(std::move(histogram), renormingPrecision);
199 };
200
201 template <typename source_T>
202 [[nodiscard]] inline static decltype(auto) fromSamples(gsl::span<const source_T> range, size_t renormingPrecision = 0)
203 {
204 auto histogram = makeDenseHistogram::template fromSamples(range);
205 return makeEncoder::fromHistogram(std::move(histogram), renormingPrecision);
206 };
207
208 private:
209 static constexpr CoderTag mCoderTag = CoderTag::SingleStream;
210 static constexpr size_t mNstreams = defaults::CoderPreset::nStreams;
211 static constexpr size_t mRenormingLowerBound = defaults::CoderPreset::renormingLowerBound;
212};
213
215{
216
217 using this_type = makeDecoder;
218
219 public:
220 template <typename container_T>
221 [[nodiscard]] inline static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept<container_T>& renormed)
222 {
223 using namespace internal;
224
226 using coder_type = DecoderImpl<mRenormingLowerBound>;
228
229 return decoder_type{renormed};
230 };
231
232 template <typename source_T>
233 [[nodiscard]] inline static decltype(auto) fromHistogram(DenseHistogram<source_T> histogram, size_t renormingPrecision = 0)
234 {
235 const auto renormedHistogram = o2::rans::compat::renorm(std::move(histogram), renormingPrecision);
236 return this_type::fromRenormed(renormedHistogram);
237 };
238
239 template <typename source_IT>
240 [[nodiscard]] inline static decltype(auto) fromSamples(source_IT begin, source_IT end, size_t renormingPrecision = 0)
241 {
242 auto histogram = makeDenseHistogram::fromSamples(begin, end);
243 return this_type::fromHistogram(std::move(histogram), renormingPrecision);
244 };
245
246 template <typename source_T>
247 [[nodiscard]] inline static decltype(auto) fromSamples(gsl::span<const source_T> range, size_t renormingPrecision = 0)
248 {
249 auto histogram = makeDenseHistogram::fromSamples(range);
250 return this_type::fromHistogram(std::move(histogram), renormingPrecision);
251 };
252
253 private:
254 static constexpr CoderTag mCoderTag = CoderTag::SingleStream;
255 static constexpr size_t mNstreams = defaults::CoderPreset::nStreams;
256 static constexpr size_t mRenormingLowerBound = defaults::CoderPreset::renormingLowerBound;
257};
258
259template <typename source_T>
260inline size_t getAlphabetRangeBits(const DenseHistogram<source_T>& histogram) noexcept
261{
262 using namespace o2::rans::internal;
263 const auto view = trim(makeHistogramView(histogram));
264 return internal::numBitsForNSymbols(view.size());
265};
266
267template <typename source_T>
268inline size_t getAlphabetRangeBits(const RenormedDenseHistogram<source_T>& histogram) noexcept
269{
270 using namespace o2::rans::internal;
271 const auto view = trim(makeHistogramView(histogram));
272 return internal::numBitsForNSymbols(view.size() + histogram.hasIncompressibleSymbol());
273};
274
275template <typename source_T, typename symbol_T>
276inline size_t getAlphabetRangeBits(const DenseSymbolTable<source_T, symbol_T>& symbolTable) noexcept
277{
278 const bool hasIncompressibleSymbol = symbolTable.getEscapeSymbol().getFrequency() > 0;
279 return internal::numBitsForNSymbols(symbolTable.size() + hasIncompressibleSymbol);
280};
281
282inline size_t calculateMaxBufferSizeB(size_t nElements, size_t rangeBits)
283{
284 constexpr size_t sizeofStreamT = sizeof(uint32_t);
285 // // RS: w/o safety margin the o2-test-ctf-io produces an overflow in the Encoder::process
286 // constexpr size_t SaferyMargin = 16;
287 // return std::ceil(1.20 * (num * rangeBits * 1.0) / (sizeofStreamT * 8.0)) + SaferyMargin;
288 return nElements * sizeofStreamT;
289}
290
291template <typename source_T>
293
294template <typename source_T>
296
297} // namespace o2::rans::compat
298
299#endif /* RANS_COMPAT_H_ */
Operations to decode a rANS stream.
Histogram for source symbols used to estimate symbol probabilities for entropy coding.
Lookup table containing statistical information for each symbol in the alphabet required for encoding...
int32_t i
Maps rANS state information back to source symbol, used for decoding.
uint32_t j
Definition RawData.h:0
Histogram renormed to sum of frequencies being 2^P for use in fast rans coding.
rANS encoding operations based on ryg's fast algorithm and a naive rANS implementation for all 64Bit ...
Contains statistical information for one source symbol, required for encoding/decoding.
Decoder - User facing class to decode a rANS encoded stream back into the source data based on the sa...
Encoder - User facing class to perform rANS entropy coding of source symbols onto a rANS state based ...
uint32_t source_type
typename base_type::value_type value_type
typename base_type::container_type container_type
typename base_type::source_type source_type
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition compat.h:221
static decltype(auto) fromHistogram(DenseHistogram< source_T > histogram, size_t renormingPrecision=0)
Definition compat.h:233
static decltype(auto) fromSamples(source_IT begin, source_IT end, size_t renormingPrecision=0)
Definition compat.h:240
static decltype(auto) fromSamples(gsl::span< const source_T > range, size_t renormingPrecision=0)
Definition compat.h:247
static decltype(auto) fromHistogram(DenseHistogram< source_T > histogram, size_t renormingPrecision=0)
Definition compat.h:187
static decltype(auto) fromSamples(source_IT begin, source_IT end, size_t renormingPrecision=0)
Definition compat.h:194
static decltype(auto) fromSamples(gsl::span< const source_T > range, size_t renormingPrecision=0)
Definition compat.h:202
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition compat.h:174
static factory classes for building histograms, encoders and decoders.
GLuint GLuint end
Definition glcorearb.h:469
GLenum GLint * range
Definition glcorearb.h:1899
GLsizei GLenum const void * indices
Definition glcorearb.h:400
constexpr uint32_t MinRenormThreshold
Definition compat.h:60
constexpr uint32_t MaxRenormThreshold
Definition compat.h:61
constexpr size_t RenormingLowerBound
Definition compat.h:48
size_t computeRenormingPrecision(size_t nUsedAlphabetSymbols)
Definition compat.h:64
decltype(makeEncoder::fromRenormed(RenormedDenseHistogram< source_T >{})) encoder_type
Definition compat.h:292
RenormedDenseHistogram< source_T > renorm(DenseHistogram< source_T > histogram, size_t newPrecision=0)
Definition compat.h:75
size_t getAlphabetRangeBits(const DenseHistogram< source_T > &histogram) noexcept
Definition compat.h:260
size_t calculateMaxBufferSizeB(size_t nElements, size_t rangeBits)
Definition compat.h:282
decltype(makeDecoder::fromRenormed(RenormedDenseHistogram< source_T >{})) decoder_type
Definition compat.h:295
constexpr size_t numBitsForNSymbols(size_t nSymbols) noexcept
Definition utils.h:129
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
constexpr T log2UInt(T x) noexcept
Definition utils.h:179
auto makeHistogramView(container_T &container, std::ptrdiff_t offset) noexcept -> HistogramView< decltype(std::begin(container))>
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
size_t countNUsedAlphabetSymbols(const AdaptiveHistogram< source_T > &histogram)
uint32_t count_t
Definition defaults.h:34
static constexpr size_t nStreams
Definition compat.h:52
static constexpr size_t renormingLowerBound
Definition compat.h:53
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
manipulation of types at compile time