Project
Loading...
Searching...
No Matches
testCTFEntropyCoder.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#define BOOST_TEST_MODULE Test CTFEntropyCoder class
17#define BOOST_TEST_MAIN
18#define BOOST_TEST_DYN_LINK
19
20#undef NDEBUG
21#include <cassert>
22
23#include <vector>
24#include <cstring>
25#include <random>
26#include <algorithm>
27#include <version>
28
29#include <boost/test/unit_test.hpp>
30#include <boost/mp11.hpp>
31#include <fmt/core.h>
32
36#include "rANS/histogram.h"
37#include "rANS/metrics.h"
38#include "rANS/factory.h"
39#include "rANS/iterator.h"
40
41using namespace o2;
42
43using buffer_type = uint32_t;
44using source_types = boost::mp11::mp_list<uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t>;
45
46template <typename source_T>
48{
49 public:
50 SourceMessage(size_t messageSize, source_T max = std::numeric_limits<source_T>::max(), source_T min = std::numeric_limits<source_T>::min()) : mMin{min}, mMax{max}
51 {
52 if (mSourceMessage.empty()) {
53 std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;
54 assert(max >= min);
55 const size_t draws = (max - min) + 1;
56 const double probability = 0.5;
57 std::binomial_distribution<int64_t> dist(draws, probability);
58 mSourceMessage.resize(messageSize);
59 std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt, min]() -> source_T { return static_cast<int64_t>(dist(mt)) + min; });
60 }
61 }
62
63 inline constexpr source_T getMin() const noexcept { return mMin; };
64 inline constexpr source_T getMax() const noexcept { return mMax; };
65 inline constexpr auto& get() const noexcept { return mSourceMessage; };
66
67 private:
68 source_T mMin{};
69 source_T mMax{};
70 std::vector<source_T> mSourceMessage{};
71};
72
74{
75 public:
76 SourceMessageProxy() = default;
77
78 template <typename T>
79 const auto& getMessage() const noexcept
80 {
81 if constexpr (std::is_same_v<uint8_t, T>) {
82 return sourceMessage8u.get();
83 } else if constexpr (std::is_same_v<int8_t, T>) {
84 return sourceMessage8.get();
85 } else if constexpr (std::is_same_v<uint16_t, T>) {
86 return sourceMessage16u.get();
87 } else if constexpr (std::is_same_v<int16_t, T>) {
88 return sourceMessage16.get();
89 } else if constexpr (std::is_same_v<uint32_t, T>) {
90 return sourceMessage32u.get();
91 } else if constexpr (std::is_same_v<int32_t, T>) {
92 return sourceMessage32.get();
93 } else {
94 throw std::runtime_error{"unsupported source type"};
95 }
96 };
97
98 private:
99 inline static constexpr size_t MessageSize = rans::utils::pow2(10);
100 SourceMessage<uint8_t> sourceMessage8u{MessageSize};
101 SourceMessage<int8_t> sourceMessage8{MessageSize};
102 SourceMessage<uint16_t> sourceMessage16u{MessageSize};
103 SourceMessage<int16_t> sourceMessage16{MessageSize};
105 SourceMessage<int32_t> sourceMessage32{MessageSize, rans::utils::pow2(26), -static_cast<int32_t>(rans::utils::pow2(26))};
106};
107
109
110template <typename source_IT>
111void encodeInplace(source_IT begin, source_IT end)
112{
113 using source_type = typename std::iterator_traits<source_IT>::value_type;
114
116 // BOOST_CHECK_THROW(entropyCoder.getEncoder(), std::runtime_error);
117 entropyCoder.makeEncoder();
118
119 const rans::Metrics<source_type>& metrics = entropyCoder.getMetrics();
120 const rans::SizeEstimate sizeEstimate = metrics.getSizeEstimate();
121
122 LOGP(info, "dataset[{},{}], coder[{},{}]", metrics.getDatasetProperties().min, metrics.getDatasetProperties().max, *metrics.getCoderProperties().min, *metrics.getCoderProperties().max);
123
124 std::vector<buffer_type> encodeBuffer(sizeEstimate.getCompressedDatasetSize<buffer_type>(), 0);
125 std::vector<buffer_type> literalSymbolsBuffer(sizeEstimate.getIncompressibleSize<buffer_type>(), 0);
126 std::vector<buffer_type> dictBuffer(sizeEstimate.getCompressedDictionarySize<buffer_type>(), 0);
127
128 auto encoderEnd = entropyCoder.encode(begin, end, encodeBuffer.data(), encodeBuffer.data() + encodeBuffer.size());
129 [[maybe_unused]] auto literalsEnd = entropyCoder.writeIncompressible(literalSymbolsBuffer.data(), literalSymbolsBuffer.data() + literalSymbolsBuffer.size());
130 auto dictEnd = entropyCoder.writeDictionary(dictBuffer.data(), dictBuffer.data() + dictBuffer.size());
131 // decode
132 const auto& coderProperties = metrics.getCoderProperties();
133 auto decoder = rans::makeDecoder<>::fromRenormed(rans::readRenormedDictionary(dictBuffer.data(), dictEnd,
134 *coderProperties.min, *coderProperties.max,
135 *coderProperties.renormingPrecisionBits));
136 std::vector<source_type> literals(entropyCoder.getNIncompressibleSamples());
137
138 const auto& datasetPropterties = metrics.getDatasetProperties();
139 rans::unpack(literalSymbolsBuffer.data(), literals.size(), literals.data(),
140 datasetPropterties.alphabetRangeBits, datasetPropterties.min);
141
142 size_t messageLength = std::distance(begin, end);
143 std::vector<source_type> sourceBuffer(messageLength, 0);
144
145 decoder.process(encoderEnd, sourceBuffer.data(), messageLength, entropyCoder.getNStreams(), literals.end());
146
147 BOOST_CHECK_EQUAL_COLLECTIONS(sourceBuffer.begin(), sourceBuffer.end(), begin, end);
148};
149
151{
152 std::vector<source_T> testMessage{};
153 encodeInplace(testMessage.data(), testMessage.data() + testMessage.size());
154};
155
157{
158 const auto& testMessage = MessageProxy.getMessage<source_T>();
159 encodeInplace(testMessage.data(), testMessage.data() + testMessage.size());
160};
161
163{
164 const auto& testMessage = MessageProxy.getMessage<source_T>();
165 encodeInplace(testMessage.begin(), testMessage.end());
166};
167
168template <typename value_T, size_t shift>
170
171{
172 public:
173 template <typename iterA_T, typename iterB_T>
174 inline value_T operator()(iterA_T iterA, iterB_T iterB) const
175 {
176 return *iterB + (static_cast<value_T>(*iterA) << shift);
177 };
178
179 template <typename iterA_T, typename iterB_T>
180 inline void operator()(iterA_T iterA, iterB_T iterB, value_T value) const
181 {
182 *iterA = value >> shift;
183 *iterB = value & ((0x1 << shift) - 0x1);
184 };
185};
186
187template <typename iterA_T, typename iterB_T, typename F>
188auto makeInputIterators(iterA_T iterA, iterB_T iterB, size_t nElements, F functor)
189{
190 using namespace o2::rans::utils;
191
192 return std::make_tuple(rans::CombinedInputIterator{iterA, iterB, functor},
193 rans::CombinedInputIterator{advanceIter(iterA, nElements), advanceIter(iterB, nElements), functor});
194};
195
196BOOST_AUTO_TEST_CASE(testInplaceEncoderCombinedIterator)
197{
198
199 const auto& testMessage1 = MessageProxy.getMessage<int8_t>();
200 const auto& testMessage2 = MessageProxy.getMessage<int8_t>();
201
202 auto [begin, end] = makeInputIterators(testMessage1.data(), testMessage2.data(), testMessage1.size(), ShiftFunctor<uint16_t, rans::utils::toBits<uint8_t>()>{});
203
204 encodeInplace(begin, end);
205};
206
208{
209 public:
211 {
212 SourceMessageProxy proxy{};
213
214 auto renormed8u = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<uint8_t>().begin(), proxy.getMessage<uint8_t>().end()), rans::RenormingPolicy::ForceIncompressible);
215 auto renormed8 = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<int8_t>().begin(), proxy.getMessage<int8_t>().end()), rans::RenormingPolicy::ForceIncompressible);
216 auto renormed16u = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<uint16_t>().begin(), proxy.getMessage<uint16_t>().end()), rans::RenormingPolicy::ForceIncompressible);
217 auto renormed16 = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<int16_t>().begin(), proxy.getMessage<int16_t>().end()), rans::RenormingPolicy::ForceIncompressible);
218 auto renormed32u = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<uint32_t>().begin(), proxy.getMessage<uint32_t>().end()), rans::RenormingPolicy::ForceIncompressible);
219 auto renormed32 = rans::renorm(rans::makeDenseHistogram::fromSamples(proxy.getMessage<int32_t>().begin(), proxy.getMessage<int32_t>().end()), rans::RenormingPolicy::ForceIncompressible);
220
221 encoder8u = rans::makeDenseEncoder<>::fromRenormed(renormed8u);
222 encoder8 = rans::makeDenseEncoder<>::fromRenormed(renormed8);
223 encoder16u = rans::makeDenseEncoder<>::fromRenormed(renormed16u);
224 encoder16 = rans::makeDenseEncoder<>::fromRenormed(renormed16);
225 encoder32u = rans::makeDenseEncoder<>::fromRenormed(renormed32u);
226 encoder32 = rans::makeDenseEncoder<>::fromRenormed(renormed32);
227
228 decoder8u = rans::makeDecoder<>::fromRenormed(renormed8u);
229 decoder8 = rans::makeDecoder<>::fromRenormed(renormed8);
230 decoder16u = rans::makeDecoder<>::fromRenormed(renormed16u);
231 decoder16 = rans::makeDecoder<>::fromRenormed(renormed16);
232 decoder32u = rans::makeDecoder<>::fromRenormed(renormed32u);
233 decoder32 = rans::makeDecoder<>::fromRenormed(renormed32);
234 }
235
236 template <typename T>
237 const auto& getEncoder() const noexcept
238 {
239 if constexpr (std::is_same_v<uint8_t, T>) {
240 return encoder8u;
241 } else if constexpr (std::is_same_v<int8_t, T>) {
242 return encoder8;
243 } else if constexpr (std::is_same_v<uint16_t, T>) {
244 return encoder16u;
245 } else if constexpr (std::is_same_v<int16_t, T>) {
246 return encoder16;
247 } else if constexpr (std::is_same_v<uint32_t, T>) {
248 return encoder32u;
249 } else if constexpr (std::is_same_v<int32_t, T>) {
250 return encoder32;
251 } else {
252 throw std::runtime_error{"unsupported encoder type"};
253 }
254 };
255
256 template <typename T>
257 const auto& getDecoder() const noexcept
258 {
259 if constexpr (std::is_same_v<uint8_t, T>) {
260 return decoder8u;
261 } else if constexpr (std::is_same_v<int8_t, T>) {
262 return decoder8;
263 } else if constexpr (std::is_same_v<uint16_t, T>) {
264 return decoder16u;
265 } else if constexpr (std::is_same_v<int16_t, T>) {
266 return decoder16;
267 } else if constexpr (std::is_same_v<uint32_t, T>) {
268 return decoder32u;
269 } else if constexpr (std::is_same_v<int32_t, T>) {
270 return decoder32;
271 } else {
272 throw std::runtime_error{"unsupported encoder type"};
273 }
274 };
275
276 private:
283
290};
291
293
294template <typename source_IT>
295void encodeExternal(source_IT begin, source_IT end)
296{
297 using source_type = typename std::iterator_traits<source_IT>::value_type;
298
300
301 const size_t sourceExtent = std::distance(begin, end);
302 std::vector<buffer_type> encodeBuffer(entropyCoder.template computePayloadSizeEstimate<buffer_type>(sourceExtent), 0);
303 auto encoderEnd = entropyCoder.encode(begin, end, encodeBuffer.data(), encodeBuffer.data() + encodeBuffer.size());
304
305 std::vector<buffer_type> literalSymbolsBuffer(entropyCoder.template computePackedIncompressibleSize<buffer_type>(), 0);
306 [[maybe_unused]] auto literalsEnd = entropyCoder.writeIncompressible(literalSymbolsBuffer.data(), literalSymbolsBuffer.data() + literalSymbolsBuffer.size());
307
308 // decode
309 auto decoder = ExternalEncoders.getDecoder<source_type>();
310 std::vector<source_type> literals((entropyCoder.getNIncompressibleSamples()));
311
312 rans::unpack(literalSymbolsBuffer.data(), literals.size(), literals.data(),
313 entropyCoder.getIncompressibleSymbolPackingBits(), entropyCoder.getIncompressibleSymbolOffset());
314
315 size_t messageLength = std::distance(begin, end);
316 std::vector<source_type> sourceBuffer(messageLength, 0);
317
318 decoder.process(encoderEnd, sourceBuffer.data(), messageLength, entropyCoder.getEncoder().getNStreams(), literals.end());
319
320 BOOST_CHECK_EQUAL_COLLECTIONS(sourceBuffer.begin(), sourceBuffer.end(), begin, end);
321};
322
324{
325 std::vector<source_T> testMessage{};
326 encodeExternal(testMessage.data(), testMessage.data() + testMessage.size());
327};
328
330{
331 const auto& testMessage = MessageProxy.getMessage<source_T>();
332 encodeExternal(testMessage.data(), testMessage.data() + testMessage.size());
333};
334
336{
337 const auto& testMessage = MessageProxy.getMessage<source_T>();
338 encodeExternal(testMessage.begin(), testMessage.end());
339};
340
341BOOST_AUTO_TEST_CASE(testExternalEncoderCombinedIterator)
342{
343
344 const auto& testMessage1 = MessageProxy.getMessage<int8_t>();
345 const auto& testMessage2 = MessageProxy.getMessage<int8_t>();
346
347 auto [begin, end] = makeInputIterators(testMessage1.data(), testMessage2.data(), testMessage1.size(), ShiftFunctor<uint16_t, rans::utils::toBits<uint8_t>()>{});
348
349 encodeExternal(begin, end);
350};
Interfaces for BitPacking using librans.
constexpr size_t MessageSize
uint32_t source_type
const auto & getEncoder() const noexcept
const auto & getDecoder() const noexcept
value_T operator()(iterA_T iterA, iterB_T iterB) const
void operator()(iterA_T iterA, iterB_T iterB, value_T value) const
SourceMessageProxy()=default
const auto & getMessage() const noexcept
constexpr source_T getMin() const noexcept
constexpr auto & get() const noexcept
constexpr source_T getMax() const noexcept
SourceMessage(size_t messageSize, source_T max=std::numeric_limits< source_T >::max(), source_T min=std::numeric_limits< source_T >::min())
size_t getIncompressibleSize(double_t safetyFactor=1.2) const
size_t getCompressedDatasetSize(double_t safetyFactor=1.2) const
size_t getCompressedDictionarySize(double_t safetyFactor=2) const
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition factory.h:106
static constexpr decltype(auto) fromRenormed(const RenormedDenseHistogram< source_T > &renormed)
Definition factory.h:195
static factory classes for building histograms, encoders and decoders.
GLuint GLuint end
Definition glcorearb.h:469
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLsizei const GLfloat * value
Definition glcorearb.h:819
public interface for building and renorming histograms from source data.
public interface for rANS iterators.
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
decltype(makeDenseEncoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) denseEncoder_type
Definition factory.h:229
decltype(makeDecoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) defaultDecoder_type
Definition factory.h:238
void unpack(const input_T *__restrict inputBegin, size_t extent, output_IT outputBegin, size_t packingWidth, typename std::iterator_traits< output_IT >::value_type offset=static_cast< typename std::iterator_traits< output_IT >::value_type >(0))
Definition pack.h:346
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
BOOST_AUTO_TEST_CASE(FlatHisto)
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
boost::mp11::mp_list< uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t > source_types
ExternalEncoderDecoderProxy ExternalEncoders
void encodeInplace(source_IT begin, source_IT end)
void encodeExternal(source_IT begin, source_IT end)
const SourceMessageProxy MessageProxy
auto makeInputIterators(iterA_T iterA, iterB_T iterB, size_t nElements, F functor)
uint32_t buffer_type
BOOST_AUTO_TEST_CASE_TEMPLATE(testInplaceEncoderEmpty, source_T, source_types)
constexpr size_t min
constexpr size_t max