Project
Loading...
Searching...
No Matches
test_ransMetrics.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#define BOOST_TEST_MODULE Utility test
17#define BOOST_TEST_MAIN
18#define BOOST_TEST_DYN_LINK
19
20#undef NDEBUG
21#include <cassert>
22
23#include <boost/test/unit_test.hpp>
24#include <boost/mp11.hpp>
25#include <gsl/span>
26
27#include "rANS/histogram.h"
28#include "rANS/metrics.h"
29
30using namespace o2::rans;
31
32using source_type = uint32_t;
33using histogram_types = boost::mp11::mp_list<DenseHistogram<source_type>, AdaptiveHistogram<source_type>, SparseHistogram<source_type>>;
34
35BOOST_AUTO_TEST_SUITE(test_DictSizeEstimate)
36BOOST_AUTO_TEST_CASE(test_initDictSizeEstimate)
37{
38 using namespace internal;
39 using namespace utils;
40
41 DictSizeEstimate estimate{};
42 BOOST_CHECK_EQUAL(estimate.getIndexSize(), 0);
43 BOOST_CHECK_EQUAL(estimate.getIndexSizeB(), 0);
44 BOOST_CHECK_EQUAL(estimate.getFreqSize(), 0);
45 BOOST_CHECK_EQUAL(estimate.getFreqSizeB(), 0);
47};
48
49BOOST_AUTO_TEST_CASE_TEMPLATE(test_emptyDictSizeEstimate, histogram_T, histogram_types)
50{
51 using namespace internal;
52 using namespace utils;
53
54 std::vector<uint32_t> frequencies{};
55 histogram_T histogram{frequencies.begin(), frequencies.end(), 0};
56 const auto [trimmedBegin, trimmedEnd] = internal::trim(histogram);
57 const auto [min, max] = internal::getMinMax(histogram, trimmedBegin, trimmedEnd);
58
59 DictSizeEstimate estimate{histogram.getNumSamples()};
60
61 source_type lastIndex = min;
62 forEachIndexValue(histogram, trimmedBegin, trimmedEnd, [&](const source_type& index, const uint32_t& frequency) {
63 if (frequency) {
64 BOOST_CHECK(lastIndex <= index);
65 source_type delta = index - lastIndex;
66 estimate.updateIndexSize(delta + (delta == 0));
67 lastIndex = index;
68 estimate.updateFreqSize(frequency);
69 }
70 });
71
72 BOOST_CHECK_EQUAL(estimate.getIndexSize(), 0);
73 BOOST_CHECK_EQUAL(estimate.getIndexSizeB(), 0);
74 BOOST_CHECK_EQUAL(estimate.getFreqSize(), 0);
75 BOOST_CHECK_EQUAL(estimate.getFreqSizeB(), 0);
77};
78
79BOOST_AUTO_TEST_CASE_TEMPLATE(test_defaultDictSizeEstimate, histogram_T, histogram_types)
80{
81 using namespace internal;
82 using namespace utils;
83
84 std::vector<uint32_t> frequencies{9, 0, 8, 0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1};
85 histogram_T histogram{frequencies.begin(), frequencies.end(), 0};
86
87 const auto [trimmedBegin, trimmedEnd] = internal::trim(histogram);
88 const auto [min, max] = internal::getMinMax(histogram, trimmedBegin, trimmedEnd);
89
90 DictSizeEstimate estimate{histogram.getNumSamples()};
91
92 source_type lastIndex = min;
93 forEachIndexValue(histogram, trimmedBegin, trimmedEnd, [&](const source_type& index, const uint32_t& frequency) {
94 if (frequency) {
95 BOOST_CHECK(lastIndex <= index);
96 source_type delta = index - lastIndex;
97 estimate.updateIndexSize(delta + (delta == 0));
98 lastIndex = index;
99 estimate.updateFreqSize(frequency);
100 }
101 });
102
103 BOOST_CHECK_EQUAL(estimate.getIndexSize(), 33);
104 BOOST_CHECK_EQUAL(estimate.getIndexSizeB(), 5);
105 BOOST_CHECK_EQUAL(estimate.getFreqSize(), 224);
106 BOOST_CHECK_EQUAL(estimate.getFreqSizeB(), 28);
108};
109BOOST_AUTO_TEST_SUITE_END()
110
111BOOST_AUTO_TEST_SUITE(test_RenormingPrecision)
112
114{
115 public:
116 inline MetricsTester(const DenseHistogram<source_type>& histogram, float_t cutoffPrecision = 0.999) : Metrics(histogram, cutoffPrecision){};
117 inline MetricsTester(const AdaptiveHistogram<source_type>& histogram, float_t cutoffPrecision = 0.999) : Metrics(histogram, cutoffPrecision){};
118 inline MetricsTester(const SparseHistogram<source_type>& histogram, float_t cutoffPrecision = 0.999) : Metrics(histogram, cutoffPrecision){};
119 inline size_t testComputeRenormingPrecision(float_t cutoffPrecision = 0.999) noexcept { return computeRenormingPrecision(cutoffPrecision); };
120 inline size_t testComputeIncompressibleCount(gsl::span<source_type> distribution, source_type renormingPrecision) noexcept { return computeIncompressibleCount(distribution, renormingPrecision); };
121};
122
123BOOST_AUTO_TEST_CASE_TEMPLATE(test_EmptyRenormingPrecision, histogram_T, histogram_types)
124{
125 std::array<uint32_t, 32> symbolLengthDistribution;
126 std::array<uint32_t, 32> weightedSymbolLengthDistribution;
127 const size_t nSamples = 0;
128 const uint32_t renormingPrecision = 0;
129
130 MetricsTester tester{histogram_T{}};
131 tester.getDatasetProperties().symbolLengthDistribution = symbolLengthDistribution;
132 tester.getDatasetProperties().weightedSymbolLengthDistribution = weightedSymbolLengthDistribution;
133 tester.getDatasetProperties().numSamples = nSamples;
134
135 BOOST_CHECK_EQUAL(tester.testComputeRenormingPrecision(), renormingPrecision);
136 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(symbolLengthDistribution, renormingPrecision), 1);
137 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(weightedSymbolLengthDistribution, renormingPrecision), 1);
138}
139
140BOOST_AUTO_TEST_CASE_TEMPLATE(test_cutoffRenormingPrecision, histogram_T, histogram_types)
141{
142 std::array<uint32_t, 32> symbolLengthDistribution{{}};
143 std::array<uint32_t, 32> weightedSymbolLengthDistribution{{}};
144 weightedSymbolLengthDistribution[31] = 44;
145 symbolLengthDistribution[31] = 42;
146 const size_t nSamples = 44;
147 const uint32_t renormingPrecision = defaults::MaxRenormPrecisionBits;
148
149 MetricsTester tester{histogram_T{}};
150 tester.getDatasetProperties().symbolLengthDistribution = symbolLengthDistribution;
151 tester.getDatasetProperties().weightedSymbolLengthDistribution = weightedSymbolLengthDistribution;
152 tester.getDatasetProperties().numSamples = nSamples;
153
154 BOOST_CHECK_EQUAL(tester.testComputeRenormingPrecision(), renormingPrecision);
155 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(symbolLengthDistribution, renormingPrecision), 42);
156 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(weightedSymbolLengthDistribution, renormingPrecision), nSamples);
157}
158
159BOOST_AUTO_TEST_CASE_TEMPLATE(test_noCutoffRenormingPrecision, histogram_T, histogram_types)
160{
161 std::array<uint32_t, 32> symbolLengthDistribution{{}};
162 std::array<uint32_t, 32> weightedSymbolLengthDistribution{{}};
163 weightedSymbolLengthDistribution[1] = 20;
164 weightedSymbolLengthDistribution[5] = 20;
165 weightedSymbolLengthDistribution[9] = 40;
166 weightedSymbolLengthDistribution[12] = 10;
167 weightedSymbolLengthDistribution[15] = 10;
168
169 symbolLengthDistribution[1] = 2;
170 symbolLengthDistribution[5] = 2;
171 symbolLengthDistribution[9] = 4;
172 symbolLengthDistribution[12] = 1;
173 symbolLengthDistribution[15] = 1;
174
175 const size_t nSamples = 100;
176 const uint32_t renormingPrecision = 17;
177
178 MetricsTester tester{histogram_T{}};
179 tester.getDatasetProperties().symbolLengthDistribution = symbolLengthDistribution;
180 tester.getDatasetProperties().weightedSymbolLengthDistribution = weightedSymbolLengthDistribution;
181 tester.getDatasetProperties().numSamples = nSamples;
182
183 BOOST_CHECK_EQUAL(tester.testComputeRenormingPrecision(), renormingPrecision);
184 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(symbolLengthDistribution, renormingPrecision), 0);
185 BOOST_CHECK_EQUAL(tester.testComputeIncompressibleCount(weightedSymbolLengthDistribution, renormingPrecision), 0);
186}
187BOOST_AUTO_TEST_SUITE_END()
188
189BOOST_AUTO_TEST_SUITE(test_Metrics)
190BOOST_AUTO_TEST_CASE_TEMPLATE(test_emptyMetrics, histogram_T, histogram_types)
191{
192 std::vector<uint32_t> frequencies{};
193 histogram_T histogram{frequencies.begin(), frequencies.end(), 0};
194 const float eps = 1e-2;
195 const size_t nUsedAlphabetSymbols = 0;
196 const auto [min, max] = getMinMax(histogram);
197
198 const Metrics<source_type> metrics{histogram};
199 const auto& dataProperies = metrics.getDatasetProperties();
200 const auto& coderProperties = metrics.getCoderProperties();
201
202 BOOST_CHECK_EQUAL(dataProperies.min, min);
203 BOOST_CHECK_EQUAL(dataProperies.max, max);
204 BOOST_CHECK_EQUAL(dataProperies.numSamples, histogram.getNumSamples());
205 BOOST_CHECK_EQUAL(dataProperies.alphabetRangeBits, 0);
206 BOOST_CHECK_EQUAL(dataProperies.nUsedAlphabetSymbols, nUsedAlphabetSymbols);
207 BOOST_CHECK_SMALL(dataProperies.entropy, eps);
208
209 std::array<uint32_t, 32> symbolLengthDistribution{{}};
210 std::array<uint32_t, 32> weightedSymbolLengthDistribution{{}};
211
212 uint32_t sumUnweighted = 0;
213 uint32_t sumWeighted = 0;
214 for (size_t i = 0; i < 32; ++i) {
215 // BOOST_TEST_MESSAGE(fmt::format("checking length: {}", i));
216 BOOST_CHECK_EQUAL(symbolLengthDistribution[i], dataProperies.symbolLengthDistribution[i]);
217 BOOST_CHECK_EQUAL(weightedSymbolLengthDistribution[i], dataProperies.weightedSymbolLengthDistribution[i]);
218
219 sumUnweighted += dataProperies.symbolLengthDistribution[i];
220 sumWeighted += dataProperies.weightedSymbolLengthDistribution[i];
221 }
222
223 BOOST_CHECK_EQUAL(*coderProperties.renormingPrecisionBits, 0);
224 BOOST_CHECK_EQUAL(sumUnweighted, nUsedAlphabetSymbols);
225 BOOST_CHECK_EQUAL(sumWeighted, 0);
226 BOOST_CHECK_EQUAL(*coderProperties.nIncompressibleSymbols, 1);
227
228 const auto& estimate = coderProperties.dictSizeEstimate;
229 BOOST_CHECK_EQUAL(estimate.getIndexSize(), 0);
230 BOOST_CHECK_EQUAL(estimate.getIndexSizeB(), 0);
231 BOOST_CHECK_EQUAL(estimate.getFreqSize(), 0);
232 BOOST_CHECK_EQUAL(estimate.getFreqSizeB(), 0);
233 BOOST_CHECK_EQUAL(estimate.getSizeB(0, defaults::MinRenormPrecisionBits), 0);
234}
235
236BOOST_AUTO_TEST_CASE_TEMPLATE(test_singleElementMetrics, histogram_T, histogram_types)
237{
238 std::vector<uint32_t> frequencies{5};
239 histogram_T histogram{frequencies.begin(), frequencies.end(), 2};
240 const auto [min, max] = getMinMax(histogram);
241 const size_t nUsedAlphabetSymbols = countNUsedAlphabetSymbols(histogram);
242
243 const Metrics<source_type> metrics{histogram};
244 const auto& dataProperies = metrics.getDatasetProperties();
245 const auto& coderProperties = metrics.getCoderProperties();
246
247 BOOST_CHECK_EQUAL(dataProperies.min, min);
248 BOOST_CHECK_EQUAL(dataProperies.max, max);
249 BOOST_CHECK_EQUAL(dataProperies.numSamples, histogram.getNumSamples());
250 BOOST_CHECK_EQUAL(dataProperies.alphabetRangeBits, 0);
251 BOOST_CHECK_EQUAL(dataProperies.nUsedAlphabetSymbols, nUsedAlphabetSymbols);
252 BOOST_CHECK_SMALL(dataProperies.entropy, 1e-5f);
253
254 std::array<uint32_t, 32> symbolLengthDistribution{{}};
255 std::array<uint32_t, 32> weightedSymbolLengthDistribution{{}};
256
257 symbolLengthDistribution[0] = 1;
258 weightedSymbolLengthDistribution[0] = 5;
259
260 uint32_t sumUnweighted = 0;
261 uint32_t sumWeighted = 0;
262 for (size_t i = 0; i < 32; ++i) {
263 // BOOST_TEST_MESSAGE(fmt::format("checking length: {}", i));
264 BOOST_CHECK_EQUAL(symbolLengthDistribution[i], dataProperies.symbolLengthDistribution[i]);
265 BOOST_CHECK_EQUAL(weightedSymbolLengthDistribution[i], dataProperies.weightedSymbolLengthDistribution[i]);
266
267 sumUnweighted += dataProperies.symbolLengthDistribution[i];
268 sumWeighted += dataProperies.weightedSymbolLengthDistribution[i];
269 }
270
271 BOOST_CHECK_EQUAL(*coderProperties.renormingPrecisionBits, defaults::MinRenormPrecisionBits);
272 BOOST_CHECK_EQUAL(sumUnweighted, nUsedAlphabetSymbols);
273 BOOST_CHECK_EQUAL(sumWeighted, histogram.getNumSamples());
274 BOOST_CHECK_EQUAL(*coderProperties.nIncompressibleSymbols, 0);
275}
276
277BOOST_AUTO_TEST_CASE_TEMPLATE(test_computeMetrics, histogram_T, histogram_types)
278{
279 std::vector<uint32_t> frequencies{9, 0, 8, 0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1};
280 histogram_T histogram{frequencies.begin(), frequencies.end(), 0};
281 const auto [min, max] = getMinMax(histogram);
282 const float eps = 1e-2;
283 const size_t nUsedAlphabetSymbols = countNUsedAlphabetSymbols(histogram);
284
285 const Metrics<source_type> metrics{histogram};
286 const auto& dataProperies = metrics.getDatasetProperties();
287 const auto& coderProperties = metrics.getCoderProperties();
288
289 BOOST_CHECK_EQUAL(dataProperies.min, min);
290 BOOST_CHECK_EQUAL(dataProperies.max, max);
291 BOOST_CHECK_EQUAL(dataProperies.numSamples, histogram.getNumSamples());
292 BOOST_CHECK_EQUAL(dataProperies.alphabetRangeBits, internal::numBitsForNSymbols(max - min + 1));
293 BOOST_CHECK_EQUAL(dataProperies.nUsedAlphabetSymbols, nUsedAlphabetSymbols);
294 BOOST_CHECK_CLOSE(dataProperies.entropy, 2.957295041922758, eps);
295
296 std::array<uint32_t, 32> symbolLengthDistribution{{}};
297 std::array<uint32_t, 32> weightedSymbolLengthDistribution{{}};
298
299 weightedSymbolLengthDistribution[2] = 30;
300 weightedSymbolLengthDistribution[3] = 12;
301 weightedSymbolLengthDistribution[4] = 2;
302 weightedSymbolLengthDistribution[5] = 1;
303
304 symbolLengthDistribution[2] = 4;
305 symbolLengthDistribution[3] = 3;
306 symbolLengthDistribution[4] = 1;
307 symbolLengthDistribution[5] = 1;
308
309 uint32_t sumUnweighted = 0;
310 uint32_t sumWeighted = 0;
311 for (size_t i = 0; i < 32; ++i) {
312 BOOST_TEST_MESSAGE(fmt::format("checking length: {}", i));
313 BOOST_CHECK_EQUAL(symbolLengthDistribution[i], dataProperies.symbolLengthDistribution[i]);
314 BOOST_CHECK_EQUAL(weightedSymbolLengthDistribution[i], dataProperies.weightedSymbolLengthDistribution[i]);
315
316 sumUnweighted += dataProperies.symbolLengthDistribution[i];
317 sumWeighted += dataProperies.weightedSymbolLengthDistribution[i];
318 }
319
320 BOOST_CHECK_EQUAL(sumUnweighted, nUsedAlphabetSymbols);
321 BOOST_CHECK_EQUAL(sumWeighted, histogram.getNumSamples());
322 BOOST_CHECK_EQUAL(*coderProperties.renormingPrecisionBits, defaults::MinRenormPrecisionBits);
323 BOOST_CHECK_EQUAL(*coderProperties.nIncompressibleSymbols, 0);
324
325 const auto& estimate = coderProperties.dictSizeEstimate;
326 BOOST_CHECK_EQUAL(estimate.getIndexSize(), 33);
327 BOOST_CHECK_EQUAL(estimate.getIndexSizeB(), 5);
328 BOOST_CHECK_EQUAL(estimate.getFreqSize(), 224);
329 BOOST_CHECK_EQUAL(estimate.getFreqSizeB(), 28);
330 BOOST_CHECK_EQUAL(estimate.getSizeB(nUsedAlphabetSymbols, *coderProperties.renormingPrecisionBits), 21);
331}
332BOOST_AUTO_TEST_SUITE_END()
333
334BOOST_AUTO_TEST_SUITE(test_SizeEstimate)
335BOOST_AUTO_TEST_CASE_TEMPLATE(test_emptySizeEstimate, histogram_T, histogram_types)
336{
337 histogram_T histogram{};
338 Metrics<source_type> metrics{histogram};
339 SizeEstimate estimate{metrics};
340 BOOST_CHECK_EQUAL(estimate.getEntropySizeB(), 0);
341 BOOST_CHECK_EQUAL(estimate.getCompressedDatasetSize<>(1.0), 0);
342 BOOST_CHECK_EQUAL(estimate.getCompressedDictionarySize<>(1.0), 8);
343 BOOST_CHECK_EQUAL(estimate.getIncompressibleSize<>(1.0), 0);
344 BOOST_CHECK_EQUAL(estimate.getPackedDatasetSize<>(1.0), 0);
345 BOOST_CHECK_EQUAL(estimate.preferPacking(1.0), true);
346}
347
348BOOST_AUTO_TEST_CASE_TEMPLATE(test_normalSizeEstimate, histogram_T, histogram_types)
349{
350 constexpr size_t entropySizeB = 17;
351
352 std::vector<uint32_t> frequencies{9, 0, 8, 0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1};
353 histogram_T histogram{frequencies.begin(), frequencies.end(), 0};
354 Metrics<source_type> metrics{histogram};
355 SizeEstimate estimate{metrics};
356 BOOST_CHECK_EQUAL(estimate.getEntropySizeB(), entropySizeB);
357 BOOST_CHECK_EQUAL(estimate.getCompressedDatasetSize<>(1.0), addEncoderOverheadEstimateB<>(entropySizeB));
358 BOOST_CHECK_EQUAL(estimate.getCompressedDictionarySize<>(1.0), 29);
359 BOOST_CHECK_EQUAL(estimate.getIncompressibleSize<>(1.0), 0);
360 BOOST_CHECK_EQUAL(estimate.getPackedDatasetSize<>(1.0), 29);
361 BOOST_CHECK_EQUAL(estimate.preferPacking(1.0), true);
362}
363BOOST_AUTO_TEST_SUITE_END()
int32_t i
uint32_t source_type
MetricsTester(const DenseHistogram< source_type > &histogram, float_t cutoffPrecision=0.999)
MetricsTester(const SparseHistogram< source_type > &histogram, float_t cutoffPrecision=0.999)
size_t testComputeRenormingPrecision(float_t cutoffPrecision=0.999) noexcept
MetricsTester(const AdaptiveHistogram< source_type > &histogram, float_t cutoffPrecision=0.999)
size_t testComputeIncompressibleCount(gsl::span< source_type > distribution, source_type renormingPrecision) noexcept
const DatasetProperties< source_type > & getDatasetProperties() const noexcept
Definition Metrics.h:52
GLuint index
Definition glcorearb.h:781
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
public interface for building and renorming histograms from source data.
constexpr size_t MinRenormPrecisionBits
Definition defaults.h:56
constexpr size_t MaxRenormPrecisionBits
Definition defaults.h:57
auto trim(typename container_T::iterator begin, typename container_T::iterator end, typename container_T::const_reference zeroElem={}) -> std::pair< typename container_T::iterator, typename container_T::iterator >
Definition algorithm.h:104
auto getMinMax(const container_T &container, typename container_T::const_iterator begin, typename container_T::const_iterator end, typename container_T::const_reference zeroElem={}) -> std::pair< typename container_T::source_type, typename container_T::source_type >
Definition algorithm.h:134
constexpr size_t numBitsForNSymbols(size_t nSymbols) noexcept
Definition utils.h:129
size_t countNUsedAlphabetSymbols(const AdaptiveHistogram< source_T > &histogram)
std::pair< source_T, source_T > getMinMax(const AdaptiveSymbolTable< source_T, symbol_T > &symbolTable)
Common utility functions.
std::array< uint32_t, 32 > symbolLengthDistribution
Definition properties.h:52
constexpr size_t min
constexpr size_t max
BOOST_CHECK(tree)
BOOST_CHECK_EQUAL(triggersD.size(), triggers.size())
mp::mp_flatten< mp::mp_list< small_dense_histogram_types, large_dense_histogram_types, adaptive_histogram_types, sparse_histograms > > histogram_types
BOOST_AUTO_TEST_CASE_TEMPLATE(test_emptyDictSizeEstimate, histogram_T, histogram_types)
BOOST_AUTO_TEST_CASE(test_initDictSizeEstimate)