Project
Loading...
Searching...
No Matches
bench_ransEncode.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
17
18#include <vector>
19#include <cstring>
20#include <random>
21#include <algorithm>
22#ifdef RANS_PARALLEL_STL
23#include <execution>
24#endif
25#include <iterator>
26
27#include <benchmark/benchmark.h>
28
29#include "rANS/factory.h"
30#include "rANS/histogram.h"
31
32#ifdef ENABLE_VTUNE_PROFILER
33#include <ittnotify.h>
34#endif
35
36#include "helpers.h"
37
38using namespace o2::rans;
39
40inline constexpr size_t MessageSize = 1ull << 22;
41
42template <typename source_T>
44{
45 public:
46 SourceMessageProxy(size_t messageSize)
47 {
48 if (mSourceMessage.empty()) {
49 std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;
50 const size_t draws = std::min(utils::pow2(20), static_cast<size_t>(std::numeric_limits<source_T>::max()));
51 const double probability = 0.5;
52 std::binomial_distribution<source_T> dist(draws, probability);
53 const size_t sourceSize = messageSize / sizeof(source_T) + 1;
54 mSourceMessage.resize(sourceSize);
55#ifdef RANS_PARALLEL_STL
56 std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
57#else
58 std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
59#endif // RANS_PARALLEL_STL
60 }
61 }
62
63 const auto& get() const { return mSourceMessage; };
64
65 private:
66 std::vector<source_T> mSourceMessage{};
67};
68
72
73template <typename T>
74const auto& getMessage()
75{
76 if constexpr (std::is_same_v<uint8_t, T>) {
77 return sourceMessage8.get();
78 } else if constexpr (std::is_same_v<uint16_t, T>) {
79 return sourceMessage16.get();
80 } else {
81 return sourceMessage32.get();
82 }
83};
84
85template <typename source_T, CoderTag coderTag_V>
86void ransCompressionBenchmark(benchmark::State& st)
87{
88 using source_type = source_T;
89
90 const auto& inputData = getMessage<source_type>();
91 EncodeBuffer<source_type> encodeBuffer{inputData.size()};
93
94 const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData));
96 const auto renormedHistogram = renorm(histogram, metrics, RenormingPolicy::Auto, 10);
97 auto encoder = makeDenseEncoder<coderTag_V>::fromRenormed(renormedHistogram);
98
99#ifdef ENABLE_VTUNE_PROFILER
100 __itt_resume();
101#endif
102 for (auto _ : st) {
103 benchmark::DoNotOptimize(encodeBuffer.encodeBufferEnd = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data()));
104 }
105#ifdef ENABLE_VTUNE_PROFILER
106 __itt_pause();
107#endif
108
109 auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);
110 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams());
111 if (!(decodeBuffer == inputData)) {
112 st.SkipWithError("Missmatch between encoded and decoded Message");
113 }
114
115 const auto& datasetProperties = metrics.getDatasetProperties();
116 st.SetItemsProcessed(static_cast<int64_t>(inputData.size()) * static_cast<int64_t>(st.iterations()));
117 st.SetBytesProcessed(static_cast<int64_t>(inputData.size()) * sizeof(source_type) * static_cast<int64_t>(st.iterations()));
118 st.counters["AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
119 st.counters["nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
120 st.counters["SymbolTablePrecision"] = renormedHistogram.getRenormingBits();
121 st.counters["Entropy"] = datasetProperties.entropy;
122 st.counters["ExpectedCodewordLength"] = computeExpectedCodewordLength(histogram, renormedHistogram);
123 st.counters["SourceSize"] = inputData.size() * sizeof(source_type);
124 st.counters["CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(typename decltype(encoder)::stream_type);
125 st.counters["Compression"] = st.counters["SourceSize"] / static_cast<double>(st.counters["CompressedSize"]);
126 st.counters["LowerBound"] = inputData.size() * (static_cast<double>(st.counters["Entropy"]) / 8);
127 st.counters["CompressionWRTEntropy"] = st.counters["CompressedSize"] / st.counters["LowerBound"];
128};
129
130template <typename source_T, CoderTag coderTag_V>
131void ransLiteralCompressionBenchmark(benchmark::State& st)
132{
133 using source_type = source_T;
134
135 const auto& inputData = getMessage<source_type>();
136 EncodeBuffer<source_type> encodeBuffer{inputData.size()};
137 encodeBuffer.literals.resize(inputData.size(), 0);
138 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
140
141 const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData));
142 Metrics<source_type> metrics{histogram};
143 const auto renormedHistogram = renorm(histogram, metrics);
144 auto encoder = makeDenseEncoder<coderTag_V>::fromRenormed(renormedHistogram);
145
146#ifdef ENABLE_VTUNE_PROFILER
147 __itt_resume();
148#endif
149 for (auto _ : st) {
150 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
151 benchmark::DoNotOptimize(std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd));
152 }
153#ifdef ENABLE_VTUNE_PROFILER
154 __itt_pause();
155#endif
156
157 auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);
158 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
159 if (!(decodeBuffer == inputData)) {
160 st.SkipWithError("Missmatch between encoded and decoded Message");
161 }
162
163 const auto& datasetProperties = metrics.getDatasetProperties();
164 st.SetItemsProcessed(static_cast<int64_t>(inputData.size()) * static_cast<int64_t>(st.iterations()));
165 st.SetBytesProcessed(static_cast<int64_t>(inputData.size()) * sizeof(source_type) * static_cast<int64_t>(st.iterations()));
166 st.counters["AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
167 st.counters["nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
168 st.counters["SymbolTablePrecision"] = renormedHistogram.getRenormingBits();
169 st.counters["Entropy"] = datasetProperties.entropy;
170 st.counters["ExpectedCodewordLength"] = computeExpectedCodewordLength(histogram, renormedHistogram);
171 st.counters["SourceSize"] = inputData.size() * sizeof(source_type);
172 st.counters["CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(typename decltype(encoder)::stream_type);
173 st.counters["Compression"] = st.counters["SourceSize"] / static_cast<double>(st.counters["CompressedSize"]);
174 st.counters["LowerBound"] = inputData.size() * (static_cast<double>(st.counters["Entropy"]) / 8);
175 st.counters["CompressionWRTEntropy"] = st.counters["CompressedSize"] / st.counters["LowerBound"];
176};
177
178template <typename source_T, CoderTag coderTag_V>
180{
181 using source_type = source_T;
182
183 const auto& inputData = getMessage<source_type>();
184 EncodeBuffer<source_type> encodeBuffer{inputData.size()};
186
187 const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData));
188 auto adaptiveHistogram = makeAdaptiveHistogram::fromSamples(gsl::span<const source_type>(inputData));
189 Metrics<source_type> metrics{histogram};
190 Metrics<source_type> adaptiveMetrics{adaptiveHistogram};
191 const auto renormedHistogram = renorm(histogram, metrics, RenormingPolicy::Auto, 10);
192 const auto renormedAdaptiveHistogram = renorm(std::move(adaptiveHistogram), adaptiveMetrics, RenormingPolicy::Auto, 10);
193
194 auto encoder = makeAdaptiveEncoder<coderTag_V>::fromRenormed(renormedAdaptiveHistogram);
195
196#ifdef ENABLE_VTUNE_PROFILER
197 __itt_resume();
198#endif
199 for (auto _ : st) {
200 benchmark::DoNotOptimize(encodeBuffer.encodeBufferEnd = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data()));
201 }
202#ifdef ENABLE_VTUNE_PROFILER
203 __itt_pause();
204#endif
205
206 auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);
207 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams());
208 if (!(decodeBuffer == inputData)) {
209 st.SkipWithError("Missmatch between encoded and decoded Message");
210 }
211
212 const auto& datasetProperties = adaptiveMetrics.getDatasetProperties();
213 st.SetItemsProcessed(static_cast<int64_t>(inputData.size()) * static_cast<int64_t>(st.iterations()));
214 st.SetBytesProcessed(static_cast<int64_t>(inputData.size()) * sizeof(source_type) * static_cast<int64_t>(st.iterations()));
215 st.counters["AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
216 st.counters["nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
217 st.counters["SymbolTablePrecision"] = renormedAdaptiveHistogram.getRenormingBits();
218 st.counters["Entropy"] = datasetProperties.entropy;
219 st.counters["ExpectedCodewordLength"] = computeExpectedCodewordLength(histogram, renormedHistogram);
220 st.counters["SourceSize"] = inputData.size() * sizeof(source_type);
221 st.counters["CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(typename decltype(encoder)::stream_type);
222 st.counters["Compression"] = st.counters["SourceSize"] / static_cast<double>(st.counters["CompressedSize"]);
223 st.counters["LowerBound"] = inputData.size() * (static_cast<double>(st.counters["Entropy"]) / 8);
224 st.counters["CompressionWRTEntropy"] = st.counters["CompressedSize"] / st.counters["LowerBound"];
225};
226
227template <typename source_T, CoderTag coderTag_V>
229{
230 using source_type = source_T;
231
232 const auto& inputData = getMessage<source_type>();
233 EncodeBuffer<source_type> encodeBuffer{inputData.size()};
234 encodeBuffer.literals.resize(inputData.size(), 0);
235 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
237
238 const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData));
239 auto adaptiveHistogram = makeAdaptiveHistogram::fromSamples(gsl::span<const source_type>(inputData));
240 Metrics<source_type> metrics{histogram};
241 Metrics<source_type> adaptiveMetrics{adaptiveHistogram};
242 const auto renormedHistogram = renorm(histogram, metrics);
243 const auto renormedAdaptiveHistogram = renorm(std::move(adaptiveHistogram), adaptiveMetrics);
244
245 auto encoder = makeAdaptiveEncoder<coderTag_V>::fromRenormed(renormedAdaptiveHistogram);
246
247#ifdef ENABLE_VTUNE_PROFILER
248 __itt_resume();
249#endif
250 for (auto _ : st) {
251 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
252 benchmark::DoNotOptimize(std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd));
253 }
254#ifdef ENABLE_VTUNE_PROFILER
255 __itt_pause();
256#endif
257
258 auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);
259 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
260 if (!(decodeBuffer == inputData)) {
261 st.SkipWithError("Missmatch between encoded and decoded Message");
262 }
263
264 const auto& datasetProperties = adaptiveMetrics.getDatasetProperties();
265 st.SetItemsProcessed(static_cast<int64_t>(inputData.size()) * static_cast<int64_t>(st.iterations()));
266 st.SetBytesProcessed(static_cast<int64_t>(inputData.size()) * sizeof(source_type) * static_cast<int64_t>(st.iterations()));
267 st.counters["AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
268 st.counters["nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
269 st.counters["SymbolTablePrecision"] = renormedAdaptiveHistogram.getRenormingBits();
270 st.counters["Entropy"] = datasetProperties.entropy;
271 st.counters["ExpectedCodewordLength"] = computeExpectedCodewordLength(histogram, renormedHistogram);
272 st.counters["SourceSize"] = inputData.size() * sizeof(source_type);
273 st.counters["CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(typename decltype(encoder)::stream_type);
274 st.counters["Compression"] = st.counters["SourceSize"] / static_cast<double>(st.counters["CompressedSize"]);
275 st.counters["LowerBound"] = inputData.size() * (static_cast<double>(st.counters["Entropy"]) / 8);
276 st.counters["CompressionWRTEntropy"] = st.counters["CompressedSize"] / st.counters["LowerBound"];
277};
278
279BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::Compat>);
280BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::Compat>);
281BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::Compat>);
282
283BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::Compat>);
284//########################################################################################
285
286#ifdef RANS_SINGLE_STREAM
287BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::SingleStream>);
288BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::SingleStream>);
289BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
290
291BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
292#endif /* RANS_SINGLE_STREAM */
293
294//########################################################################################
295
296#ifdef RANS_SSE
297BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::SSE>);
298BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::SSE>);
299BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::SSE>);
300
301BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::SSE>);
302#endif /* RANS SSE */
303
304// //########################################################################################
305
306#ifdef RANS_AVX2
307BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::AVX2>);
308BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::AVX2>);
309BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::AVX2>);
310
311BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::AVX2>);
312#endif /* RANS_AVX2 */
313
314//########################################################################################
315
316BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::Compat>);
317BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::Compat>);
318BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::Compat>);
319
320BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::Compat>);
321
322//########################################################################################
323
324#ifdef RANS_SINGLE_STREAM
325BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::SingleStream>);
326BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::SingleStream>);
327BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
328
329BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
330#endif /* RANS_SINGLE_STREAM */
331
332//########################################################################################
333
334#ifdef RANS_SSE
335BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::SSE>);
336BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::SSE>);
337BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::SSE>);
338
339BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::SSE>);
340#endif /* RANS_SSE */
341
342//########################################################################################
343
344#ifdef RANS_AVX2
345BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::AVX2>);
346BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::AVX2>);
347BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::AVX2>);
348
349BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::AVX2>);
350#endif /* RANS_AVX2 */
351
std::vector< o2::mid::ColumnData > inputData
benchmark::State & st
const SourceMessageProxy< uint32_t > sourceMessage32
constexpr size_t MessageSize
void ransAdaptiveLiteralCompressionBenchmark(benchmark::State &st)
void ransCompressionBenchmark(benchmark::State &st)
BENCHMARK_MAIN()
const auto & getMessage()
const SourceMessageProxy< uint16_t > sourceMessage16
void ransAdaptiveCompressionBenchmark(benchmark::State &st)
const SourceMessageProxy< uint8_t > sourceMessage8
void ransLiteralCompressionBenchmark(benchmark::State &st)
BENCHMARK(ransCompressionBenchmark< uint8_t, CoderTag::Compat >)
uint32_t source_type
uint32_t stream_type
const auto & get() const
SourceMessageProxy(size_t messageSize)
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition factory.h:106
static constexpr decltype(auto) fromRenormed(const RenormedDenseHistogram< source_T > &renormed)
Definition factory.h:195
preprocessor defines to enable features based on CPU architecture
static factory classes for building histograms, encoders and decoders.
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
common functionality for rANS benchmarks.
public interface for building and renorming histograms from source data.
double_t computeExpectedCodewordLength(const DenseHistogram< source_T > &histogram, const RenormedDenseHistogram< source_T > &rescaledHistogram)
Definition utils.h:33
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
std::vector< source_T > literals
Definition helpers.h:296
static decltype(auto) fromSamples(source_IT begin, source_IT end)
Definition factory.h:65
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
std::string decodeBuffer(int feeId, gsl::span< const std::byte > buffer)