22#ifdef RANS_PARALLEL_STL
27#include <benchmark/benchmark.h>
32#ifdef ENABLE_VTUNE_PROFILER
42template <
typename source_T>
48 if (mSourceMessage.empty()) {
50 const size_t draws = std::min(utils::pow2(20),
static_cast<size_t>(std::numeric_limits<source_T>::max()));
51 const double probability = 0.5;
52 std::binomial_distribution<source_T> dist(draws, probability);
53 const size_t sourceSize = messageSize /
sizeof(
source_T) + 1;
54 mSourceMessage.resize(sourceSize);
55#ifdef RANS_PARALLEL_STL
56 std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
58 std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
63 const auto&
get()
const {
return mSourceMessage; };
66 std::vector<source_T> mSourceMessage{};
76 if constexpr (std::is_same_v<uint8_t, T>) {
78 }
else if constexpr (std::is_same_v<uint16_t, T>) {
85template <
typename source_T, CoderTag coderTag_V>
90 const auto&
inputData = getMessage<source_type>();
96 const auto renormedHistogram =
renorm(histogram,
metrics, RenormingPolicy::Auto, 10);
99#ifdef ENABLE_VTUNE_PROFILER
103 benchmark::DoNotOptimize(encodeBuffer.encodeBufferEnd = encoder.process(
inputData.data(),
inputData.data() +
inputData.size(), encodeBuffer.buffer.data()));
105#ifdef ENABLE_VTUNE_PROFILER
110 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams());
112 st.SkipWithError(
"Missmatch between encoded and decoded Message");
115 const auto& datasetProperties =
metrics.getDatasetProperties();
116 st.SetItemsProcessed(
static_cast<int64_t
>(
inputData.size()) *
static_cast<int64_t
>(
st.iterations()));
117 st.SetBytesProcessed(
static_cast<int64_t
>(
inputData.size()) *
sizeof(
source_type) *
static_cast<int64_t
>(
st.iterations()));
118 st.counters[
"AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
119 st.counters[
"nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
120 st.counters[
"SymbolTablePrecision"] = renormedHistogram.getRenormingBits();
121 st.counters[
"Entropy"] = datasetProperties.entropy;
124 st.counters[
"CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) *
sizeof(
typename decltype(encoder)
::stream_type);
125 st.counters[
"Compression"] =
st.counters[
"SourceSize"] /
static_cast<double>(
st.counters[
"CompressedSize"]);
126 st.counters[
"LowerBound"] =
inputData.size() * (
static_cast<double>(
st.counters[
"Entropy"]) / 8);
127 st.counters[
"CompressionWRTEntropy"] =
st.counters[
"CompressedSize"] /
st.counters[
"LowerBound"];
130template <
typename source_T, CoderTag coderTag_V>
135 const auto&
inputData = getMessage<source_type>();
138 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
146#ifdef ENABLE_VTUNE_PROFILER
150 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
151 benchmark::DoNotOptimize(std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(
inputData.data(),
inputData.data() +
inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd));
153#ifdef ENABLE_VTUNE_PROFILER
158 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
160 st.SkipWithError(
"Missmatch between encoded and decoded Message");
163 const auto& datasetProperties =
metrics.getDatasetProperties();
164 st.SetItemsProcessed(
static_cast<int64_t
>(
inputData.size()) *
static_cast<int64_t
>(
st.iterations()));
165 st.SetBytesProcessed(
static_cast<int64_t
>(
inputData.size()) *
sizeof(
source_type) *
static_cast<int64_t
>(
st.iterations()));
166 st.counters[
"AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
167 st.counters[
"nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
168 st.counters[
"SymbolTablePrecision"] = renormedHistogram.getRenormingBits();
169 st.counters[
"Entropy"] = datasetProperties.entropy;
172 st.counters[
"CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) *
sizeof(
typename decltype(encoder)
::stream_type);
173 st.counters[
"Compression"] =
st.counters[
"SourceSize"] /
static_cast<double>(
st.counters[
"CompressedSize"]);
174 st.counters[
"LowerBound"] =
inputData.size() * (
static_cast<double>(
st.counters[
"Entropy"]) / 8);
175 st.counters[
"CompressionWRTEntropy"] =
st.counters[
"CompressedSize"] /
st.counters[
"LowerBound"];
178template <
typename source_T, CoderTag coderTag_V>
183 const auto&
inputData = getMessage<source_type>();
191 const auto renormedHistogram =
renorm(histogram,
metrics, RenormingPolicy::Auto, 10);
192 const auto renormedAdaptiveHistogram =
renorm(std::move(adaptiveHistogram), adaptiveMetrics, RenormingPolicy::Auto, 10);
196#ifdef ENABLE_VTUNE_PROFILER
200 benchmark::DoNotOptimize(encodeBuffer.encodeBufferEnd = encoder.process(
inputData.data(),
inputData.data() +
inputData.size(), encodeBuffer.buffer.data()));
202#ifdef ENABLE_VTUNE_PROFILER
207 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams());
209 st.SkipWithError(
"Missmatch between encoded and decoded Message");
212 const auto& datasetProperties = adaptiveMetrics.getDatasetProperties();
213 st.SetItemsProcessed(
static_cast<int64_t
>(
inputData.size()) *
static_cast<int64_t
>(
st.iterations()));
214 st.SetBytesProcessed(
static_cast<int64_t
>(
inputData.size()) *
sizeof(
source_type) *
static_cast<int64_t
>(
st.iterations()));
215 st.counters[
"AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
216 st.counters[
"nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
217 st.counters[
"SymbolTablePrecision"] = renormedAdaptiveHistogram.getRenormingBits();
218 st.counters[
"Entropy"] = datasetProperties.entropy;
221 st.counters[
"CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) *
sizeof(
typename decltype(encoder)
::stream_type);
222 st.counters[
"Compression"] =
st.counters[
"SourceSize"] /
static_cast<double>(
st.counters[
"CompressedSize"]);
223 st.counters[
"LowerBound"] =
inputData.size() * (
static_cast<double>(
st.counters[
"Entropy"]) / 8);
224 st.counters[
"CompressionWRTEntropy"] =
st.counters[
"CompressedSize"] /
st.counters[
"LowerBound"];
227template <
typename source_T, CoderTag coderTag_V>
232 const auto&
inputData = getMessage<source_type>();
235 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
243 const auto renormedAdaptiveHistogram =
renorm(std::move(adaptiveHistogram), adaptiveMetrics);
247#ifdef ENABLE_VTUNE_PROFILER
251 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
252 benchmark::DoNotOptimize(std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(
inputData.data(),
inputData.data() +
inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd));
254#ifdef ENABLE_VTUNE_PROFILER
259 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
261 st.SkipWithError(
"Missmatch between encoded and decoded Message");
264 const auto& datasetProperties = adaptiveMetrics.getDatasetProperties();
265 st.SetItemsProcessed(
static_cast<int64_t
>(
inputData.size()) *
static_cast<int64_t
>(
st.iterations()));
266 st.SetBytesProcessed(
static_cast<int64_t
>(
inputData.size()) *
sizeof(
source_type) *
static_cast<int64_t
>(
st.iterations()));
267 st.counters[
"AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;
268 st.counters[
"nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;
269 st.counters[
"SymbolTablePrecision"] = renormedAdaptiveHistogram.getRenormingBits();
270 st.counters[
"Entropy"] = datasetProperties.entropy;
273 st.counters[
"CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) *
sizeof(
typename decltype(encoder)
::stream_type);
274 st.counters[
"Compression"] =
st.counters[
"SourceSize"] /
static_cast<double>(
st.counters[
"CompressedSize"]);
275 st.counters[
"LowerBound"] =
inputData.size() * (
static_cast<double>(
st.counters[
"Entropy"]) / 8);
276 st.counters[
"CompressionWRTEntropy"] =
st.counters[
"CompressedSize"] /
st.counters[
"LowerBound"];
279BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::Compat>);
280BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::Compat>);
281BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::Compat>);
283BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::Compat>);
286#ifdef RANS_SINGLE_STREAM
287BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::SingleStream>);
288BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::SingleStream>);
289BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
291BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
297BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::SSE>);
298BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::SSE>);
299BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::SSE>);
301BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::SSE>);
307BENCHMARK(ransCompressionBenchmark<uint8_t, CoderTag::AVX2>);
308BENCHMARK(ransCompressionBenchmark<uint16_t, CoderTag::AVX2>);
309BENCHMARK(ransCompressionBenchmark<uint32_t, CoderTag::AVX2>);
311BENCHMARK(ransAdaptiveCompressionBenchmark<uint32_t, CoderTag::AVX2>);
316BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::Compat>);
317BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::Compat>);
318BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::Compat>);
320BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::Compat>);
324#ifdef RANS_SINGLE_STREAM
325BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::SingleStream>);
326BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::SingleStream>);
327BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
329BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::SingleStream>);
335BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::SSE>);
336BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::SSE>);
337BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::SSE>);
339BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::SSE>);
345BENCHMARK(ransLiteralCompressionBenchmark<uint8_t, CoderTag::AVX2>);
346BENCHMARK(ransLiteralCompressionBenchmark<uint16_t, CoderTag::AVX2>);
347BENCHMARK(ransLiteralCompressionBenchmark<uint32_t, CoderTag::AVX2>);
349BENCHMARK(ransAdaptiveLiteralCompressionBenchmark<uint32_t, CoderTag::AVX2>);
std::vector< o2::mid::ColumnData > inputData
const SourceMessageProxy< uint32_t > sourceMessage32
constexpr size_t MessageSize
void ransAdaptiveLiteralCompressionBenchmark(benchmark::State &st)
void ransCompressionBenchmark(benchmark::State &st)
const auto & getMessage()
const SourceMessageProxy< uint16_t > sourceMessage16
void ransAdaptiveCompressionBenchmark(benchmark::State &st)
const SourceMessageProxy< uint8_t > sourceMessage8
void ransLiteralCompressionBenchmark(benchmark::State &st)
BENCHMARK(ransCompressionBenchmark< uint8_t, CoderTag::Compat >)
SourceMessageProxy(size_t messageSize)
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
static constexpr decltype(auto) fromRenormed(const RenormedDenseHistogram< source_T > &renormed)
preprocessor defines to enable features based on CPU architecture
static factory classes for building histograms, encoders and decoders.
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
common functionality for rANS benchmarks.
public interface for building and renorming histograms from source data.
double_t computeExpectedCodewordLength(const DenseHistogram< source_T > &histogram, const RenormedDenseHistogram< source_T > &rescaledHistogram)
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
std::vector< source_T > literals
static decltype(auto) fromSamples(source_IT begin, source_IT end)
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
std::string decodeBuffer(int feeId, gsl::span< const std::byte > buffer)