Project
Loading...
Searching...
No Matches
bench_ransEncodeImpl.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
17
18#include <vector>
19#include <cstring>
20#include <random>
21#include <algorithm>
22#ifdef RANS_PARALLEL_STL
23#include <execution>
24#endif
25#include <iterator>
26
27#include <gsl/span>
28#include <benchmark/benchmark.h>
29
30#include "rANS/factory.h"
31#include "rANS/histogram.h"
32
37
38#ifdef ENABLE_VTUNE_PROFILER
39#include <ittnotify.h>
40#endif
41
42using count_t = uint32_t;
43using ransState_t = uint64_t;
44using stream_t = uint32_t;
45
46#ifdef RANS_SINGLE_STREAM
47__extension__ using uint128_t = unsigned __int128;
48#endif /* RANS_SINGLE_STREAM */
49
50using namespace o2::rans;
51using namespace o2::rans::internal;
52using namespace o2::rans::utils;
53
54inline constexpr size_t MessageSize = 1ull << 22;
55inline constexpr size_t LowerBound = 1ul << 20;
56inline constexpr size_t StreamBits = toBits<stream_t>();
57
58template <typename source_T>
60{
61 public:
62 explicit SymbolTableData(size_t messageSize)
63 {
64 std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;
65 const size_t draws = std::min(1ul << 20, static_cast<size_t>(std::numeric_limits<source_T>::max()));
66 const double probability = 0.5;
67 std::binomial_distribution<source_T> dist(draws, probability);
68 const size_t sourceSize = messageSize / sizeof(source_T);
69 mSourceMessage.resize(sourceSize);
70#ifdef RANS_PARALLEL_STL
71 std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
72#else
73 std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
74#endif // RANS_PARALLEL_STL
75
76 const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_T>(mSourceMessage));
77 Metrics<source_T> metrics{histogram};
78 mRenormedFrequencies = renorm(histogram, metrics);
79
80 double_t expectationValue = std::accumulate(mRenormedFrequencies.begin(), mRenormedFrequencies.end(), 0.0, [this](const double_t& a, const count_t& b) {
81 double_t prb = static_cast<double_t>(b) / static_cast<double_t>(mRenormedFrequencies.getNumSamples());
82 return a + b * prb;
83 });
84
85 mState = ((LowerBound >> mRenormedFrequencies.getRenormingBits()) << StreamBits) * expectationValue;
86 };
87
88 const auto& getSourceMessage() const { return mSourceMessage; };
89 const auto& getRenormedFrequencies() const { return mRenormedFrequencies; };
90
91 ransState_t getState() const { return mState; };
92
93 private:
94 std::vector<source_T> mSourceMessage{};
95 RenormedDenseHistogram<source_T> mRenormedFrequencies{};
96 ransState_t mState{};
97};
98
102
103template <typename T>
104const auto& getData()
105{
106 if constexpr (std::is_same_v<uint8_t, T>) {
107 return Data8;
108 } else if constexpr (std::is_same_v<uint16_t, T>) {
109 return Data16;
110 } else {
111 return Data32;
112 }
113};
114
115template <typename source_T>
116struct SimpleFixture : public benchmark::Fixture {
119
120 void SetUp(const ::benchmark::State& state) final
121 {
122 const auto& sourceMessage = getData<source_T>().getSourceMessage();
123 DenseSymbolTable<source_t, symbol_t> symbolTable{getData<source_T>().getRenormedFrequencies()};
124 for (auto& symbol : sourceMessage) {
125 mSymbols.push_back(symbolTable[symbol]);
126 }
127 }
128
129 void TearDown(const ::benchmark::State& state) final
130 {
131 mSymbols.clear();
132 }
133 std::vector<symbol_t> mSymbols{};
134 ransState_t mState = getData<source_T>().getState();
135 size_t mRenormingBits = getData<source_T>().getRenormedFrequencies().getRenormingBits();
136};
137
138template <typename source_T>
139struct Fixture : public benchmark::Fixture {
142
143 void SetUp(const ::benchmark::State& state) final
144 {
145 const auto& sourceMessage = getData<source_T>().getSourceMessage();
146 DenseSymbolTable<source_t, symbol_t> symbolTable{getData<source_T>().getRenormedFrequencies()};
147 for (auto& symbol : sourceMessage) {
148 mSymbols.push_back(symbolTable[symbol]);
149 }
150 }
151
152 void TearDown(const ::benchmark::State& state) final
153 {
154 mSymbols.clear();
155 }
156 std::vector<symbol_t> mSymbols{};
157 ransState_t mState = getData<source_T>().getState();
158 size_t mRenormingBits = getData<source_T>().getRenormedFrequencies().getRenormingBits();
159};
160
161#ifdef RANS_SIMD
162
163template <typename source_T, simd::SIMDWidth width_V>
164struct SIMDFixture : public benchmark::Fixture {
165 using source_t = source_T;
166 using symbol_t = Symbol;
167
168 void SetUp(const ::benchmark::State& state) final
169 {
170 mState = simd::setAll<width_V>(getData<source_T>().getState());
171 mNSamples = simd::setAll<width_V>(static_cast<double>(pow2(getData<source_T>().getRenormedFrequencies().getRenormingBits())));
172
173 const auto& sourceMessage = getData<source_T>().getSourceMessage();
174 DenseSymbolTable<source_t, symbol_t> symbolTable{getData<source_T>().getRenormedFrequencies()};
175 for (size_t i = 0; i < sourceMessage.size(); i += nElems) {
176 if constexpr (width_V == simd::SIMDWidth::SSE) {
177 mSymbols.push_back({
178 &symbolTable[sourceMessage[i]],
179 &symbolTable[sourceMessage[i + 1]],
180 });
181 }
182 if constexpr (width_V == simd::SIMDWidth::AVX) {
183 mSymbols.push_back({
184 &symbolTable[sourceMessage[i]],
185 &symbolTable[sourceMessage[i + 1]],
186 &symbolTable[sourceMessage[i + 2]],
187 &symbolTable[sourceMessage[i + 3]],
188 });
189 }
190 }
191 }
192
193 void TearDown(const ::benchmark::State& state) final
194 {
195 mSymbols.clear();
196 }
197
198 static constexpr size_t nElems = simd::getElementCount<ransState_t>(width_V);
199 std::vector<std::array<const symbol_t*, nElems>> mSymbols{};
200 simd::simdI_t<width_V> mState;
201 simd::simdD_t<width_V> mNSamples;
202};
203#endif /* RANS_SIMD */
204
205#ifdef RANS_SINGLE_STREAM
207{
208 // x = C(s,x)
209 ransState_t quotient = static_cast<ransState_t>((static_cast<uint128_t>(state) * symbol.getReciprocalFrequency()) >> 64);
210 quotient = quotient >> symbol.getReciprocalShift();
211
212 return state + symbol.getCumulative() + quotient * symbol.getFrequencyComplement();
213};
214#endif /* RANS_SINGLE_STREAM */
215
216ransState_t simpleEncode(ransState_t state, size_t symbolTablePrecision, const Symbol& symbol)
217{
218 // x = C(s,x)
219 return ((state / symbol.getFrequency()) << symbolTablePrecision) + symbol.getCumulative() + (state % symbol.getFrequency());
220};
221
222#ifdef RANS_SIMD
223template <simd::SIMDWidth width_V>
224inline auto SIMDEncode(simd::simdI_t<width_V> states, simd::simdD_t<width_V> nSamples, gsl::span<const Symbol*, simd::getElementCount<ransState_t>(width_V)> symbols)
225{
226 simd::simdIsse_t frequencies;
227 simd::simdIsse_t cumulativeFrequencies;
228 simd::aosToSoa(symbols, &frequencies, &cumulativeFrequencies);
229 return simd::ransEncode(states, simd::int32ToDouble<width_V>(frequencies), simd::int32ToDouble<width_V>(cumulativeFrequencies), nSamples);
230};
231#endif /* RANS_SIMD */
232
233template <typename source_T>
234static void ransSimpleEncodeBenchmark(benchmark::State& st, SimpleFixture<source_T>& fixture)
235{
236 for (auto _ : st) {
237 for (size_t i = 0; i < fixture.mSymbols.size(); ++i) {
238 ransState_t newState = simpleEncode(fixture.mState, fixture.mRenormingBits, fixture.mSymbols[i]);
239 benchmark::DoNotOptimize(newState);
240 }
241 };
242
243 st.SetItemsProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size());
244 st.SetBytesProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size() * sizeof(source_T));
245};
246
247#ifdef RANS_SINGLE_STREAM
248template <typename source_T>
249static void ransEncodeBenchmark(benchmark::State& st, Fixture<source_T>& fixture)
250{
251 for (auto _ : st) {
252 for (size_t i = 0; i < fixture.mSymbols.size(); ++i) {
253 ransState_t newState = encode(fixture.mState, fixture.mSymbols[i]);
254 benchmark::DoNotOptimize(newState);
255 }
256 };
257
258 st.SetItemsProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size());
259 st.SetBytesProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size() * sizeof(source_T));
260};
261#endif /* RANS_SINGLE_STREAM */
262
263#ifdef RANS_SIMD
264template <typename source_T, simd::SIMDWidth width_V>
265static void ransSIMDEncodeBenchmark(benchmark::State& st, SIMDFixture<source_T, width_V>& fixture)
266{
267#ifdef ENABLE_VTUNE_PROFILER
268 __itt_resume();
269#endif
270 for (auto _ : st) {
271 for (size_t i = 0; i < fixture.mSymbols.size(); ++i) {
272 auto newStates = SIMDEncode<width_V>(fixture.mState, fixture.mNSamples, fixture.mSymbols[i]);
273 benchmark::DoNotOptimize(newStates);
274 benchmark::ClobberMemory();
275 }
276 }
277#ifdef ENABLE_VTUNE_PROFILER
278 __itt_pause();
279#endif
280
281 st.SetItemsProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size());
282 st.SetBytesProcessed(int64_t(st.iterations()) * getData<source_T>().getSourceMessage().size() * sizeof(source_T));
283};
284#endif /* RANS_SIMD */
285
286BENCHMARK_TEMPLATE_DEFINE_F(SimpleFixture, simpleEncode_8, uint8_t)
287(benchmark::State& st)
288{
289 ransSimpleEncodeBenchmark(st, *this);
290};
291BENCHMARK_TEMPLATE_DEFINE_F(SimpleFixture, simpleEncode_16, uint16_t)
292(benchmark::State& st)
293{
294 ransSimpleEncodeBenchmark(st, *this);
295};
296BENCHMARK_TEMPLATE_DEFINE_F(SimpleFixture, simpleEncode_32, uint32_t)
297(benchmark::State& st)
298{
299 ransSimpleEncodeBenchmark(st, *this);
300};
301
302BENCHMARK_TEMPLATE_DEFINE_F(Fixture, encode_8, uint8_t)
303(benchmark::State& st)
304{
305 ransEncodeBenchmark(st, *this);
306};
307BENCHMARK_TEMPLATE_DEFINE_F(Fixture, encode_16, uint16_t)
308(benchmark::State& st)
309{
310 ransEncodeBenchmark(st, *this);
311};
312BENCHMARK_TEMPLATE_DEFINE_F(Fixture, encode_32, uint32_t)
313(benchmark::State& st)
314{
315 ransEncodeBenchmark(st, *this);
316};
317
318#ifdef RANS_SSE
319BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_8, uint8_t, simd::SIMDWidth::SSE)
320(benchmark::State& st)
321{
322 ransSIMDEncodeBenchmark(st, *this);
323};
324
325BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_16, uint16_t, simd::SIMDWidth::SSE)
326(benchmark::State& st)
327{
328 ransSIMDEncodeBenchmark(st, *this);
329};
330
331BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_32, uint32_t, simd::SIMDWidth::SSE)
332(benchmark::State& st)
333{
334 ransSIMDEncodeBenchmark(st, *this);
335};
336#endif /*RANS_SSE*/
337
338#ifdef RANS_AVX2
339BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_8, uint8_t, simd::SIMDWidth::AVX)
340(benchmark::State& st)
341{
342 ransSIMDEncodeBenchmark(st, *this);
343};
344
345BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_16, uint16_t, simd::SIMDWidth::AVX)
346(benchmark::State& st)
347{
348 ransSIMDEncodeBenchmark(st, *this);
349};
350
351BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_32, uint32_t, simd::SIMDWidth::AVX)
352(benchmark::State& st)
353{
354 ransSIMDEncodeBenchmark(st, *this);
355};
356#endif /* RANS_AVX2 */
357
361
362#ifdef RANS_SINGLE_STREAM
364BENCHMARK_REGISTER_F(Fixture, encode_16);
365BENCHMARK_REGISTER_F(Fixture, encode_32);
366#endif /* RANS_SINGLE_STREAM */
367
368#ifdef RANS_SSE
369BENCHMARK_REGISTER_F(SIMDFixture, encodeSSE_8);
370BENCHMARK_REGISTER_F(SIMDFixture, encodeSSE_16);
371BENCHMARK_REGISTER_F(SIMDFixture, encodeSSE_32);
372#endif /* RANS_SSE */
373
374#ifdef RANS_AVX2
375BENCHMARK_REGISTER_F(SIMDFixture, encodeAVX_8);
376BENCHMARK_REGISTER_F(SIMDFixture, encodeAVX_16);
377BENCHMARK_REGISTER_F(SIMDFixture, encodeAVX_32);
378#endif /* RANS_SSE */
379
benchmark::State & state
int32_t i
common helper classes and functions
SourceMessageUniform< uint32_t > sourceMessage
ransState_t simpleEncode(ransState_t state, size_t symbolTablePrecision, const Symbol &symbol)
const SymbolTableData< uint32_t > Data32(MessageSize)
constexpr size_t MessageSize
BENCHMARK_MAIN()
BENCHMARK_REGISTER_F(SimpleFixture, simpleEncode_8)
constexpr size_t StreamBits
const SymbolTableData< uint8_t > Data8(MessageSize)
constexpr size_t LowerBound
uint32_t stream_t
const auto & getData()
uint32_t count_t
const SymbolTableData< uint16_t > Data16(MessageSize)
uint64_t ransState_t
benchmark::State & st
uint64_t ransState_t
const auto & getSourceMessage() const
const auto & getRenormedFrequencies() const
SymbolTableData(size_t messageSize)
ransState_t getState() const
constexpr state_type getReciprocalFrequency() const noexcept
Definition Symbol.h:185
constexpr value_type getCumulative() const noexcept
Definition Symbol.h:184
constexpr value_type getFrequencyComplement() const noexcept
Definition Symbol.h:186
constexpr value_type getReciprocalShift() const noexcept
Definition Symbol.h:187
constexpr value_type getCumulative() const noexcept
Definition Symbol.h:43
constexpr value_type getFrequency() const noexcept
Definition Symbol.h:42
preprocessor defines to enable features based on CPU architecture
static factory classes for building histograms, encoders and decoders.
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint * states
Definition glcorearb.h:4932
public interface for building and renorming histograms from source data.
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
wrapper around basic SIMD operations
basic SIMD datatypes and traits
ransState_t mState
void SetUp(const ::benchmark::State &state) final
std::vector< symbol_t > mSymbols
void TearDown(const ::benchmark::State &state) final
void SetUp(const ::benchmark::State &state) final
std::vector< symbol_t > mSymbols
void TearDown(const ::benchmark::State &state) final
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
coder encode(vec, triggers, clusters)