22#ifdef RANS_PARALLEL_STL
28#include <benchmark/benchmark.h>
38#ifdef ENABLE_VTUNE_PROFILER
46#ifdef RANS_SINGLE_STREAM
47__extension__
using uint128_t =
unsigned __int128;
58template <
typename source_T>
65 const size_t draws = std::min(1ul << 20,
static_cast<size_t>(std::numeric_limits<source_T>::max()));
66 const double probability = 0.5;
67 std::binomial_distribution<source_T> dist(draws, probability);
68 const size_t sourceSize = messageSize /
sizeof(
source_T);
69 mSourceMessage.resize(sourceSize);
70#ifdef RANS_PARALLEL_STL
71 std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
73 std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });
80 double_t expectationValue = std::accumulate(mRenormedFrequencies.begin(), mRenormedFrequencies.end(), 0.0, [
this](
const double_t&
a,
const count_t&
b) {
81 double_t prb = static_cast<double_t>(b) / static_cast<double_t>(mRenormedFrequencies.getNumSamples());
85 mState = ((
LowerBound >> mRenormedFrequencies.getRenormingBits()) <<
StreamBits) * expectationValue;
94 std::vector<source_T> mSourceMessage{};
106 if constexpr (std::is_same_v<uint8_t, T>) {
108 }
else if constexpr (std::is_same_v<uint16_t, T>) {
115template <
typename source_T>
120 void SetUp(const ::benchmark::State& state)
final
122 const auto&
sourceMessage = getData<source_T>().getSourceMessage();
125 mSymbols.push_back(symbolTable[symbol]);
129 void TearDown(const ::benchmark::State& state)
final
135 size_t mRenormingBits = getData<source_T>().getRenormedFrequencies().getRenormingBits();
138template <
typename source_T>
139struct Fixture :
public benchmark::Fixture {
143 void SetUp(const ::benchmark::State& state)
final
145 const auto&
sourceMessage = getData<source_T>().getSourceMessage();
148 mSymbols.push_back(symbolTable[symbol]);
152 void TearDown(const ::benchmark::State& state)
final
158 size_t mRenormingBits = getData<source_T>().getRenormedFrequencies().getRenormingBits();
163template <
typename source_T, simd::SIMDW
idth w
idth_V>
164struct SIMDFixture :
public benchmark::Fixture {
168 void SetUp(const ::benchmark::State&
state)
final
170 mState = simd::setAll<width_V>(getData<source_T>().getState());
171 mNSamples = simd::setAll<width_V>(
static_cast<double>(
pow2(getData<source_T>().getRenormedFrequencies().getRenormingBits())));
173 const auto&
sourceMessage = getData<source_T>().getSourceMessage();
176 if constexpr (width_V == simd::SIMDWidth::SSE) {
182 if constexpr (width_V == simd::SIMDWidth::AVX) {
193 void TearDown(const ::benchmark::State&
state)
final
198 static constexpr size_t nElems = simd::getElementCount<ransState_t>(width_V);
199 std::vector<std::array<const symbol_t*, nElems>> mSymbols{};
200 simd::simdI_t<width_V> mState;
201 simd::simdD_t<width_V> mNSamples;
205#ifdef RANS_SINGLE_STREAM
223template <simd::SIMDW
idth w
idth_V>
224inline auto SIMDEncode(simd::simdI_t<width_V>
states, simd::simdD_t<width_V> nSamples, gsl::span<
const Symbol*, simd::getElementCount<ransState_t>(width_V)> symbols)
226 simd::simdIsse_t frequencies;
227 simd::simdIsse_t cumulativeFrequencies;
228 simd::aosToSoa(symbols, &frequencies, &cumulativeFrequencies);
229 return simd::ransEncode(
states, simd::int32ToDouble<width_V>(frequencies), simd::int32ToDouble<width_V>(cumulativeFrequencies), nSamples);
233template <
typename source_T>
237 for (
size_t i = 0;
i < fixture.
mSymbols.size(); ++
i) {
239 benchmark::DoNotOptimize(newState);
243 st.SetItemsProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size());
244 st.SetBytesProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size() *
sizeof(
source_T));
247#ifdef RANS_SINGLE_STREAM
248template <
typename source_T>
252 for (
size_t i = 0;
i < fixture.
mSymbols.size(); ++
i) {
254 benchmark::DoNotOptimize(newState);
258 st.SetItemsProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size());
259 st.SetBytesProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size() *
sizeof(
source_T));
264template <
typename source_T, simd::SIMDW
idth w
idth_V>
265static void ransSIMDEncodeBenchmark(benchmark::State&
st, SIMDFixture<source_T, width_V>& fixture)
267#ifdef ENABLE_VTUNE_PROFILER
271 for (
size_t i = 0;
i < fixture.mSymbols.size(); ++
i) {
272 auto newStates = SIMDEncode<width_V>(fixture.mState, fixture.mNSamples, fixture.mSymbols[
i]);
273 benchmark::DoNotOptimize(newStates);
274 benchmark::ClobberMemory();
277#ifdef ENABLE_VTUNE_PROFILER
281 st.SetItemsProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size());
282 st.SetBytesProcessed(int64_t(
st.iterations()) * getData<source_T>().getSourceMessage().size() *
sizeof(
source_T));
286BENCHMARK_TEMPLATE_DEFINE_F(
SimpleFixture, simpleEncode_8, uint8_t)
287(benchmark::State&
st)
289 ransSimpleEncodeBenchmark(
st, *
this);
291BENCHMARK_TEMPLATE_DEFINE_F(
SimpleFixture, simpleEncode_16, uint16_t)
292(benchmark::State&
st)
294 ransSimpleEncodeBenchmark(
st, *
this);
296BENCHMARK_TEMPLATE_DEFINE_F(
SimpleFixture, simpleEncode_32, uint32_t)
297(benchmark::State&
st)
299 ransSimpleEncodeBenchmark(
st, *
this);
302BENCHMARK_TEMPLATE_DEFINE_F(
Fixture, encode_8, uint8_t)
303(benchmark::State&
st)
305 ransEncodeBenchmark(
st, *
this);
307BENCHMARK_TEMPLATE_DEFINE_F(
Fixture, encode_16, uint16_t)
308(benchmark::State&
st)
310 ransEncodeBenchmark(
st, *
this);
312BENCHMARK_TEMPLATE_DEFINE_F(
Fixture, encode_32, uint32_t)
313(benchmark::State&
st)
315 ransEncodeBenchmark(
st, *
this);
319BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_8, uint8_t, simd::SIMDWidth::SSE)
320(benchmark::State&
st)
322 ransSIMDEncodeBenchmark(
st, *
this);
325BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_16, uint16_t, simd::SIMDWidth::SSE)
326(benchmark::State&
st)
328 ransSIMDEncodeBenchmark(
st, *
this);
331BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeSSE_32, uint32_t, simd::SIMDWidth::SSE)
332(benchmark::State&
st)
334 ransSIMDEncodeBenchmark(
st, *
this);
339BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_8, uint8_t, simd::SIMDWidth::AVX)
340(benchmark::State&
st)
342 ransSIMDEncodeBenchmark(
st, *
this);
345BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_16, uint16_t, simd::SIMDWidth::AVX)
346(benchmark::State&
st)
348 ransSIMDEncodeBenchmark(
st, *
this);
351BENCHMARK_TEMPLATE_DEFINE_F(SIMDFixture, encodeAVX_32, uint32_t, simd::SIMDWidth::AVX)
352(benchmark::State&
st)
354 ransSIMDEncodeBenchmark(
st, *
this);
362#ifdef RANS_SINGLE_STREAM
common helper classes and functions
SourceMessageUniform< uint32_t > sourceMessage
ransState_t simpleEncode(ransState_t state, size_t symbolTablePrecision, const Symbol &symbol)
const SymbolTableData< uint32_t > Data32(MessageSize)
constexpr size_t MessageSize
BENCHMARK_REGISTER_F(SimpleFixture, simpleEncode_8)
constexpr size_t StreamBits
const SymbolTableData< uint8_t > Data8(MessageSize)
constexpr size_t LowerBound
const SymbolTableData< uint16_t > Data16(MessageSize)
const auto & getSourceMessage() const
const auto & getRenormedFrequencies() const
SymbolTableData(size_t messageSize)
ransState_t getState() const
constexpr state_type getReciprocalFrequency() const noexcept
constexpr value_type getCumulative() const noexcept
constexpr value_type getFrequencyComplement() const noexcept
constexpr value_type getReciprocalShift() const noexcept
constexpr value_type getCumulative() const noexcept
constexpr value_type getFrequency() const noexcept
preprocessor defines to enable features based on CPU architecture
static factory classes for building histograms, encoders and decoders.
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
GLboolean GLboolean GLboolean b
GLboolean GLboolean GLboolean GLboolean a
public interface for building and renorming histograms from source data.
constexpr size_t pow2(size_t n) noexcept
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
wrapper around basic SIMD operations
basic SIMD datatypes and traits
void SetUp(const ::benchmark::State &state) final
std::vector< symbol_t > mSymbols
void TearDown(const ::benchmark::State &state) final
void SetUp(const ::benchmark::State &state) final
std::vector< symbol_t > mSymbols
void TearDown(const ::benchmark::State &state) final
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
coder encode(vec, triggers, clusters)