22#ifdef RANS_PARALLEL_STL
27#include <benchmark/benchmark.h>
31#ifdef ENABLE_VTUNE_PROFILER
43template <
typename source_T>
46 std::vector<source_T>
result(nelems, 0);
48 std::uniform_int_distribution<source_T> dist(
min,
max);
50#ifdef RANS_PARALLEL_STL
51 std::generate(std::execution::par_unseq,
result.begin(),
result.end(), [&dist, &mt]() { return dist(mt); });
53 std::generate(
result.begin(),
result.end(), [&dist, &mt]() { return dist(mt); });
59static void copyBenchmark(benchmark::State& state)
61 std::vector<source_type>
src = makeRandomUniformVector<source_type>(
MessageSize);
63 for (
auto _ :
state) {
64 std::copy(
src.begin(),
src.end(),
dst.begin());
70static void packingBenchmark(benchmark::State& state)
72 size_t packingBits =
state.range(0);
74 std::vector<source_type>
src = makeRandomUniformVector<source_type>(
MessageSize, 0, utils::pow2(packingBits) - 1);
76 for (
auto _ :
state) {
83 state.SetBytesProcessed(
src.size() *
sizeof(uint32_t) *
state.iterations());
88 for (
size_t i = 0;
i <
src.size(); ++
i) {
89 unpacked[
i] = internal::unpack<uint32_t>(iter, packingBits);
92 if (!std::equal(unpacked.begin(), unpacked.end(),
src.begin())) {
93 state.SkipWithError(
"error in packing");
97static void fastPackBenchmark(benchmark::State& state)
99 size_t packingBits =
state.range(0);
101 std::vector<source_type>
src = makeRandomUniformVector<source_type>(
MessageSize, 0, utils::pow2(packingBits) - 1);
103#ifdef ENABLE_VTUNE_PROFILER
106 for (
auto _ :
state) {
109#ifdef ENABLE_VTUNE_PROFILER
113 state.SetBytesProcessed(
src.size() *
sizeof(uint32_t) *
state.iterations());
118 for (
size_t i = 0;
i <
src.size(); ++
i) {
119 unpacked[
i] = internal::unpack<uint32_t>(iter, packingBits);
124 if (!std::equal(unpacked.begin(), unpacked.end(),
src.begin(), [&
i](
auto a,
auto b) ->
bool {
126 LOGP(info,
"[{}]{:0x}!={:0x}", i++, a, b);
130 state.SkipWithError(
"error in packing");
constexpr size_t MessageSize
std::vector< source_T > makeRandomUniformVector(size_t nelems, source_T min=std::numeric_limits< source_T >::max(), source_T max=std::numeric_limits< source_T >::max())
preprocessor defines to enable features based on CPU architecture
GLboolean GLboolean GLboolean b
GLboolean GLboolean GLboolean GLboolean a
common functionality for rANS benchmarks.
BitPtr pack(BitPtr pos, uint64_t data, size_t packingWidth)
constexpr BitPtr pack(const input_T *__restrict inputBegin, size_t extent, output_T *__restrict outputBegin, size_t packingWidth, input_T offset=static_cast< input_T >(0))