Project
Loading...
Searching...
No Matches
bench_ransPack.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
17
18#include <vector>
19#include <cstring>
20#include <random>
21#include <algorithm>
22#ifdef RANS_PARALLEL_STL
23#include <execution>
24#endif
25#include <iterator>
26
27#include <benchmark/benchmark.h>
28
30
31#ifdef ENABLE_VTUNE_PROFILER
32#include <ittnotify.h>
33#endif
34
35#include "helpers.h"
36
37using namespace o2::rans;
38
39inline constexpr size_t MessageSize = 1ull << 22;
40
41using source_type = uint32_t;
42
43template <typename source_T>
44std::vector<source_T> makeRandomUniformVector(size_t nelems, source_T min = std::numeric_limits<source_T>::max(), source_T max = std::numeric_limits<source_T>::max())
45{
46 std::vector<source_T> result(nelems, 0);
47 std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;
48 std::uniform_int_distribution<source_T> dist(min, max);
49
50#ifdef RANS_PARALLEL_STL
51 std::generate(std::execution::par_unseq, result.begin(), result.end(), [&dist, &mt]() { return dist(mt); });
52#else
53 std::generate(result.begin(), result.end(), [&dist, &mt]() { return dist(mt); });
54#endif // RANS_PARALLEL_STL
55
56 return result;
57};
58
59static void copyBenchmark(benchmark::State& state)
60{
61 std::vector<source_type> src = makeRandomUniformVector<source_type>(MessageSize);
62 std::vector<source_type> dst(MessageSize, 0);
63 for (auto _ : state) {
64 std::copy(src.begin(), src.end(), dst.begin());
65 };
66 state.SetItemsProcessed(src.size() * state.iterations());
67 state.SetBytesProcessed(src.size() * sizeof(source_type) * state.iterations());
68};
69
70static void packingBenchmark(benchmark::State& state)
71{
72 size_t packingBits = state.range(0);
73
74 std::vector<source_type> src = makeRandomUniformVector<source_type>(MessageSize, 0, utils::pow2(packingBits) - 1);
75 std::vector<uint32_t> dst(MessageSize, 0);
76 for (auto _ : state) {
77 BitPtr iter{dst.data()};
78 for (auto i : src) {
79 iter = internal::pack(iter, i, packingBits);
80 }
81 };
82 state.SetItemsProcessed(src.size() * state.iterations());
83 state.SetBytesProcessed(src.size() * sizeof(uint32_t) * state.iterations());
84
85 std::vector<uint32_t> unpacked(MessageSize, 0);
86
87 BitPtr iter{dst.data()};
88 for (size_t i = 0; i < src.size(); ++i) {
89 unpacked[i] = internal::unpack<uint32_t>(iter, packingBits);
90 iter += packingBits;
91 }
92 if (!std::equal(unpacked.begin(), unpacked.end(), src.begin())) {
93 state.SkipWithError("error in packing");
94 }
95};
96
97static void fastPackBenchmark(benchmark::State& state)
98{
99 size_t packingBits = state.range(0);
100
101 std::vector<source_type> src = makeRandomUniformVector<source_type>(MessageSize, 0, utils::pow2(packingBits) - 1);
102 std::vector<uint32_t> dst(MessageSize, 0);
103#ifdef ENABLE_VTUNE_PROFILER
104 __itt_resume();
105#endif
106 for (auto _ : state) {
107 pack(src.data(), src.size(), dst.data(), packingBits, 0u);
108 };
109#ifdef ENABLE_VTUNE_PROFILER
110 __itt_pause();
111#endif
112 state.SetItemsProcessed(src.size() * state.iterations());
113 state.SetBytesProcessed(src.size() * sizeof(uint32_t) * state.iterations());
114
115 std::vector<uint32_t> unpacked(MessageSize, 0);
116
117 BitPtr iter{dst.data()};
118 for (size_t i = 0; i < src.size(); ++i) {
119 unpacked[i] = internal::unpack<uint32_t>(iter, packingBits);
120 // LOGP(info, "[{}]{:0" + std::to_string(packingBits) + "b}", i, unpacked[i]);
121 iter += packingBits;
122 }
123 size_t i = 0;
124 if (!std::equal(unpacked.begin(), unpacked.end(), src.begin(), [&i](auto a, auto b) -> bool {
125 if (a != b) {
126 LOGP(info, "[{}]{:0x}!={:0x}", i++, a, b);
127 }
128 return a == b;
129 })) {
130 state.SkipWithError("error in packing");
131 }
132};
133
134BENCHMARK(copyBenchmark);
135BENCHMARK(packingBenchmark)->DenseRange(1, 32, 1);
136BENCHMARK(fastPackBenchmark)->DenseRange(1, 32, 1);
benchmark::State & state
int32_t i
constexpr size_t MessageSize
BENCHMARK_MAIN()
std::vector< source_T > makeRandomUniformVector(size_t nelems, source_T min=std::numeric_limits< source_T >::max(), source_T max=std::numeric_limits< source_T >::max())
BENCHMARK(copyBenchmark)
uint32_t source_type
preprocessor defines to enable features based on CPU architecture
GLenum src
Definition glcorearb.h:1767
GLuint64EXT * result
Definition glcorearb.h:5662
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLenum dst
Definition glcorearb.h:1767
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
common functionality for rANS benchmarks.
packs data into a buffer
BitPtr pack(BitPtr pos, uint64_t data, size_t packingWidth)
Definition pack.h:68
constexpr BitPtr pack(const input_T *__restrict inputBegin, size_t extent, output_T *__restrict outputBegin, size_t packingWidth, input_T offset=static_cast< input_T >(0))
Definition pack.h:196
constexpr size_t min
constexpr size_t max