Project
Loading...
Searching...
No Matches
test_ransSIMDEncoderKernels.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#define BOOST_TEST_MODULE Utility test
17#define BOOST_TEST_MAIN
18#define BOOST_TEST_DYN_LINK
19
20#include <boost/test/unit_test.hpp>
21#include <boost/mpl/list.hpp>
22
24
25#ifdef RANS_SIMD
26
27#include <vector>
28#include <type_traits>
29
33
34using namespace o2::rans::internal::simd;
35using namespace o2::rans::internal;
36using namespace o2::rans::utils;
37
38// clang-format off
39using pd_types = boost::mpl::list<pd_t<SIMDWidth::SSE>
40#ifdef RANS_AVX2
42#endif /* RANS_AVX2 */
43 >;
44
45using epi64_types = boost::mpl::list<epi64_t<SIMDWidth::SSE>
46#ifdef RANS_AVX2
48#endif /* RANS_AVX2 */
49 >;
50
51using epi32_types = boost::mpl::list<epi32_t<SIMDWidth::SSE>
52#ifdef RANS_AVX2
54#endif /* RANS_AVX2 */
55 >;
56// clang-format on
57
58struct RANSEncodeFixture {
59
60 uint64_t mState{};
61 double mNormalization{};
62 std::vector<double> mFrequency{};
63 std::vector<double> mCumulative{};
64 std::vector<uint64_t> mResultState{};
65
66 RANSEncodeFixture()
67 {
68 const uint64_t state = 1ul << 21;
69 const std::vector<uint32_t> frequency{1, 1, 997, 1234};
70 const std::vector<uint32_t> cumulative{0, 321, 1, (1u << 16) - 1234};
71 const uint64_t normalization = 1ul << 16;
72
73 // copy and convert to double
74 mState = static_cast<double>(state);
75 mNormalization = static_cast<double>(normalization);
76 std::copy(std::begin(frequency), std::end(frequency), std::back_inserter(mFrequency));
77 std::copy(std::begin(cumulative), std::end(cumulative), std::back_inserter(mCumulative));
78
79 // calculate result based on RANS formula
80 for (size_t i = 0; i < frequency.size(); ++i) {
81 uint64_t resultState = normalization * (state / frequency[i]) + (state % frequency[i]) + cumulative[i];
82 mResultState.push_back(resultState);
83 }
84 };
85};
86
87BOOST_FIXTURE_TEST_SUITE(testRANSEncode, RANSEncodeFixture)
88
89BOOST_AUTO_TEST_CASE_TEMPLATE(simd_RansEncode, pd_T, pd_types)
90{
91 using epi64_T = epi64_t<simdWidth_v<pd_T>>;
92
93 const size_t nTests = mFrequency.size();
94
95 for (size_t i = 0; i < nTests; ++i) {
96 const epi64_T state{mState};
97 const pd_T frequencyPD{mFrequency[i]};
98 const pd_T cumulativePD{mCumulative[i]};
99 const pd_T normalizationPD{mNormalization};
100 epi64_T result{0};
101
102 result = store<uint64_t>(ransEncode(load(state), load(frequencyPD), load(cumulativePD), load(normalizationPD)));
103
104 epi64_T correctStateVector{mResultState[i]};
105
106 BOOST_CHECK_EQUAL_COLLECTIONS(gsl::make_span(correctStateVector).begin(), gsl::make_span(correctStateVector).end(), gsl::make_span(result).begin(), gsl::make_span(result).end());
107 }
108}
109BOOST_AUTO_TEST_SUITE_END()
110
111struct AosToSoaFixture {
112
113 std::vector<Symbol> mSource;
114 epi32_t<SIMDWidth::AVX> mFrequencies;
115 epi32_t<SIMDWidth::AVX> mCumulative;
116
117 AosToSoaFixture()
118 {
119 constexpr size_t nElems = getElementCount<uint32_t>(SIMDWidth::AVX);
120 uint32_t counter = 0;
121
122 for (size_t i = 0; i < nElems; ++i) {
123 const auto freq = counter++;
124 const auto cumul = counter++;
125 Symbol symbol{freq, cumul, 0};
126 mFrequencies(i) = symbol.getFrequency();
127 mCumulative(i) = symbol.getCumulative();
128
129 mSource.emplace_back(std::move(symbol));
130 }
131 };
132};
133using aosToSoa_T = boost::mpl::list<std::integral_constant<size_t, 2>,
134 std::integral_constant<size_t, 4>>;
135
136BOOST_FIXTURE_TEST_SUITE(testAostoSoa, AosToSoaFixture)
137BOOST_AUTO_TEST_CASE_TEMPLATE(simd_AosToSOA, sizes_T, aosToSoa_T)
138{
139 constexpr sizes_T nElements;
140 std::array<const o2::rans::internal::Symbol*, nElements()> aosPtrs{};
141 for (size_t i = 0; i < nElements(); ++i) {
142 aosPtrs[i] = &mSource[i];
143 }
144
145 UnrolledSymbols u;
146 aosToSoa(aosPtrs, &u.frequencies[0], &u.cumulativeFrequencies[0]);
147
148 auto frequencies = store<uint32_t>(u.frequencies[0]);
149 auto cumulative = store<uint32_t>(u.cumulativeFrequencies[0]);
150
151 for (size_t i = 0; i < nElements(); ++i) {
152 BOOST_CHECK_EQUAL(frequencies(i), mFrequencies(i));
153 BOOST_CHECK_EQUAL(cumulative(i), mCumulative(i));
154 };
155}
156BOOST_AUTO_TEST_SUITE_END()
157
158BOOST_AUTO_TEST_SUITE(testcmpge)
159
160BOOST_AUTO_TEST_CASE_TEMPLATE(simd_cmpgeq_epi64, epi64_T, epi64_types)
161{
162 epi64_T a{0};
163 epi64_T b{1};
164 epi64_T res{0x0};
165 epi64_T res1 = store<uint64_t>(cmpgeq_epi64(load(a), load(b)));
166 BOOST_CHECK_EQUAL_COLLECTIONS(gsl::make_span(res1).begin(), gsl::make_span(res1).end(), gsl::make_span(res).begin(), gsl::make_span(res).end());
167
168 a = epi64_T{1};
169 b = epi64_T{1};
170 res = epi64_T{0xFFFFFFFFFFFFFFFF};
171 res1 = store<uint64_t>(cmpgeq_epi64(load(a), load(b)));
172 BOOST_CHECK_EQUAL_COLLECTIONS(gsl::make_span(res1).begin(), gsl::make_span(res1).end(), gsl::make_span(res).begin(), gsl::make_span(res).end());
173
174 a = epi64_T{1};
175 b = epi64_T{0};
176 res = epi64_T{0xFFFFFFFFFFFFFFFF};
177 res1 = store<uint64_t>(cmpgeq_epi64(load(a), load(b)));
178 BOOST_CHECK_EQUAL_COLLECTIONS(gsl::make_span(res1).begin(), gsl::make_span(res1).end(), gsl::make_span(res).begin(), gsl::make_span(res).end());
179}
180
181BOOST_AUTO_TEST_SUITE_END()
182
183struct SSERenormFixture {
185 using ransState_t = uint64_t;
186 using stream_t = uint32_t;
187
188 SSERenormFixture() = default;
189
190 static constexpr size_t LowerBoundBits = o2::rans::defaults::internal::RenormingLowerBound;
191 static constexpr size_t LowerBound = pow2(LowerBoundBits);
192 static constexpr size_t SymbolTablePrecisionBits = 16;
193 static constexpr size_t StreamBits = o2::rans::utils::toBits<stream_t>();
194
195 uint64_t computeLimitState(count_t frequency)
196 {
197 return (LowerBound >> SymbolTablePrecisionBits << StreamBits) * static_cast<uint64_t>(frequency);
198 };
199
200 template <typename stream_IT>
201 inline auto renorm(ransState_t state, stream_IT outputIter, count_t frequency)
202 {
203 ransState_t maxState = ((LowerBound >> SymbolTablePrecisionBits) << StreamBits) * frequency;
204 if (state >= maxState) {
205 *outputIter = static_cast<stream_t>(state);
206 ++outputIter;
207 state >>= StreamBits;
208 assert(state < maxState);
209 }
210 return std::make_tuple(state, outputIter);
211 };
212 void runRenormingChecksSSE(const epi64_t<SIMDWidth::SSE, 2>& states, const epi32_t<SIMDWidth::SSE>& compactfrequencies)
213 {
214 const size_t nElems = getElementCount<ransState_t>(SIMDWidth::SSE) * 2;
215
216 std::vector<stream_t> streamOutBuffer = std::vector<stream_t>(nElems, 0);
217 std::vector<stream_t> controlBuffer = std::vector<stream_t>(nElems, 0);
218
219 using stream_iterator = decltype(streamOutBuffer.begin());
220
221 epi32_t<SIMDWidth::SSE, 2> frequencies{compactfrequencies(0), compactfrequencies(1), 0x0u, 0x0u, compactfrequencies(2), compactfrequencies(3), 0x0u, 0x0u};
222
223 __m128i frequenciesVec[2];
224 __m128i statesVec[2];
225 __m128i newStatesVec[2];
226
227 frequenciesVec[0] = load(frequencies[0]);
228 frequenciesVec[1] = load(frequencies[1]);
229
230 statesVec[0] = load(states[0]);
231 statesVec[1] = load(states[1]);
232
233 [[maybe_unused]] stream_iterator newstreamOutIter = ransRenorm<stream_iterator, LowerBound, StreamBits>(statesVec,
234 frequenciesVec,
235 SymbolTablePrecisionBits,
236 streamOutBuffer.begin(), newStatesVec);
237
238 epi64_t<SIMDWidth::SSE, 2> newStates(0);
239 store(newStatesVec[0], newStates[0]);
240 store(newStatesVec[1], newStates[1]);
241
242 auto controlIter = controlBuffer.begin();
243 epi64_t<SIMDWidth::SSE, 2> controlStates;
244 for (size_t i = nElems; i-- > 0;) {
245 std::tie(controlStates(i), controlIter) = renorm(states(i), controlIter, compactfrequencies(i));
246 }
247 for (size_t i = 0; i < nElems; ++i) {
248 LOG(trace) << fmt::format("[{}]: {:#0x}; {:#0x}", i, streamOutBuffer[i], controlBuffer[i]);
249 }
250
251 BOOST_CHECK_EQUAL_COLLECTIONS(gsl::make_span(newStates).begin(), gsl::make_span(newStates).end(), gsl::make_span(controlStates).begin(), gsl::make_span(controlStates).end());
252 BOOST_CHECK_EQUAL_COLLECTIONS(streamOutBuffer.begin(), streamOutBuffer.end(), controlBuffer.begin(), controlBuffer.end());
253 }
254};
255
256BOOST_FIXTURE_TEST_SUITE(SSErenorm, SSERenormFixture)
257
258BOOST_AUTO_TEST_CASE(renormSSE_0000)
259{
260 runRenormingChecksSSE({LowerBound, LowerBound, LowerBound, LowerBound}, {0x1u, 0x1u, 0x1u, 0x1u});
261}
262BOOST_AUTO_TEST_CASE(renormSSE_0001)
263{
264 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x1u, 0x1u, 0x5u};
265 runRenormingChecksSSE({LowerBound,
268 computeLimitState(frequencies(3)) + 0xF5},
269 frequencies);
270}
271BOOST_AUTO_TEST_CASE(renormSSE_0010)
272{
273 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x1u, 0x4u, 0x1u};
274 runRenormingChecksSSE({LowerBound,
276 computeLimitState(frequencies(2)) + 0xF4,
277 LowerBound},
278 frequencies);
279}
280BOOST_AUTO_TEST_CASE(renormSSE_0011)
281{
282 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x1u, 0x4u, 0x5u};
283 runRenormingChecksSSE({LowerBound,
285 computeLimitState(frequencies(2)) + 0xF4,
286 computeLimitState(frequencies(3)) + 0xF5},
287 frequencies);
288}
289BOOST_AUTO_TEST_CASE(renormSSE_0100)
290{
291 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x3u, 0x1u, 0x1u};
292 runRenormingChecksSSE({LowerBound,
293 computeLimitState(frequencies(1)) + 0xF3,
295 LowerBound},
296 frequencies);
297}
298BOOST_AUTO_TEST_CASE(renormSSE_0101)
299{
300 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x3u, 0x1u, 0x5u};
301 runRenormingChecksSSE({LowerBound,
302 computeLimitState(frequencies(1)) + 0xF3,
304 computeLimitState(frequencies(3)) + 0xF5},
305 frequencies);
306}
307BOOST_AUTO_TEST_CASE(renormSSE_0110)
308{
309 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x3u, 0x4u, 0x1u};
310 runRenormingChecksSSE({LowerBound,
311 computeLimitState(frequencies(1)) + 0xF3,
312 computeLimitState(frequencies(2)) + 0xF4,
313 LowerBound},
314 frequencies);
315}
316BOOST_AUTO_TEST_CASE(renormSSE_0111)
317{
318 epi32_t<SIMDWidth::SSE> frequencies{0x1u, 0x3u, 0x4u, 0x5u};
319 runRenormingChecksSSE({LowerBound,
320 computeLimitState(frequencies(1)) + 0xF3,
321 computeLimitState(frequencies(2)) + 0xF4,
322 computeLimitState(frequencies(3)) + 0xF5},
323 frequencies);
324}
325BOOST_AUTO_TEST_CASE(renormSSE_1000)
326{
327 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x1u, 0x1u, 0x1u};
328 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
331 LowerBound},
332 frequencies);
333}
334BOOST_AUTO_TEST_CASE(renormSSE_1001)
335{
336 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x1u, 0x1u, 0x5u};
337 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
340 computeLimitState(frequencies(3)) + 0xF5},
341 frequencies);
342}
343BOOST_AUTO_TEST_CASE(renormSSE_1010)
344{
345 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x1u, 0x4u, 0x1u};
346 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
348 computeLimitState(frequencies(2)) + 0xF4,
349 LowerBound},
350 frequencies);
351}
352BOOST_AUTO_TEST_CASE(renormSSE_1011)
353{
354 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x1u, 04u, 0x5u};
355 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
357 computeLimitState(frequencies(2)) + 0xF4,
358 computeLimitState(frequencies(3)) + 0xF5},
359 frequencies);
360}
361BOOST_AUTO_TEST_CASE(renormSSE_1100)
362{
363 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x3u, 0x1u, 0x1u};
364 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
365 computeLimitState(frequencies(1)) + 0xF3,
367 LowerBound},
368 frequencies);
369}
370BOOST_AUTO_TEST_CASE(renormSSE_1101)
371{
372 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x3u, 0x1u, 0x5u};
373 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
374 computeLimitState(frequencies(1)) + 0xF3,
376 computeLimitState(frequencies(3)) + 0xF5},
377 frequencies);
378}
379BOOST_AUTO_TEST_CASE(renormSSE_1110)
380{
381 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x3u, 0x4u, 0x1u};
382 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
383 computeLimitState(frequencies(1)) + 0xF3,
384 computeLimitState(frequencies(2)) + 0xF4,
385 LowerBound},
386 frequencies);
387}
388BOOST_AUTO_TEST_CASE(renormSSE_1111)
389{
390 epi32_t<SIMDWidth::SSE> frequencies{0x2u, 0x3u, 0x4u, 0x5u};
391 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
392 computeLimitState(frequencies(1)) + 0xF3,
393 computeLimitState(frequencies(2)) + 0xF4,
394 computeLimitState(frequencies(3)) + 0xF5},
395 frequencies);
396}
397
398BOOST_AUTO_TEST_SUITE_END()
399
400#ifndef RANS_AVX2
401BOOST_AUTO_TEST_CASE(test_NoAVX2)
402{
403 BOOST_TEST_WARN("Tests were not Compiled for AVX2, cannot run all tests");
404}
405#endif
406
407#else /* !defined(RANS_SIMD) */
408
410{
411 BOOST_TEST_WARN("Tests were not Compiled for SIMD, cannot run all tests");
412}
413
414#endif /* RANS_SIMD */
benchmark::State & state
int32_t i
uint32_t res
Definition RawData.h:0
constexpr size_t StreamBits
constexpr size_t LowerBound
uint32_t stream_t
uint32_t count_t
uint64_t ransState_t
std::tuple< ransState_t, stream_IT > renorm(ransState_t state, stream_IT outputIter, count_t frequency, size_t symbolTablePrecision)
static constexpr size_t size() noexcept
preprocessor defines to enable features based on CPU architecture
GLuint64EXT * result
Definition glcorearb.h:5662
GLuint GLuint end
Definition glcorearb.h:469
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLuint * states
Definition glcorearb.h:4932
GLuint counter
Definition glcorearb.h:3987
auto make_span(const o2::rans::internal::simd::AlignedArray< T, width_V, size_V > &array)
constexpr size_t RenormingLowerBound
Definition defaults.h:53
constexpr size_t pow2(size_t n) noexcept
Definition utils.h:165
uint32_t count_t
Definition defaults.h:34
Enum< T >::Iterator begin(Enum< T >)
Definition Defs.h:173
BOOST_AUTO_TEST_CASE_TEMPLATE(testInplaceEncoderEmpty, source_T, source_types)
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
BOOST_CHECK_EQUAL(triggersD.size(), triggers.size())
BOOST_AUTO_TEST_CASE(test_NoSIMD)
manipulation of types at compile time