16#define BOOST_TEST_MODULE Utility test
17#define BOOST_TEST_MAIN
18#define BOOST_TEST_DYN_LINK
20#include <boost/test/unit_test.hpp>
21#include <boost/mpl/list.hpp>
39using pd_types = boost::mpl::list<pd_t<SIMDWidth::SSE>
45using epi64_types = boost::mpl::list<epi64_t<SIMDWidth::SSE>
51using epi32_types = boost::mpl::list<epi32_t<SIMDWidth::SSE>
58struct RANSEncodeFixture {
61 double mNormalization{};
62 std::vector<double> mFrequency{};
63 std::vector<double> mCumulative{};
64 std::vector<uint64_t> mResultState{};
68 const uint64_t
state = 1ul << 21;
69 const std::vector<uint32_t> frequency{1, 1, 997, 1234};
70 const std::vector<uint32_t> cumulative{0, 321, 1, (1u << 16) - 1234};
71 const uint64_t normalization = 1ul << 16;
74 mState =
static_cast<double>(
state);
75 mNormalization =
static_cast<double>(normalization);
76 std::copy(std::begin(frequency), std::end(frequency), std::back_inserter(mFrequency));
77 std::copy(std::begin(cumulative), std::end(cumulative), std::back_inserter(mCumulative));
80 for (
size_t i = 0;
i < frequency.size(); ++
i) {
81 uint64_t resultState = normalization * (
state / frequency[
i]) + (
state % frequency[
i]) + cumulative[
i];
82 mResultState.push_back(resultState);
87BOOST_FIXTURE_TEST_SUITE(testRANSEncode, RANSEncodeFixture)
93 const size_t nTests = mFrequency.
size();
95 for (
size_t i = 0;
i < nTests; ++
i) {
96 const epi64_T
state{mState};
97 const pd_T frequencyPD{mFrequency[
i]};
98 const pd_T cumulativePD{mCumulative[
i]};
99 const pd_T normalizationPD{mNormalization};
102 result = store<uint64_t>(ransEncode(load(
state), load(frequencyPD), load(cumulativePD), load(normalizationPD)));
104 epi64_T correctStateVector{mResultState[
i]};
109BOOST_AUTO_TEST_SUITE_END()
111struct AosToSoaFixture {
113 std::vector<Symbol> mSource;
119 constexpr size_t nElems = getElementCount<uint32_t>(SIMDWidth::AVX);
122 for (
size_t i = 0;
i < nElems; ++
i) {
125 Symbol symbol{freq, cumul, 0};
126 mFrequencies(
i) = symbol.getFrequency();
127 mCumulative(
i) = symbol.getCumulative();
129 mSource.emplace_back(std::move(symbol));
133using aosToSoa_T = boost::mpl::list<std::integral_constant<size_t, 2>,
134 std::integral_constant<size_t, 4>>;
136BOOST_FIXTURE_TEST_SUITE(testAostoSoa, AosToSoaFixture)
139 constexpr sizes_T nElements;
141 for (
size_t i = 0;
i < nElements(); ++
i) {
142 aosPtrs[
i] = &mSource[
i];
146 aosToSoa(aosPtrs, &u.frequencies[0], &u.cumulativeFrequencies[0]);
148 auto frequencies = store<uint32_t>(u.frequencies[0]);
149 auto cumulative = store<uint32_t>(u.cumulativeFrequencies[0]);
151 for (
size_t i = 0;
i < nElements(); ++
i) {
156BOOST_AUTO_TEST_SUITE_END()
158BOOST_AUTO_TEST_SUITE(testcmpge)
165 epi64_T res1 = store<uint64_t>(cmpgeq_epi64(load(
a), load(
b)));
170 res = epi64_T{0xFFFFFFFFFFFFFFFF};
171 res1 = store<uint64_t>(cmpgeq_epi64(load(
a), load(
b)));
176 res = epi64_T{0xFFFFFFFFFFFFFFFF};
177 res1 = store<uint64_t>(cmpgeq_epi64(load(
a), load(
b)));
181BOOST_AUTO_TEST_SUITE_END()
183struct SSERenormFixture {
188 SSERenormFixture() =
default;
192 static constexpr size_t SymbolTablePrecisionBits = 16;
193 static constexpr size_t StreamBits = o2::rans::utils::toBits<stream_t>();
195 uint64_t computeLimitState(
count_t frequency)
197 return (
LowerBound >> SymbolTablePrecisionBits <<
StreamBits) *
static_cast<uint64_t
>(frequency);
200 template <
typename stream_IT>
204 if (
state >= maxState) {
208 assert(
state < maxState);
210 return std::make_tuple(
state, outputIter);
214 const size_t nElems = getElementCount<ransState_t>(SIMDWidth::SSE) * 2;
216 std::vector<stream_t> streamOutBuffer = std::vector<stream_t>(nElems, 0);
217 std::vector<stream_t> controlBuffer = std::vector<stream_t>(nElems, 0);
219 using stream_iterator =
decltype(streamOutBuffer.begin());
221 epi32_t<SIMDWidth::SSE, 2> frequencies{compactfrequencies(0), compactfrequencies(1), 0x0u, 0x0u, compactfrequencies(2), compactfrequencies(3), 0x0u, 0x0u};
223 __m128i frequenciesVec[2];
224 __m128i statesVec[2];
225 __m128i newStatesVec[2];
227 frequenciesVec[0] = load(frequencies[0]);
228 frequenciesVec[1] = load(frequencies[1]);
230 statesVec[0] = load(
states[0]);
231 statesVec[1] = load(
states[1]);
233 [[maybe_unused]] stream_iterator newstreamOutIter = ransRenorm<stream_iterator, LowerBound, StreamBits>(statesVec,
235 SymbolTablePrecisionBits,
236 streamOutBuffer.begin(), newStatesVec);
239 store(newStatesVec[0], newStates[0]);
240 store(newStatesVec[1], newStates[1]);
242 auto controlIter = controlBuffer.begin();
244 for (
size_t i = nElems;
i-- > 0;) {
245 std::tie(controlStates(
i), controlIter) =
renorm(
states(
i), controlIter, compactfrequencies(
i));
247 for (
size_t i = 0;
i < nElems; ++
i) {
248 LOG(trace) << fmt::format(
"[{}]: {:#0x}; {:#0x}",
i, streamOutBuffer[
i], controlBuffer[
i]);
252 BOOST_CHECK_EQUAL_COLLECTIONS(streamOutBuffer.begin(), streamOutBuffer.end(), controlBuffer.begin(), controlBuffer.end());
256BOOST_FIXTURE_TEST_SUITE(SSErenorm, SSERenormFixture)
268 computeLimitState(frequencies(3)) + 0xF5},
276 computeLimitState(frequencies(2)) + 0xF4,
285 computeLimitState(frequencies(2)) + 0xF4,
286 computeLimitState(frequencies(3)) + 0xF5},
293 computeLimitState(frequencies(1)) + 0xF3,
302 computeLimitState(frequencies(1)) + 0xF3,
304 computeLimitState(frequencies(3)) + 0xF5},
311 computeLimitState(frequencies(1)) + 0xF3,
312 computeLimitState(frequencies(2)) + 0xF4,
320 computeLimitState(frequencies(1)) + 0xF3,
321 computeLimitState(frequencies(2)) + 0xF4,
322 computeLimitState(frequencies(3)) + 0xF5},
328 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
337 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
340 computeLimitState(frequencies(3)) + 0xF5},
346 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
348 computeLimitState(frequencies(2)) + 0xF4,
355 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
357 computeLimitState(frequencies(2)) + 0xF4,
358 computeLimitState(frequencies(3)) + 0xF5},
364 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
365 computeLimitState(frequencies(1)) + 0xF3,
373 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
374 computeLimitState(frequencies(1)) + 0xF3,
376 computeLimitState(frequencies(3)) + 0xF5},
382 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
383 computeLimitState(frequencies(1)) + 0xF3,
384 computeLimitState(frequencies(2)) + 0xF4,
391 runRenormingChecksSSE({computeLimitState(frequencies(0)) + 0xF2,
392 computeLimitState(frequencies(1)) + 0xF3,
393 computeLimitState(frequencies(2)) + 0xF4,
394 computeLimitState(frequencies(3)) + 0xF5},
398BOOST_AUTO_TEST_SUITE_END()
403 BOOST_TEST_WARN(
"Tests were not Compiled for AVX2, cannot run all tests");
411 BOOST_TEST_WARN(
"Tests were not Compiled for SIMD, cannot run all tests");
constexpr size_t StreamBits
constexpr size_t LowerBound
std::tuple< ransState_t, stream_IT > renorm(ransState_t state, stream_IT outputIter, count_t frequency, size_t symbolTablePrecision)
static constexpr size_t size() noexcept
preprocessor defines to enable features based on CPU architecture
GLboolean GLboolean GLboolean b
GLboolean GLboolean GLboolean GLboolean a
auto make_span(const o2::rans::internal::simd::AlignedArray< T, width_V, size_V > &array)
constexpr size_t RenormingLowerBound
constexpr size_t pow2(size_t n) noexcept
Enum< T >::Iterator begin(Enum< T >)
BOOST_AUTO_TEST_CASE_TEMPLATE(testInplaceEncoderEmpty, source_T, source_types)
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
BOOST_CHECK_EQUAL(triggersD.size(), triggers.size())
BOOST_AUTO_TEST_CASE(test_NoSIMD)
manipulation of types at compile time