16#define BOOST_TEST_MODULE Utility test
17#define BOOST_TEST_MAIN
18#define BOOST_TEST_DYN_LINK
23#include <boost/test/unit_test.hpp>
24#include <boost/mp11.hpp>
26#include <fmt/format.h>
35namespace mp = boost::mp11;
61template <
class F,
class S>
62struct print_log_value<::std::pair<F, S>> {
63 void operator()(::std::ostream& os, ::std::pair<F, S>
const& p)
65 os <<
"([" << p.first <<
"], [" << p.second <<
"])";
73using histogram_types = mp::mp_flatten<mp::mp_list<small_dense_histogram_types, large_dense_histogram_types, adaptive_histogram_types, sparse_histograms>>;
75using variable_histograms_types = mp::mp_flatten<mp::mp_list<large_dense_histogram_types, adaptive_histogram_types, sparse_histograms>>;
77template <
typename histogram_T>
80 for (
auto iter =
a.begin(); iter !=
a.end(); ++iter) {
87template <
class histogram_T,
typename map_T>
91 using source_type =
typename histogram_T::source_type;
92 if constexpr (isDenseContainer_v<histogram_T>) {
94 return static_cast<size_t>(std::numeric_limits<std::make_unsigned_t<source_type>>
::max()) + 1;
96 const auto [minIter, maxIter] = std::minmax_element(std::begin(resultsMap), std::end(resultsMap), [](
const auto&
a,
const auto&
b) {
return a.first <
b.first; });
97 return maxIter->first - minIter->first + std::is_signed_v<source_type>;
99 }
else if constexpr (isAdaptiveContainer_v<histogram_T>) {
100 std::vector<int32_t> buckets;
101 for (
const auto [
key,
value] : resultsMap) {
102 buckets.push_back(
key / histogram_T::container_type::getBucketSize());
104 std::sort(buckets.begin(), buckets.end());
105 auto end = std::unique(buckets.begin(), buckets.end());
106 return histogram_T::container_type::getBucketSize() * std::distance(buckets.begin(),
end);
108 static_assert(isHashContainer_v<histogram_T> || isSetContainer_v<histogram_T>);
109 return std::count_if(resultsMap.begin(), resultsMap.end(), [](
const auto&
val) { return val.second > 0; });
113template <
class histogram_T,
typename map_T>
114auto getOffset(
const map_T& resultsMap) ->
typename map_T::key_type
117 using source_type =
typename histogram_T::source_type;
118 if constexpr (isDenseContainer_v<histogram_T>) {
120 return std::numeric_limits<source_type>::min();
122 const auto [minIter, maxIter] = std::minmax_element(std::begin(resultsMap), std::end(resultsMap), [](
const auto&
a,
const auto&
b) {
return a.first <
b.first; });
123 return minIter->first;
125 }
else if constexpr (isAdaptiveContainer_v<histogram_T>) {
126 return std::numeric_limits<source_type>::min();
127 }
else if constexpr (isHashContainer_v<histogram_T>) {
130 static_assert(isSetContainer_v<histogram_T>);
133 for (
const auto& [
index,
value] : resultsMap) {
144 using source_type =
typename histogram_T::source_type;
145 histogram_T histogram{};
146 const size_t tableSize = 1ul << (
sizeof(
source_type) * 8);
151 BOOST_CHECK(histogram.cbegin() != histogram.cend());
156 using source_type =
typename histogram_T::source_type;
157 histogram_T histogram{};
163 BOOST_CHECK(histogram.cbegin() == histogram.cend());
168 using source_type =
typename histogram_T::source_type;
170 std::vector<source_type>
samples{
192 std::unordered_map<source_type, uint32_t> results{{
static_cast<source_type>(-5), 2},
204 histogram_T histogram{};
207 histogram_T histogram2{};
208 histogram2.addSamples(
samples);
212 for (
const auto [symbol,
value] : results) {
213 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
222 BOOST_CHECK(histogram.cbegin() != histogram.cend());
225 std::vector<source_type> samples2{
250 histogram.addSamples(samples2.begin(), samples2.end());
252 histogram2.addSamples(samples2);
256 for (
const auto [symbol,
value] : results) {
257 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
266 BOOST_CHECK(histogram.cbegin() != histogram.cend());
274 using source_type =
typename histogram_T::source_type;
275 using value_type =
typename histogram_T::value_type;
276 std::vector<value_type> frequencies{0, 1, 2, 3, 4, 5};
278 std::unordered_map<source_type, uint32_t> results{
286 histogram_T histogram{};
287 histogram.addFrequencies(frequencies.begin(), frequencies.end(), 0);
289 histogram_T histogram2{};
294 for (
const auto [symbol,
value] : results) {
295 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
304 BOOST_CHECK(histogram.cbegin() != histogram.cend());
309 std::vector<value_type> frequencies2{3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0};
311 if constexpr (std::is_signed_v<source_type>) {
312 histogram.addFrequencies(frequencies2.begin(), frequencies2.end(), -1);
323 histogram.addFrequencies(frequencies2.begin(), frequencies2.end(), 3);
338 for (
const auto [symbol,
value] : results) {
339 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
345 BOOST_CHECK(histogram.cbegin() != histogram.cend());
350 using source_type =
typename histogram_T::source_type;
351 using value_type =
typename histogram_T::value_type;
352 std::vector<value_type> frequencies{0, 1, 2, 3, 4, 5};
354 std::unordered_map<source_type, uint32_t> results{
362 histogram_T histogram{};
363 histogram.addFrequencies(frequencies.begin(), frequencies.end(), 0);
365 histogram_T histogram2{};
370 for (
const auto [symbol,
value] : results) {
371 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
379 BOOST_CHECK(histogram.cbegin() != histogram.cend());
384 std::vector<value_type> frequencies2{3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0};
386 if constexpr (std::is_signed_v<source_type>) {
387 const std::ptrdiff_t
offset = utils::pow2(utils::toBits<source_type>() - 1);
389 if constexpr (std::is_same_v<histogram_T, DenseHistogram<int32_t>>) {
390 BOOST_CHECK_THROW(histogram.addFrequencies(frequencies2.begin(), frequencies2.end(),
offset),
HistogramError);
394 histogram.addFrequencies(frequencies2.begin(), frequencies2.end(),
offset);
406 const std::ptrdiff_t
offset = -1;
407 histogram.addFrequencies(frequencies2.begin(), frequencies2.end(),
offset);
419 if constexpr (std::is_same_v<histogram_T, DenseHistogram<int32_t>>) {
428 for (
const auto [symbol,
value] : results) {
429 BOOST_TEST_MESSAGE(fmt::format(
"testing symbol {}",
static_cast<int64_t
>(symbol)));
435 BOOST_CHECK(histogram.cbegin() != histogram.cend());
442 std::vector<uint32_t> frequencies{1, 1, 2, 2, 2, 2, 6, 8, 4, 10, 8, 14, 10, 19, 26, 30, 31, 35, 41, 45, 51, 44, 47, 39, 58, 52, 42, 53, 50, 34, 50, 30, 32, 24, 30, 20, 17, 12, 16, 6, 8, 5, 6, 4, 4, 2, 2, 2, 1};
443 histogram_T histogram{frequencies.
begin(), frequencies.end(),
static_cast<uint8_t
>(0)};
445 const size_t scaleBits = 8;
447 auto renormedHistogram =
renorm(std::move(histogram), scaleBits, RenormingPolicy::ForceIncompressible, 1);
449 const std::vector<uint32_t> rescaledFrequencies{1, 2, 1, 3, 2, 3, 3, 5, 6, 7, 8, 9, 10, 11, 13, 11, 12, 10, 14, 13, 10, 13, 12, 8, 12, 7, 8, 6, 7, 5, 4, 3, 4, 2, 2, 1, 2, 1, 1};
455 for (std::ptrdiff_t
i = rescaledFrequenciesView.getMin();
i <= rescaledFrequenciesView.getMax(); ++
i) {
464 std::vector<uint32_t> frequencies{1, 1, 2, 2, 2, 2, 6, 8, 4, 10, 8, 14, 10, 19, 26, 30, 31, 35, 41, 45, 51, 44, 47, 39, 58, 52, 42, 53, 50, 34, 50, 30, 32, 24, 30, 20, 17, 12, 16, 6, 8, 5, 6, 4, 4, 2, 2, 2, 1};
465 histogram_T histogram{frequencies.begin(), frequencies.end(),
static_cast<uint8_t
>(0)};
467 const size_t scaleBits = 8;
469 auto renormedHistogram =
compat::renorm(std::move(histogram), scaleBits);
470 const std::vector<uint32_t> rescaledFrequencies{1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 3, 3, 4, 6, 7, 7, 9, 9, 11, 12, 10, 11, 9, 13, 12, 10, 13, 11, 8, 12, 7, 7, 6, 7, 4, 4, 3, 4, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1};
474 BOOST_CHECK_EQUAL_COLLECTIONS(renormedHistogram.begin(), renormedHistogram.begin() + rescaledFrequencies.size(), rescaledFrequencies.begin(), rescaledFrequencies.end());
479 using namespace internal;
480 using namespace utils;
482 constexpr double_t eps = 1e-2;
484 std::vector<uint32_t> frequencies{9, 0, 8, 0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1};
490 BOOST_CHECK_CLOSE(expectedCodewordLength, 2.9573820061153833, eps);
491 BOOST_CHECK_GE(expectedCodewordLength,
metrics.getDatasetProperties().entropy);
helper functionalities useful for packing operations
const_iterator begin() const noexcept
functionality to maintain compatibility with previous version of this library
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * value
GLboolean GLboolean GLboolean GLboolean a
public interface for building and renorming histograms from source data.
auto make_span(const o2::rans::internal::simd::AlignedArray< T, width_V, size_V > &array)
RenormedDenseHistogram< source_T > renorm(DenseHistogram< source_T > histogram, size_t newPrecision=0)
auto getValue(IT iter) -> typename std::iterator_traits< IT >::value_type::second_type
constexpr auto getIndex(const container_T &container, typename container_T::const_iterator iter) -> typename container_T::source_type
class DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > > :public internal::VectorContainer< source_T, uint32_t >, internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >{ using containerBase_type=internal::VectorContainer< source_T, uint32_t >;using HistogramConcept_type=internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >;friend containerBase_type;friend HistogramConcept_type;public:using source_type=source_T;using value_type=typename containerBase_type::value_type;using container_type=typename containerBase_type::container_type;using size_type=typename containerBase_type::size_type;using difference_type=typename containerBase_type::difference_type;using reference=typename containerBase_type::reference;using const_reference=typename containerBase_type::const_reference;using pointer=typename containerBase_type::pointer;using const_pointer=typename containerBase_type::const_pointer;using const_iterator=typename containerBase_type::const_iterator;DenseHistogram() :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()} {};template< typename freq_IT > DenseHistogram(freq_IT begin, freq_IT end, difference_type offset) :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()}, HistogramConcept_type{begin, end, offset} {};using HistogramConcept_type::addSamples;template< typename source_IT > inline DenseHistogram &addSamples(source_IT begin, source_IT end, source_type min, source_type max) { return addSamplesImpl(begin, end);};template< typename source_IT > DenseHistogram &addSamples(gsl::span< const source_type > span, source_type min, source_type max) { return addSamplesImpl(span);};using HistogramConcept_type::addFrequencies;protected:template< typename source_IT > DenseHistogram &addSamplesImpl(source_IT begin, source_IT end);DenseHistogram &addSamplesImpl(gsl::span< const source_type > samples);template< typename freq_IT > DenseHistogram &addFrequenciesImpl(freq_IT begin, freq_IT end, difference_type offset);private:inline static constexpr size_t MaxSize=utils::pow2(utils::toBits< source_type >());};template< typename source_T >template< typename source_IT >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(source_IT begin, source_IT end) -> DenseHistogram &{ if constexpr(std::is_pointer_v< source_IT >) { return addSamplesImpl({begin, end});} else { std::for_each(begin, end, [this](const source_type &symbol) {++this->mNSamples;++this->mContainer[symbol];});} return *this;}template< typename source_T >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(gsl::span< const source_type > samples) -> DenseHistogram &{ using namespace internal;using namespace utils;if(samples.empty()) { return *this;} const auto begin=samples.data();const auto end=begin+samples.size();constexpr size_t ElemsPerQWord=sizeof(uint64_t)/sizeof(source_type);constexpr size_t nUnroll=2 *ElemsPerQWord;auto iter=begin;if constexpr(sizeof(source_type)==1) { std::array< ShiftableVector< source_type, value_type >, 3 > histograms{ {{this-> mContainer this mContainer getOffset()}
auto makeHistogramView(container_T &container, std::ptrdiff_t offset) noexcept -> HistogramView< decltype(std::begin(container))>
size_t countNUsedAlphabetSymbols(const AdaptiveHistogram< source_T > &histogram)
double_t computeExpectedCodewordLength(const DenseHistogram< source_T > &histogram, const RenormedDenseHistogram< source_T > &rescaledHistogram)
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Common utility functions.
BOOST_CHECK_EQUAL(triggersD.size(), triggers.size())
mp::mp_list< DenseHistogram< char >, DenseHistogram< uint8_t >, DenseHistogram< int8_t >, DenseHistogram< uint16_t >, DenseHistogram< int16_t > > small_dense_histogram_types
size_t getTableSize(const map_T &resultsMap)
BOOST_AUTO_TEST_CASE(test_ExpectedCodewordLength)
BOOST_AUTO_TEST_CASE_TEMPLATE(test_emptyTablesSmall, histogram_T, small_dense_histogram_types)
mp::mp_flatten< mp::mp_list< small_dense_histogram_types, large_dense_histogram_types, adaptive_histogram_types, sparse_histograms > > histogram_types
mp::mp_list< DenseHistogram< int32_t > > large_dense_histogram_types
mp::mp_list< DenseHistogram< uint8_t >, DenseHistogram< uint32_t >, AdaptiveHistogram< int32_t >, SparseHistogram< int32_t > > renorm_types
mp::mp_flatten< mp::mp_list< large_dense_histogram_types, adaptive_histogram_types, sparse_histograms > > variable_histograms_types
void checkEquivalent(const histogram_T &a, const histogram_T &b)
mp::mp_list< SparseHistogram< uint32_t >, SparseHistogram< int32_t > > sparse_histograms
mp::mp_list< DenseHistogram< uint8_t >, DenseHistogram< uint32_t > > legacy_renorm_types
mp::mp_list< AdaptiveHistogram< uint32_t >, AdaptiveHistogram< int32_t > > adaptive_histogram_types
manipulation of types at compile time