Project
Loading...
Searching...
No Matches
DenseSymbolTable.h
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
16
17#ifndef RANS_INTERNAL_CONTAINERS_DENSESYMBOLTABLE_H_
18#define RANS_INTERNAL_CONTAINERS_DENSESYMBOLTABLE_H_
19
20#include <vector>
21#include <cstdint>
22#include <cmath>
23#include <fairlogger/Logger.h>
24
29
30namespace o2::rans
31{
32
33template <class source_T, class symbol_T>
34class DenseSymbolTable : public internal::VectorContainer<source_T, symbol_T>
35{
37 friend base_type;
38
39 public:
47 using pointer = typename base_type::pointer;
50
51 DenseSymbolTable() = default;
52
53 template <typename container_T>
55
56 [[nodiscard]] inline const_reference operator[](source_type sourceSymbol) const noexcept;
57
58 [[nodiscard]] inline const_pointer lookupSafe(source_type sourceSymbol) const;
59
60 [[nodiscard]] inline const_pointer lookupUnsafe(source_type sourceSymbol) const { return &this->mContainer[sourceSymbol]; };
61
62 [[nodiscard]] inline size_type size() const noexcept { return mSize; };
63
64 [[nodiscard]] inline bool hasEscapeSymbol() const noexcept { return mEscapeSymbol.getFrequency() > 0; };
65
66 [[nodiscard]] inline const_reference getEscapeSymbol() const noexcept { return mEscapeSymbol; };
67
68 [[nodiscard]] inline bool isEscapeSymbol(const_reference symbol) const noexcept { return symbol == mEscapeSymbol; };
69
70 [[nodiscard]] inline bool isEscapeSymbol(source_type sourceSymbol) const noexcept { return this->operator[](sourceSymbol) == mEscapeSymbol; };
71
72 [[nodiscard]] inline size_type getPrecision() const noexcept { return mSymbolTablePrecision; };
73
74 protected:
75 [[nodiscard]] inline bool isValidSymbol(const symbol_type& value) const noexcept { return !this->isEscapeSymbol(value); };
76
80};
81
82template <class source_T, class value_T>
83template <typename container_T>
85{
86 using namespace utils;
87 using namespace internal;
88 using count_type = typename value_T::value_type;
89
90 this->mSymbolTablePrecision = renormedHistogram.getRenormingBits();
91 this->mEscapeSymbol = [&]() -> value_T {
92 const count_type symbolFrequency = renormedHistogram.getIncompressibleSymbolFrequency();
93 const count_type cumulatedFrequency = renormedHistogram.getNumSamples() - symbolFrequency;
94 return {symbolFrequency, cumulatedFrequency, this->getPrecision()};
95 }();
96
97 const auto [trimmedBegin, trimmedEnd] = trim(renormedHistogram);
98 const auto [min, max] = getMinMax(renormedHistogram, trimmedBegin, trimmedEnd);
99 // one cacheline worth of padding in the back of the container to ensure SIMD reads do not cause out of bounds reads
100 // then first reserve to increase the capacity
101 // finally use resize to set size of the vector. this does not change capacity and retains the padding.
102 constexpr size_t padding = nBytesTo<symbol_type>(toBytes(512));
103 const size_t symbolTableSize = trimmedBegin == trimmedEnd ? 0 : (max - min + 1);
104 const source_type offset = min;
105 this->mContainer.reserve(symbolTableSize + padding);
106 this->mContainer.resize(symbolTableSize, offset, this->mEscapeSymbol);
107
108 count_type cumulatedFrequency = 0;
109 forEachIndexValue(
110 renormedHistogram, trimmedBegin, trimmedEnd, [&, this](const source_type& sourceSymbol, const count_type& symbolFrequency) {
111 if (symbolFrequency) {
112 this->mContainer[sourceSymbol] = symbol_type{symbolFrequency, cumulatedFrequency, this->getPrecision()};
113 cumulatedFrequency += symbolFrequency;
114 }
115 });
116
117 mSize = this->mContainer.size();
118};
119
120template <class source_T, class value_T>
121[[nodiscard]] inline auto DenseSymbolTable<source_T, value_T>::operator[](source_type sourceSymbol) const noexcept -> const_reference
122{
123 const size_type index = static_cast<size_type>(sourceSymbol - this->getOffset());
124 // static cast to unsigned: idx < 0 => (uint)idx > MAX_INT => idx > mIndex.size()
125 if (index < this->size()) {
126 return this->mContainer[sourceSymbol];
127 } else {
128 return this->getEscapeSymbol();
129 }
130};
131
132template <class source_T, class value_T>
133[[nodiscard]] inline auto DenseSymbolTable<source_T, value_T>::lookupSafe(source_type sourceSymbol) const -> const_pointer
134{
135 const size_type index = static_cast<size_type>(sourceSymbol - this->getOffset());
136 // static cast to unsigned: idx < 0 => (uint)idx > MAX_INT => idx > mIndex.size()
137 if (index < this->size()) {
138 return this->mContainer.data() + index;
139 } else {
140 return nullptr;
141 }
142};
143
144template <typename source_T, typename symbol_T>
145std::pair<source_T, source_T> getMinMax(const DenseSymbolTable<source_T, symbol_T>& symbolTable)
146{
147 return internal::getMinMax(symbolTable, symbolTable.getEscapeSymbol());
148};
149
150template <typename source_T, typename symbol_T>
152{
153 return std::count_if(symbolTable.begin(), symbolTable.end(), [&symbolTable](typename DenseSymbolTable<source_T, symbol_T>::const_reference v) { return !symbolTable.isEscapeSymbol(v); });
154}
155
156} // namespace o2::rans
157
158#endif /* RANS_INTERNAL_CONTAINERS_DENSESYMBOLTABLE_H_ */
uint16_t padding
Abstract container class that defines and implements basic properties shared by histograms and lookup...
Non-owning, lightweight structure for histogram manipulation.
Histogram renormed to sum of frequencies being 2^P for use in fast rans coding.
helper functionalities useful for packing operations
bool isValidSymbol(const symbol_type &value) const noexcept
typename base_type::source_type source_type
bool isEscapeSymbol(const_reference symbol) const noexcept
size_type getPrecision() const noexcept
typename base_type::pointer pointer
typename base_type::const_reference const_reference
size_type size() const noexcept
typename base_type::size_type size_type
typename base_type::reference reference
DenseSymbolTable(const RenormedHistogramConcept< container_T > &renormedHistogram)
const_pointer lookupUnsafe(source_type sourceSymbol) const
const_reference getEscapeSymbol() const noexcept
bool hasEscapeSymbol() const noexcept
const_reference operator[](source_type sourceSymbol) const noexcept
bool isEscapeSymbol(source_type sourceSymbol) const noexcept
typename base_type::const_iterator const_iterator
typename base_type::container_type container_type
const_pointer lookupSafe(source_type sourceSymbol) const
typename base_type::const_pointer const_pointer
typename base_type::difference_type difference_type
typename base_type::value_type symbol_type
value_type getIncompressibleSymbolFrequency() const noexcept
size_t getRenormingBits() const noexcept
const_iterator begin() const noexcept
Definition Container.h:53
const_iterator end() const noexcept
Definition Container.h:55
typename base_type::const_pointer const_pointer
Definition Container.h:97
typename base_type::const_reference const_reference
Definition Container.h:95
typename base_type::const_iterator const_iterator
Definition Container.h:98
typename base_type::container_type container_type
Definition Container.h:91
typename base_type::difference_type difference_type
Definition Container.h:93
const GLdouble * v
Definition glcorearb.h:832
GLuint index
Definition glcorearb.h:781
GLsizei const GLfloat * value
Definition glcorearb.h:819
GLintptr offset
Definition glcorearb.h:660
auto getMinMax(const container_T &container, typename container_T::const_iterator begin, typename container_T::const_iterator end, typename container_T::const_reference zeroElem={}) -> std::pair< typename container_T::source_type, typename container_T::source_type >
Definition algorithm.h:134
class DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > > :public internal::VectorContainer< source_T, uint32_t >, internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >{ using containerBase_type=internal::VectorContainer< source_T, uint32_t >;using HistogramConcept_type=internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >;friend containerBase_type;friend HistogramConcept_type;public:using source_type=source_T;using value_type=typename containerBase_type::value_type;using container_type=typename containerBase_type::container_type;using size_type=typename containerBase_type::size_type;using difference_type=typename containerBase_type::difference_type;using reference=typename containerBase_type::reference;using const_reference=typename containerBase_type::const_reference;using pointer=typename containerBase_type::pointer;using const_pointer=typename containerBase_type::const_pointer;using const_iterator=typename containerBase_type::const_iterator;DenseHistogram() :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()} {};template< typename freq_IT > DenseHistogram(freq_IT begin, freq_IT end, difference_type offset) :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()}, HistogramConcept_type{begin, end, offset} {};using HistogramConcept_type::addSamples;template< typename source_IT > inline DenseHistogram &addSamples(source_IT begin, source_IT end, source_type min, source_type max) { return addSamplesImpl(begin, end);};template< typename source_IT > DenseHistogram &addSamples(gsl::span< const source_type > span, source_type min, source_type max) { return addSamplesImpl(span);};using HistogramConcept_type::addFrequencies;protected:template< typename source_IT > DenseHistogram &addSamplesImpl(source_IT begin, source_IT end);DenseHistogram &addSamplesImpl(gsl::span< const source_type > samples);template< typename freq_IT > DenseHistogram &addFrequenciesImpl(freq_IT begin, freq_IT end, difference_type offset);private:inline static constexpr size_t MaxSize=utils::pow2(utils::toBits< source_type >());};template< typename source_T >template< typename source_IT >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(source_IT begin, source_IT end) -> DenseHistogram &{ if constexpr(std::is_pointer_v< source_IT >) { return addSamplesImpl({begin, end});} else { std::for_each(begin, end, [this](const source_type &symbol) {++this->mNSamples;++this->mContainer[symbol];});} return *this;}template< typename source_T >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(gsl::span< const source_type > samples) -> DenseHistogram &{ using namespace internal;using namespace utils;if(samples.empty()) { return *this;} const auto begin=samples.data();const auto end=begin+samples.size();constexpr size_t ElemsPerQWord=sizeof(uint64_t)/sizeof(source_type);constexpr size_t nUnroll=2 *ElemsPerQWord;auto iter=begin;if constexpr(sizeof(source_type)==1) { std::array< ShiftableVector< source_type, value_type >, 3 > histograms{ {{this-> mContainer this mContainer getOffset()}
class DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > > :public internal::VectorContainer< source_T, uint32_t >, internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >{ using containerBase_type=internal::VectorContainer< source_T, uint32_t >;using HistogramConcept_type=internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >;friend containerBase_type;friend HistogramConcept_type;public:using source_type=source_T;using value_type=typename containerBase_type::value_type;using container_type=typename containerBase_type::container_type;using size_type=typename containerBase_type::size_type;using difference_type=typename containerBase_type::difference_type;using reference=typename containerBase_type::reference;using const_reference=typename containerBase_type::const_reference;using pointer=typename containerBase_type::pointer;using const_pointer=typename containerBase_type::const_pointer;using const_iterator=typename containerBase_type::const_iterator;DenseHistogram() :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()} {};template< typename freq_IT > DenseHistogram(freq_IT begin, freq_IT end, difference_type offset) :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()}, HistogramConcept_type{begin, end, offset} {};using HistogramConcept_type::addSamples;template< typename source_IT > inline DenseHistogram &addSamples(source_IT begin, source_IT end, source_type min, source_type max) { return addSamplesImpl(begin, end);};template< typename source_IT > DenseHistogram &addSamples(gsl::span< const source_type > span, source_type min, source_type max) { return addSamplesImpl(span);};using HistogramConcept_type::addFrequencies;protected:template< typename source_IT > DenseHistogram &addSamplesImpl(source_IT begin, source_IT end);DenseHistogram &addSamplesImpl(gsl::span< const source_type > samples);template< typename freq_IT > DenseHistogram &addFrequenciesImpl(freq_IT begin, freq_IT end, difference_type offset);private:inline static constexpr size_t MaxSize=utils::pow2(utils::toBits< source_type >());};template< typename source_T >template< typename source_IT >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(source_IT begin, source_IT end) -> DenseHistogram &{ if constexpr(std::is_pointer_v< source_IT >) { return addSamplesImpl({begin, end});} else { std::for_each(begin, end,[this](const source_type &symbol) {++this->mNSamples;++this->mContainer[symbol];});} return *this;}template< typename source_T >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(gsl::span< const source_type > samples) -> DenseHistogram &{ using namespace internal;using namespace utils;if(samples.empty()) { return *this;} const auto begin=samples.data();const auto end=begin+samples.size();constexpr size_t ElemsPerQWord=sizeof(uint64_t)/sizeof(source_type);constexpr size_t nUnroll=2 *ElemsPerQWord;auto iter=begin;if constexpr(sizeof(source_type)==1) { std::array< ShiftableVector< source_type, value_type >, 3 > histograms{ {{this-> mContainer size()
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
size_t countNUsedAlphabetSymbols(const AdaptiveHistogram< source_T > &histogram)
std::pair< source_T, source_T > getMinMax(const AdaptiveSymbolTable< source_T, symbol_T > &symbolTable)
Common utility functions.
constexpr size_t min
constexpr size_t max