16#ifndef ALICEO2_INPLACEENTROPYCODER_H_
17#define ALICEO2_INPLACEENTROPYCODER_H_
34template <
typename source_T>
45 using dict_buffer_type = std::vector<uint8_t>;
51 using histogram_type = std::variant<dense_histogram_type, adaptive_histogram_type, sparse_histogram_type>;
52 using encoder_type = std::variant<dense_encoder_type, adaptive_encoder_type, sparse_encoder_type>;
57 template <
typename source_IT>
60 template <
typename source_IT>
75 template <
typename dst_T = u
int8_t>
79 template <typename src_IT, typename dst_IT>
80 [[nodiscard]] dst_IT
encode(src_IT srcBegin, src_IT srcEnd, dst_IT dstBegin, dst_IT dstEnd);
82 template <typename dst_IT>
85 template <typename dst_T>
89 template <typename source_IT,
std::enable_if_t<(sizeof(typename
std::iterator_traits<source_IT>::value_type) < 4),
bool> = true>
92 template <typename source_IT,
std::enable_if_t<(sizeof(typename
std::iterator_traits<source_IT>::value_type) == 4),
bool> = true>
95 template <typename source_IT,
std::enable_if_t<(sizeof(typename
std::iterator_traits<source_IT>::value_type) < 4),
bool> = true>
96 void init(source_IT srcBegin, source_IT srcEnd);
98 template <typename source_IT,
std::enable_if_t<(sizeof(typename
std::iterator_traits<source_IT>::value_type) == 4),
bool> = true>
99 void init(source_IT srcBegin, source_IT srcEnd);
101 template <typename container_T>
102 void serializeDictionary(const container_T&);
106 std::optional<encoder_type> mEncoder{};
108 dict_buffer_type mDictBuffer{};
112template <
typename source_T>
113template <
typename src_IT>
116 static_assert(std::is_same_v<source_T, typename std::iterator_traits<src_IT>::value_type>);
118 const size_t nSamples = std::distance(srcBegin, srcEnd);
119 if constexpr (std::is_pointer_v<src_IT>) {
122 init(srcBegin, srcEnd,
min,
max);
124 init(srcBegin, srcEnd);
127 init(srcBegin, srcEnd);
130 mIncompressiblePacker =
Packer(mMetrics);
133template <
typename source_T>
134template <
typename source_IT>
137 static_assert(std::is_same_v<source_T, typename std::iterator_traits<source_IT>::value_type>);
138 init(srcBegin, srcEnd,
min,
max);
139 mIncompressiblePacker =
Packer(mMetrics);
142template <
typename source_T>
146 std::visit([&,
this](
auto&& encoder) { nStreams = encoder.getNStreams(); }, *mEncoder);
150template <
typename source_T>
154 std::visit([&,
this](
auto&& encoder) {
precision = encoder.getSymbolTable().getPrecision(); }, *mEncoder);
158template <
typename source_T>
161 std::visit([
this](
auto&& histogram) {
162 auto renormed =
rans::renorm(std::move(histogram), mMetrics);
164 if (std::holds_alternative<sparse_histogram_type>(*mHistogram)) {
165 serializeDictionary(renormed);
168 const size_t rangeBits =
rans::utils::getRangeBits(*mMetrics.getCoderProperties().min, *mMetrics.getCoderProperties().max);
169 const size_t nUsedAlphabetSymbols = mMetrics.getDatasetProperties().nUsedAlphabetSymbols;
171 if (rangeBits <= 18) {
173 mEncoder =
encoder_type{std::in_place_type<dense_encoder_type>, renormed};
176 mEncoder =
encoder_type{std::in_place_type<sparse_encoder_type>, renormed};
179 mEncoder =
encoder_type{std::in_place_type<adaptive_encoder_type>, renormed};
185template <
typename source_T>
186template <
typename src_IT,
typename dst_IT>
189 static_assert(std::is_same_v<source_T, typename std::iterator_traits<src_IT>::value_type>);
191 dst_IT messageEnd = dstBegin;
193 std::visit([&,
this](
auto&& encoder) {
194 if (encoder.getSymbolTable().hasEscapeSymbol()) {
195 mIncompressibleBuffer.reserve(*mMetrics.getCoderProperties().nIncompressibleSamples);
196 auto [encodedMessageEnd, literalsEnd] = encoder.process(srcBegin, srcEnd, dstBegin, std::back_inserter(mIncompressibleBuffer));
197 messageEnd = encodedMessageEnd;
199 messageEnd = encoder.process(srcBegin, srcEnd, dstBegin);
208template <
typename source_T>
209template <
typename dst_IT>
212 static_assert(std::is_pointer_v<dst_IT>);
214 using dst_type = std::remove_pointer_t<dst_IT>;
217 if (mDictBuffer.empty()) {
221 std::memcpy(dstBegin, mDictBuffer.data(), mDictBuffer.size());
224 auto end =
reinterpret_cast<uint8_t*
>(dstBegin) + mDictBuffer.size();
226 constexpr size_t alignment = std::alignment_of_v<dst_type>;
227 end += (alignment -
reinterpret_cast<uintptr_t
>(
end) % alignment) % alignment;
229 ret =
reinterpret_cast<dst_IT
>(
end);
236template <
typename source_T>
237template <
typename dst_T>
240 return mIncompressiblePacker.pack(mIncompressibleBuffer.data(), mIncompressibleBuffer.size(), dstBegin, dstEnd);
243template <
typename source_T>
244template <
typename dst_T>
247 return mIncompressiblePacker.template getPackingBufferSize<dst_T>(getNIncompressibleSamples());
250template <
typename source_T>
251template <
typename source_IT, std::enable_if_t<(sizeof(
typename std::iterator_traits<source_IT>::value_type) < 4),
bool>>
252void InplaceEntropyCoder<source_T>::init(source_IT srcBegin, source_IT srcEnd, source_type min, source_type max)
254 mHistogram.emplace(histogram_type{rans::makeDenseHistogram::fromSamples(srcBegin, srcEnd)});
255 mMetrics = metrics_type{std::get<dense_histogram_type>(*mHistogram), min, max};
258template <
typename source_T>
259template <
typename source_IT, std::enable_if_t<(sizeof(
typename std::iterator_traits<source_IT>::value_type) == 4),
bool>>
262 const size_t nSamples = std::distance(srcBegin, srcEnd);
269 mMetrics = metrics_type{std::get<dense_histogram_type>(*mHistogram),
min,
max};
273 mMetrics = metrics_type{std::get<sparse_histogram_type>(*mHistogram),
min,
max};
277 mMetrics = metrics_type{std::get<adaptive_histogram_type>(*mHistogram),
min,
max};
281template <
typename source_T>
282template <
typename source_IT, std::enable_if_t<(sizeof(
typename std::iterator_traits<source_IT>::value_type) < 4),
bool>>
283void InplaceEntropyCoder<source_T>::init(source_IT srcBegin, source_IT srcEnd)
285 mHistogram = histogram_type{std::in_place_type<dense_histogram_type>, rans::makeDenseHistogram::fromSamples(srcBegin, srcEnd)};
286 mMetrics = metrics_type{std::get<dense_histogram_type>(*mHistogram)};
289template <
typename source_T>
290template <
typename source_IT, std::enable_if_t<(sizeof(
typename std::iterator_traits<source_IT>::value_type) == 4),
bool>>
291void InplaceEntropyCoder<source_T>::init(source_IT srcBegin, source_IT srcEnd)
294 mMetrics = metrics_type{std::get<sparse_histogram_type>(*mHistogram)};
297template <
typename source_T>
298template <
typename container_T>
299void InplaceEntropyCoder<source_T>::serializeDictionary(
const container_T& renormedHistogram)
302 mDictBuffer.resize(mMetrics.getSizeEstimate().getCompressedDictionarySize(), 0);
305 mDictBuffer.resize(std::distance(mDictBuffer.data(),
end));
307 assert(mDictBuffer.size() > 0);
Interfaces for BitPacking using librans.
const metrics_type & getMetrics() const noexcept
dst_IT encode(src_IT srcBegin, src_IT srcEnd, dst_IT dstBegin, dst_IT dstEnd)
Packer< source_type > packer_type
dst_T * writeIncompressible(dst_T *dstBegin, dst_T *dstEnd)
std::variant< dense_encoder_type, adaptive_encoder_type, sparse_encoder_type > encoder_type
std::vector< source_type > incompressible_buffer_type
std::variant< dense_histogram_type, adaptive_histogram_type, sparse_histogram_type > histogram_type
InplaceEntropyCoder()=default
size_t getPackedIncompressibleSize() const noexcept
size_t getSymbolTablePrecision() const
size_t getNIncompressibleSamples() const noexcept
dst_IT writeDictionary(dst_IT dstBegin, dst_IT dstEnd)
rans::Metrics< source_type > metrics_type
InplaceEntropyCoder(source_IT srcBegin, source_IT srcEnd)
size_t getNStreams() const
public interface for encoding.
static factory classes for building histograms, encoders and decoders.
GLenum GLint GLint * precision
public interface for building and renorming histograms from source data.
std::pair< source_T, source_T > minmax(gsl::span< const source_T > range)
constexpr uint32_t getRangeBits(T min, T max) noexcept
constexpr size_t pow2(size_t n) noexcept
void checkBounds(IT iteratorPosition, IT upperBound)
decltype(makeDenseEncoder<>::fromRenormed(RenormedDenseHistogram< source_T >{})) denseEncoder_type
decltype(makeSparseEncoder<>::fromRenormed(RenormedSparseHistogram< source_T >{})) sparseEncoder_type
dest_IT compressRenormedDictionary(const container_T &container, dest_IT dstBufferBegin)
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
decltype(makeAdaptiveEncoder<>::fromRenormed(RenormedAdaptiveHistogram< source_T >{})) adaptiveEncoder_type
Defining DataPointCompositeObject explicitly as copiable.
public interface for serializing histograms (dictionaries) to JSON or compressed binary.
static decltype(auto) fromSamples(source_IT begin, source_IT end)
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)