16#ifndef RANS_SERIALIZE_H_
17#define RANS_SERIALIZE_H_
20#error rANS should not be exposed to root
28#ifdef RANS_ENABLE_JSON
29#include <rapidjson/writer.h>
46template <
typename container_T>
47inline constexpr count_t getFrequency(
const container_T& container,
typename container_T::const_reference symbol)
49 if constexpr (isSymbolTable_v<container_T>) {
50 return container.isEscapeSymbol(symbol) ? 0 : symbol.getFrequency();
56template <
typename container_T, std::enable_if_t<isAdaptiveContainer_v<container_T>,
bool> = true>
57inline constexpr count_t getFrequency(
const container_T& container,
typename container_T::const_iterator::value_type symbolPair)
62template <
typename container_T, std::enable_if_t<isHashContainer_v<container_T>,
bool> = true>
63inline constexpr count_t getFrequency(
const container_T& container,
const typename container_T::const_iterator::value_type& symbolPair)
65 const auto& symbol = symbolPair.second;
69template <
typename container_T>
72 if constexpr (isSymbolTable_v<container_T>) {
73 return container.getEscapeSymbol().getFrequency();
74 }
else if constexpr (isRenormedHistogram_v<container_T>) {
75 return container.getIncompressibleSymbolFrequency();
81template <
typename container_T>
82auto getNullElement(
const container_T& container) ->
typename container_T::value_type
84 if constexpr (isSymbolTable_v<container_T>) {
85 return container.getEscapeSymbol();
96 if (renormingPrecision == 0) {
99 return static_cast<size_t>(
max -
min) + 1;
105#ifdef RANS_ENABLE_JSON
106template <
typename container_T,
typename jsonBuffer_T>
107void toJSON(
const container_T& container, rapidjson::Writer<jsonBuffer_T>& writer)
109 using namespace utils;
111 writer.StartObject();
112 writer.Key(
"Offset");
113 writer.Int64(container.getOffset());
116 std::vector<count_t> nonzeroFrequencies;
118 for (
auto iter = container.begin(); iter != container.end(); ++iter) {
119 auto frequency = getFrequency(container, iter);
121 nonzeroFrequencies.push_back(frequency);
130 for (
auto freq : nonzeroFrequencies) {
135 writer.Key(
"Incompressible");
141template <
typename container_T,
typename dest_IT>
144 using namespace internal;
145 static_assert(std::is_pointer_v<dest_IT>,
"only raw pointers are permited as a target for serialization");
146 static_assert((isSymbolTable_v<container_T> || isRenormedHistogram_v<container_T>),
"only renormed Histograms and symbol tables are accepted. Non-renormed histograms might not compress well");
148 using source_type =
typename container_T::source_type;
149 using const_iterator =
typename container_T::const_iterator;
151 BitPtr dstIter{dstBufferBegin};
152 const auto [trimmedBegin, trimmedEnd] =
trim(container, getNullElement(container));
153 std::optional<source_type> lastValidIndex{};
154 forEachIndexValue(container, trimmedBegin, trimmedEnd, [&](
const source_type&
index,
const auto& symbol) {
155 auto frequency = getFrequency(container, symbol);
156 if (lastValidIndex.has_value()) {
158 assert(
index > *lastValidIndex);
160 lastValidIndex =
index;
161 dstIter = eliasDeltaEncode(dstIter,
offset);
162 dstIter = eliasDeltaEncode(dstIter, frequency);
166 dstIter = eliasDeltaEncode(dstIter, frequency);
167 lastValidIndex =
index;
172 dstIter = eliasDeltaEncode(dstIter, getIncompressibleFrequency(container) + 1);
174 dstIter = eliasDeltaEncode(dstIter, 1);
177 const dest_IT iterEnd = [dstIter]() {
178 using buffer_type =
typename std::iterator_traits<dest_IT>::value_type;
187template <
typename source_T,
typename buffer_IT>
190 static_assert(std::is_pointer_v<buffer_IT>,
"can only deserialize from raw pointers");
192 using namespace internal;
194 using value_type =
typename container_type::value_type;
196 DictionaryStreamParser<source_T> dictStream{begin,
end,
max};
198 const size_t dictExtent = getDictExtent(
min,
max, renormingPrecision);
200 container_type container(dictExtent,
min);
202 while (dictStream.hasNext()) {
203 const auto [
index, frequency] = dictStream.getNext();
204 container[
index] = frequency;
207 const auto index = dictStream.getIndex();
209 throw ParsingError{fmt::format(
"failed to read renormed dictionary: reached EOS at index {} before parsing min {} ",
index,
min)};
211 return {std::move(container), renormingPrecision, dictStream.getIncompressibleSymbolFrequency()};
214template <
typename source_T,
typename buffer_IT>
217 static_assert(std::is_pointer_v<buffer_IT>,
"can only deserialize from raw pointers");
219 using namespace internal;
220 using streamParser_type = DictionaryStreamParser<source_T>;
221 using value_type =
typename streamParser_type::value_type;
223 using base_container_type =
typename container_type::container_type;
225 streamParser_type dictStream{begin,
end,
max};
226 base_container_type container{};
228 while (dictStream.hasNext()) {
229 container.emplace_back(dictStream.getNext());
232 std::reverse(container.begin(), container.end());
233 container_type setContainer{std::move(container), 0, OrderedSetState::ordered};
235 const auto index = dictStream.getIndex();
237 throw ParsingError{fmt::format(
"failed to read renormed dictionary: reached EOS at index {} before parsing min {} ",
index,
min)};
239 return {std::move(setContainer), renormingPrecision, dictStream.getIncompressibleSymbolFrequency()};
Non-owning, lightweight structure for histogram manipulation.
common helper classes and functions
helper functionalities useful for packing operations
typename base_type::container_type container_type
compress data stream using Elias-Delta coding.
constexpr count_t getFrequency(const container_T &container, typename container_T::const_reference symbol)
constexpr count_t getIncompressibleFrequency(const container_T &container) noexcept
constexpr size_t getDictExtent(T min, T max, size_t renormingPrecision) noexcept
auto getNullElement(const container_T &container) -> typename container_T::value_type
dest_IT compressRenormedDictionary(const container_T &container, dest_IT dstBufferBegin)
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
RenormedSparseHistogram< source_T > readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Common utility functions.
manipulation of types at compile time