Project
Loading...
Searching...
No Matches
serialize.h
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#ifndef RANS_SERIALIZE_H_
17#define RANS_SERIALIZE_H_
18
19#ifdef __CLING__
20#error rANS should not be exposed to root
21#endif
22
23#include <type_traits>
24#include <cstdint>
25#include <stdexcept>
26#include <optional>
27
28#ifdef RANS_ENABLE_JSON
29#include <rapidjson/writer.h>
30#endif
39
40namespace o2::rans
41{
42
43namespace internal
44{
45
46template <typename container_T>
47inline constexpr count_t getFrequency(const container_T& container, typename container_T::const_reference symbol)
48{
49 if constexpr (isSymbolTable_v<container_T>) {
50 return container.isEscapeSymbol(symbol) ? 0 : symbol.getFrequency();
51 } else {
52 return symbol;
53 }
54};
55
56template <typename container_T, std::enable_if_t<isAdaptiveContainer_v<container_T>, bool> = true>
57inline constexpr count_t getFrequency(const container_T& container, typename container_T::const_iterator::value_type symbolPair)
58{
59 return getFrequency(container, symbolPair.second);
60};
61
62template <typename container_T, std::enable_if_t<isHashContainer_v<container_T>, bool> = true>
63inline constexpr count_t getFrequency(const container_T& container, const typename container_T::const_iterator::value_type& symbolPair)
64{
65 const auto& symbol = symbolPair.second;
66 return getFrequency(container, symbol);
67};
68
69template <typename container_T>
70inline constexpr count_t getIncompressibleFrequency(const container_T& container) noexcept
71{
72 if constexpr (isSymbolTable_v<container_T>) {
73 return container.getEscapeSymbol().getFrequency();
74 } else if constexpr (isRenormedHistogram_v<container_T>) {
75 return container.getIncompressibleSymbolFrequency();
76 } else {
77 return 0;
78 }
79};
80
81template <typename container_T>
82auto getNullElement(const container_T& container) -> typename container_T::value_type
83{
84 if constexpr (isSymbolTable_v<container_T>) {
85 return container.getEscapeSymbol();
86 } else {
87 return {};
88 }
89}
90
91template <typename T>
92[[nodiscard]] inline constexpr size_t getDictExtent(T min, T max, size_t renormingPrecision) noexcept
93{
94 assert(max >= min);
95 // special case - empty dictionary
96 if (renormingPrecision == 0) {
97 return 0;
98 } else {
99 return static_cast<size_t>(max - min) + 1;
100 }
101};
102
103}; // namespace internal
104
105#ifdef RANS_ENABLE_JSON
106template <typename container_T, typename jsonBuffer_T>
107void toJSON(const container_T& container, rapidjson::Writer<jsonBuffer_T>& writer)
108{
109 using namespace utils;
110
111 writer.StartObject();
112 writer.Key("Offset");
113 writer.Int64(container.getOffset());
114 writer.Key("Index");
115 uint32_t index = 0;
116 std::vector<count_t> nonzeroFrequencies;
117 writer.StartArray();
118 for (auto iter = container.begin(); iter != container.end(); ++iter) {
119 auto frequency = getFrequency(container, iter);
120 if (frequency > 0) {
121 nonzeroFrequencies.push_back(frequency);
122 writer.Uint(index);
123 }
124 ++index;
125 }
126 writer.EndArray();
127
128 writer.Key("Value");
129 writer.StartArray();
130 for (auto freq : nonzeroFrequencies) {
131 writer.Uint(freq);
132 }
133 writer.EndArray();
134
135 writer.Key("Incompressible");
136 writer.Int64(getIncompressibleFrequency(container));
137 writer.EndObject();
138};
139#endif /* RANS_ENABLE_JSON */
140
141template <typename container_T, typename dest_IT>
142dest_IT compressRenormedDictionary(const container_T& container, dest_IT dstBufferBegin)
143{
144 using namespace internal;
145 static_assert(std::is_pointer_v<dest_IT>, "only raw pointers are permited as a target for serialization");
146 static_assert((isSymbolTable_v<container_T> || isRenormedHistogram_v<container_T>), "only renormed Histograms and symbol tables are accepted. Non-renormed histograms might not compress well");
147
148 using source_type = typename container_T::source_type;
149 using const_iterator = typename container_T::const_iterator;
150
151 BitPtr dstIter{dstBufferBegin};
152 const auto [trimmedBegin, trimmedEnd] = trim(container, getNullElement(container));
153 std::optional<source_type> lastValidIndex{};
154 forEachIndexValue(container, trimmedBegin, trimmedEnd, [&](const source_type& index, const auto& symbol) {
155 auto frequency = getFrequency(container, symbol);
156 if (lastValidIndex.has_value()) {
157 if (frequency > 0) {
158 assert(index > *lastValidIndex);
159 uint32_t offset = index - *lastValidIndex;
160 lastValidIndex = index;
161 dstIter = eliasDeltaEncode(dstIter, offset);
162 dstIter = eliasDeltaEncode(dstIter, frequency);
163 }
164 } else {
165 if (frequency > 0) {
166 dstIter = eliasDeltaEncode(dstIter, frequency);
167 lastValidIndex = index;
168 }
169 }
170 });
171 // write out incompressibleFrequency
172 dstIter = eliasDeltaEncode(dstIter, getIncompressibleFrequency(container) + 1);
173 // finish off by a 1 to identify start of the sequence.
174 dstIter = eliasDeltaEncode(dstIter, 1);
175
176 // extract raw Pointer from BitPtr
177 const dest_IT iterEnd = [dstIter]() {
178 using buffer_type = typename std::iterator_traits<dest_IT>::value_type;
179 dest_IT iterEnd = dstIter.toPtr<buffer_type>();
180 // one past the end
181 return ++iterEnd;
182 }();
183
184 return iterEnd;
185} // namespace o2::rans
186
187template <typename source_T, typename buffer_IT>
188RenormedDenseHistogram<source_T> readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
189{
190 static_assert(std::is_pointer_v<buffer_IT>, "can only deserialize from raw pointers");
191
192 using namespace internal;
193 using container_type = typename RenormedDenseHistogram<source_T>::container_type;
194 using value_type = typename container_type::value_type;
195
196 DictionaryStreamParser<source_T> dictStream{begin, end, max};
197
198 const size_t dictExtent = getDictExtent(min, max, renormingPrecision);
199
200 container_type container(dictExtent, min);
201
202 while (dictStream.hasNext()) {
203 const auto [index, frequency] = dictStream.getNext();
204 container[index] = frequency;
205 }
206
207 const auto index = dictStream.getIndex();
208 if (index != min) {
209 throw ParsingError{fmt::format("failed to read renormed dictionary: reached EOS at index {} before parsing min {} ", index, min)};
210 }
211 return {std::move(container), renormingPrecision, dictStream.getIncompressibleSymbolFrequency()};
212};
213
214template <typename source_T, typename buffer_IT>
215RenormedSparseHistogram<source_T> readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
216{
217 static_assert(std::is_pointer_v<buffer_IT>, "can only deserialize from raw pointers");
218
219 using namespace internal;
220 using streamParser_type = DictionaryStreamParser<source_T>;
221 using value_type = typename streamParser_type::value_type;
222 using container_type = typename RenormedSparseHistogram<source_T>::container_type;
223 using base_container_type = typename container_type::container_type;
224
225 streamParser_type dictStream{begin, end, max};
226 base_container_type container{};
227
228 while (dictStream.hasNext()) {
229 container.emplace_back(dictStream.getNext());
230 }
231
232 std::reverse(container.begin(), container.end());
233 container_type setContainer{std::move(container), 0, OrderedSetState::ordered};
234
235 const auto index = dictStream.getIndex();
236 if (index != min) {
237 throw ParsingError{fmt::format("failed to read renormed dictionary: reached EOS at index {} before parsing min {} ", index, min)};
238 }
239 return {std::move(setContainer), renormingPrecision, dictStream.getIncompressibleSymbolFrequency()};
240};
241
242} // namespace o2::rans
243
244#endif /* RANS_SERIALIZE_H_ */
Non-owning, lightweight structure for histogram manipulation.
common helper classes and functions
helper functionalities useful for packing operations
uint32_t source_type
typename base_type::container_type container_type
compress data stream using Elias-Delta coding.
rans exceptions
GLuint GLuint end
Definition glcorearb.h:469
GLuint index
Definition glcorearb.h:781
GLintptr offset
Definition glcorearb.h:660
packs data into a buffer
constexpr count_t getFrequency(const container_T &container, typename container_T::const_reference symbol)
Definition serialize.h:47
constexpr count_t getIncompressibleFrequency(const container_T &container) noexcept
Definition serialize.h:70
constexpr size_t getDictExtent(T min, T max, size_t renormingPrecision) noexcept
Definition serialize.h:92
auto getNullElement(const container_T &container) -> typename container_T::value_type
Definition serialize.h:82
dest_IT compressRenormedDictionary(const container_T &container, dest_IT dstBufferBegin)
Definition serialize.h:142
HistogramView< Hist_IT > trim(const HistogramView< Hist_IT > &buffer)
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
uint32_t count_t
Definition defaults.h:34
RenormedSparseHistogram< source_T > readRenormedSetDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:215
Common utility functions.
uint32_t buffer_type
constexpr size_t min
constexpr size_t max
manipulation of types at compile time