Project
Loading...
Searching...
No Matches
bench_ransTPC.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#include <vector>
17#include <cstring>
18#include <execution>
19#include <iterator>
20#include <iostream>
21
22#include <boost/program_options.hpp>
23#include <boost/mp11.hpp>
24#include <rapidjson/document.h>
25#include <rapidjson/writer.h>
26#include <rapidjson/istreamwrapper.h>
27#include <rapidjson/ostreamwrapper.h>
28#include <fairlogger/Logger.h>
29
30#include "rANS/factory.h"
31#include "rANS/histogram.h"
32#include "rANS/serialize.h"
33
34#include "helpers.h"
35
36#ifdef ENABLE_VTUNE_PROFILER
37#include <ittnotify.h>
38#endif
39
40namespace bpo = boost::program_options;
41using namespace o2::rans;
42
43using coder_types = boost::mp11::mp_list<std::integral_constant<CoderTag, CoderTag::Compat>
44#ifdef RANS_SINGLE_STREAM
45 ,
46 std::integral_constant<CoderTag, CoderTag::SingleStream>
47#endif /* RANS_SINGLE_STREAM */
48#ifdef RANS_SSE
49 ,
50 std::integral_constant<CoderTag, CoderTag::SSE>
51#endif /* RANS_SSE */
52#ifdef RANS_AVX2
53 ,
54 std::integral_constant<CoderTag, CoderTag::AVX2>
55#endif /* RANS_AVX2 */
56 >;
57
58// using coder_types = boost::mp11::mp_list<std::integral_constant<CoderTag, CoderTag::SingleStream>>;
59
60std::string toString(CoderTag tag)
61{
62 switch (tag) {
63 case CoderTag::Compat:
64 return {"Compat"};
65 break;
66 case CoderTag::SingleStream:
67 return {"SingleStream"};
68 break;
69 case CoderTag::SSE:
70 return {"SSE"};
71 break;
72 case CoderTag::AVX2:
73 return {"AVX2"};
74 break;
75 default:
76 throw Exception("Invalid");
77 break;
78 };
79};
80
82{
83 public:
84 using jsonWriter_type = rapidjson::Writer<rapidjson::OStreamWrapper>;
85
86 TimingDecorator() = default;
87 TimingDecorator(jsonWriter_type& writer) : mWriter{&writer} {};
88
89 template <typename F, std::enable_if_t<!std::is_void_v<std::invoke_result_t<F>>, bool> = true>
90 decltype(auto) timeAndLog(const std::string& keyName, const std::string& logMessage, F functor)
91 {
92 mTimer.start();
93 decltype(auto) ret = functor();
94 mTimer.stop();
95 mWriter->Key(keyName.c_str());
96 const double_t msDuration = mTimer.getDurationMS();
97 mWriter->Double(msDuration);
98 LOGP(info, "{} in {} ms", logMessage, msDuration);
99
100 return ret;
101 };
102
103 template <typename F, std::enable_if_t<std::is_void_v<std::invoke_result_t<F>>, bool> = true>
104 void timeAndLog(const std::string& keyName, const std::string& logMessage, F functor)
105 {
106 mTimer.start();
107 functor();
108 mTimer.stop();
109 mWriter->Key(keyName.c_str());
110 const double_t msDuration = mTimer.getDurationMS();
111 mWriter->Double(msDuration);
112 LOGP(info, "{} in {} ms", logMessage, msDuration);
113 };
114
115 private:
116 jsonWriter_type* mWriter{};
117 utils::RANSTimer mTimer{};
118};
119
120// std::ofstream ofFrequencies{"frequencies.json"};
121// rapidjson::OStreamWrapper streamFrequencies{ofFrequencies};
122// rapidjson::Writer<rapidjson::OStreamWrapper> writerFrequencies{streamFrequencies};
123
124// std::ofstream ofRenormed{"renormed.json"};
125// rapidjson::OStreamWrapper streamRenormed{ofRenormed};
126// rapidjson::Writer<rapidjson::OStreamWrapper> writerRenormed{streamRenormed};
127
128template <typename source_T, CoderTag coderTag_V>
129void ransEncodeDecode(const std::string& name, const std::vector<source_T>& inputData, rapidjson::Writer<rapidjson::OStreamWrapper>& writer)
130{
131 using source_type = source_T;
132 utils::RANSTimer timer{};
133 TimingDecorator t{writer};
134
135 writer.Key(name.c_str());
136 writer.StartObject();
137
138 EncodeBuffer<source_type> encodeBuffer{inputData.size()};
139 encodeBuffer.literals.resize(inputData.size(), 0);
140 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
142
143 writer.Key("Timing");
144 writer.StartObject();
145
146 LOGP(info, "processing: {} (nItems: {}, size: {} MiB)", name, inputData.size(), inputData.size() * sizeof(source_type) / 1024.0 / 1024.0);
147 auto histogram = t.timeAndLog(
148 "FrequencyTable", "Built Frequency Table", [&]() { return makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData)); });
149
150 // writerFrequencies.Key(name.c_str());
151 // toJSON(histogram, writerFrequencies);
152
153 auto tmpHist = histogram;
155 RenormedDenseHistogram<source_type> renormedHistogram{};
156 t.timeAndLog("Renorming", "Renormed Frequency Table", [&]() mutable {
157 metrics = Metrics<source_type>{histogram};
158 renormedHistogram = renorm(std::move(tmpHist), metrics);
159 });
160 // writerRenormed.Key(name.c_str());
161 // toJSON(renormedFrequencyTable, writerRenormed);
162
163 auto encoder = t.timeAndLog("Encoder", "Built Encoder", [&]() { return makeDenseEncoder<coderTag_V>::fromRenormed(renormedHistogram); });
164
165 t.timeAndLog("Encoding", "Encoded", [&]() mutable {
166#ifdef ENABLE_VTUNE_PROFILER
167 __itt_resume();
168#endif
169 if (renormedHistogram.hasIncompressibleSymbol()) {
170 std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd);
171 } else {
172 encodeBuffer.encodeBufferEnd = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data());
173 }
174#ifdef ENABLE_VTUNE_PROFILER
175 __itt_pause();
176#endif
177 });
178 LOGP(info, "Encoded {} Bytes", inputData.size() * sizeof(source_type));
179
180 std::vector<uint8_t> dict(histogram.size() * sizeof(uint64_t), 0);
181 auto dictEnd = t.timeAndLog("WriteDict", "Serialized Dict", [&]() { return compressRenormedDictionary(encoder.getSymbolTable(), dict.data()); });
182 LOGP(info, "Serialized Dict of {} Bytes", std::distance(dict.data(), dictEnd));
183 auto recoveredHistogram = t.timeAndLog("ReadDict", "Read Dict", [&]() {
184 const source_type min = encoder.getSymbolTable().getOffset();
185 const source_type max = min + std::max<source_type>(static_cast<int64_t>(encoder.getSymbolTable().size())-1,0);
186 return readRenormedDictionary(dict.data(), dictEnd,min,max,renormedHistogram.getRenormingBits()); });
187 auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);
188 auto recoveredDecoder = makeDecoder<>::fromRenormed(recoveredHistogram);
189
190 // if (!(std::equal(decoder.getSymbolTable().begin(), decoder.getSymbolTable().end(), recoveredDecoder.getSymbolTable().begin()) &&
191 // (decoder.getSymbolTable().getEscapeSymbol() == recoveredDecoder.getSymbolTable().getEscapeSymbol()))) {
192 // LOGP(warning, "Missmatch between original and decoded Dictionary");
193 // }
194
195 if (encodeBuffer.literalsEnd == encodeBuffer.literals.data()) {
196 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams());
197 } else {
198 decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
199 }
200
201 if (!(decodeBuffer == inputData)) {
202 LOGP(warning, "Missmatch between original and decoded Message");
203 }
204 LOG(info) << "finished: " << name;
205
206 writer.EndObject(); // Timing
207
208 const auto& datasetProperties = metrics.getDatasetProperties();
209
210 // Frequency Table
211 // ##########################
212 writer.Key("FrequencyTable");
213 writer.StartObject();
214 writer.Key("nSamples");
215 writer.Uint64(histogram.getNumSamples());
216 writer.Key("Min");
217 writer.Int(datasetProperties.min);
218 writer.Key("Max");
219 writer.Int(datasetProperties.max);
220 writer.Key("alphabetRangeBits");
221 writer.Int(datasetProperties.alphabetRangeBits);
222 writer.Key("nUsedAlphabetSymbols");
223 writer.Uint(datasetProperties.nUsedAlphabetSymbols);
224 writer.Key("IncompressibleFrequency");
225 writer.Uint(0);
226 writer.EndObject(); // FrequencyTable
227
228 // RescaledFrequencies
229 //##########################
230 const Metrics<source_type> renormedMetrics{histogram};
231 const auto& renormedDatasetProperties = renormedMetrics.getDatasetProperties();
232
233 writer.Key("RescaledFrequencies");
234 writer.StartObject();
235 writer.Key("nSamples");
236 writer.Uint64(renormedHistogram.getNumSamples());
237 writer.Key("Min");
238 writer.Int(renormedDatasetProperties.min);
239 writer.Key("Max");
240 writer.Int(renormedDatasetProperties.max);
241 writer.Key("alphabetRangeBits");
242 writer.Int(renormedDatasetProperties.alphabetRangeBits);
243 writer.Key("nUsedAlphabetSymbols");
244 writer.Uint(renormedDatasetProperties.nUsedAlphabetSymbols);
245 writer.Key("IncompressibleFrequency");
246 writer.Uint(renormedHistogram.getIncompressibleSymbolFrequency());
247 writer.Key("RenormingBits");
248 writer.Uint(renormedHistogram.getRenormingBits());
249 writer.EndObject(); // RescaledFrequencies
250
251 // Message Properties
252 //##########################
253 writer.Key("Message");
254 writer.StartObject();
255 writer.Key("Size");
256 writer.Uint64(inputData.size());
257 writer.Key("SymbolSize");
258 writer.Uint(sizeof(source_type));
259 writer.Key("Entropy");
260 writer.Double(datasetProperties.entropy);
261 writer.Key("ExpectedCodewordLength");
262 writer.Double(computeExpectedCodewordLength<>(histogram, renormedHistogram));
263 writer.EndObject(); // Message
264
265 // Compression Properties
266 //##########################
267 writer.Key("Compression");
268 writer.StartObject();
269 writer.Key("EncodeBufferSize");
270 writer.Uint64(std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(uint32_t));
271 writer.Key("LiteralSize");
272 writer.Uint64(std::distance(encodeBuffer.literals.data(), encodeBuffer.literalsEnd) * sizeof(source_type));
273 writer.Key("DictSize");
274 writer.Uint64(std::distance(dict.data(), dictEnd));
275 writer.EndObject(); // Compression
276
277 writer.EndObject(); // Encode/Decode Run
278};
279
280template <CoderTag coderTag_V>
281void encodeTPC(const std::string& name, const TPCCompressedClusters& compressedClusters, bool mergeColumns, rapidjson::Writer<rapidjson::OStreamWrapper>& writer)
282{
283 writer.Key(name.c_str());
284 writer.StartObject();
285 ransEncodeDecode<uint16_t, coderTag_V>("qTotA", compressedClusters.qTotA, writer);
286 ransEncodeDecode<uint16_t, coderTag_V>("qMaxA", compressedClusters.qMaxA, writer);
287 ransEncodeDecode<uint8_t, coderTag_V>("flagsA", compressedClusters.flagsA, writer);
288 ransEncodeDecode<uint8_t, coderTag_V>("rowDiffA", compressedClusters.rowDiffA, writer);
289 ransEncodeDecode<uint8_t, coderTag_V>("sliceLegDiffA", compressedClusters.sliceLegDiffA, writer);
290 ransEncodeDecode<uint16_t, coderTag_V>("padResA", compressedClusters.padResA, writer);
291 ransEncodeDecode<uint32_t, coderTag_V>("timeResA", compressedClusters.timeResA, writer);
292 ransEncodeDecode<uint8_t, coderTag_V>("sigmaPadA", compressedClusters.sigmaPadA, writer);
293 ransEncodeDecode<uint8_t, coderTag_V>("sigmaTimeA", compressedClusters.sigmaTimeA, writer);
294 ransEncodeDecode<uint8_t, coderTag_V>("qPtA", compressedClusters.qPtA, writer);
295 ransEncodeDecode<uint8_t, coderTag_V>("rowA", compressedClusters.rowA, writer);
296 ransEncodeDecode<uint8_t, coderTag_V>("sliceA", compressedClusters.sliceA, writer);
297 ransEncodeDecode<uint32_t, coderTag_V>("timeA", compressedClusters.timeA, writer);
298 ransEncodeDecode<uint16_t, coderTag_V>("padA", compressedClusters.padA, writer);
299 ransEncodeDecode<uint16_t, coderTag_V>("qTotU", compressedClusters.qTotU, writer);
300 ransEncodeDecode<uint16_t, coderTag_V>("qMaxU", compressedClusters.qMaxU, writer);
301 ransEncodeDecode<uint8_t, coderTag_V>("flagsU", compressedClusters.flagsU, writer);
302 ransEncodeDecode<uint16_t, coderTag_V>("padDiffU", compressedClusters.padDiffU, writer);
303 ransEncodeDecode<uint32_t, coderTag_V>("timeDiffU", compressedClusters.timeDiffU, writer);
304 ransEncodeDecode<uint8_t, coderTag_V>("sigmaPadU", compressedClusters.sigmaPadU, writer);
305 ransEncodeDecode<uint8_t, coderTag_V>("sigmaTimeU", compressedClusters.sigmaTimeU, writer);
306 ransEncodeDecode<uint16_t, coderTag_V>("nTrackClusters", compressedClusters.nTrackClusters, writer);
307 ransEncodeDecode<uint32_t, coderTag_V>("nSliceRowClusters", compressedClusters.nSliceRowClusters, writer);
308
309 writer.EndObject();
310};
311
312using encoder_types = boost::mp11::mp_product<boost::mp11::mp_list, coder_types>;
313
314int main(int argc, char* argv[])
315{
316 bpo::options_description options("Allowed options");
317 // clang-format off
318 options.add_options()
319 ("help,h", "print usage message")
320 ("in,i",bpo::value<std::string>(), "file to process")
321 ("out,o",bpo::value<std::string>(), "json output file")
322 ("mode,m",bpo::value<std::string>(), "compressor processing mode")
323 ("log_severity,l",bpo::value<std::string>(), "severity of FairLogger");
324 // clang-format on
325
326 bpo::variables_map vm;
327 bpo::store(bpo::parse_command_line(argc, argv, options), vm);
328 bpo::notify(vm);
329
330 if (vm.count("help")) {
331 std::cout << options << "\n";
332 return 0;
333 }
334
335 const std::string inFile = [&]() {
336 if (vm.count("in")) {
337 return vm["in"].as<std::string>();
338 } else {
339 LOG(error) << "missing path to input file";
340 exit(1);
341 }
342 }();
343
344 const std::string outFile = [&]() {
345 if (vm.count("out")) {
346 return vm["out"].as<std::string>();
347 } else {
348 return std::string("out.json");
349 }
350 }();
351
352 if (vm.count("log_severity")) {
353 fair::Logger::SetConsoleSeverity(vm["log_severity"].as<std::string>().c_str());
354 }
355
356 std::ofstream of{outFile};
357 if (!of) {
358 std::runtime_error(fmt::format("could not open output file at path {}", inFile));
359 }
360
361 // writerFrequencies.StartObject();
362 // writerRenormed.StartObject();
363
364 rapidjson::OStreamWrapper stream{of};
365 rapidjson::Writer<rapidjson::OStreamWrapper> writer{stream};
366 writer.StartObject();
367
368 TPCCompressedClusters compressedClusters = readFile(inFile);
369 LOG(info) << "loaded Compressed Clusters from file";
370 LOG(info) << "######################################################";
371 boost::mp11::mp_for_each<encoder_types>([&](auto L) {
372 using coder_type = boost::mp11::mp_at_c<decltype(L), 0>;
373 constexpr CoderTag coderTag = coder_type::value;
374 const std::string encoderTitle = toString(coderTag);
375
376 LOGP(info, "start rANS {}/Decode", encoderTitle);
377 encodeTPC<coderTag>(encoderTitle, compressedClusters, false, writer);
378 LOG(info) << "######################################################";
379 });
380 writer.EndObject();
381 stream.Flush();
382 of.close();
383
384 // writerFrequencies.EndObject();
385 // writerFrequencies.Flush();
386 // ofFrequencies.close();
387 // writerRenormed.EndObject();
388 // writerRenormed.Flush();
389 // ofRenormed.close();
390};
TPCCompressedClusters
std::vector< o2::mid::ColumnData > inputData
uint32_t source_type
boost::mp11::mp_product< boost::mp11::mp_list, coder_types > encoder_types
void ransEncodeDecode(const std::string &name, const std::vector< source_T > &inputData, rapidjson::Writer< rapidjson::OStreamWrapper > &writer)
void encodeTPC(const std::string &name, const TPCCompressedClusters &compressedClusters, bool mergeColumns, rapidjson::Writer< rapidjson::OStreamWrapper > &writer)
boost::mp11::mp_list< std::integral_constant< CoderTag, CoderTag::Compat > > coder_types
std::string toString(CoderTag tag)
TimingDecorator(jsonWriter_type &writer)
void timeAndLog(const std::string &keyName, const std::string &logMessage, F functor)
decltype(auto) timeAndLog(const std::string &keyName, const std::string &logMessage, F functor)
TimingDecorator()=default
rapidjson::Writer< rapidjson::OStreamWrapper > jsonWriter_type
const DatasetProperties< source_type > & getDatasetProperties() const noexcept
Definition Metrics.h:52
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition factory.h:106
static constexpr decltype(auto) fromRenormed(const RenormedDenseHistogram< source_T > &renormed)
Definition factory.h:195
static factory classes for building histograms, encoders and decoders.
GLuint const GLchar * name
Definition glcorearb.h:781
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLuint GLuint stream
Definition glcorearb.h:1806
common functionality for rANS benchmarks.
public interface for building and renorming histograms from source data.
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
dest_IT compressRenormedDictionary(const container_T &container, dest_IT dstBufferBegin)
Definition serialize.h:142
class DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > > :public internal::VectorContainer< source_T, uint32_t >, internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >{ using containerBase_type=internal::VectorContainer< source_T, uint32_t >;using HistogramConcept_type=internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >;friend containerBase_type;friend HistogramConcept_type;public:using source_type=source_T;using value_type=typename containerBase_type::value_type;using container_type=typename containerBase_type::container_type;using size_type=typename containerBase_type::size_type;using difference_type=typename containerBase_type::difference_type;using reference=typename containerBase_type::reference;using const_reference=typename containerBase_type::const_reference;using pointer=typename containerBase_type::pointer;using const_pointer=typename containerBase_type::const_pointer;using const_iterator=typename containerBase_type::const_iterator;DenseHistogram() :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()} {};template< typename freq_IT > DenseHistogram(freq_IT begin, freq_IT end, difference_type offset) :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()}, HistogramConcept_type{begin, end, offset} {};using HistogramConcept_type::addSamples;template< typename source_IT > inline DenseHistogram &addSamples(source_IT begin, source_IT end, source_type min, source_type max) { return addSamplesImpl(begin, end);};template< typename source_IT > DenseHistogram &addSamples(gsl::span< const source_type > span, source_type min, source_type max) { return addSamplesImpl(span);};using HistogramConcept_type::addFrequencies;protected:template< typename source_IT > DenseHistogram &addSamplesImpl(source_IT begin, source_IT end);DenseHistogram &addSamplesImpl(gsl::span< const source_type > samples);template< typename freq_IT > DenseHistogram &addFrequenciesImpl(freq_IT begin, freq_IT end, difference_type offset);private:inline static constexpr size_t MaxSize=utils::pow2(utils::toBits< source_type >());};template< typename source_T >template< typename source_IT >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(source_IT begin, source_IT end) -> DenseHistogram &{ if constexpr(std::is_pointer_v< source_IT >) { return addSamplesImpl({begin, end});} else { std::for_each(begin, end, [this](const source_type &symbol) {++this->mNSamples;++this->mContainer[symbol];});} return *this;}template< typename source_T >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(gsl::span< const source_type > samples) -> DenseHistogram &{ using namespace internal;using namespace utils;if(samples.empty()) { return *this;} const auto begin=samples.data();const auto end=begin+samples.size();constexpr size_t ElemsPerQWord=sizeof(uint64_t)/sizeof(source_type);constexpr size_t nUnroll=2 *ElemsPerQWord;auto iter=begin;if constexpr(sizeof(source_type)==1) { std::array< ShiftableVector< source_type, value_type >, 3 > histograms{ {{this-> mContainer this mContainer getOffset()}
RenormedDenseHistogram< source_T > readRenormedDictionary(buffer_IT begin, buffer_IT end, source_T min, source_T max, size_t renormingPrecision)
Definition serialize.h:188
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203
bool readFile(std::string filename, o2::mid::Decoder &decoder, std::vector< o2::mid::ROBoard > &data, std::vector< o2::mid::ROFRecord > &rofRecords, unsigned long int nHBFs)
public interface for serializing histograms (dictionaries) to JSON or compressed binary.
std::vector< source_T > literals
Definition helpers.h:296
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144
std::string decodeBuffer(int feeId, gsl::span< const std::byte > buffer)
constexpr size_t min
constexpr size_t max
#define main
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"