Project
Loading...
Searching...
No Matches
bin-encode-decode.cxx
Go to the documentation of this file.
1// Copyright 2019-2023 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15
16#include <rANS/factory.h>
17#include <rANS/histogram.h>
18#include <rANS/encode.h>
19#include <rANS/decode.h>
20
21#include <boost/program_options.hpp>
22#include <algorithm>
23#include <iostream>
24
25#include <fairlogger/Logger.h>
26
27namespace bpo = boost::program_options;
28
29#ifndef SOURCE_T
30#define SOURCE_T uint8_t
31#endif
32
34using stream_type = uint32_t;
35inline constexpr size_t NSTREAMS = 2;
36inline constexpr size_t LOWER_BOUND = 31;
37
38template <typename T>
39std::vector<T> readFile(const std::string& filename)
40{
41 std::vector<T> tokens{};
42 std::ifstream is(filename, std::ios_base::binary | std::ios_base::in);
43 if (is) {
44 // get length of file:
45 is.seekg(0, is.end);
46 size_t length = is.tellg();
47 is.seekg(0, is.beg);
48
49 if (length % sizeof(T)) {
50 throw o2::rans::IOError("Filesize is not a multiple of datatype.");
51 }
52 // size the vector appropriately
53 tokens.resize(length / sizeof(T));
54
55 // read data as a block:
56 is.read(reinterpret_cast<char*>(tokens.data()), length);
57 is.close();
58 }
59 return tokens;
60}
61
62int main(int argc, char* argv[])
63{
64
65 using namespace o2::rans;
66
67 bpo::options_description options("Allowed options");
68 // clang-format off
69 options.add_options()
70 ("help,h", "print usage message")
71 ("file,f",bpo::value<std::string>(), "file to compress")
72 ("log_severity,l",bpo::value<std::string>(), "severity of FairLogger");
73 // clang-format on
74
75 bpo::variables_map vm;
76 bpo::store(bpo::parse_command_line(argc, argv, options), vm);
77 bpo::notify(vm);
78
79 if (vm.count("help")) {
80 std::cout << options << "\n";
81 return 0;
82 }
83
84 const std::string filename = [&]() {
85 if (vm.count("file")) {
86 return vm["file"].as<std::string>();
87 } else {
88 LOG(error) << "missing path to input file";
89 exit(1);
90 }
91 }();
92
93 if (vm.count("log_severity")) {
94 fair::Logger::SetConsoleSeverity(vm["log_severity"].as<std::string>().c_str());
95 }
96
97 std::vector<source_type> tokens = readFile<source_type>(filename);
98
99 // build encoders
100 auto histogram = makeDenseHistogram::fromSamples(tokens.begin(), tokens.end());
101 Metrics<source_type> metrics{histogram};
102 auto renormedHistogram = renorm(std::move(histogram), metrics);
104 auto decoder = makeDecoder<LOWER_BOUND>::fromRenormed(renormedHistogram);
105
106 std::vector<stream_type> encoderBuffer;
107 std::vector<source_type> decodeBuffer(tokens.size(), 0);
108 std::vector<source_type> incompressibleSymbols;
109
110 if (renormedHistogram.hasIncompressibleSymbol()) {
111 LOG(info) << "With incompressible symbols";
112 [[maybe_unused]] auto res = encoder.process(tokens.begin(), tokens.end(), std::back_inserter(encoderBuffer), std::back_inserter(incompressibleSymbols));
113 LOGP(info, "nIncompressible {}", incompressibleSymbols.size());
114 decoder.process(encoderBuffer.end(), decodeBuffer.begin(), tokens.size(), NSTREAMS, incompressibleSymbols.end());
115 } else {
116 LOG(info) << "Without incompressible symbols";
117 encoder.process(std::begin(tokens), std::end(tokens), std::back_inserter(encoderBuffer));
118 decoder.process(encoderBuffer.end(), decodeBuffer.begin(), tokens.size(), NSTREAMS);
119 }
120
121 size_t pos = 0;
122 if (std::equal(tokens.begin(), tokens.end(), decodeBuffer.begin(), decodeBuffer.end(),
123 [&pos](const auto& a, const auto& b) {
124 const bool cmp = a == b;
125 if (!cmp) {
126 LOG(error) << fmt::format("[{}] {} != {}", pos, a, b);
127 }
128 ++pos;
129 return cmp;
130 })) {
131 LOG(info) << "Decoder passed tests";
132 } else {
133 LOG(error) << "Decoder failed tests";
134 }
135};
uint16_t pos
Definition RawData.h:3
uint32_t res
Definition RawData.h:0
uint32_t source_type
std::tuple< ransState_t, stream_IT > renorm(ransState_t state, stream_IT outputIter, count_t frequency, size_t symbolTablePrecision)
uint32_t stream_type
constexpr size_t NSTREAMS
#define SOURCE_T
constexpr size_t LOWER_BOUND
std::vector< T > readFile(const std::string &filename)
public interface for decoding.
public interface for encoding.
static factory classes for building histograms, encoders and decoders.
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLuint GLsizei GLsizei * length
Definition glcorearb.h:790
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
public interface for building and renorming histograms from source data.
std::string filename()
std::string decodeBuffer(int feeId, gsl::span< const std::byte > buffer)
#define main
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
char const *restrict const cmp
Definition x9.h:96