135 writer.Key(
name.c_str());
136 writer.StartObject();
140 encodeBuffer.literalsEnd = encodeBuffer.literals.data();
143 writer.Key(
"Timing");
144 writer.StartObject();
147 auto histogram = t.timeAndLog(
153 auto tmpHist = histogram;
156 t.timeAndLog(
"Renorming",
"Renormed Frequency Table", [&]()
mutable {
165 t.timeAndLog(
"Encoding",
"Encoded", [&]()
mutable {
166#ifdef ENABLE_VTUNE_PROFILER
169 if (renormedHistogram.hasIncompressibleSymbol()) {
170 std::tie(encodeBuffer.encodeBufferEnd, encodeBuffer.literalsEnd) = encoder.process(
inputData.data(),
inputData.data() +
inputData.size(), encodeBuffer.buffer.data(), encodeBuffer.literalsEnd);
174#ifdef ENABLE_VTUNE_PROFILER
180 std::vector<uint8_t> dict(histogram.size() *
sizeof(uint64_t), 0);
181 auto dictEnd = t.timeAndLog(
"WriteDict",
"Serialized Dict", [&]() {
return compressRenormedDictionary(encoder.getSymbolTable(), dict.data()); });
182 LOGP(info,
"Serialized Dict of {} Bytes", std::distance(dict.data(), dictEnd));
183 auto recoveredHistogram = t.timeAndLog(
"ReadDict",
"Read Dict", [&]() {
185 const source_type max =
min + std::max<source_type>(
static_cast<int64_t
>(encoder.getSymbolTable().size())-1,0);
195 if (encodeBuffer.literalsEnd == encodeBuffer.literals.data()) {
196 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams());
198 decoder.process(encodeBuffer.encodeBufferEnd,
decodeBuffer.buffer.data(),
inputData.size(), encoder.getNStreams(), encodeBuffer.literalsEnd);
202 LOGP(warning,
"Missmatch between original and decoded Message");
204 LOG(info) <<
"finished: " <<
name;
208 const auto& datasetProperties =
metrics.getDatasetProperties();
212 writer.Key(
"FrequencyTable");
213 writer.StartObject();
214 writer.Key(
"nSamples");
215 writer.Uint64(histogram.getNumSamples());
217 writer.Int(datasetProperties.min);
219 writer.Int(datasetProperties.max);
220 writer.Key(
"alphabetRangeBits");
221 writer.Int(datasetProperties.alphabetRangeBits);
222 writer.Key(
"nUsedAlphabetSymbols");
223 writer.Uint(datasetProperties.nUsedAlphabetSymbols);
224 writer.Key(
"IncompressibleFrequency");
233 writer.Key(
"RescaledFrequencies");
234 writer.StartObject();
235 writer.Key(
"nSamples");
236 writer.Uint64(renormedHistogram.getNumSamples());
238 writer.Int(renormedDatasetProperties.min);
240 writer.Int(renormedDatasetProperties.max);
241 writer.Key(
"alphabetRangeBits");
242 writer.Int(renormedDatasetProperties.alphabetRangeBits);
243 writer.Key(
"nUsedAlphabetSymbols");
244 writer.Uint(renormedDatasetProperties.nUsedAlphabetSymbols);
245 writer.Key(
"IncompressibleFrequency");
246 writer.Uint(renormedHistogram.getIncompressibleSymbolFrequency());
247 writer.Key(
"RenormingBits");
248 writer.Uint(renormedHistogram.getRenormingBits());
253 writer.Key(
"Message");
254 writer.StartObject();
257 writer.Key(
"SymbolSize");
259 writer.Key(
"Entropy");
260 writer.Double(datasetProperties.entropy);
261 writer.Key(
"ExpectedCodewordLength");
262 writer.Double(computeExpectedCodewordLength<>(histogram, renormedHistogram));
267 writer.Key(
"Compression");
268 writer.StartObject();
269 writer.Key(
"EncodeBufferSize");
270 writer.Uint64(std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) *
sizeof(uint32_t));
271 writer.Key(
"LiteralSize");
272 writer.Uint64(std::distance(encodeBuffer.literals.data(), encodeBuffer.literalsEnd) *
sizeof(
source_type));
273 writer.Key(
"DictSize");
274 writer.Uint64(std::distance(dict.data(), dictEnd));
283 writer.Key(
name.c_str());
284 writer.StartObject();
285 ransEncodeDecode<uint16_t, coderTag_V>(
"qTotA", compressedClusters.qTotA, writer);
286 ransEncodeDecode<uint16_t, coderTag_V>(
"qMaxA", compressedClusters.qMaxA, writer);
287 ransEncodeDecode<uint8_t, coderTag_V>(
"flagsA", compressedClusters.flagsA, writer);
288 ransEncodeDecode<uint8_t, coderTag_V>(
"rowDiffA", compressedClusters.rowDiffA, writer);
289 ransEncodeDecode<uint8_t, coderTag_V>(
"sliceLegDiffA", compressedClusters.sliceLegDiffA, writer);
290 ransEncodeDecode<uint16_t, coderTag_V>(
"padResA", compressedClusters.padResA, writer);
291 ransEncodeDecode<uint32_t, coderTag_V>(
"timeResA", compressedClusters.timeResA, writer);
292 ransEncodeDecode<uint8_t, coderTag_V>(
"sigmaPadA", compressedClusters.sigmaPadA, writer);
293 ransEncodeDecode<uint8_t, coderTag_V>(
"sigmaTimeA", compressedClusters.sigmaTimeA, writer);
294 ransEncodeDecode<uint8_t, coderTag_V>(
"qPtA", compressedClusters.qPtA, writer);
295 ransEncodeDecode<uint8_t, coderTag_V>(
"rowA", compressedClusters.rowA, writer);
296 ransEncodeDecode<uint8_t, coderTag_V>(
"sliceA", compressedClusters.sliceA, writer);
297 ransEncodeDecode<uint32_t, coderTag_V>(
"timeA", compressedClusters.timeA, writer);
298 ransEncodeDecode<uint16_t, coderTag_V>(
"padA", compressedClusters.padA, writer);
299 ransEncodeDecode<uint16_t, coderTag_V>(
"qTotU", compressedClusters.qTotU, writer);
300 ransEncodeDecode<uint16_t, coderTag_V>(
"qMaxU", compressedClusters.qMaxU, writer);
301 ransEncodeDecode<uint8_t, coderTag_V>(
"flagsU", compressedClusters.flagsU, writer);
302 ransEncodeDecode<uint16_t, coderTag_V>(
"padDiffU", compressedClusters.padDiffU, writer);
303 ransEncodeDecode<uint32_t, coderTag_V>(
"timeDiffU", compressedClusters.timeDiffU, writer);
304 ransEncodeDecode<uint8_t, coderTag_V>(
"sigmaPadU", compressedClusters.sigmaPadU, writer);
305 ransEncodeDecode<uint8_t, coderTag_V>(
"sigmaTimeU", compressedClusters.sigmaTimeU, writer);
306 ransEncodeDecode<uint16_t, coderTag_V>(
"nTrackClusters", compressedClusters.nTrackClusters, writer);
307 ransEncodeDecode<uint32_t, coderTag_V>(
"nSliceRowClusters", compressedClusters.nSliceRowClusters, writer);
314int main(
int argc,
char* argv[])
316 bpo::options_description options(
"Allowed options");
318 options.add_options()
319 (
"help,h",
"print usage message")
320 (
"in,i",bpo::value<std::string>(),
"file to process")
321 (
"out,o",bpo::value<std::string>(),
"json output file")
322 (
"mode,m",bpo::value<std::string>(),
"compressor processing mode")
323 (
"log_severity,l",bpo::value<std::string>(),
"severity of FairLogger");
326 bpo::variables_map vm;
327 bpo::store(bpo::parse_command_line(argc, argv, options), vm);
330 if (vm.count(
"help")) {
331 std::cout << options <<
"\n";
335 const std::string inFile = [&]() {
336 if (vm.count(
"in")) {
337 return vm[
"in"].as<std::string>();
339 LOG(error) <<
"missing path to input file";
344 const std::string outFile = [&]() {
345 if (vm.count(
"out")) {
346 return vm[
"out"].as<std::string>();
348 return std::string(
"out.json");
352 if (vm.count(
"log_severity")) {
353 fair::Logger::SetConsoleSeverity(vm[
"log_severity"].as<std::string>().c_str());
356 std::ofstream
of{outFile};
358 std::runtime_error(fmt::format(
"could not open output file at path {}", inFile));
364 rapidjson::OStreamWrapper
stream{
of};
365 rapidjson::Writer<rapidjson::OStreamWrapper> writer{
stream};
366 writer.StartObject();
369 LOG(info) <<
"loaded Compressed Clusters from file";
370 LOG(info) <<
"######################################################";
371 boost::mp11::mp_for_each<encoder_types>([&](
auto L) {
372 using coder_type = boost::mp11::mp_at_c<
decltype(L), 0>;
373 constexpr CoderTag coderTag = coder_type::value;
374 const std::string encoderTitle =
toString(coderTag);
376 LOGP(info,
"start rANS {}/Decode", encoderTitle);
377 encodeTPC<coderTag>(encoderTitle, compressedClusters,
false, writer);
378 LOG(info) <<
"######################################################";
class DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > > :public internal::VectorContainer< source_T, uint32_t >, internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >{ using containerBase_type=internal::VectorContainer< source_T, uint32_t >;using HistogramConcept_type=internal::HistogramConcept< source_T, typename internal::VectorContainer< source_T, uint32_t >::value_type, typename internal::VectorContainer< source_T, uint32_t >::difference_type, DenseHistogram< source_T > >;friend containerBase_type;friend HistogramConcept_type;public:using source_type=source_T;using value_type=typename containerBase_type::value_type;using container_type=typename containerBase_type::container_type;using size_type=typename containerBase_type::size_type;using difference_type=typename containerBase_type::difference_type;using reference=typename containerBase_type::reference;using const_reference=typename containerBase_type::const_reference;using pointer=typename containerBase_type::pointer;using const_pointer=typename containerBase_type::const_pointer;using const_iterator=typename containerBase_type::const_iterator;DenseHistogram() :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()} {};template< typename freq_IT > DenseHistogram(freq_IT begin, freq_IT end, difference_type offset) :containerBase_type{MaxSize, std::numeric_limits< source_type >::min()}, HistogramConcept_type{begin, end, offset} {};using HistogramConcept_type::addSamples;template< typename source_IT > inline DenseHistogram &addSamples(source_IT begin, source_IT end, source_type min, source_type max) { return addSamplesImpl(begin, end);};template< typename source_IT > DenseHistogram &addSamples(gsl::span< const source_type > span, source_type min, source_type max) { return addSamplesImpl(span);};using HistogramConcept_type::addFrequencies;protected:template< typename source_IT > DenseHistogram &addSamplesImpl(source_IT begin, source_IT end);DenseHistogram &addSamplesImpl(gsl::span< const source_type > samples);template< typename freq_IT > DenseHistogram &addFrequenciesImpl(freq_IT begin, freq_IT end, difference_type offset);private:inline static constexpr size_t MaxSize=utils::pow2(utils::toBits< source_type >());};template< typename source_T >template< typename source_IT >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(source_IT begin, source_IT end) -> DenseHistogram &{ if constexpr(std::is_pointer_v< source_IT >) { return addSamplesImpl({begin, end});} else { std::for_each(begin, end, [this](const source_type &symbol) {++this->mNSamples;++this->mContainer[symbol];});} return *this;}template< typename source_T >auto DenseHistogram< source_T, std::enable_if_t< sizeof(source_T)<=2 > >::addSamplesImpl(gsl::span< const source_type > samples) -> DenseHistogram &{ using namespace internal;using namespace utils;if(samples.empty()) { return *this;} const auto begin=samples.data();const auto end=begin+samples.size();constexpr size_t ElemsPerQWord=sizeof(uint64_t)/sizeof(source_type);constexpr size_t nUnroll=2 *ElemsPerQWord;auto iter=begin;if constexpr(sizeof(source_type)==1) { std::array< ShiftableVector< source_type, value_type >, 3 > histograms{ {{this-> mContainer this mContainer getOffset()}