df/d4d/bench__ransDecode_8cxx_source.html

// Copyright 2019-2023 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#include "rANS/internal/common/defines.h"


#include <vector>

#include <cstring>

#include <random>

#include <algorithm>

#ifdef RANS_PARALLEL_STL

#include <execution>

#endif

#include <iterator>


#include <benchmark/benchmark.h>


#include "rANS/factory.h"

#include "rANS/histogram.h"


#ifdef ENABLE_VTUNE_PROFILER

#include <ittnotify.h>

#endif


#include "helpers.h"


using namespace o2::rans;


inline constexpr size_t MessageSize = 1ull << 22;


// template <typename source_T>

// class SourceMessageProxyBinomial

// {

//  public:

//   SourceMessageProxyBinomial(size_t messageSize)

//   {

//     if (mSourceMessage.empty()) {

//       std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;

//       const size_t draws = std::min(1ul << 27, static_cast<size_t>(std::numeric_limits<source_T>::max()));

//       const double probability = 0.5;

//       std::binomial_distribution<source_T> dist(draws, probability);

//       const size_t sourceSize = messageSize / sizeof(source_T) + 1;

//       mSourceMessage.resize(sourceSize);

// #ifdef RANS_PARALLEL_STL

//       std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });

// #else

//       std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });

// #endif // RANS_PARALLEL_STL

//     }

//   }


//   const auto& get() const { return mSourceMessage; };


//  private:

//   std::vector<source_T> mSourceMessage{};

// };


// inline const SourceMessageProxyBinomial<uint8_t> sourceMessageBinomial8{MessageSize};

// inline const SourceMessageProxyBinomial<uint16_t> sourceMessageBinomial16{MessageSize};

// inline const SourceMessageProxyBinomial<uint32_t> sourceMessageBinomial32{MessageSize};


template <typename source_T>


class SourceMessageProxyUniform

{

 public:


  SourceMessageProxyUniform(size_t messageSize)

  {

    if (mSourceMessage.empty()) {

      std::mt19937 mt(0); // same seed we want always the same distrubution of random numbers;

      const size_t min = 0;

      const double max = std::min(1ul << 27, static_cast<size_t>(std::numeric_limits<source_T>::max()));

      std::uniform_int_distribution<source_T> dist(min, max);

      const size_t sourceSize = messageSize / sizeof(source_T) + 1;

      mSourceMessage.resize(sourceSize);

#ifdef RANS_PARALLEL_STL

      std::generate(std::execution::par_unseq, mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });

#else

      std::generate(mSourceMessage.begin(), mSourceMessage.end(), [&dist, &mt]() { return dist(mt); });

#endif // RANS_PARALLEL_STL

    }

  }


  const auto& get() const { return mSourceMessage; };


 private:

  std::vector<source_T> mSourceMessage{};

};


inline const SourceMessageProxyUniform<uint8_t> sourceMessageUniform8{MessageSize};

inline const SourceMessageProxyUniform<uint16_t> sourceMessageUniform16{MessageSize};

inline const SourceMessageProxyUniform<uint32_t> sourceMessageUniform32{MessageSize};


template <class... Args>


void ransDecodeBenchmark(benchmark::State& st, Args&&... args)

{


  auto args_tuple = std::make_tuple(std::move(args)...);


  const auto& inputData = std::get<0>(args_tuple).get();


  using input_data_type = std::remove_cv_t<std::remove_reference_t<decltype(inputData)>>;

  using source_type = typename input_data_type::value_type;


#pragma GCC diagnostic push // TODO: Remove me when fixed in GCC

#pragma GCC diagnostic ignored "-Walloc-size-larger-than="

  EncodeBuffer<source_type> encodeBuffer{inputData.size()};

  DecodeBuffer<source_type> decodeBuffer{inputData.size()};

#pragma GCC diagnostic pop


  const auto histogram = makeDenseHistogram::fromSamples(gsl::span<const source_type>(inputData));

  Metrics<source_type> metrics{histogram};

  const auto renormedHistogram = renorm(histogram, metrics, RenormingPolicy::Auto, 10);


  auto encoder = makeDenseEncoder<>::fromRenormed(renormedHistogram);

  encodeBuffer.encodeBufferEnd = encoder.process(inputData.data(), inputData.data() + inputData.size(), encodeBuffer.buffer.data());


  auto decoder = makeDecoder<>::fromRenormed(renormedHistogram);

#ifdef ENABLE_VTUNE_PROFILER

  __itt_resume();

#endif

  for (auto _ : st) {

    decoder.process(encodeBuffer.encodeBufferEnd, decodeBuffer.buffer.data(), inputData.size(), encoder.getNStreams());

  }

#ifdef ENABLE_VTUNE_PROFILER

  __itt_pause();

#endif


  if (!(decodeBuffer == inputData)) {

    st.SkipWithError("Missmatch between encoded and decoded Message");

  }


  const auto& datasetProperties = metrics.getDatasetProperties();

  st.SetItemsProcessed(static_cast<int64_t>(inputData.size()) * static_cast<int64_t>(st.iterations()));

  st.SetBytesProcessed(static_cast<int64_t>(inputData.size()) * sizeof(source_type) * static_cast<int64_t>(st.iterations()));

  st.counters["AlphabetRangeBits"] = datasetProperties.alphabetRangeBits;

  st.counters["nUsedAlphabetSymbols"] = datasetProperties.nUsedAlphabetSymbols;

  st.counters["SymbolTablePrecision"] = renormedHistogram.getRenormingBits();

  st.counters["Entropy"] = datasetProperties.entropy;

  st.counters["ExpectedCodewordLength"] = computeExpectedCodewordLength(histogram, renormedHistogram);

  st.counters["SourceSize"] = inputData.size() * sizeof(source_type);

  st.counters["CompressedSize"] = std::distance(encodeBuffer.buffer.data(), encodeBuffer.encodeBufferEnd) * sizeof(typename decltype(encoder)::stream_type);

  st.counters["Compression"] = st.counters["SourceSize"] / static_cast<double>(st.counters["CompressedSize"]);

  st.counters["LowerBound"] = inputData.size() * (static_cast<double>(st.counters["Entropy"]) / 8);

  st.counters["CompressionWRTEntropy"] = st.counters["CompressedSize"] / st.counters["LowerBound"];

};


// BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_binomial_8, sourceMessageBinomial8);

// BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_binomial_16, sourceMessageBinomial16);

// BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_binomial_32, sourceMessageBinomial32);


BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_uniform_8, sourceMessageUniform8);

BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_uniform_16, sourceMessageUniform16);

BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_uniform_32, sourceMessageUniform32);


BENCHMARK_MAIN();

inputData
std::vector< o2::mid::ColumnData > inputData
Definition bench_Clusterizer.cxx:124

sourceMessageUniform32
const SourceMessageProxyUniform< uint32_t > sourceMessageUniform32
Definition bench_ransDecode.cxx:102

ransDecodeBenchmark
void ransDecodeBenchmark(benchmark::State &st, Args &&... args)
Definition bench_ransDecode.cxx:105

MessageSize
constexpr size_t MessageSize
Definition bench_ransDecode.cxx:40

sourceMessageUniform8
const SourceMessageProxyUniform< uint8_t > sourceMessageUniform8
Definition bench_ransDecode.cxx:100

BENCHMARK_MAIN
BENCHMARK_MAIN()

BENCHMARK_CAPTURE
BENCHMARK_CAPTURE(ransDecodeBenchmark, decode_uniform_8, sourceMessageUniform8)

sourceMessageUniform16
const SourceMessageProxyUniform< uint16_t > sourceMessageUniform16
Definition bench_ransDecode.cxx:101

st
benchmark::State & st
Definition bench_ransEncodeImpl.cxx:288

source_type
uint32_t source_type
Definition bench_ransPack.cxx:41

stream_type
uint32_t stream_type
Definition bin-encode-decode.cxx:34

SourceMessageProxyUniform
Definition bench_ransDecode.cxx:75

SourceMessageProxyUniform::get
const auto & get() const
Definition bench_ransDecode.cxx:94

SourceMessageProxyUniform::SourceMessageProxyUniform
SourceMessageProxyUniform(size_t messageSize)
Definition bench_ransDecode.cxx:77

o2::rans::Metrics
Definition Metrics.h:37

o2::rans::internal::makeEncoder::fromRenormed
static constexpr decltype(auto) fromRenormed(const RenormedHistogramConcept< container_T > &renormed)
Definition factory.h:106

o2::rans::makeDecoder::fromRenormed
static constexpr decltype(auto) fromRenormed(const RenormedDenseHistogram< source_T > &renormed)
Definition factory.h:195

source_T

defines.h
preprocessor defines to enable features based on CPU architecture

factory.h
static factory classes for building histograms, encoders and decoders.

metrics
GLsizei GLenum const void GLuint GLsizei GLfloat * metrics
Definition glcorearb.h:5500

helpers.h
common functionality for rANS benchmarks.

histogram.h
public interface for building and renorming histograms from source data.

o2::rans
Definition compat.h:42

o2::rans::computeExpectedCodewordLength
double_t computeExpectedCodewordLength(const DenseHistogram< source_T > &histogram, const RenormedDenseHistogram< source_T > &rescaledHistogram)
Definition utils.h:33

o2::rans::renorm
decltype(auto) renorm(histogram_T histogram, size_t newPrecision, RenormingPolicy renormingPolicy=RenormingPolicy::Auto, size_t lowProbabilityCutoffBits=0)
Definition renorm.h:203

DecodeBuffer
Definition helpers.h:302

EncodeBuffer
Definition helpers.h:285

o2::rans::makeDenseHistogram::fromSamples
static decltype(auto) fromSamples(source_IT begin, source_IT end, typename std::iterator_traits< source_IT >::value_type min, typename std::iterator_traits< source_IT >::value_type max)
Definition factory.h:144

decodeBuffer
std::string decodeBuffer(int feeId, gsl::span< const std::byte > buffer)
Definition testUserLogicEndpointDecoder.cxx:165

min
constexpr size_t min
Definition test_Algorithm.cxx:48

max
constexpr size_t max
Definition test_Algorithm.cxx:49