76 computeMetrics(histogram,
min,
max);
77 mCoderProperties.renormingPrecisionBits = computeRenormingPrecision(cutoffPrecision);
78 mCoderProperties.nIncompressibleSymbols = computeIncompressibleCount(mDatasetProperties.symbolLengthDistribution, *mCoderProperties.renormingPrecisionBits);
79 mCoderProperties.nIncompressibleSamples = computeIncompressibleCount(mDatasetProperties.weightedSymbolLengthDistribution, *mCoderProperties.renormingPrecisionBits);
86 using namespace internal;
87 using namespace utils;
88 using source_type =
typename histogram_T::source_type;
89 using value_type =
typename histogram_T::value_type;
90 static_assert(std::is_same_v<source_type, source_T>);
92 mCoderProperties.dictSizeEstimate = DictSizeEstimate{histogram.getNumSamples()};
93 if (histogram.getNumSamples() > 0) {
94 const auto [trimmedBegin, trimmedEnd] =
trim(histogram);
95 if (
min.has_value()) {
96 mDatasetProperties.min = *
min;
97 mDatasetProperties.max = *
max;
99 std::tie(mDatasetProperties.min, mDatasetProperties.max) =
getMinMax(histogram, trimmedBegin, trimmedEnd);
101 assert(mDatasetProperties.max >= mDatasetProperties.min);
102 mDatasetProperties.numSamples = histogram.getNumSamples();
103 mDatasetProperties.alphabetRangeBits = getRangeBits(mDatasetProperties.min, mDatasetProperties.max);
105 const double_t reciprocalNumSamples = 1.0 /
static_cast<double_t
>(histogram.getNumSamples());
109 forEachIndexValue(histogram, trimmedBegin, trimmedEnd, [&,
this](
const source_type&
index,
const uint32_t& frequency) {
111 assert(lastIndex <=
index);
113 mCoderProperties.dictSizeEstimate.updateIndexSize(delta + (delta == 0));
115 mCoderProperties.dictSizeEstimate.updateFreqSize(frequency);
116 ++mDatasetProperties.nUsedAlphabetSymbols;
118 const double_t probability =
static_cast<double_t
>(frequency) * reciprocalNumSamples;
119 const float_t fractionalBitLength = -fastlog2(probability);
120 const uint32_t bitLength = std::ceil(fractionalBitLength);
122 assert(bitLength > 0);
123 const uint32_t symbolDistributionBucket = bitLength - 1;
124 mDatasetProperties.entropy += probability * fractionalBitLength;
125 ++mDatasetProperties.symbolLengthDistribution[symbolDistributionBucket];
126 mDatasetProperties.weightedSymbolLengthDistribution[symbolDistributionBucket] += frequency;