16#ifndef RANS_INTERNAL_ENCODE_SIMDENCODERIMPL_H_
17#define RANS_INTERNAL_ENCODE_SIMDENCODERIMPL_H_
35template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
36class SIMDEncoderImpl :
public EncoderImpl<simd::UnrolledSymbols,
37 SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>>
39 using base_type = EncoderImpl<simd::UnrolledSymbols, SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>>;
42 using stream_type =
typename base_type::stream_type;
43 using state_type =
typename base_type::state_type;
44 using symbol_type =
typename base_type::symbol_type;
45 using size_type =
typename base_type::size_type;
46 using difference_type =
typename base_type::difference_type;
48 static_assert(streamingLowerBound_V <= 20,
"SIMD coders are limited to 20 BIT precision because of their used of FP arithmeric");
50 [[nodiscard]]
inline static constexpr size_type getNstreams() noexcept {
return 2 * simd::getElementCount<state_type>(simdWidth_V); };
52 SIMDEncoderImpl(
size_t symbolTablePrecision);
53 SIMDEncoderImpl() : SIMDEncoderImpl{0} {};
56 template <
typename Stream_IT>
57 Stream_IT flush(Stream_IT outputIter);
59 template <
typename Stream_IT>
60 Stream_IT putSymbols(Stream_IT outputIter,
const symbol_type& encodeSymbols);
62 template <
typename Stream_IT>
63 Stream_IT putSymbols(Stream_IT outputIter,
const symbol_type& encodeSymbols,
size_t nActiveStreams);
65 [[nodiscard]]
inline static constexpr state_type getStreamingLowerBound() noexcept {
return static_cast<state_type
>(utils::pow2(streamingLowerBound_V)); };
68 size_t mSymbolTablePrecision{};
69 simd::simdI_t<simdWidth_V> mStates[2]{};
70 simd::simdD_t<simdWidth_V> mNSamples{};
72 template <
typename Stream_IT>
73 Stream_IT putSymbol(Stream_IT outputIter,
const Symbol& symbol, state_type&
state);
75 template <
typename Stream_IT>
76 Stream_IT flushState(state_type&
state, Stream_IT outputIter);
79 template <
typename Stream_IT>
80 std::tuple<state_type, Stream_IT>
renorm(state_type
state, Stream_IT outputIter, uint32_t frequency);
82 inline static constexpr state_type
LowerBound = utils::pow2(streamingLowerBound_V);
84 inline static constexpr state_type
StreamBits = utils::toBits<stream_type>();
87template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
88SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::SIMDEncoderImpl(
size_t symbolTablePrecision) : mSymbolTablePrecision{symbolTablePrecision}, mStates{}, mNSamples{}
91 throw HistogramError(fmt::format(
"SymbolTable Precision of {} Bits is larger than allowed by the rANS Encoder (max {} Bits)", mSymbolTablePrecision,
LowerBound));
94 mStates[0] = simd::setAll<simdWidth_V>(
LowerBound);
95 mStates[1] = simd::setAll<simdWidth_V>(
LowerBound);
97 mNSamples = simd::setAll<simdWidth_V>(
static_cast<double>(utils::pow2(mSymbolTablePrecision)));
100template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
101template <
typename Stream_IT>
102Stream_IT SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::flush(Stream_IT iter)
104 using namespace simd;
105 epi64_t<simdWidth_V, 2>
states;
106 store(mStates[0],
states[0]);
107 store(mStates[1],
states[1]);
109 Stream_IT streamPos = iter;
110 for (
size_t stateIdx =
states.nElements(); stateIdx-- > 0;) {
111 streamPos = flushState(*(
states.data() + stateIdx), streamPos);
114 mStates[0] = load(
states[0]);
115 mStates[1] = load(
states[1]);
120template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
121template <
typename Stream_IT>
122inline Stream_IT SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::putSymbols(Stream_IT outputIter,
const symbol_type& symbols)
124 using namespace simd;
131 simd::simdI_t<simdWidth_V> renormedStates[2];
132 auto streamPosition = ransRenorm<Stream_IT, LowerBound, StreamBits>(mStates,
134 static_cast<uint8_t>(mSymbolTablePrecision),
137 mStates[0] = ransEncode(renormedStates[0], int32ToDouble<simdWidth_V>(symbols.frequencies[0]), int32ToDouble<simdWidth_V>(symbols.cumulativeFrequencies[0]), mNSamples);
138 mStates[1] = ransEncode(renormedStates[1], int32ToDouble<simdWidth_V>(symbols.frequencies[1]), int32ToDouble<simdWidth_V>(symbols.cumulativeFrequencies[1]), mNSamples);
140 return streamPosition;
143template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
144template <
typename Stream_IT>
145Stream_IT SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::putSymbols(Stream_IT outputIter,
const symbol_type& symbols,
size_t nActiveStreams)
147 using namespace simd;
149 Stream_IT streamPos = outputIter;
151 epi64_t<simdWidth_V, 2>
states;
152 store(mStates[0],
states[0]);
153 store(mStates[1],
states[1]);
155 epi32_t<SIMDWidth::SSE, 2> frequencies;
156 epi32_t<SIMDWidth::SSE, 2> cumulativeFrequencies;
158 store<uint32_t>(symbols.frequencies[0], frequencies[0]);
159 store<uint32_t>(symbols.frequencies[1], frequencies[1]);
160 store<uint32_t>(symbols.cumulativeFrequencies[0], cumulativeFrequencies[0]);
161 store<uint32_t>(symbols.cumulativeFrequencies[1], cumulativeFrequencies[1]);
163 for (
size_t i = nActiveStreams;
i-- > 0;) {
164 Symbol encodeSymbol{frequencies(
i), cumulativeFrequencies(
i)};
165 streamPos = putSymbol(streamPos, encodeSymbol,
states(
i));
168 mStates[0] = load(
states[0]);
169 mStates[1] = load(
states[1]);
174template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
175template <
typename Stream_IT>
176Stream_IT SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::putSymbol(Stream_IT outputIter,
const Symbol& symbol, state_type&
state)
178 assert(symbol.getFrequency() != 0);
180 const auto [
x, streamPos] =
renorm(
state, outputIter, symbol.getFrequency());
183 state = ((
x / symbol.getFrequency()) << mSymbolTablePrecision) + (
x % symbol.getFrequency()) + symbol.getCumulative();
187template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
188template <
typename Stream_IT>
189Stream_IT SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::flushState(state_type&
state, Stream_IT streamPosition)
197 return streamPosition;
200template <
size_t streamingLowerBound_V, simd::SIMDW
idth simdW
idth_V>
201template <
typename Stream_IT>
202inline auto SIMDEncoderImpl<streamingLowerBound_V, simdWidth_V>::renorm(state_type
state, Stream_IT outputIter, uint32_t frequency) -> std::tuple<state_type, Stream_IT>
205 if (
state >= maxState) {
209 assert(
state < maxState);
211 return std::make_tuple(
state, outputIter);
214template <
size_t streamingLowerBound_V>
215using SSEEncoderImpl = SIMDEncoderImpl<streamingLowerBound_V, simd::SIMDWidth::SSE>;
216template <
size_t streamingLowerBound_V>
217using AVXEncoderImpl = SIMDEncoderImpl<streamingLowerBound_V, simd::SIMDWidth::AVX>;
Defines the common operations for encoding data onto an rANS stream.
Contains statistical information for one source symbol, required for encoding/decoding.
common helper classes and functions
constexpr size_t StreamBits
constexpr size_t LowerBound
std::tuple< ransState_t, stream_IT > renorm(ransState_t state, stream_IT outputIter, count_t frequency, size_t symbolTablePrecision)
preprocessor defines to enable features based on CPU architecture
uint8_t itsSharedClusterMap uint8_t