144 assert(inputBegin !=
nullptr);
145 assert(outputBegin !=
nullptr);
147 constexpr size_t PackingBufferBits = utils::toBits<packing_type>();
148 constexpr size_t PackingWidth = width_V;
150 constexpr size_t NPackAtOnce = PackingBufferBits / PackingWidth;
152 uint8_t* outputIter =
reinterpret_cast<uint8_t*
>(outputBegin);
153 size_t outputIterBitOffset = {};
155 const size_t nIterations = extent / NPackAtOnce;
156 const size_t nRemainderIterations = extent % NPackAtOnce;
158 auto inputIter = inputBegin;
159 const auto iterEnd = inputIter + NPackAtOnce * nIterations;
162 for (; inputIter < iterEnd; inputIter += NPackAtOnce) {
165 const size_t tail = PackingBufferBits - outputIterBitOffset;
166 overflowBuffer |= packed << outputIterBitOffset;
167 outputIterBitOffset += NPackAtOnce * PackingWidth;
169 if constexpr (PackingBufferBits % PackingWidth == 0) {
170 write64(outputIter, overflowBuffer);
172 outputIterBitOffset = 0;
175 if (outputIterBitOffset >= PackingBufferBits) {
176 write64(outputIter, overflowBuffer);
178 overflowBuffer = packed >> tail;
180 outputIterBitOffset %= PackingBufferBits;
183 write64(outputIter, overflowBuffer);
185 BitPtr bitPos{outputIter,
static_cast<intptr_t>(outputIterBitOffset)};
186 for (
size_t i = 0;
i < nRemainderIterations; ++
i) {
187 const int64_t adjustedValue =
static_cast<int64_t
>(inputIter[
i]) -
offset;
188 bitPos =
pack(bitPos, adjustedValue, PackingWidth);
196inline constexpr BitPtr pack(
const input_T* __restrict inputBegin,
size_t extent, output_T* __restrict outputBegin,
size_t packingWidth, input_T
offset =
static_cast<input_T
>(0))
198 using namespace internal;
199 using namespace utils;
201 assert(inputBegin !=
nullptr);
202 assert(outputBegin !=
nullptr);
204 switch (packingWidth) {
207 throw PackingError(
"Cannot pack data into 0 Bit wide blocks");
209 return BitPtr(outputBegin);
213 return packStreamImpl<input_T, output_T, 1>(inputBegin, extent, outputBegin,
offset);
216 return packStreamImpl<input_T, output_T, 2>(inputBegin, extent, outputBegin,
offset);
219 return packStreamImpl<input_T, output_T, 3>(inputBegin, extent, outputBegin,
offset);
222 return packStreamImpl<input_T, output_T, 4>(inputBegin, extent, outputBegin,
offset);
225 return packStreamImpl<input_T, output_T, 5>(inputBegin, extent, outputBegin,
offset);
228 return packStreamImpl<input_T, output_T, 6>(inputBegin, extent, outputBegin,
offset);
231 return packStreamImpl<input_T, output_T, 7>(inputBegin, extent, outputBegin,
offset);
234 return packStreamImpl<input_T, output_T, 8>(inputBegin, extent, outputBegin,
offset);
237 return packStreamImpl<input_T, output_T, 9>(inputBegin, extent, outputBegin,
offset);
240 return packStreamImpl<input_T, output_T, 10>(inputBegin, extent, outputBegin,
offset);
243 return packStreamImpl<input_T, output_T, 11>(inputBegin, extent, outputBegin,
offset);
246 return packStreamImpl<input_T, output_T, 12>(inputBegin, extent, outputBegin,
offset);
249 return packStreamImpl<input_T, output_T, 13>(inputBegin, extent, outputBegin,
offset);
252 return packStreamImpl<input_T, output_T, 14>(inputBegin, extent, outputBegin,
offset);
255 return packStreamImpl<input_T, output_T, 15>(inputBegin, extent, outputBegin,
offset);
258 return packStreamImpl<input_T, output_T, 16>(inputBegin, extent, outputBegin,
offset);
261 return packStreamImpl<input_T, output_T, 17>(inputBegin, extent, outputBegin,
offset);
264 return packStreamImpl<input_T, output_T, 18>(inputBegin, extent, outputBegin,
offset);
267 return packStreamImpl<input_T, output_T, 19>(inputBegin, extent, outputBegin,
offset);
270 return packStreamImpl<input_T, output_T, 20>(inputBegin, extent, outputBegin,
offset);
273 return packStreamImpl<input_T, output_T, 21>(inputBegin, extent, outputBegin,
offset);
276 return packStreamImpl<input_T, output_T, 22>(inputBegin, extent, outputBegin,
offset);
279 return packStreamImpl<input_T, output_T, 23>(inputBegin, extent, outputBegin,
offset);
282 return packStreamImpl<input_T, output_T, 24>(inputBegin, extent, outputBegin,
offset);
285 return packStreamImpl<input_T, output_T, 25>(inputBegin, extent, outputBegin,
offset);
288 return packStreamImpl<input_T, output_T, 26>(inputBegin, extent, outputBegin,
offset);
291 return packStreamImpl<input_T, output_T, 27>(inputBegin, extent, outputBegin,
offset);
294 return packStreamImpl<input_T, output_T, 28>(inputBegin, extent, outputBegin,
offset);
297 return packStreamImpl<input_T, output_T, 29>(inputBegin, extent, outputBegin,
offset);
300 return packStreamImpl<input_T, output_T, 30>(inputBegin, extent, outputBegin,
offset);
303 return packStreamImpl<input_T, output_T, 31>(inputBegin, extent, outputBegin,
offset);
306 return packStreamImpl<input_T, output_T, 32>(inputBegin, extent, outputBegin,
offset);
310 for (
size_t i = 0;
i < extent; ++
i) {
311 const int64_t adjustedValue =
static_cast<int64_t
>(inputBegin[
i]) -
offset;
312 iter = packLong(iter, adjustedValue, packingWidth);