87 uint32_t srcOffsets[
GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
95 template <typename Scalar, typename BaseVector>
98 Size =
sizeof(BaseVector) /
sizeof(Scalar),
104 template <
int32_t iKernel = defaultKernel>
107 template <typename T, typename
S>
110 template <typename T>
113 template <typename Scalar, typename Vector>
116 template <typename T>
119 template <typename V, typename T, typename
S>
120 GPUdi() static
void compressorMemcpyBuffered(V*
buf, T*
dst, const T*
src, const
S* nums, const uint32_t* srcOffets, uint32_t nEntries, int32_t nLanes, int32_t iLane, int32_t diff = 0,
size_t scaleBase1024 = 1024);
122 template <typename T>
123 GPUdi() static uint32_t calculateWarpOffsets(
GPUSharedMemory& smem, T* nums, uint32_t
start, uint32_t
end, int32_t nWarps, int32_t iWarp, int32_t nLanes, int32_t iLane);
125 template <typename V>