15#ifndef GPU_BENCHMARK_KERNELS_H
16#define GPU_BENCHMARK_KERNELS_H
30template <
class chunk_t>
39 template <
typename... T>
43 template <
typename... T>
45 std::pair<float, float>& chunkRanges,
52 template <
typename... T>
53 std::vector<float>
runConcurrent(
void (*kernel)(chunk_t*,
size_t, T...),
54 std::vector<std::pair<float, float>>& chunkRanges,
62 template <
typename... T>
64 std::vector<std::pair<float, float>>& chunkRanges,
GPUbenchmark(benchmarkOpts &opts)
virtual ~GPUbenchmark()=default
void runTest(Test, Mode, KernelConfig)
float runDistributed(void(*kernel)(chunk_t **, size_t *, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, size_t nBlocks, int32_t nThreads, T &... args)
float measure(void(GPUbenchmark::*)(T...), const char *, T &&... args)
std::vector< float > runConcurrent(void(*kernel)(chunk_t *, size_t, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, int32_t dimStreams, int32_t nBlocks, int32_t nThreads, T &... args)
float runSequential(void(*kernel)(chunk_t *, size_t, T...), std::pair< float, float > &chunkRanges, int32_t nLaunches, int32_t dimGrid, int32_t dimBlock, T &... args)
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...