![]() |
Project
|
#include <Kernels.h>
Public Member Functions | |
GPUbenchmark ()=delete | |
GPUbenchmark (benchmarkOpts &opts) | |
virtual | ~GPUbenchmark ()=default |
template<typename... T> | |
float | measure (void(GPUbenchmark::*)(T...), const char *, T &&... args) |
template<typename... T> | |
float | runSequential (void(*kernel)(chunk_t *, size_t, T...), std::pair< float, float > &chunkRanges, int32_t nLaunches, int32_t dimGrid, int32_t dimBlock, T &... args) |
template<typename... T> | |
std::vector< float > | runConcurrent (void(*kernel)(chunk_t *, size_t, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, int32_t dimStreams, int32_t nBlocks, int32_t nThreads, T &... args) |
template<typename... T> | |
float | runDistributed (void(*kernel)(chunk_t **, size_t *, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, size_t nBlocks, int32_t nThreads, T &... args) |
void | globalInit () |
void | run () |
void | globalFinalize () |
void | printDevices () |
void | initTest (Test) |
void | finalizeTest (Test) |
void | runTest (Test, Mode, KernelConfig) |
|
delete |
|
inline |
|
virtualdefault |
void o2::benchmark::GPUbenchmark< chunk_t >::finalizeTest | ( | Test | ) |
void o2::benchmark::GPUbenchmark< chunk_t >::globalFinalize | ( | ) |
void o2::benchmark::GPUbenchmark< chunk_t >::globalInit | ( | ) |
void o2::benchmark::GPUbenchmark< chunk_t >::initTest | ( | Test | ) |
float o2::benchmark::GPUbenchmark< chunk_t >::measure | ( | void(GPUbenchmark< chunk_t >::*)(T...) | , |
const char * | , | ||
T &&... | args | ||
) |
void o2::benchmark::GPUbenchmark< chunk_t >::printDevices | ( | ) |
void o2::benchmark::GPUbenchmark< chunk_t >::run | ( | ) |
std::vector< float > o2::benchmark::GPUbenchmark< chunk_t >::runConcurrent | ( | void(*)(chunk_t *, size_t, T...) | kernel, |
std::vector< std::pair< float, float > > & | chunkRanges, | ||
int32_t | nLaunches, | ||
int32_t | dimStreams, | ||
int32_t | nBlocks, | ||
int32_t | nThreads, | ||
T &... | args | ||
) |
float o2::benchmark::GPUbenchmark< chunk_t >::runDistributed | ( | void(*)(chunk_t **, size_t *, T...) | kernel, |
std::vector< std::pair< float, float > > & | chunkRanges, | ||
int32_t | nLaunches, | ||
size_t | nBlocks, | ||
int32_t | nThreads, | ||
T &... | args | ||
) |
float o2::benchmark::GPUbenchmark< chunk_t >::runSequential | ( | void(*)(chunk_t *, size_t, T...) | kernel, |
std::pair< float, float > & | chunkRanges, | ||
int32_t | nLaunches, | ||
int32_t | dimGrid, | ||
int32_t | dimBlock, | ||
T &... | args | ||
) |
void o2::benchmark::GPUbenchmark< chunk_t >::runTest | ( | Test | , |
Mode | , | ||
KernelConfig | |||
) |