15#ifndef GPU_BENCHMARK_UTILS_H
16#define GPU_BENCHMARK_UTILS_H
19#include "hip/hip_runtime.h"
26#include <boost/program_options.hpp>
32#define KRED "\x1B[31m"
33#define KGRN "\x1B[32m"
34#define KYEL "\x1B[33m"
35#define configLU "\x1B[34m"
36#define KMAG "\x1B[35m"
37#define KCYN "\x1B[36m"
38#define KWHT "\x1B[37m"
40#define GB (1024 * 1024 * 1024)
43 printf("%serror: ", KRED); \
44 printf(__VA_ARGS__); \
46 printf("error: TEST FAILED\n%s", KNRM); \
140 if (
typeid(T).
name() ==
typeid(int8_t).
name()) {
141 return std::string{
"int8_t"};
143 if (
typeid(T).
name() ==
typeid(
size_t).
name()) {
144 return std::string{
"uint64_t"};
146 if (
typeid(T).
name() ==
typeid(int32_t).
name()) {
147 return std::string{
"int32_t"};
150 return std::string{
"int4"};
152 return std::string{
"unknown"};
166template <
class chunk_t>
169 return reinterpret_cast<chunk_t*
>(
reinterpret_cast<char*
>(scratchPtr) + (
static_cast<size_t>(
GB * startGB) & 0xFFFFFFFFFFFFF000));
177 return 1e3 * chunkSizeGB * (float)ntests /
result;
180template <
class chunk_t>
183 auto chunkCapacity = (
static_cast<size_t>(
GB * chunkSizeGB) & 0xFFFFFFFFFFFFF000) /
sizeof(chunk_t);
185 return chunkCapacity;
187 return (chunkCapacity % prime == 0) ? (chunkCapacity - 0x1000) : chunkCapacity;
194 if (
n == 0 ||
n == 1) {
197 for (int32_t
i = 2;
i <= sqrt(
n); ++
i) {
219 std::vector<std::string>
dtypes = {
"int8_t",
"int32_t",
"uint64_t"};
235template <
class chunk_t>
std::ostream & operator<<(std::ostream &os, Test test)
chunk_t * getCustomPtr(chunk_t *scratchPtr, float startGB)
float computeThroughput(Test test, float result, float chunkSizeGB, int32_t ntests)
size_t getBufferCapacity(float chunkSizeGB, int32_t prime)
bool is_prime(const int32_t n)
std::string getTestName(Mode mode, Test test, KernelConfig blocks)
void discardResult(const T &)
GLuint const GLchar * name
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
FIXME: do not use data model tables.
std::vector< std::string > dtypes
std::vector< std::pair< float, float > > testChunks
std::vector< KernelConfig > pools
std::vector< Test > tests
std::vector< Mode > modes
float freeMemoryFractionToAllocate
size_t nMaxThreadsPerDimension
std::vector< std::pair< float, float > > testChunks
int32_t getNKernelLaunches()
int32_t getStreamsPoolSize()
size_t nMaxThreadsPerBlock
std::vector< chunk_t * > partAddrOnHost