Project
Loading...
Searching...
No Matches
Utils.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
14
15#ifndef GPU_BENCHMARK_UTILS_H
16#define GPU_BENCHMARK_UTILS_H
17
18#if defined(__HIPCC__)
19#include "hip/hip_runtime.h"
20#endif
21
22#include <iostream>
23#include <sstream>
24#include <iomanip>
25#include <typeinfo>
26#include <boost/program_options.hpp>
27#include <vector>
28#include <string>
29#include <cmath>
30
31#define KNRM "\x1B[0m"
32#define KRED "\x1B[31m"
33#define KGRN "\x1B[32m"
34#define KYEL "\x1B[33m"
35#define configLU "\x1B[34m"
36#define KMAG "\x1B[35m"
37#define KCYN "\x1B[36m"
38#define KWHT "\x1B[37m"
39
40#define GB (1024 * 1024 * 1024)
41
42#define failed(...) \
43 printf("%serror: ", KRED); \
44 printf(__VA_ARGS__); \
45 printf("\n"); \
46 printf("error: TEST FAILED\n%s", KNRM); \
47 exit(EXIT_FAILURE);
48#endif
49
50template <typename T>
51void discardResult(const T&)
52{
53}
54
55enum class Test {
56 Read,
57 Write,
58 Copy,
62};
63
64inline std::ostream& operator<<(std::ostream& os, Test test)
65{
66 switch (test) {
67 case Test::Read:
68 os << "read";
69 break;
70 case Test::Write:
71 os << "write";
72 break;
73 case Test::Copy:
74 os << "copy";
75 break;
77 os << "random read";
78 break;
80 os << "random write";
81 break;
83 os << "random copy";
84 break;
85 }
86 return os;
87}
88
89enum class Mode {
93};
94
95inline std::ostream& operator<<(std::ostream& os, Mode mode)
96{
97 switch (mode) {
99 os << "sequential";
100 break;
101 case Mode::Concurrent:
102 os << "concurrent";
103 break;
105 os << "distributed";
106 break;
107 }
108 return os;
109}
110
111enum class KernelConfig {
112 Single,
113 Multi,
114 All,
115 Manual
116};
117
118inline std::ostream& operator<<(std::ostream& os, KernelConfig config)
119{
120 switch (config) {
122 os << "single";
123 break;
125 os << "multiple";
126 break;
128 os << "all";
129 break;
131 os << "manual";
132 break;
133 }
134 return os;
135}
136
137template <class T>
138inline std::string getType()
139{
140 if (typeid(T).name() == typeid(int8_t).name()) {
141 return std::string{"int8_t"};
142 }
143 if (typeid(T).name() == typeid(size_t).name()) {
144 return std::string{"uint64_t"};
145 }
146 if (typeid(T).name() == typeid(int32_t).name()) {
147 return std::string{"int32_t"};
148 }
149 if (typeid(T).name() == typeid(int4).name()) {
150 return std::string{"int4"};
151 }
152 return std::string{"unknown"};
153}
154
155inline std::string getTestName(Mode mode, Test test, KernelConfig blocks)
156{
157 std::string tname;
158 tname += (mode == Mode::Sequential) ? "seq_" : "conc_";
159 tname += (test == Test::Read) ? "read_" : (test == Test::Write) ? "write_"
160 : "copy_";
161 tname += (blocks == KernelConfig::Single) ? "SB" : "MB";
162 return tname;
163}
164
165// Return pointer to custom offset (GB)
166template <class chunk_t>
167inline chunk_t* getCustomPtr(chunk_t* scratchPtr, float startGB)
168{
169 return reinterpret_cast<chunk_t*>(reinterpret_cast<char*>(scratchPtr) + (static_cast<size_t>(GB * startGB) & 0xFFFFFFFFFFFFF000));
170}
171
172inline float computeThroughput(Test test, float result, float chunkSizeGB, int32_t ntests)
173{
174 // https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html
175 // Eff_bandwidth (GB/s) = (B_r + B_w) / (~1e9 * Time (s))
176
177 return 1e3 * chunkSizeGB * (float)ntests / result;
178}
179
180template <class chunk_t>
181inline size_t getBufferCapacity(float chunkSizeGB, int32_t prime)
182{
183 auto chunkCapacity = (static_cast<size_t>(GB * chunkSizeGB) & 0xFFFFFFFFFFFFF000) / sizeof(chunk_t);
184 if (!prime) {
185 return chunkCapacity;
186 } else {
187 return (chunkCapacity % prime == 0) ? (chunkCapacity - 0x1000) : chunkCapacity;
188 }
189}
190
191inline bool is_prime(const int32_t n)
192{
193 bool isPrime = true;
194 if (n == 0 || n == 1) {
195 isPrime = false;
196 } else {
197 for (int32_t i = 2; i <= sqrt(n); ++i) {
198 if (n % i == 0) {
199 isPrime = false;
200 break;
201 }
202 }
203 }
204
205 return isPrime;
206}
207
208namespace o2
209{
210namespace benchmark
211{
213 benchmarkOpts() = default;
214
215 int32_t deviceId = 0;
216 std::vector<Test> tests = {Test::Read, Test::Write, Test::Copy};
218 std::vector<KernelConfig> pools = {KernelConfig::Single, KernelConfig::Multi};
219 std::vector<std::string> dtypes = {"int8_t", "int32_t", "uint64_t"};
220 std::vector<std::pair<float, float>> testChunks;
221 float chunkReservedGB = 1.f;
224 int32_t numThreads = -1;
225 int32_t numBlocks = -1;
226 int32_t kernelLaunches = 1;
227 int32_t nTests = 1;
228 bool raw = false;
229 int32_t streams = 8;
230 int32_t prime = 0;
231 std::string outFileName = "benchmark_result";
232 bool dumpChunks = false;
233};
234
235template <class chunk_t>
236struct gpuState {
237 int32_t getMaxChunks()
238 {
239 return (double)scratchSize / (chunkReservedGB * GB);
240 }
241
242 int32_t getNKernelLaunches() { return iterations; }
243 int32_t getStreamsPoolSize() { return streams; }
244
245 // Configuration
247 int32_t iterations;
248 int32_t streams;
249
250 float chunkReservedGB; // Size of each partition (GB)
251
252 // General containers and state
253 chunk_t* scratchPtr; // Pointer to scratch buffer
254 size_t scratchSize; // Size of scratch area (B)
255 std::vector<chunk_t*> partAddrOnHost; // Pointers to scratch partitions on host vector
256 std::vector<std::pair<float, float>> testChunks; // Vector of definitions for arbitrary chunks
257
258 // Static info
262};
263
264} // namespace benchmark
265} // namespace o2
int32_t i
std::ostream & operator<<(std::ostream &os, Test test)
Definition Utils.h:64
chunk_t * getCustomPtr(chunk_t *scratchPtr, float startGB)
Definition Utils.h:167
#define GB
Definition Utils.h:40
Mode
Definition Utils.h:89
@ Concurrent
@ Distributed
@ Sequential
float computeThroughput(Test test, float result, float chunkSizeGB, int32_t ntests)
Definition Utils.h:172
std::string getType()
Definition Utils.h:138
size_t getBufferCapacity(float chunkSizeGB, int32_t prime)
Definition Utils.h:181
bool is_prime(const int32_t n)
Definition Utils.h:191
Test
Definition Utils.h:55
@ RandomCopy
@ RandomRead
@ RandomWrite
std::string getTestName(Mode mode, Test test, KernelConfig blocks)
Definition Utils.h:155
KernelConfig
Definition Utils.h:111
void discardResult(const T &)
Definition Utils.h:51
GLdouble n
Definition glcorearb.h:1982
GLenum mode
Definition glcorearb.h:266
GLuint64EXT * result
Definition glcorearb.h:5662
GLuint const GLchar * name
Definition glcorearb.h:781
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
FIXME: do not use data model tables.
std::vector< std::string > dtypes
Definition Utils.h:219
std::vector< std::pair< float, float > > testChunks
Definition Utils.h:220
std::vector< KernelConfig > pools
Definition Utils.h:218
std::vector< Test > tests
Definition Utils.h:216
std::vector< Mode > modes
Definition Utils.h:217
chunk_t * scratchPtr
Definition Utils.h:253
size_t nMaxThreadsPerDimension
Definition Utils.h:246
std::vector< std::pair< float, float > > testChunks
Definition Utils.h:256
int32_t getNKernelLaunches()
Definition Utils.h:242
int32_t getStreamsPoolSize()
Definition Utils.h:243
size_t nMaxThreadsPerBlock
Definition Utils.h:261
int32_t getMaxChunks()
Definition Utils.h:237
std::vector< chunk_t * > partAddrOnHost
Definition Utils.h:255