Project
Loading...
Searching...
No Matches
Kernels.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
14
15#ifndef GPU_BENCHMARK_KERNELS_H
16#define GPU_BENCHMARK_KERNELS_H
17
18#include "Utils.h"
19#include <vector>
20#include <iostream>
21#include <iomanip>
22#include <memory>
23#include <chrono>
24
25namespace o2
26{
27namespace benchmark
28{
29
30template <class chunk_t>
31class GPUbenchmark final
32{
33 public:
34 GPUbenchmark() = delete; // need for a configuration
35 GPUbenchmark(benchmarkOpts& opts) : mOptions{opts}
36 {
37 }
38 virtual ~GPUbenchmark() = default;
39 template <typename... T>
40 float measure(void (GPUbenchmark::*)(T...), const char*, T&&... args);
41
42 // Single stream (sequential kernels) execution
43 template <typename... T>
44 float runSequential(void (*kernel)(chunk_t*, size_t, T...),
45 std::pair<float, float>& chunkRanges,
46 int32_t nLaunches,
47 int32_t dimGrid,
48 int32_t dimBlock,
49 T&... args);
50
51 // Multi-streams asynchronous executions
52 template <typename... T>
53 std::vector<float> runConcurrent(void (*kernel)(chunk_t*, size_t, T...),
54 std::vector<std::pair<float, float>>& chunkRanges,
55 int32_t nLaunches,
56 int32_t dimStreams,
57 int32_t nBlocks,
58 int32_t nThreads,
59 T&... args);
60
61 // Single stream executions on all chunks at a time by same kernel
62 template <typename... T>
63 float runDistributed(void (*kernel)(chunk_t**, size_t*, T...),
64 std::vector<std::pair<float, float>>& chunkRanges,
65 int32_t nLaunches,
66 size_t nBlocks,
67 int32_t nThreads,
68 T&... args);
69
70 // Main interface
71 void globalInit(); // Allocate scratch buffers and compute runtime parameters
72 void run(); // Execute all specified callbacks
73 void globalFinalize(); // Cleanup
74 void printDevices(); // Dump info
75
76 // Initializations/Finalizations of tests. Not to be measured, in principle used for report
79
80 // Kernel calling wrapper
82
83 private:
84 gpuState<chunk_t> mState;
85 benchmarkOpts mOptions;
86};
87
88} // namespace benchmark
89} // namespace o2
90#endif
Mode
Definition Utils.h:89
Test
Definition Utils.h:55
KernelConfig
Definition Utils.h:111
GPUbenchmark(benchmarkOpts &opts)
Definition Kernels.h:35
virtual ~GPUbenchmark()=default
void runTest(Test, Mode, KernelConfig)
float runDistributed(void(*kernel)(chunk_t **, size_t *, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, size_t nBlocks, int32_t nThreads, T &... args)
float measure(void(GPUbenchmark::*)(T...), const char *, T &&... args)
std::vector< float > runConcurrent(void(*kernel)(chunk_t *, size_t, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, int32_t dimStreams, int32_t nBlocks, int32_t nThreads, T &... args)
float runSequential(void(*kernel)(chunk_t *, size_t, T...), std::pair< float, float > &chunkRanges, int32_t nLaunches, int32_t dimGrid, int32_t dimBlock, T &... args)
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...