d0/d92/Kernels_8h_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#ifndef GPU_BENCHMARK_KERNELS_H

#define GPU_BENCHMARK_KERNELS_H


#include "Utils.h"

#include <vector>

#include <iostream>

#include <iomanip>

#include <memory>

#include <chrono>


namespace o2

{


namespace benchmark

{


template <class chunk_t>


class GPUbenchmark final

{

 public:

  GPUbenchmark() = delete; // need for a configuration


  GPUbenchmark(benchmarkOpts& opts) : mOptions{opts}

  {

  }


  virtual ~GPUbenchmark() = default;

  template <typename... T>

  float measure(void (GPUbenchmark::*)(T...), const char*, T&&... args);


  // Single stream (sequential kernels) execution

  template <typename... T>

  float runSequential(void (*kernel)(chunk_t*, size_t, T...),

                      std::pair<float, float>& chunkRanges,

                      int32_t nLaunches,

                      int32_t dimGrid,

                      int32_t dimBlock,

                      T&... args);


  // Multi-streams asynchronous executions

  template <typename... T>

  std::vector<float> runConcurrent(void (*kernel)(chunk_t*, size_t, T...),

                                   std::vector<std::pair<float, float>>& chunkRanges,

                                   int32_t nLaunches,

                                   int32_t dimStreams,

                                   int32_t nBlocks,

                                   int32_t nThreads,

                                   T&... args);


  // Single stream executions on all chunks at a time by same kernel

  template <typename... T>

  float runDistributed(void (*kernel)(chunk_t**, size_t*, T...),

                       std::vector<std::pair<float, float>>& chunkRanges,

                       int32_t nLaunches,

                       size_t nBlocks,

                       int32_t nThreads,

                       T&... args);


  // Main interface

  void globalInit();     // Allocate scratch buffers and compute runtime parameters

  void run();            // Execute all specified callbacks

  void globalFinalize(); // Cleanup

  void printDevices();   // Dump info


  // Initializations/Finalizations of tests. Not to be measured, in principle used for report

  void initTest(Test);

  void finalizeTest(Test);


  // Kernel calling wrapper

  void runTest(Test, Mode, KernelConfig);


 private:

  gpuState<chunk_t> mState;

  benchmarkOpts mOptions;

};


} // namespace benchmark


} // namespace o2

#endif

Utils.h

Mode
Mode
Definition Utils.h:89

Test
Test
Definition Utils.h:55

KernelConfig
KernelConfig
Definition Utils.h:111

char

o2::benchmark::GPUbenchmark
Definition Kernels.h:32

o2::benchmark::GPUbenchmark::printDevices
void printDevices()

o2::benchmark::GPUbenchmark::GPUbenchmark
GPUbenchmark(benchmarkOpts &opts)
Definition Kernels.h:35

o2::benchmark::GPUbenchmark::finalizeTest
void finalizeTest(Test)

o2::benchmark::GPUbenchmark::~GPUbenchmark
virtual ~GPUbenchmark()=default

o2::benchmark::GPUbenchmark::globalFinalize
void globalFinalize()

o2::benchmark::GPUbenchmark::runTest
void runTest(Test, Mode, KernelConfig)

o2::benchmark::GPUbenchmark::runDistributed
float runDistributed(void(*kernel)(chunk_t **, size_t *, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, size_t nBlocks, int32_t nThreads, T &... args)

o2::benchmark::GPUbenchmark::measure
float measure(void(GPUbenchmark::*)(T...), const char *, T &&... args)

o2::benchmark::GPUbenchmark::globalInit
void globalInit()

o2::benchmark::GPUbenchmark::runConcurrent
std::vector< float > runConcurrent(void(*kernel)(chunk_t *, size_t, T...), std::vector< std::pair< float, float > > &chunkRanges, int32_t nLaunches, int32_t dimStreams, int32_t nBlocks, int32_t nThreads, T &... args)

o2::benchmark::GPUbenchmark::initTest
void initTest(Test)

o2::benchmark::GPUbenchmark::GPUbenchmark
GPUbenchmark()=delete

o2::benchmark::GPUbenchmark::run
void run()

o2::benchmark::GPUbenchmark::runSequential
float runSequential(void(*kernel)(chunk_t *, size_t, T...), std::pair< float, float > &chunkRanges, int32_t nLaunches, int32_t dimGrid, int32_t dimBlock, T &... args)

o2
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Definition BitstreamReader.h:24

o2::benchmark::benchmarkOpts
Definition Utils.h:212

o2::benchmark::gpuState
Definition Utils.h:236