Project
Loading...
Searching...
No Matches
GPUReconstructionProcessing.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
17#include "GPUDefParametersLoad.inc"
18#include "GPUReconstructionKernelIncludes.h"
19#include "GPUSettings.h"
20#include "GPULogging.h"
21
22using namespace o2::gpu;
23
25{
26 if (mMaster == nullptr) {
27 mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad());
29 } else {
31 mParCPU = master->mParCPU;
32 mParDevice = master->mParDevice;
33 }
34}
35
37{
38 if (mMaster == nullptr) {
39 delete mParCPU;
40 delete mParDevice;
41 }
42}
43
45{
46 int32_t nThreads = 0;
47 if (GetProcessingSettings().inKernelParallel == 2 && mNActiveThreadsOuterLoop) {
48 if (splitCores) {
51 } else {
52 nThreads = mMaxHostThreads;
53 }
54 nThreads = std::max(1, nThreads);
55 } else {
56 nThreads = GetProcessingSettings().inKernelParallel ? mMaxHostThreads : 1;
57 }
58 return nThreads;
59}
60
62{
63 mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads));
64 mThreading->activeThreads = std::make_unique<tbb::task_arena>(mActiveHostKernelThreads);
65 if (GetProcessingSettings().debugLevel >= 3) {
66 GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n);
67 }
68}
69
70void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function<void(uint32_t)> lambda)
71{
72 uint32_t nThreadsAdjusted = SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads);
73 if (nThreadsAdjusted > 1) {
74 tbb::task_arena(nThreadsAdjusted).execute([&] {
75 tbb::parallel_for<uint32_t>(0, nThreads, lambda, tbb::simple_partitioner());
76 });
77 } else {
78 for (uint32_t i = 0; i < nThreads; i++) {
79 lambda(i);
80 }
81 }
82}
83
85{
86 if (condition && GetProcessingSettings().inKernelParallel != 1) {
87 mNActiveThreadsOuterLoop = GetProcessingSettings().inKernelParallel == 2 ? std::min<uint32_t>(max, mMaxHostThreads) : mMaxHostThreads;
88 } else {
90 }
91 if (GetProcessingSettings().debugLevel >= 5) {
92 printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop);
93 }
95}
96
97std::atomic_flag GPUReconstructionProcessing::mTimerFlag = ATOMIC_FLAG_INIT;
98
99GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step)
100{
101 while (mTimerFlag.test_and_set()) {
102 }
103 if (mTimers.size() <= id) {
104 mTimers.resize(id + 1);
105 }
106 if (mTimers[id] == nullptr) {
107 if (J >= 0) {
108 name += std::to_string(J);
109 }
110 mTimers[id].reset(new timerMeta{std::unique_ptr<HighResTimer[]>{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0});
111 } else {
112 mTimers[id]->count++;
113 }
114 timerMeta* retVal = mTimers[id].get();
115 mTimerFlag.clear();
116 return retVal;
117}
118
119GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment)
120{
121 timerMeta* retVal = nullptr;
122 while (mTimerFlag.test_and_set()) {
123 }
124 if (mTimers.size() > id && mTimers[id]) {
125 retVal = mTimers[id].get();
126 retVal->count += increment;
127 }
128 mTimerFlag.clear();
129 return retVal;
130}
131
132uint32_t GPUReconstructionProcessing::getNextTimerId()
133{
134 static std::atomic<uint32_t> id{0};
135 return id.fetch_add(1);
136}
137
138std::unique_ptr<GPUReconstructionProcessing::threadContext> GPUReconstructionProcessing::GetThreadContext()
139{
140 return std::make_unique<threadContext>();
141}
142
145
146const std::vector<std::string> GPUReconstructionProcessing::mKernelNames = {
147#define GPUCA_KRNL(x_class, ...) GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)),
148#include "GPUReconstructionKernelList.h"
149#undef GPUCA_KRNL
150};
151
152#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \
153 template <> \
154 uint32_t GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
155 { \
156 return x_num; \
157 } \
158 template <> \
159 const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
160 { \
161 return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
162 }
163#include "GPUReconstructionKernelList.h"
164#undef GPUCA_KRNL
int32_t i
int32_t retVal
double num
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::unique_ptr< timerMeta > > mTimers
static const std::vector< std::string > mKernelNames
GPUReconstructionProcessing(const GPUSettingsDeviceBackend &cfg)
virtual std::unique_ptr< threadContext > GetThreadContext() override
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
const GPUSettingsProcessing & GetProcessingSettings() const
std::shared_ptr< GPUReconstructionThreading > mThreading
GLdouble n
Definition glcorearb.h:1982
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint id
Definition glcorearb.h:650
value_T step
Definition TrackUtils.h:42
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
constexpr size_t max