Project
Loading...
Searching...
No Matches
GPUReconstructionProcessing.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
17#include "GPUDefParametersLoad.inc"
18
19using namespace o2::gpu;
20
22{
23 if (mMaster == nullptr) {
24 mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad());
26 } else {
28 mParCPU = master->mParCPU;
29 mParDevice = master->mParDevice;
30 }
31}
32
34{
35 if (mMaster == nullptr) {
36 delete mParCPU;
37 delete mParDevice;
38 }
39}
40
42{
43 int32_t nThreads = 0;
44 if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) {
45 if (splitCores) {
48 } else {
49 nThreads = mMaxHostThreads;
50 }
51 nThreads = std::max(1, nThreads);
52 } else {
53 nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1;
54 }
55 return nThreads;
56}
57
59{
60 mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads));
61 mThreading->activeThreads = std::make_unique<tbb::task_arena>(mActiveHostKernelThreads);
62 if (mProcessingSettings.debugLevel >= 3) {
63 GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n);
64 }
65}
66
67void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function<void(uint32_t)> lambda)
68{
69 uint32_t nThreadsAdjusted = SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads);
70 if (nThreadsAdjusted > 1) {
71 tbb::task_arena(nThreadsAdjusted).execute([&] {
72 tbb::parallel_for<uint32_t>(0, nThreads, lambda, tbb::simple_partitioner());
73 });
74 } else {
75 for (uint32_t i = 0; i < nThreads; i++) {
76 lambda(i);
77 }
78 }
79}
80
82{
83 if (condition && mProcessingSettings.inKernelParallel != 1) {
84 mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min<uint32_t>(max, mMaxHostThreads) : mMaxHostThreads;
85 } else {
87 }
88 if (mProcessingSettings.debugLevel >= 5) {
89 printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop);
90 }
92}
93
94std::atomic_flag GPUReconstructionProcessing::mTimerFlag = ATOMIC_FLAG_INIT;
95
96GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step)
97{
98 while (mTimerFlag.test_and_set()) {
99 }
100 if (mTimers.size() <= id) {
101 mTimers.resize(id + 1);
102 }
103 if (mTimers[id] == nullptr) {
104 if (J >= 0) {
105 name += std::to_string(J);
106 }
107 mTimers[id].reset(new timerMeta{std::unique_ptr<HighResTimer[]>{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0});
108 } else {
109 mTimers[id]->count++;
110 }
111 timerMeta* retVal = mTimers[id].get();
112 mTimerFlag.clear();
113 return retVal;
114}
115
116GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment)
117{
118 timerMeta* retVal = nullptr;
119 while (mTimerFlag.test_and_set()) {
120 }
121 if (mTimers.size() > id && mTimers[id]) {
122 retVal = mTimers[id].get();
123 retVal->count += increment;
124 }
125 mTimerFlag.clear();
126 return retVal;
127}
128
129uint32_t GPUReconstructionProcessing::getNextTimerId()
130{
131 static std::atomic<uint32_t> id{0};
132 return id.fetch_add(1);
133}
134
135std::unique_ptr<gpu_reconstruction_kernels::threadContext> GPUReconstructionProcessing::GetThreadContext()
136{
137 return std::make_unique<gpu_reconstruction_kernels::threadContext>();
138}
139
142
143const std::vector<std::string> GPUReconstructionProcessing::mKernelNames = {
144#define GPUCA_KRNL(x_class, ...) GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)),
145#include "GPUReconstructionKernelList.h"
146#undef GPUCA_KRNL
147};
148
149#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \
150 template <> \
151 uint32_t GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
152 { \
153 return x_num; \
154 } \
155 template <> \
156 const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
157 { \
158 return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
159 }
160#include "GPUReconstructionKernelList.h"
161#undef GPUCA_KRNL
int32_t i
int32_t retVal
double num
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext() override
void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda)
std::vector< std::unique_ptr< timerMeta > > mTimers
static const std::vector< std::string > mKernelNames
GPUReconstructionProcessing(const GPUSettingsDeviceBackend &cfg)
uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max)
GPUSettingsProcessing mProcessingSettings
std::shared_ptr< GPUReconstructionThreading > mThreading
GLdouble n
Definition glcorearb.h:1982
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint id
Definition glcorearb.h:650
value_T step
Definition TrackUtils.h:42
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
constexpr size_t max