Project
Loading...
Searching...
No Matches
GPUTPCNNClusterizerHost.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
16
18#include "GPUTPCNNClusterizer.h"
19#include "GPUSettings.h"
21#include "GPUReconstruction.h"
22
23#ifdef GPUCA_HAS_ONNX
24#include <onnxruntime_cxx_api.h>
25#endif
26
27using namespace o2::gpu;
28
29void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings)
30{
31 std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath;
32 std::vector<std::string> reg_model_paths_local;
33 std::vector<std::string> evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':');
34
35 if (settings.nnLoadFromCCDB) {
36 reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place
37 if (evalMode[0] == "c1") {
38 class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx";
39 } else if (evalMode[0] == "c2") {
40 class_model_path = settings.nnLocalFolder + "/net_classification_c2.onnx";
41 }
42
43 if (evalMode[1] == "r2") {
44 reg_model_path += ":" + settings.nnLocalFolder + "/net_regression_c2.onnx";
45 }
46 }
47
48 mOrtOptions = {
49 {"model-path", class_model_path},
50 {"device-type", settings.nnInferenceDevice},
51 {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)},
52 {"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)},
53 {"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)},
54 {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)},
55 {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)},
56 {"profiling-output-path", settings.nnInferenceOrtProfilingPath},
57 {"logging-level", std::to_string(settings.nnInferenceVerbosity)},
58 {"onnx-environment-name", "c1"}};
59
61 mModelsUsed[0] = true;
62
63 reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':');
64
65 if (!settings.nnClusterizerUseCfRegression) {
66 if (reg_model_paths_local.size() == 1) {
67 mOrtOptions["model-path"] = reg_model_paths_local[0];
68 mOrtOptions["onnx-environment-name"] = "r1";
70 mModelsUsed[1] = true;
71 } else {
72 mOrtOptions["model-path"] = reg_model_paths_local[0];
73 mOrtOptions["onnx-environment-name"] = "r1";
75 mModelsUsed[1] = true;
76 mOrtOptions["model-path"] = reg_model_paths_local[1];
77 mOrtOptions["onnx-environment-name"] = "r2";
79 mModelsUsed[2] = true;
80 }
81 }
82}
83
84void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN)
85{
86 clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression;
87 clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow;
88 clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad;
89 clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime;
90 clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData;
91 clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0);
92 clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
93 clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
94 clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
95 if (clustererNN.mNnSigmoidTrafoClassThreshold) {
96 clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
97 } else {
98 clustererNN.mNnClassThreshold = settings.nnClassThreshold;
99 }
100 if (settings.nnClusterizerVerbosity < 0) {
101 clustererNN.mNnClusterizerVerbosity = settings.nnInferenceVerbosity;
102 } else {
103 clustererNN.mNnClusterizerVerbosity = settings.nnClusterizerVerbosity;
104 }
105 clustererNN.mNnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos;
106 clustererNN.mNnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos;
108 if (!settings.nnClusterizerUseCfRegression) {
109 if (mModelClass.getNumOutputNodes()[0][1] == 1 || !mModelReg2.isInitialized()) {
111 } else {
114 }
115 }
116}
117
118// MockedOrtAllocator implementation to be able to use volatile assignment
120 MockedOrtAllocator(GPUReconstruction* = nullptr, OrtMemoryInfo* = nullptr);
122
123 void* Alloc(size_t size);
124 void Free(void* p);
125 const OrtMemoryInfo* Info() const;
126 void* Reserve(size_t size);
127 size_t NumAllocations() const;
128 size_t NumReserveAllocations() const;
129
130 void LeakCheck();
131
132 private:
133 MockedOrtAllocator(const MockedOrtAllocator&) = delete;
134 MockedOrtAllocator& operator=(const MockedOrtAllocator&) = delete;
135
136 std::atomic<size_t> memory_inuse{0};
137 std::atomic<size_t> num_allocations{0};
138 std::atomic<size_t> num_reserve_allocations{0};
139 OrtMemoryInfo* memory_info;
141};
142
144{
145 OrtAllocator::version = ORT_API_VERSION;
146 OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast<MockedOrtAllocator*>(this_)->Alloc(size); };
147 OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast<MockedOrtAllocator*>(this_)->Free(p); };
148 OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast<const MockedOrtAllocator*>(this_)->Info(); };
149 OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast<MockedOrtAllocator*>(this_)->Reserve(size); };
150 rec = r;
151 memory_info = info;
152}
153
155{
156 // Ort::GetApi().ReleaseMemoryInfo(memory_info);
157 (void)0; // Suppress warning for empty destructor
158}
159
161{
162 // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes";
164}
165
167{
168 // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes";
170}
171
173{
174 // LOG(info) << "(ORT) Freeing volatile memory " << p;
176}
177
178const OrtMemoryInfo* MockedOrtAllocator::Info() const
179{
180 return memory_info;
181}
182
184{
185 return num_allocations.load();
186}
187
189{
190 return num_reserve_allocations.load();
191}
192
194{
195 if (memory_inuse.load()) {
196 LOG(warning) << "memory leak!!!";
197 }
198}
199
200void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate)
201{
202 mMockedAlloc = std::make_shared<MockedOrtAllocator>(rec, (OrtMemoryInfo*)(*memInfo));
203 if (recreate) {
204 Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo)));
205 }
206 Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mMockedAlloc.get()));
207 memInfo = (Ort::MemoryInfo*)mMockedAlloc->Info();
208}
209
211{
212 return mMockedAlloc->Info();
213}
214
void * AllocateVolatileDeviceMemory(size_t size)
void volatileOrtAllocator(Ort::Env *, Ort::MemoryInfo *, GPUReconstruction *, bool=false)
std::unordered_map< std::string, std::string > mOrtOptions
void initClusterizer(const GPUSettingsProcessingNNclusterizer &, GPUTPCNNClusterizer &)
std::shared_ptr< MockedOrtAllocator > mMockedAlloc
void init(const GPUSettingsProcessingNNclusterizer &)
void initOptions(std::unordered_map< std::string, std::string > optionsMap)
std::vector< std::vector< int64_t > > getNumOutputNodes() const
GLsizeiptr size
Definition glcorearb.h:659
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
GPUReconstruction * rec
const OrtMemoryInfo * Info() const
MockedOrtAllocator(GPUReconstruction *=nullptr, OrtMemoryInfo *=nullptr)
static std::vector< std::string > tokenize(const std::string &src, char delim, bool trimToken=true, bool skipEmpty=true)
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"