27#include <onnxruntime_cxx_api.h>
34 std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath;
35 std::vector<std::string> reg_model_paths_local;
38 if (settings.nnLoadFromCCDB) {
39 reg_model_path = settings.nnLocalFolder +
"/net_regression_c1.onnx";
40 if (evalMode[0] ==
"c1") {
41 class_model_path = settings.nnLocalFolder +
"/net_classification_c1.onnx";
42 }
else if (evalMode[0] ==
"c2") {
43 class_model_path = settings.nnLocalFolder +
"/net_classification_c2.onnx";
46 if (evalMode[1] ==
"r2") {
47 reg_model_path +=
":" + settings.nnLocalFolder +
"/net_regression_c2.onnx";
52 {
"model-path", class_model_path},
53 {
"device-type", settings.nnInferenceDevice},
54 {
"allocate-device-memory",
std::to_string(settings.nnInferenceAllocateDevMem)},
55 {
"intra-op-num-threads",
std::to_string(settings.nnInferenceIntraOpNumThreads)},
56 {
"inter-op-num-threads",
std::to_string(settings.nnInferenceInterOpNumThreads)},
57 {
"enable-optimizations",
std::to_string(settings.nnInferenceEnableOrtOptimization)},
58 {
"deterministic-compute",
std::to_string(useDeterministicMode ? 1 : settings.nnInferenceUseDeterministicCompute)},
59 {
"enable-profiling",
std::to_string(settings.nnInferenceOrtProfiling)},
60 {
"profiling-output-path", settings.nnInferenceOrtProfilingPath},
62 {
"onnx-environment-name",
"c1"}};
69 if (!settings.nnClusterizerUseCfRegression) {
70 if (reg_model_paths_local.size() == 1) {
71 mOrtOptions[
"model-path"] = reg_model_paths_local[0];
76 mOrtOptions[
"model-path"] = reg_model_paths_local[0];
80 mOrtOptions[
"model-path"] = reg_model_paths_local[1];
116 clustererNN.
mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
120 if (settings.nnClusterizerVerbosity < 0) {
128 if (!settings.nnClusterizerUseCfRegression) {
176 void* Alloc(
size_t size);
178 const OrtMemoryInfo* Info()
const;
179 void* Reserve(
size_t size);
180 size_t NumAllocations()
const;
181 size_t NumReserveAllocations()
const;
189 std::atomic<size_t> memory_inuse{0};
190 std::atomic<size_t> num_allocations{0};
191 std::atomic<size_t> num_reserve_allocations{0};
192 OrtMemoryInfo* mMemoryInfoInternal;
198 OrtAllocator::version = ORT_API_VERSION;
204 mMemoryInfoInternal = info;
215 LOG(info) <<
"(ORT) Allocating direct memory of size " <<
size <<
" bytes";
221 LOG(info) <<
"(ORT) Reserving direct memory of size " <<
size <<
" bytes";
232 return mMemoryInfoInternal;
237 return num_allocations.load();
242 return num_reserve_allocations.load();
247 if (memory_inuse.load()) {
248 LOG(warning) <<
"memory leak!!!";
254 mMockedAlloc = std::make_shared<MockedOrtAllocator>(
rec, (OrtMemoryInfo*)(*memInfo));
256 Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo)));
258 Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env),
mMockedAlloc.get()));
o2::ml::OrtModel mModelReg1
o2::ml::OrtModel mModelClass
const OrtMemoryInfo * getMockedMemoryInfo()
o2::ml::OrtModel mModelReg2
void init(const GPUSettingsProcessingNNclusterizer &, bool=false)
std::unordered_map< std::string, std::string > mOrtOptions
MockedOrtAllocator * getMockedAllocator()
std::shared_ptr< MockedOrtAllocator > mMockedAlloc
std::vector< bool > mModelsUsed
void directOrtAllocator(Ort::Env *, Ort::MemoryInfo *, GPUReconstruction *, bool=false)
void initClusterizer(const GPUSettingsProcessingNNclusterizer &, GPUTPCNNClusterizer &, int32_t=-1, int32_t=-1)
int32_t mNnClusterizerSizeInputTime
uint32_t mNnClusterizerRowTimeSize
int32_t mNnClusterizerSizeInputRow
int32_t mNnClusterizerSizeInputPad
uint32_t mNnClusterizerFullPadSize
int32_t mNnClusterizerBatchedMode
int32_t mNnClusterizerElementSize
int32_t mNnClusterizerModelReg1NumOutputNodes
int32_t mNnClusterizerVerbosity
int32_t maxAllowedTimebin
int32_t mNnClusterizerUseCfRegression
int32_t mNnClusterizerModelClassNumOutputNodes
int32_t mNnClusterizerBoundaryFillValue
int8_t mNnClusterizerAddIndexData
int8_t mNnSigmoidTrafoClassThreshold
int32_t mNnInferenceInputDType
uint32_t mNnClusterizerRowTimeSizeFull
int32_t mNnInferenceOutputDType
uint32_t mNnClusterizerFullRowSize
int32_t mNnClusterizerModelReg2NumOutputNodes
int32_t mNnClusterizerChargeArraySize
uint32_t mNnClusterizerFullTimeSize
int8_t mNnClusterizerSetDeconvolutionFlags
uint32_t mNnClusterizerPadTimeSize
int8_t mNnClusterizerUseClassification
void initOptions(std::unordered_map< std::string, std::string > optionsMap)
std::vector< std::vector< int64_t > > getNumOutputNodes() const
#define TPC_MAX_FRAGMENT_LEN_GPU
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
std::string to_string(gsl::span< T, Size > span)
void * Alloc(size_t size)
size_t NumAllocations() const
const OrtMemoryInfo * Info() const
MockedOrtAllocator(GPUReconstruction *=nullptr, OrtMemoryInfo *=nullptr)
void * Reserve(size_t size)
size_t NumReserveAllocations() const
static std::vector< std::string > tokenize(const std::string &src, char delim, bool trimToken=true, bool skipEmpty=true)
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"