71 auto fmt = [](
size_t bytes) ->
const char* {
73 double mb = (double)bytes / (1024.0 * 1024.0);
74 int n = snprintf(
buf,
sizeof(
buf),
"%zu bytes (%.3f MB)", bytes, mb);
92 size_t szClusterFlags = elemsClusterFlags *
sizeof(int8_t);
93 size_t szInput16 = elemsInput16 *
sizeof(OrtDataType::Float16_t);
94 size_t szInput32 = elemsInput32 *
sizeof(float);
95 size_t szProb16 = elemsProb16 *
sizeof(OrtDataType::Float16_t);
96 size_t szProb32 = elemsProb32 *
sizeof(float);
97 size_t szReg1_16 = elemsReg1_16 *
sizeof(OrtDataType::Float16_t);
98 size_t szReg2_16 = elemsReg2_16 *
sizeof(OrtDataType::Float16_t);
99 size_t szReg1_32 = elemsReg1_32 *
sizeof(float);
100 size_t szReg2_32 = elemsReg2_32 *
sizeof(float);
101 size_t szOutputDataClass = elemsOutputDataClass *
sizeof(int32_t);
103 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") Pointers set for clusterizer with memoryID " <<
mMemoryId <<
" deviceID " <<
mDeviceId <<
" and sector " <<
mISector;
104 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mOutputDataClass pointer: " <<
mOutputDataClass
105 <<
" | elements=" << elemsOutputDataClass <<
" (= mNnClusterizerTotalClusters)"
106 <<
" | " <<
fmt(szOutputDataClass);
107 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mClusterFlags pointer: " <<
static_cast<const void*
>(
mClusterFlags)
108 <<
" | elements=" << elemsClusterFlags <<
" (= 2 * mNnClusterizerBatchedMode)"
109 <<
" | " <<
fmt(szClusterFlags);
110 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mInputData_16 pointer: " <<
mInputData_16
111 <<
" | elements=" << elemsInput16 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
112 <<
" | " <<
fmt(szInput16);
113 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mModelProbabilities_16 pointer: " <<
mModelProbabilities_16
114 <<
" | elements=" << elemsProb16 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
115 <<
" | " <<
fmt(szProb16);
116 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mOutputDataReg1_16 pointer: " <<
mOutputDataReg1_16
117 <<
" | elements=" << elemsReg1_16 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
118 <<
" | " <<
fmt(szReg1_16);
119 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mOutputDataReg2_16 pointer: " <<
mOutputDataReg2_16
120 <<
" | elements=" << elemsReg2_16 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
121 <<
" | " <<
fmt(szReg2_16);
122 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mInputData_32 pointer: " <<
mInputData_32
123 <<
" | elements=" << elemsInput32 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
124 <<
" | " <<
fmt(szInput32);
125 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mModelProbabilities_32 pointer: " <<
mModelProbabilities_32
126 <<
" | elements=" << elemsProb32 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
127 <<
" | " <<
fmt(szProb32);
128 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mOutputDataReg1_32 pointer: " <<
mOutputDataReg1_32
129 <<
" | elements=" << elemsReg1_32 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
130 <<
" | " <<
fmt(szReg1_32);
131 LOG(info) <<
"(NNCLUS, GPUTPCNNClusterizer, this=" <<
this <<
") mOutputDataReg2_32 pointer: " <<
mOutputDataReg2_32
132 <<
" | elements=" << elemsReg2_32 <<
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
133 <<
" | " <<
fmt(szReg2_32);
136 size_t allocatedBytes =
static_cast<size_t>(
reinterpret_cast<uintptr_t
>(mem) -
reinterpret_cast<uintptr_t
>(startMem));
137 double allocatedMB =
static_cast<double>(allocatedBytes) / (1024.0 * 1024.0);
140 int nn = snprintf(allocMsg,
sizeof(allocMsg),
141 "(NNCLUS, GPUTPCNNClusterizer, this=%p) Total scratch allocation in setIOPointers: %zu bytes (%.3f MB)",
142 (
void*)
this, (
size_t)allocatedBytes, allocatedMB);
144 LOG(info) << allocMsg;
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void AllocateAndInitializeLate()
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
const GPUSettingsProcessing & GetProcessingSettings() const
float * mOutputDataReg2_32
OrtDataType::Float16_t * mInputData_16
void * setIOPointers(void *)
int32_t mNnClusterizerBatchedMode
int32_t mNnClusterizerTotalClusters
int32_t mNnClusterizerElementSize
int32_t mNnClusterizerModelReg1NumOutputNodes
int32_t mNnClusterizerVerbosity
OrtDataType::Float16_t * mOutputDataReg2_16
float * mModelProbabilities_32
int32_t mNnClusterizerUseCfRegression
int32_t mNnClusterizerModelClassNumOutputNodes
void SetMaxData(const GPUTrackingInOutPointers &)
int32_t mNnInferenceInputDType
int32_t * mOutputDataClass
int32_t mNnInferenceOutputDType
float * mOutputDataReg1_32
void InitializeProcessor()
int32_t mNnClusterizerModelReg2NumOutputNodes
OrtDataType::Float16_t * mModelProbabilities_16
OrtDataType::Float16_t * mOutputDataReg1_16
void RegisterMemoryAllocation()
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLuint GLenum GLsizei const GLchar * buf
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"