71      auto fmt = [](
size_t bytes) -> 
const char* {
 
   73        double mb = (double)bytes / (1024.0 * 1024.0);
 
   74        int n = snprintf(
buf, 
sizeof(
buf), 
"%zu bytes (%.3f MB)", bytes, mb);
 
   92      size_t szClusterFlags = elemsClusterFlags * 
sizeof(int8_t);
 
   93      size_t szInput16 = elemsInput16 * 
sizeof(OrtDataType::Float16_t);
 
   94      size_t szInput32 = elemsInput32 * 
sizeof(float);
 
   95      size_t szProb16 = elemsProb16 * 
sizeof(OrtDataType::Float16_t);
 
   96      size_t szProb32 = elemsProb32 * 
sizeof(float);
 
   97      size_t szReg1_16 = elemsReg1_16 * 
sizeof(OrtDataType::Float16_t);
 
   98      size_t szReg2_16 = elemsReg2_16 * 
sizeof(OrtDataType::Float16_t);
 
   99      size_t szReg1_32 = elemsReg1_32 * 
sizeof(float);
 
  100      size_t szReg2_32 = elemsReg2_32 * 
sizeof(float);
 
  101      size_t szOutputDataClass = elemsOutputDataClass * 
sizeof(int32_t);
 
  103      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") Pointers set for clusterizer with memoryID " << 
mMemoryId << 
" deviceID " << 
mDeviceId << 
" and sector " << 
mISector;
 
  104      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mOutputDataClass pointer: " << 
mOutputDataClass 
  105                << 
" | elements=" << elemsOutputDataClass << 
" (= mNnClusterizerTotalClusters)" 
  106                << 
" | " << 
fmt(szOutputDataClass);
 
  107      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mClusterFlags pointer: " << 
static_cast<const void*
>(
mClusterFlags)
 
  108                << 
" | elements=" << elemsClusterFlags << 
" (= 2 * mNnClusterizerBatchedMode)" 
  109                << 
" | " << 
fmt(szClusterFlags);
 
  110      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mInputData_16 pointer: " << 
mInputData_16 
  111                << 
" | elements=" << elemsInput16 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)" 
  112                << 
" | " << 
fmt(szInput16);
 
  113      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mModelProbabilities_16 pointer: " << 
mModelProbabilities_16 
  114                << 
" | elements=" << elemsProb16 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)" 
  115                << 
" | " << 
fmt(szProb16);
 
  116      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mOutputDataReg1_16 pointer: " << 
mOutputDataReg1_16 
  117                << 
" | elements=" << elemsReg1_16 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)" 
  118                << 
" | " << 
fmt(szReg1_16);
 
  119      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mOutputDataReg2_16 pointer: " << 
mOutputDataReg2_16 
  120                << 
" | elements=" << elemsReg2_16 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)" 
  121                << 
" | " << 
fmt(szReg2_16);
 
  122      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mInputData_32 pointer: " << 
mInputData_32 
  123                << 
" | elements=" << elemsInput32 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)" 
  124                << 
" | " << 
fmt(szInput32);
 
  125      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mModelProbabilities_32 pointer: " << 
mModelProbabilities_32 
  126                << 
" | elements=" << elemsProb32 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)" 
  127                << 
" | " << 
fmt(szProb32);
 
  128      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mOutputDataReg1_32 pointer: " << 
mOutputDataReg1_32 
  129                << 
" | elements=" << elemsReg1_32 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)" 
  130                << 
" | " << 
fmt(szReg1_32);
 
  131      LOG(info) << 
"(NNCLUS, GPUTPCNNClusterizer, this=" << 
this << 
") mOutputDataReg2_32 pointer: " << 
mOutputDataReg2_32 
  132                << 
" | elements=" << elemsReg2_32 << 
" (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)" 
  133                << 
" | " << 
fmt(szReg2_32);
 
  136    size_t allocatedBytes = 
static_cast<size_t>(
reinterpret_cast<uintptr_t
>(mem) - 
reinterpret_cast<uintptr_t
>(startMem));
 
  137    double allocatedMB = 
static_cast<double>(allocatedBytes) / (1024.0 * 1024.0);
 
  140      int nn = snprintf(allocMsg, 
sizeof(allocMsg),
 
  141                        "(NNCLUS, GPUTPCNNClusterizer, this=%p) Total scratch allocation in setIOPointers: %zu bytes (%.3f MB)",
 
  142                        (
void*)
this, (
size_t)allocatedBytes, allocatedMB);
 
  144      LOG(info) << allocMsg;
 
 
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void AllocateAndInitializeLate()
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
const GPUSettingsProcessing & GetProcessingSettings() const
float * mOutputDataReg2_32
OrtDataType::Float16_t * mInputData_16
void * setIOPointers(void *)
int32_t mNnClusterizerBatchedMode
int32_t mNnClusterizerTotalClusters
int32_t mNnClusterizerElementSize
int32_t mNnClusterizerModelReg1NumOutputNodes
int32_t mNnClusterizerVerbosity
OrtDataType::Float16_t * mOutputDataReg2_16
float * mModelProbabilities_32
int32_t mNnClusterizerUseCfRegression
int32_t mNnClusterizerModelClassNumOutputNodes
void SetMaxData(const GPUTrackingInOutPointers &)
int32_t mNnInferenceInputDType
int32_t * mOutputDataClass
int32_t mNnInferenceOutputDType
float * mOutputDataReg1_32
void InitializeProcessor()
int32_t mNnClusterizerModelReg2NumOutputNodes
OrtDataType::Float16_t * mModelProbabilities_16
OrtDataType::Float16_t * mOutputDataReg1_16
void RegisterMemoryAllocation()
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLuint GLenum GLsizei const GLchar * buf
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"