Project
Loading...
Searching...
No Matches
GPUTPCNNClusterizer.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUReconstruction.h"
17#include "GPUTPCNNClusterizer.h"
18#include "GPUSettings.h"
19#include "GPUCommonLogger.h"
20
21using namespace o2::gpu;
22
24
26
28{
29 // Keep track of the start address to compute how much memory we assign
30 void* startMem = mem;
36 }
38
42 }
46 }
49 }
50 }
54 }
58 }
61 }
62 }
63 }
64 }
67 }
68
71 auto fmt = [](size_t bytes) -> const char* {
72 static char buf[64];
73 double mb = (double)bytes / (1024.0 * 1024.0);
74 int n = snprintf(buf, sizeof(buf), "%zu bytes (%.3f MB)", bytes, mb);
75 (void)n;
76 return buf;
77 };
78
79 // Element counts (number of array entries, not bytes)
80 size_t elemsClusterFlags = (mClusterFlags && mNnClusterizerBatchedMode > 0) ? (size_t)2 * mNnClusterizerBatchedMode : 0;
89 size_t elemsOutputDataClass = (mOutputDataClass && mNnClusterizerTotalClusters > 0) ? (size_t)mNnClusterizerTotalClusters : 0;
90
91 // Byte sizes
92 size_t szClusterFlags = elemsClusterFlags * sizeof(int8_t);
93 size_t szInput16 = elemsInput16 * sizeof(OrtDataType::Float16_t);
94 size_t szInput32 = elemsInput32 * sizeof(float);
95 size_t szProb16 = elemsProb16 * sizeof(OrtDataType::Float16_t);
96 size_t szProb32 = elemsProb32 * sizeof(float);
97 size_t szReg1_16 = elemsReg1_16 * sizeof(OrtDataType::Float16_t);
98 size_t szReg2_16 = elemsReg2_16 * sizeof(OrtDataType::Float16_t);
99 size_t szReg1_32 = elemsReg1_32 * sizeof(float);
100 size_t szReg2_32 = elemsReg2_32 * sizeof(float);
101 size_t szOutputDataClass = elemsOutputDataClass * sizeof(int32_t);
102
103 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") Pointers set for clusterizer with memoryID " << mMemoryId << " deviceID " << mDeviceId << " and sector " << mISector;
104 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataClass pointer: " << mOutputDataClass
105 << " | elements=" << elemsOutputDataClass << " (= mNnClusterizerTotalClusters)"
106 << " | " << fmt(szOutputDataClass);
107 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mClusterFlags pointer: " << static_cast<const void*>(mClusterFlags)
108 << " | elements=" << elemsClusterFlags << " (= 2 * mNnClusterizerBatchedMode)"
109 << " | " << fmt(szClusterFlags);
110 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mInputData_16 pointer: " << mInputData_16
111 << " | elements=" << elemsInput16 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
112 << " | " << fmt(szInput16);
113 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mModelProbabilities_16 pointer: " << mModelProbabilities_16
114 << " | elements=" << elemsProb16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
115 << " | " << fmt(szProb16);
116 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg1_16 pointer: " << mOutputDataReg1_16
117 << " | elements=" << elemsReg1_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
118 << " | " << fmt(szReg1_16);
119 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg2_16 pointer: " << mOutputDataReg2_16
120 << " | elements=" << elemsReg2_16 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
121 << " | " << fmt(szReg2_16);
122 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mInputData_32 pointer: " << mInputData_32
123 << " | elements=" << elemsInput32 << " (= mNnClusterizerBatchedMode * mNnClusterizerElementSize)"
124 << " | " << fmt(szInput32);
125 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mModelProbabilities_32 pointer: " << mModelProbabilities_32
126 << " | elements=" << elemsProb32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes)"
127 << " | " << fmt(szProb32);
128 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg1_32 pointer: " << mOutputDataReg1_32
129 << " | elements=" << elemsReg1_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes)"
130 << " | " << fmt(szReg1_32);
131 LOG(info) << "(NNCLUS, GPUTPCNNClusterizer, this=" << this << ") mOutputDataReg2_32 pointer: " << mOutputDataReg2_32
132 << " | elements=" << elemsReg2_32 << " (= mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes)"
133 << " | " << fmt(szReg2_32);
134 }
135 // Compute allocated bytes (difference between advanced pointer and start pointer)
136 size_t allocatedBytes = static_cast<size_t>(reinterpret_cast<uintptr_t>(mem) - reinterpret_cast<uintptr_t>(startMem));
137 double allocatedMB = static_cast<double>(allocatedBytes) / (1024.0 * 1024.0);
138 {
139 char allocMsg[256];
140 int nn = snprintf(allocMsg, sizeof(allocMsg),
141 "(NNCLUS, GPUTPCNNClusterizer, this=%p) Total scratch allocation in setIOPointers: %zu bytes (%.3f MB)",
142 (void*)this, (size_t)allocatedBytes, allocatedMB);
143 (void)nn;
144 LOG(info) << allocMsg;
145 }
146 }
147
148 return mem;
149}
150
GPUReconstruction * mRec
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
int16_t RegisterMemoryAllocation(T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse())
const GPUSettingsProcessing & GetProcessingSettings() const
OrtDataType::Float16_t * mInputData_16
OrtDataType::Float16_t * mOutputDataReg2_16
void SetMaxData(const GPUTrackingInOutPointers &)
OrtDataType::Float16_t * mModelProbabilities_16
OrtDataType::Float16_t * mOutputDataReg1_16
GLdouble n
Definition glcorearb.h:1982
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition glcorearb.h:2514
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"