Project
Loading...
Searching...
No Matches
GPUReconstructionCUDAGenRTC.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#define GPUCA_GPUCODE_HOSTONLY
17#include "GPUParamRTC.h"
18#include "GPUDefMacros.h"
19#include <unistd.h>
20#include "Framework/SHA1.h"
21#include <sys/stat.h>
22#include <fcntl.h>
23#include <filesystem>
24
25#include <oneapi/tbb.h>
26using namespace o2::gpu;
27
29QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_src);
30QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command);
31QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch);
32
33int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
34{
35 std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
36 std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
37 GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr);
38 if (filename == "") {
39 filename = "/tmp/o2cagpu_rtc_";
40 }
41 filename += std::to_string(getpid());
42 filename += "_";
43 filename += std::to_string(rand());
44
45 std::vector<std::string> kernels;
46 getRTCKernelCalls(kernels);
47 std::string kernelsall;
48 for (uint32_t i = 0; i < kernels.size(); i++) {
49 kernelsall += kernels[i] + "\n";
50 }
51
52 std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : "");
53 baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
54 baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
55
56 char shasource[21], shaparam[21], shacmd[21], shakernels[21];
57 if (mProcessingSettings.rtc.cacheOutput) {
58 o2::framework::internal::SHA1(shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len);
59 o2::framework::internal::SHA1(shaparam, rtcparam.c_str(), rtcparam.size());
60 o2::framework::internal::SHA1(shacmd, baseCommand.c_str(), baseCommand.size());
61 o2::framework::internal::SHA1(shakernels, kernelsall.c_str(), kernelsall.size());
62 }
63
64 nCompile = mProcessingSettings.rtc.compilePerKernel ? kernels.size() : 1;
65 bool cacheLoaded = false;
66 int32_t fd = 0;
67 if (mProcessingSettings.rtc.cacheOutput) {
68 if (mProcessingSettings.RTCcacheFolder != ".") {
69 std::filesystem::create_directories(mProcessingSettings.RTCcacheFolder);
70 }
71 if (mProcessingSettings.rtc.cacheMutex) {
72 mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
73 fd = open((mProcessingSettings.RTCcacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask);
74 if (fd == -1) {
75 throw std::runtime_error("Error opening rtc cache mutex lock file");
76 }
77 fchmod(fd, mask);
78 if (lockf(fd, F_LOCK, 0)) {
79 throw std::runtime_error("Error locking rtc cache mutex file");
80 }
81 }
82
83 FILE* fp = fopen((mProcessingSettings.RTCcacheFolder + "/rtc.cuda.cache").c_str(), "rb");
84 char sharead[20];
85 if (fp) {
86 size_t len;
87 while (true) {
88 if (fread(sharead, 1, 20, fp) != 20) {
89 throw std::runtime_error("Cache file corrupt");
90 }
91 if (!mProcessingSettings.rtc.ignoreCacheValid && memcmp(sharead, shasource, 20)) {
92 GPUInfo("Cache file content outdated (source)");
93 break;
94 }
95 if (fread(sharead, 1, 20, fp) != 20) {
96 throw std::runtime_error("Cache file corrupt");
97 }
98 if (!mProcessingSettings.rtc.ignoreCacheValid && memcmp(sharead, shaparam, 20)) {
99 GPUInfo("Cache file content outdated (param)");
100 break;
101 }
102 if (fread(sharead, 1, 20, fp) != 20) {
103 throw std::runtime_error("Cache file corrupt");
104 }
105 if (!mProcessingSettings.rtc.ignoreCacheValid && memcmp(sharead, shacmd, 20)) {
106 GPUInfo("Cache file content outdated (commandline)");
107 break;
108 }
109 if (fread(sharead, 1, 20, fp) != 20) {
110 throw std::runtime_error("Cache file corrupt");
111 }
112 if (!mProcessingSettings.rtc.ignoreCacheValid && memcmp(sharead, shakernels, 20)) {
113 GPUInfo("Cache file content outdated (kernel definitions)");
114 break;
115 }
116 GPUSettingsProcessingRTC cachedSettings;
117 static_assert(std::is_trivially_copyable_v<GPUSettingsProcessingRTC> == true, "GPUSettingsProcessingRTC must be POD");
118 if (fread(&cachedSettings, sizeof(cachedSettings), 1, fp) != 1) {
119 throw std::runtime_error("Cache file corrupt");
120 }
121 if (!mProcessingSettings.rtc.ignoreCacheValid && !(cachedSettings == mProcessingSettings.rtc)) {
122 GPUInfo("Cache file content outdated (rtc parameters)");
123 break;
124 }
125 std::vector<char> buffer;
126 for (uint32_t i = 0; i < nCompile; i++) {
127 if (fread(&len, sizeof(len), 1, fp) != 1) {
128 throw std::runtime_error("Cache file corrupt");
129 }
130 buffer.resize(len);
131 if (fread(buffer.data(), 1, len, fp) != len) {
132 throw std::runtime_error("Cache file corrupt");
133 }
134 FILE* fp2 = fopen((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str(), "w+b");
135 if (fp2 == nullptr) {
136 throw std::runtime_error("Cannot open tmp file");
137 }
138 if (fwrite(buffer.data(), 1, len, fp2) != len) {
139 throw std::runtime_error("Error writing file");
140 }
141 fclose(fp2);
142 }
143 GPUInfo("Using RTC cache file");
144 cacheLoaded = true;
145 break;
146 };
147 fclose(fp);
148 }
149 }
150 if (!cacheLoaded) {
151 if (mProcessingSettings.debugLevel >= 0) {
152 GPUInfo("Starting CUDA RTC Compilation");
153 }
154 HighResTimer rtcTimer;
155 rtcTimer.ResetStart();
156 tbb::parallel_for<uint32_t>(0, nCompile, [&](auto i) {
157 if (mProcessingSettings.debugLevel >= 3) {
158 printf("Compiling %s\n", (filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str());
159 }
160 FILE* fp = fopen((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str(), "w+b");
161 if (fp == nullptr) {
162 throw std::runtime_error("Error opening file");
163 }
164
165 std::string kernel = "extern \"C\" {";
166 kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall;
167 kernel += "}";
168
169 if (fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() ||
170 fwrite(_binary_GPUReconstructionCUDArtc_src_start, 1, _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len ||
171 fwrite(kernel.c_str(), 1, kernel.size(), fp) != kernel.size()) {
172 throw std::runtime_error("Error writing file");
173 }
174 fclose(fp);
175 std::string command = baseCommand;
176 command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension;
177 if (mProcessingSettings.debugLevel < 0) {
178 command += " &> /dev/null";
179 } else if (mProcessingSettings.debugLevel < 2) {
180 command += " > /dev/null";
181 }
182 if (mProcessingSettings.debugLevel >= 3) {
183 printf("Running command %s\n", command.c_str());
184 }
185 if (system(command.c_str())) {
186 if (mProcessingSettings.debugLevel >= 3) {
187 printf("Source code file: %s", filename.c_str());
188 }
189 throw std::runtime_error("Error during CUDA compilation");
190 } // clang-format off
191 }, tbb::simple_partitioner()); // clang-format on
192 if (mProcessingSettings.debugLevel >= 0) {
193 GPUInfo("RTC Compilation finished (%f seconds)", rtcTimer.GetCurrentElapsedTime());
194 }
195 if (mProcessingSettings.rtc.cacheOutput) {
196 FILE* fp = fopen((mProcessingSettings.RTCcacheFolder + "/rtc.cuda.cache").c_str(), "w+b");
197 if (fp == nullptr) {
198 throw std::runtime_error("Cannot open cache file for writing");
199 }
200 GPUInfo("Storing RTC compilation result in cache file");
201
202 if (fwrite(shasource, 1, 20, fp) != 20 ||
203 fwrite(shaparam, 1, 20, fp) != 20 ||
204 fwrite(shacmd, 1, 20, fp) != 20 ||
205 fwrite(shakernels, 1, 20, fp) != 20 ||
206 fwrite(&mProcessingSettings.rtc, sizeof(mProcessingSettings.rtc), 1, fp) != 1) {
207 throw std::runtime_error("Error writing cache file");
208 }
209
210 std::vector<char> buffer;
211 for (uint32_t i = 0; i < nCompile; i++) {
212 FILE* fp2 = fopen((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str(), "rb");
213 if (fp2 == nullptr) {
214 throw std::runtime_error("Cannot open cuda module file");
215 }
216 fseek(fp2, 0, SEEK_END);
217 size_t size = ftell(fp2);
218 buffer.resize(size);
219 fseek(fp2, 0, SEEK_SET);
220 if (fread(buffer.data(), 1, size, fp2) != size) {
221 throw std::runtime_error("Error reading cuda module file");
222 }
223 fclose(fp2);
224
225 if (fwrite(&size, sizeof(size), 1, fp) != 1 ||
226 fwrite(buffer.data(), 1, size, fp) != size) {
227 throw std::runtime_error("Error writing cache file");
228 }
229 }
230 fclose(fp);
231 }
232 }
233 if (mProcessingSettings.rtc.cacheOutput && mProcessingSettings.rtc.cacheMutex) {
234 if (lockf(fd, F_ULOCK, 0)) {
235 throw std::runtime_error("Error unlocking RTC cache mutex file");
236 }
237 close(fd);
238 }
239
240 return 0;
241}
int32_t i
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:110
void ResetStart()
Definition timer.cxx:63
void getRTCKernelCalls(std::vector< std::string > &kernels)
GPUSettingsProcessing mProcessingSettings
GLsizei const GLchar *const * string
Definition glcorearb.h:809
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLenum GLenum GLsizei len
Definition glcorearb.h:4232
GLenum GLfloat param
Definition glcorearb.h:271
GLint GLuint mask
Definition glcorearb.h:291
Polygon< T > close(Polygon< T > polygon)
Definition Polygon.h:126
Defining DataPointCompositeObject explicitly as copiable.
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
std::string filename()
#define QGET_LD_BINARY_SYMBOLS(filename)