15#define GPUCA_GPUCODE_HOSTONLY
25#include <oneapi/tbb.h>
33int32_t GPUReconstructionCUDA::genRTC(std::string&
filename, uint32_t& nCompile)
35 std::string rtcparam = std::string(
"#define GPUCA_RTC_CODE\n") +
36 std::string(
mProcessingSettings.rtc.optSpecialCode ?
"#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" :
"#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
45 std::vector<std::string> kernels;
47 std::string kernelsall;
48 for (uint32_t
i = 0;
i < kernels.size();
i++) {
49 kernelsall += kernels[
i] +
"\n";
53 baseCommand += (getenv(
"O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv(
"O2_GPU_RTC_OVERRIDE_CMD")) :
std::
string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
54 baseCommand += std::string(
" ") + (
mProcessingSettings.RTCoverrideArchitecture !=
"" ?
mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
56 char shasource[21], shaparam[21], shacmd[21], shakernels[21];
58 o2::framework::internal::SHA1(shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len);
59 o2::framework::internal::SHA1(shaparam, rtcparam.c_str(), rtcparam.size());
60 o2::framework::internal::SHA1(shacmd, baseCommand.c_str(), baseCommand.size());
61 o2::framework::internal::SHA1(shakernels, kernelsall.c_str(), kernelsall.size());
65 bool cacheLoaded =
false;
72 mode_t
mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
75 throw std::runtime_error(
"Error opening rtc cache mutex lock file");
78 if (lockf(fd, F_LOCK, 0)) {
79 throw std::runtime_error(
"Error locking rtc cache mutex file");
88 if (fread(sharead, 1, 20, fp) != 20) {
89 throw std::runtime_error(
"Cache file corrupt");
92 GPUInfo(
"Cache file content outdated (source)");
95 if (fread(sharead, 1, 20, fp) != 20) {
96 throw std::runtime_error(
"Cache file corrupt");
99 GPUInfo(
"Cache file content outdated (param)");
102 if (fread(sharead, 1, 20, fp) != 20) {
103 throw std::runtime_error(
"Cache file corrupt");
106 GPUInfo(
"Cache file content outdated (commandline)");
109 if (fread(sharead, 1, 20, fp) != 20) {
110 throw std::runtime_error(
"Cache file corrupt");
113 GPUInfo(
"Cache file content outdated (kernel definitions)");
116 GPUSettingsProcessingRTC cachedSettings;
117 static_assert(std::is_trivially_copyable_v<GPUSettingsProcessingRTC> ==
true,
"GPUSettingsProcessingRTC must be POD");
118 if (fread(&cachedSettings,
sizeof(cachedSettings), 1, fp) != 1) {
119 throw std::runtime_error(
"Cache file corrupt");
122 GPUInfo(
"Cache file content outdated (rtc parameters)");
126 for (uint32_t
i = 0;
i < nCompile;
i++) {
127 if (fread(&
len,
sizeof(
len), 1, fp) != 1) {
128 throw std::runtime_error(
"Cache file corrupt");
132 throw std::runtime_error(
"Cache file corrupt");
135 if (fp2 ==
nullptr) {
136 throw std::runtime_error(
"Cannot open tmp file");
139 throw std::runtime_error(
"Error writing file");
143 GPUInfo(
"Using RTC cache file");
152 GPUInfo(
"Starting CUDA RTC Compilation");
156 tbb::parallel_for<uint32_t>(0, nCompile, [&](
auto i) {
158 printf(
"Compiling %s\n", (filename +
"_" + std::to_string(i) + mRtcSrcExtension).c_str());
162 throw std::runtime_error(
"Error opening file");
165 std::string kernel =
"extern \"C\" {";
169 if (fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() ||
170 fwrite(_binary_GPUReconstructionCUDArtc_src_start, 1, _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len ||
171 fwrite(kernel.c_str(), 1, kernel.size(), fp) != kernel.size()) {
172 throw std::runtime_error(
"Error writing file");
175 std::string command = baseCommand;
178 command +=
" &> /dev/null";
180 command +=
" > /dev/null";
183 printf(
"Running command %s\n", command.c_str());
185 if (system(command.c_str())) {
187 printf(
"Source code file: %s",
filename.c_str());
189 throw std::runtime_error(
"Error during CUDA compilation");
191 }, tbb::simple_partitioner());
198 throw std::runtime_error(
"Cannot open cache file for writing");
200 GPUInfo(
"Storing RTC compilation result in cache file");
202 if (fwrite(shasource, 1, 20, fp) != 20 ||
203 fwrite(shaparam, 1, 20, fp) != 20 ||
204 fwrite(shacmd, 1, 20, fp) != 20 ||
205 fwrite(shakernels, 1, 20, fp) != 20 ||
207 throw std::runtime_error(
"Error writing cache file");
211 for (uint32_t
i = 0;
i < nCompile;
i++) {
213 if (fp2 ==
nullptr) {
214 throw std::runtime_error(
"Cannot open cuda module file");
216 fseek(fp2, 0, SEEK_END);
217 size_t size = ftell(fp2);
219 fseek(fp2, 0, SEEK_SET);
221 throw std::runtime_error(
"Error reading cuda module file");
225 if (fwrite(&
size,
sizeof(
size), 1, fp) != 1 ||
227 throw std::runtime_error(
"Error writing cache file");
234 if (lockf(fd, F_ULOCK, 0)) {
235 throw std::runtime_error(
"Error unlocking RTC cache mutex file");
double GetCurrentElapsedTime(bool reset=false)
void getRTCKernelCalls(std::vector< std::string > &kernels)
GPUSettingsProcessing mProcessingSettings
GLsizei const GLchar *const * string
GLenum GLenum GLsizei len
Polygon< T > close(Polygon< T > polygon)
Defining DataPointCompositeObject explicitly as copiable.
std::string to_string(gsl::span< T, Size > span)
#define QGET_LD_BINARY_SYMBOLS(filename)