Project
Loading...
Searching...
No Matches
GPUReconstructionDebug.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUReconstruction.h"
16#include "GPULogging.h"
17#include "GPUSettings.h"
18
19#include <csignal>
20#include <functional>
21#include <unordered_map>
22#include <mutex>
23#include <filesystem>
24#include <chrono>
25#include <format>
26
27using namespace o2::gpu;
28
30 std::function<void(int32_t, siginfo_t*, void*)> signalCallback;
31 std::function<void()> debugCallback = nullptr;
32 std::function<void()> reinstallCallback = nullptr;
33 std::unordered_map<int32_t, struct sigaction> oldActions;
34 size_t debugCount = 0;
35 static void globalCallback(int32_t signal, siginfo_t* info, void* ucontext)
36 {
37 GPUReconstruction::mDebugData->signalCallback(signal, info, ucontext);
38 }
39};
40
41std::unique_ptr<GPUReconstruction::debugInternal> GPUReconstruction::mDebugData;
42
44{
45 if (GetProcessingSettings().debugOnFailure) {
46 static std::mutex initMutex;
47 {
48 std::lock_guard<std::mutex> guard(initMutex);
49 if (mDebugData) {
50 GPUFatal("Error handlers for debug dumps already set, cannot set them again");
51 }
52 mDebugData = std::make_unique<debugInternal>();
53 }
54 mDebugEnabled = true;
55 if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 2)) {
56 struct sigaction sa, oldsa;
57 memset(&sa, 0, sizeof(sa));
59 sa.sa_flags = SA_SIGINFO;
60 uint32_t mask = GetProcessingSettings().debugOnFailureSignalMask == (uint32_t)-1 ? ((1 << SIGINT) | (1 << SIGABRT) | (1 << SIGBUS) | (1 << SIGTERM) | (1 << SIGSEGV)) : GetProcessingSettings().debugOnFailureSignalMask;
61 if (mask) {
62 for (uint32_t i = 0; i < sizeof(mask) * 8; i++) {
63 if (mask & (1 << i)) {
64 if (sigaction(i, &sa, &oldsa)) {
65 GPUFatal("Error installing signal handler for error dump on signal %d", i);
66 }
67 mDebugData->oldActions.emplace(i, oldsa);
68 }
69 }
70 }
71
72 mDebugData->signalCallback = [this, &oldActions = mDebugData->oldActions, myAction = std::move(sa)](int32_t signal, siginfo_t* info, void* ucontext) {
73 static std::mutex callbackMutex;
74 std::lock_guard<std::mutex> guard(callbackMutex);
75 if (mDebugData->debugCallback) {
76 GPUInfo("Running debug callback for signal %d", signal);
77 mDebugData->debugCallback();
78 mDebugData->debugCount++;
79 }
80 mDebugData->debugCallback = nullptr;
81 if (!GetProcessingSettings().debugOnFailureNoForwardSignal) {
82 sigaction(signal, &oldActions[signal], nullptr);
83 raise(signal);
84 mDebugData->reinstallCallback = [signal, myAction]() { sigaction(signal, &myAction, nullptr); };
85 }
86 };
87 }
88 }
89}
90
92{
93 if (!mDebugEnabled) {
94 return;
95 }
96 if (mDebugData) {
97 for (auto& it : mDebugData->oldActions) {
98 if (sigaction(it.first, &it.second, nullptr)) {
99 GPUFatal("Error restoring signal handler for signal %d", it.first);
100 }
101 }
102 }
103 mDebugEnabled = false;
104}
105
106void GPUReconstruction::setDebugDumpCallback(std::function<void()>&& callback)
107{
108 if (mMaster) {
109 if (mDebugData->reinstallCallback) {
110 mDebugData->reinstallCallback();
111 mDebugData->reinstallCallback = nullptr;
112 }
113 mMaster->setDebugDumpCallback(std::move(callback));
114 } else if (mDebugEnabled && mDebugData) {
115 mDebugData->debugCallback = callback;
116 }
117}
118
119std::string GPUReconstruction::getDebugFolder(const std::string& prefix)
120{
121 const std::filesystem::path target_dir = GetProcessingSettings().debugOnFailureDirectory;
122
123 std::size_t total_size = 0;
124 std::size_t subfolder_count = 0;
125
126 if (!std::filesystem::exists(target_dir) || !std::filesystem::is_directory(target_dir)) {
127 GPUError("Invalid debugOnFailureDirectory %s", GetProcessingSettings().debugOnFailureDirectory.c_str());
128 return "";
129 }
130
131 for (const auto& entry : std::filesystem::directory_iterator(target_dir)) {
132 if (entry.is_directory()) {
133 subfolder_count++;
134
135 for (const auto& subentry : std::filesystem::directory_iterator(entry.path())) {
136 if (subentry.is_regular_file()) {
137 std::error_code ec;
138 auto size = std::filesystem::file_size(subentry.path(), ec);
139 if (!ec) {
140 total_size += size;
141 }
142 }
143 }
144 }
145 }
146
147 if ((GetProcessingSettings().debugOnFailureMaxFiles && subfolder_count >= GetProcessingSettings().debugOnFailureMaxFiles) || (GetProcessingSettings().debugOnFailureMaxSize && (total_size >> 30) >= GetProcessingSettings().debugOnFailureMaxSize)) {
148 GPUError("Cannot store debug dump files, target storage exceeded: %zu dumps, %zu bytes", subfolder_count, total_size);
149 return "";
150 }
151
152 auto currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
153 std::ostringstream dateTime;
154 dateTime << std::put_time(std::localtime(&currentTime), "%Y-%m-%d_%H-%M-%S");
155
156 int32_t attempt = 0;
157 std::string outname;
158 while (true) {
159 if (attempt++ >= 512) {
160 GPUError("Error creating debug dump folder");
161 return "";
162 }
163
164 outname = GetProcessingSettings().debugOnFailureDirectory + "/debug_" + prefix + (prefix == "" ? "" : "_") + dateTime.str() + "_" + std::to_string(attempt);
165 std::error_code ec;
166 bool created = std::filesystem::create_directory(outname, ec);
167 if (!ec && created) {
168 break;
169 }
170 }
171
172 GPUInfo("Debug dump to %s", outname.c_str());
173 return outname;
174}
175
177{
178 if (mMaster) {
179 return mMaster->triggerDebugDump();
180 } else if (mDebugEnabled && mDebugData && mDebugData->debugCallback) {
181 GPUInfo("Running triggered debug callback");
182 mDebugData->debugCallback();
183 mDebugData->debugCount++;
184 mDebugData->debugCallback = nullptr;
185 return true;
186 }
187 return false;
188}
int32_t i
std::string getDebugFolder(const std::string &prefix="")
void setDebugDumpCallback(std::function< void()> &&callback=std::function< void()>(nullptr))
static std::unique_ptr< debugInternal > mDebugData
const GPUSettingsProcessing & GetProcessingSettings() const
GLuint entry
Definition glcorearb.h:5735
GLsizeiptr size
Definition glcorearb.h:659
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLuint mask
Definition glcorearb.h:291
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
std::function< void(int32_t, siginfo_t *, void *)> signalCallback
static void globalCallback(int32_t signal, siginfo_t *info, void *ucontext)
std::unordered_map< int32_t, struct sigaction > oldActions