Project
Loading...
Searching...
No Matches
standalone.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "utils/qconfig.h"
16#include "GPUReconstruction.h"
19#include "GPUChainTracking.h"
20#include "GPUChainTrackingGetters.inc"
21#include "GPUTPCDef.h"
22#include "GPUQA.h"
23#include "GPUParam.h"
25#include "genEvents.h"
26
27#include "TPCFastTransform.h"
29#include "GPUTPCGMMergedTrack.h"
30#include "GPUSettings.h"
31#include "GPUConstantMem.h"
32
33#include "GPUO2DataTypes.h"
34#include "GPUChainITS.h"
35
37
38#include <iostream>
39#include <fstream>
40#include <cstdio>
41#include <cstring>
42#include <chrono>
43#include <tuple>
44#include <algorithm>
45#include <thread>
46#include <future>
47#include <atomic>
48#include <vector>
49
50#ifndef _WIN32
51#include <unistd.h>
52#include <sched.h>
53#include <csignal>
54#include <sys/types.h>
55#include <sys/wait.h>
56#include <sys/select.h>
57#include <cfenv>
58#include <clocale>
59#include <sys/stat.h>
60#endif
61#include "utils/timer.h"
63#include "utils/vecpod.h"
64
65using namespace o2::gpu;
66
67// #define BROKEN_EVENTS
68
69namespace o2::gpu
70{
71extern GPUSettingsStandalone configStandalone;
72}
73
78std::string eventsDir;
80{
81 operator delete(v, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
82}
84std::unique_ptr<GPUDisplayFrontendInterface> eventDisplay;
85std::unique_ptr<GPUReconstructionTimeframe> tf;
87std::atomic<uint32_t> nIteration, nIterationEnd;
88
89std::vector<GPUTrackingInOutPointers> ioPtrEvents;
90std::vector<GPUChainTracking::InOutMemory> ioMemEvents;
91
92int32_t ReadConfiguration(int argc, char** argv)
93{
94 int32_t qcRet = qConfigParse(argc, (const char**)argv);
95 if (qcRet) {
96 if (qcRet != qConfig::qcrHelp) {
97 printf("Error parsing command line parameters\n");
98 }
99 return 1;
100 }
101 if (configStandalone.printSettings > 1) {
102 printf("Config Dump before ReadConfiguration\n");
103 qConfigPrint();
104 }
105 if (configStandalone.proc.debugLevel == -1) {
106 configStandalone.proc.debugLevel = 0;
107 }
108#ifndef _WIN32
109 setlocale(LC_ALL, "en_US.utf-8");
110 setlocale(LC_NUMERIC, "en_US.utf-8");
111 if (configStandalone.cpuAffinity != -1) {
112 cpu_set_t mask;
113 CPU_ZERO(&mask);
114 CPU_SET(configStandalone.cpuAffinity, &mask);
115
116 printf("Setting affinitiy to restrict on CPU core %d\n", configStandalone.cpuAffinity);
117 if (0 != sched_setaffinity(0, sizeof(mask), &mask)) {
118 printf("Error setting CPU affinity\n");
119 return 1;
120 }
121 }
122 if (configStandalone.fifoScheduler) {
123 printf("Setting FIFO scheduler\n");
124 sched_param param;
125 sched_getparam(0, &param);
126 param.sched_priority = 1;
127 if (0 != sched_setscheduler(0, SCHED_FIFO, &param)) {
128 printf("Error setting scheduler\n");
129 return 1;
130 }
131 }
132#ifdef __FAST_MATH__
133 if (configStandalone.fpe == 1) {
134#else
135 if (configStandalone.fpe) {
136#endif
137 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
138 }
139 if (configStandalone.flushDenormals) {
140 disable_denormals();
141 }
142
143#else
144 if (configStandalone.cpuAffinity != -1) {
145 printf("Affinity setting not supported on Windows\n");
146 return 1;
147 }
148 if (configStandalone.fifoScheduler) {
149 printf("FIFO Scheduler setting not supported on Windows\n");
150 return 1;
151 }
152 if (configStandalone.fpe == 1) {
153 printf("FPE not supported on Windows\n");
154 return 1;
155 }
156#endif
157#ifndef GPUCA_TPC_GEOMETRY_O2
158#error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed?
159 configStandalone.proc.inKernelParallel = false;
160 configStandalone.proc.createO2Output = 0;
161 if (configStandalone.rundEdx == -1) {
162 configStandalone.rundEdx = 0;
163 }
164#endif
165#ifndef GPUCA_BUILD_QA
166 if (configStandalone.proc.runQA || configStandalone.eventGenerator) {
167 printf("QA not enabled in build\n");
168 return 1;
169 }
170#endif
171 if (configStandalone.proc.doublePipeline && configStandalone.testSyncAsync) {
172 printf("Cannot run asynchronous processing with double pipeline\n");
173 return 1;
174 }
175 if (configStandalone.proc.doublePipeline && (configStandalone.runs < 4 || !configStandalone.outputcontrolmem)) {
176 printf("Double pipeline mode needs at least 4 runs per event and external output. To cycle though multiple events, use --preloadEvents and --runs n for n iterations round-robin\n");
177 return 1;
178 }
179 if (configStandalone.TF.bunchSim && configStandalone.TF.nMerge) {
180 printf("Cannot run --MERGE and --SIMBUNCHES togeterh\n");
181 return 1;
182 }
183 if (configStandalone.TF.bunchSim > 1) {
184 configStandalone.TF.timeFrameLen = 1.e9 * configStandalone.TF.bunchSim / configStandalone.TF.interactionRate;
185 }
186 if (configStandalone.TF.nMerge) {
187 double len = configStandalone.TF.nMerge - 1;
188 if (configStandalone.TF.randomizeDistance) {
189 len += 0.5;
190 }
191 if (configStandalone.TF.shiftFirstEvent) {
192 len += 0.5;
193 }
195 }
196 if (configStandalone.QA.inputHistogramsOnly && configStandalone.QA.compareInputs.size() == 0) {
197 printf("Can only produce QA pdf output when input files are specified!\n");
198 return 1;
199 }
200 if (configStandalone.QA.enableLocalOutput && !configStandalone.QA.inputHistogramsOnly && configStandalone.QA.output == "" && configStandalone.QA.plotsDir != "") {
201 configStandalone.QA.output = configStandalone.QA.plotsDir + "/output.root";
202 }
203 if (configStandalone.QA.inputHistogramsOnly) {
204 configStandalone.rundEdx = false;
205 configStandalone.noEvents = true;
206 }
207 if (configStandalone.QA.dumpToROOT) {
208 configStandalone.proc.outputSharedClusterMap = true;
209 }
210 if (configStandalone.eventDisplay) {
211 configStandalone.noprompt = 1;
212 }
213 if (configStandalone.proc.debugLevel >= 4) {
214 if (configStandalone.proc.inKernelParallel) {
215 configStandalone.proc.inKernelParallel = 1;
216 } else {
217 configStandalone.proc.nHostThreads = 1;
218 }
219 }
220 if (configStandalone.setO2Settings) {
221 if (configStandalone.runGPU && configStandalone.proc.debugLevel <= 1) {
222 if (!(configStandalone.inputcontrolmem && configStandalone.outputcontrolmem)) {
223 printf("setO2Settings requires the usage of --inputMemory and --outputMemory as in O2\n");
224 return 1;
225 }
226 configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024;
227 }
228 configStandalone.rec.tpc.trackReferenceX = 83;
229 configStandalone.proc.outputSharedClusterMap = 1;
230 configStandalone.proc.clearO2OutputFromGPU = 1;
231 configStandalone.QA.clusterRejectionHistograms = 1;
232 configStandalone.proc.tpcIncreasedMinClustersPerRow = 500000;
233 configStandalone.proc.ignoreNonFatalGPUErrors = 1;
234 // TODO: rundEdx=1
235 // GPU_proc.qcRunFraction=$TPC_TRACKING_QC_RUN_FRACTION;"
236 // [[ $CTFINPUT == 1 ]] && GPU_CONFIG_KEY+="GPU_proc.tpcInputWithClusterRejection=1;"
237 // double pipeline / rtc
238 }
239
240 if (configStandalone.outputcontrolmem) {
241 bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr;
242 outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
243 if (forceEmptyMemory) {
244 printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads");
245 memset(outputmemory.get(), 0, configStandalone.outputcontrolmem);
246 }
247 if (configStandalone.proc.doublePipeline) {
248 outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
249 if (forceEmptyMemory) {
250 memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem);
251 }
252 }
253 }
254 if (configStandalone.inputcontrolmem) {
255 inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
256 }
257
258 configStandalone.proc.showOutputStat = true;
259
260 if (configStandalone.runGPU && configStandalone.gpuType == "AUTO") {
261 if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::CUDA, configStandalone.proc.debugLevel >= 2)) {
262 configStandalone.gpuType = "CUDA";
263 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::HIP, configStandalone.proc.debugLevel >= 2)) {
264 configStandalone.gpuType = "HIP";
265 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
266 configStandalone.gpuType = "OCL";
267 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
268 configStandalone.gpuType = "OCL";
269 } else {
270 if (configStandalone.runGPU > 1 && configStandalone.runGPUforce) {
271 printf("No GPU backend / device found, running on CPU is disabled due to runGPUforce\n");
272 return 1;
273 }
274 configStandalone.runGPU = false;
275 configStandalone.gpuType = "CPU";
276 }
277 }
278
279 if (configStandalone.printSettings) {
280 configStandalone.proc.printSettings = true;
281 }
282 if (configStandalone.printSettings > 1) {
283 printf("Config Dump after ReadConfiguration\n");
284 qConfigPrint();
285 }
286
287 return (0);
288}
289
291{
292 if (!configStandalone.eventGenerator) {
293 if (configStandalone.noEvents) {
294 eventsDir = "NON_EXISTING";
295 configStandalone.rundEdx = false;
296 } else if (rec->ReadSettings(eventsDir.c_str())) {
297 printf("Error reading event config file\n");
298 return 1;
299 }
300 const char* tmptext = configStandalone.noEvents ? "Using default event settings, no event dir loaded" : "Read event settings from dir ";
301 printf("%s%s (solenoidBz: %f, constBz %d, maxTimeBin %d)\n", tmptext, configStandalone.noEvents ? "" : eventsDir.c_str(), rec->GetGRPSettings().solenoidBzNominalGPU, (int32_t)rec->GetGRPSettings().constBz, rec->GetGRPSettings().grpContinuousMaxTimeBin);
302 if (configStandalone.testSyncAsync) {
304 }
305 if (configStandalone.proc.doublePipeline) {
307 }
308 }
309
312 if (configStandalone.testSyncAsync) {
315 }
316
318 GPUSettingsRec recSet;
319 GPUSettingsProcessing procSet;
320 recSet = configStandalone.rec;
321 procSet = configStandalone.proc;
323
324 if (configStandalone.solenoidBzNominalGPU != -1e6f) {
325 grp.solenoidBzNominalGPU = configStandalone.solenoidBzNominalGPU;
326 }
327 if (configStandalone.constBz) {
328 grp.constBz = true;
329 }
330 if (configStandalone.TF.nMerge || configStandalone.TF.bunchSim) {
331 if (grp.grpContinuousMaxTimeBin) {
332 printf("ERROR: requested to overlay continuous data - not supported\n");
333 return 1;
334 }
335 if (!configStandalone.cont) {
336 printf("Continuous mode forced\n");
337 configStandalone.cont = true;
338 }
341 }
342 }
343 if (configStandalone.setMaxTimeBin != -2) {
344 grp.grpContinuousMaxTimeBin = configStandalone.setMaxTimeBin;
345 } else if (configStandalone.cont && grp.grpContinuousMaxTimeBin == 0) {
347 }
348 if (grp.grpContinuousMaxTimeBin < -1 && !configStandalone.noEvents) {
349 printf("Invalid maxTimeBin %d\n", grp.grpContinuousMaxTimeBin);
350 return 1;
351 }
352 if (rec->GetDeviceType() == GPUReconstruction::DeviceType::CPU) {
353 printf("Standalone Test Framework for CA Tracker - Using CPU\n");
354 } else {
355 printf("Standalone Test Framework for CA Tracker - Using GPU\n");
356 }
357
358 configStandalone.proc.forceMemoryPoolSize = (configStandalone.proc.forceMemoryPoolSize == 1 && configStandalone.eventDisplay) ? 2 : configStandalone.proc.forceMemoryPoolSize;
359 if (configStandalone.eventDisplay) {
360 eventDisplay.reset(GPUDisplayFrontendInterface::getFrontend(configStandalone.display.displayFrontend.c_str()));
361 if (eventDisplay.get() == nullptr) {
362 throw std::runtime_error("Requested display not available");
363 }
364 printf("Enabling event display (%s backend)\n", eventDisplay->frontendName());
365 procSet.eventDisplay = eventDisplay.get();
366 if (!configStandalone.QA.noMC) {
367 procSet.runMC = true;
368 }
369 }
370
371 if (procSet.runQA && !configStandalone.QA.noMC) {
372 procSet.runMC = true;
373 }
374
375 steps.steps = gpudatatypes::RecoStep::AllRecoSteps;
376 if (configStandalone.runTRD != -1) {
377 steps.steps.setBits(gpudatatypes::RecoStep::TRDTracking, configStandalone.runTRD > 0);
378 } else if (chainTracking->GetTRDGeometry() == nullptr) {
379 steps.steps.setBits(gpudatatypes::RecoStep::TRDTracking, false);
380 }
381 if (configStandalone.runCompression != -1) {
382 steps.steps.setBits(gpudatatypes::RecoStep::TPCCompression, configStandalone.runCompression > 0);
383 }
384 if (configStandalone.runTransformation != -1) {
385 steps.steps.setBits(gpudatatypes::RecoStep::TPCConversion, configStandalone.runTransformation > 0);
386 }
387 steps.steps.setBits(gpudatatypes::RecoStep::Refit, configStandalone.runRefit);
388 if (!configStandalone.runMerger) {
389 steps.steps.setBits(gpudatatypes::RecoStep::TPCMerging, false);
390 steps.steps.setBits(gpudatatypes::RecoStep::TRDTracking, false);
391 steps.steps.setBits(gpudatatypes::RecoStep::TPCdEdx, false);
392 steps.steps.setBits(gpudatatypes::RecoStep::TPCCompression, false);
393 steps.steps.setBits(gpudatatypes::RecoStep::Refit, false);
394 }
395
396 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
397 steps.steps.setBits(gpudatatypes::RecoStep::TRDTracking, false);
398 }
399 steps.inputs.set(gpudatatypes::InOutType::TPCClusters, gpudatatypes::InOutType::TRDTracklets);
400 steps.steps.setBits(gpudatatypes::RecoStep::TPCDecompression, false);
401 steps.inputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, false);
402 if (grp.doCompClusterDecode) {
403 steps.inputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, true);
404 steps.inputs.setBits(gpudatatypes::InOutType::TPCClusters, false);
405 steps.steps.setBits(gpudatatypes::RecoStep::TPCCompression, false);
406 steps.steps.setBits(gpudatatypes::RecoStep::TPCClusterFinding, false);
407 steps.steps.setBits(gpudatatypes::RecoStep::TPCDecompression, true);
408 steps.outputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, false);
409 } else if (grp.needsClusterer) {
410 steps.inputs.setBits(gpudatatypes::InOutType::TPCRaw, true);
411 steps.inputs.setBits(gpudatatypes::InOutType::TPCClusters, false);
412 } else {
413 steps.steps.setBits(gpudatatypes::RecoStep::TPCClusterFinding, false);
414 }
415
416 // Set settings for synchronous
417 GPUChainTracking::ApplySyncSettings(procSet, recSet, steps.steps, configStandalone.testSyncAsync || configStandalone.testSync, configStandalone.rundEdx);
418 int32_t runAsyncQA = procSet.runQA && !configStandalone.testSyncAsyncQcInSync ? procSet.runQA : 0;
419 if (configStandalone.testSyncAsync) {
420 procSet.eventDisplay = nullptr;
421 if (!configStandalone.testSyncAsyncQcInSync) {
422 procSet.runQA = false;
423 }
424 }
425
426 // Apply --recoSteps flag last so it takes precedence
427 // E.g. ApplySyncSettings might enable TPCdEdx, but might not be needed if only clusterizer was requested
428 if (configStandalone.recoSteps >= 0) {
429 steps.steps &= configStandalone.recoSteps;
430 }
431 if (configStandalone.recoStepsGPU >= 0) {
432 steps.stepsGPUMask &= configStandalone.recoStepsGPU;
433 }
434
435 steps.outputs.clear();
436 steps.outputs.setBits(gpudatatypes::InOutType::TPCMergedTracks, steps.steps.isSet(gpudatatypes::RecoStep::TPCMerging));
437 steps.outputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, steps.steps.isSet(gpudatatypes::RecoStep::TPCCompression));
438 steps.outputs.setBits(gpudatatypes::InOutType::TRDTracks, steps.steps.isSet(gpudatatypes::RecoStep::TRDTracking));
439 steps.outputs.setBits(gpudatatypes::InOutType::TPCClusters, steps.steps.isSet(gpudatatypes::RecoStep::TPCClusterFinding));
440
441 if (steps.steps.isSet(gpudatatypes::RecoStep::TRDTracking)) {
442 if (procSet.createO2Output && !procSet.trdTrackModelO2) {
443 procSet.createO2Output = 1; // Must not be 2, to make sure TPC GPU tracks are still available for TRD
444 }
445 }
446
447 rec->SetSettings(&grp, &recSet, &procSet, &steps);
448 if (configStandalone.proc.doublePipeline) {
449 recPipeline->SetSettings(&grp, &recSet, &procSet, &steps);
450 }
451 if (configStandalone.testSyncAsync) { // TODO: Add --async mode / flag
452 // Set settings for asynchronous
453 steps.steps.setBits(gpudatatypes::RecoStep::TPCDecompression, true);
454 steps.steps.setBits(gpudatatypes::RecoStep::TPCdEdx, true);
455 steps.steps.setBits(gpudatatypes::RecoStep::TPCCompression, false);
456 steps.steps.setBits(gpudatatypes::RecoStep::TPCClusterFinding, false);
457 steps.inputs.setBits(gpudatatypes::InOutType::TPCRaw, false);
458 steps.inputs.setBits(gpudatatypes::InOutType::TPCClusters, false);
459 steps.inputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, true);
460 steps.outputs.setBits(gpudatatypes::InOutType::TPCCompressedClusters, false);
461 procSet.runMC = false;
462 procSet.runQA = runAsyncQA;
463 procSet.eventDisplay = eventDisplay.get();
464 procSet.runCompressionStatistics = 0;
465 if (recSet.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
466 procSet.tpcInputWithClusterRejection = 1;
467 }
468 recSet.tpc.disableRefitAttachment = 0xFF;
469 recSet.maxTrackQPtB5 = CAMath::Min(recSet.maxTrackQPtB5, recSet.tpc.rejectQPtB5);
470 GPUChainTracking::ApplySyncSettings(procSet, recSet, steps.steps, false, configStandalone.rundEdx);
471 recAsync->SetSettings(&grp, &recSet, &procSet, &steps);
472 }
473
474 if (configStandalone.outputcontrolmem) {
475 rec->SetOutputControl(outputmemory.get(), configStandalone.outputcontrolmem);
476 if (configStandalone.proc.doublePipeline) {
478 }
479 }
480
481 o2::base::Propagator* prop = nullptr;
483 prop->setGPUField(&rec->GetParam().polynomialField);
484 prop->setNominalBz(rec->GetParam().bzkG);
485 prop->setMatLUT(chainTracking->GetMatLUT());
487 if (chainTrackingAsync) {
489 }
492 }
493 procSet.o2PropagatorUseGPUField = true;
494
495 if (rec->Init()) {
496 printf("Error initializing GPUReconstruction!\n");
497 return 1;
498 }
499 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
500 if (rec->registerMemoryForGPU(outputmemory.get(), configStandalone.outputcontrolmem) || (configStandalone.proc.doublePipeline && recPipeline->registerMemoryForGPU(outputmemoryPipeline.get(), configStandalone.outputcontrolmem))) {
501 printf("ERROR registering memory for the GPU!!!\n");
502 return 1;
503 }
504 }
505 if (configStandalone.inputcontrolmem && rec->IsGPU()) {
506 if (rec->registerMemoryForGPU(inputmemory.get(), configStandalone.inputcontrolmem)) {
507 printf("ERROR registering input memory for the GPU!!!\n");
508 return 1;
509 }
510 }
511 if (configStandalone.proc.debugLevel >= 4) {
513 }
514 return (0);
515}
516
517int32_t ReadEvent(int32_t n)
518{
519 if (configStandalone.inputcontrolmem && !configStandalone.preloadEvents) {
520 rec->SetInputControl(inputmemory.get(), configStandalone.inputcontrolmem);
521 }
522 int32_t r = chainTracking->ReadData((eventsDir + GPUCA_EVDUMP_FILE "." + std::to_string(n) + ".dump").c_str());
523 if (r) {
524 return r;
525 }
526#if defined(GPUCA_TPC_GEOMETRY_O2) && defined(GPUCA_BUILD_QA) && !defined(GPUCA_O2_LIB)
527 if ((configStandalone.proc.runQA || configStandalone.eventDisplay) && !configStandalone.QA.noMC) {
530 if (chainTracking->GetQA()->ReadO2MCData((eventsDir + "mc." + std::to_string(n) + ".dump").c_str()) &&
531 chainTracking->GetQA()->ReadO2MCData((eventsDir + "mc.0.dump").c_str()) && configStandalone.proc.runQA) {
532 throw std::runtime_error("Error reading O2 MC dump");
533 }
534 }
535#endif
536 if (chainTracking->mIOPtrs.clustersNative && (configStandalone.TF.bunchSim || configStandalone.TF.nMerge || !configStandalone.runTransformation)) {
537 if (configStandalone.proc.debugLevel >= 2) {
538 printf("Converting Native to Legacy ClusterData for overlaying - WARNING: No raw clusters produced - Compression etc will not run!!!\n");
539 }
541 }
542 return 0;
543}
544
545int32_t LoadEvent(int32_t iEvent, int32_t x)
546{
547 if (configStandalone.TF.bunchSim) {
548 if (tf->LoadCreateTimeFrame(iEvent)) {
549 return 1;
550 }
551 } else if (configStandalone.TF.nMerge) {
552 if (tf->LoadMergedEvents(iEvent)) {
553 return 1;
554 }
555 } else {
556 if (ReadEvent(iEvent)) {
557 return 1;
558 }
559 }
560 bool encodeZS = configStandalone.encodeZS == -1 ? (chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.encodeZS;
561 bool zsFilter = configStandalone.zsFilter == -1 ? (!encodeZS && chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.zsFilter;
562 if (encodeZS || zsFilter) {
564 printf("Need digit input to run ZS\n");
565 return 1;
566 }
567 if (zsFilter) {
569 }
570 if (encodeZS) {
572 }
573 }
574 if (!configStandalone.runTransformation) {
576 } else {
577 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
579 if (configStandalone.proc.debugLevel >= 2) {
580 printf("Converting Legacy Raw Cluster to Native\n");
581 }
583 break;
584 }
585 }
586 }
587
588 if (configStandalone.stripDumpedEvents) {
591 }
592 }
593
595 printf("Need cluster native data for on-the-fly TPC transform\n");
596 return 1;
597 }
598
600 ioMemEvents[x] = std::move(chainTracking->mIOMem);
602 return 0;
603}
604
605void OutputStat(GPUChainTracking* t, int64_t* nTracksTotal = nullptr, int64_t* nClustersTotal = nullptr)
606{
607 int32_t nTracks = 0;
608 if (t->GetProcessingSettings().createO2Output) {
609 nTracks += t->mIOPtrs.nOutputTracksTPCO2;
610 } else {
611 for (uint32_t k = 0; k < t->mIOPtrs.nMergedTracks; k++) {
612 if (t->mIOPtrs.mergedTracks[k].OK()) {
613 nTracks++;
614 }
615 }
616 }
617 if (nTracksTotal && nClustersTotal) {
618 *nTracksTotal += nTracks;
619 *nClustersTotal += t->mIOPtrs.nMergedTrackHits;
620 }
621}
622
623int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse, int32_t runs, int32_t iEvent, int64_t* nTracksTotal, int64_t* nClustersTotal, int32_t threadId = 0, HighResTimer* timerPipeline = nullptr)
624{
625 int32_t iRun = 0, iteration = 0;
626 while ((iteration = nIteration.fetch_add(1)) < runs) {
627 if (configStandalone.runs > 1) {
628 printf("Run %d (thread %d)\n", iteration + 1, threadId);
629 }
630 recUse->SetResetTimers(iRun < configStandalone.runsInit);
631 if (configStandalone.outputcontrolmem) {
632 recUse->SetOutputControl(threadId ? outputmemoryPipeline.get() : outputmemory.get(), configStandalone.outputcontrolmem);
633 }
634
635 if (configStandalone.testSyncAsync) {
636 printf("Running synchronous phase\n");
637 }
638 const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size()) : (iEvent - configStandalone.StartEvent)];
639 chainTrackingUse->mIOPtrs = ioPtrs;
640 if (iteration == (configStandalone.proc.doublePipeline ? 2 : (configStandalone.runs - 1))) {
641 if (configStandalone.proc.doublePipeline && timerPipeline) {
642 timerPipeline->Start();
643 }
644 if (configStandalone.controlProfiler) {
646 }
647 }
648 int32_t tmpRetVal = recUse->RunChains();
649 int32_t iterationEnd = nIterationEnd.fetch_add(1);
650 if (iterationEnd == configStandalone.runs - 1) {
651 if (configStandalone.proc.doublePipeline && timerPipeline) {
652 timerPipeline->Stop();
653 }
654 if (configStandalone.controlProfiler) {
656 }
657 }
658
659 if (tmpRetVal == 0 || tmpRetVal == 2) {
660 OutputStat(chainTrackingUse, iRun == 0 ? nTracksTotal : nullptr, iRun == 0 ? nClustersTotal : nullptr);
661 }
662
663 if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
664
669 printf("Running asynchronous phase from %'u compressed clusters\n", syncAsyncDecodedClusters);
670
671 chainTrackingAsync->mIOPtrs = ioPtrs;
681 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
686 }
688 recAsync->SetResetTimers(iRun < configStandalone.runsInit);
689 tmpRetVal = recAsync->RunChains();
690 if (tmpRetVal == 0 || tmpRetVal == 2) {
691 OutputStat(chainTrackingAsync, nullptr, nullptr);
692 }
694 }
695 if (!configStandalone.proc.doublePipeline) {
696 recUse->ClearAllocatedMemory();
697 }
698
699 if (tmpRetVal == 2) {
700 configStandalone.continueOnError = 0; // Forced exit from event display loop
701 configStandalone.noprompt = 1;
702 }
703 if (tmpRetVal == 3 && configStandalone.proc.ignoreNonFatalGPUErrors) {
704 printf("GPU Standalone Benchmark: Non-FATAL GPU error occured, ignoring\n");
705 } else if (tmpRetVal && !configStandalone.continueOnError) {
706 if (tmpRetVal != 2) {
707 printf("GPU Standalone Benchmark: Error occured\n");
708 }
709 return 1;
710 }
711 iRun++;
712 }
713 if (configStandalone.proc.doublePipeline) {
714 recUse->ClearAllocatedMemory();
715 }
716 nIteration.store(runs);
717 return 0;
718}
719
720int32_t main(int argc, char** argv)
721{
722 std::unique_ptr<GPUReconstruction> recUnique, recUniqueAsync, recUniquePipeline;
723
724 if (ReadConfiguration(argc, argv)) {
725 return 1;
726 }
727 eventsDir = std::string(configStandalone.absoluteEventsDir ? "" : "events/") + configStandalone.eventsDir + "/";
728
729 GPUSettingsDeviceBackend deviceSet;
730 deviceSet.deviceType = configStandalone.runGPU ? gpudatatypes::GetDeviceType(configStandalone.gpuType.c_str()) : gpudatatypes::DeviceType::CPU;
731 deviceSet.forceDeviceType = configStandalone.runGPUforce;
732 deviceSet.master = nullptr;
733 recUnique.reset(GPUReconstruction::CreateInstance(deviceSet));
734 rec = recUnique.get();
735 deviceSet.master = rec;
736 if (configStandalone.testSyncAsync) {
737 recUniqueAsync.reset(GPUReconstruction::CreateInstance(deviceSet));
738 recAsync = recUniqueAsync.get();
739 }
740 if (configStandalone.proc.doublePipeline) {
741 recUniquePipeline.reset(GPUReconstruction::CreateInstance(deviceSet));
742 recPipeline = recUniquePipeline.get();
743 }
744 if (rec == nullptr || (configStandalone.testSyncAsync && recAsync == nullptr)) {
745 printf("Error initializing GPUReconstruction\n");
746 return 1;
747 }
748 rec->SetDebugLevelTmp(configStandalone.proc.debugLevel);
750 if (configStandalone.testSyncAsync) {
751 if (configStandalone.proc.debugLevel >= 3) {
753 }
755 }
756 if (configStandalone.proc.doublePipeline) {
757 if (configStandalone.proc.debugLevel >= 3) {
759 }
762 }
763 if (!configStandalone.proc.doublePipeline) {
765 if (configStandalone.testSyncAsync) {
767 }
768 }
769
770 if (SetupReconstruction()) {
771 return 1;
772 }
773
774 std::unique_ptr<std::thread> pipelineThread;
775 if (configStandalone.proc.doublePipeline) {
776 pipelineThread.reset(new std::thread([]() { rec->RunPipelineWorker(); }));
777 }
778
779 if (configStandalone.seed == -1) {
780 std::random_device rd;
781 configStandalone.seed = (int32_t)rd();
782 printf("Using random seed %d\n", configStandalone.seed);
783 }
784
785 srand(configStandalone.seed);
786
788 if (!configStandalone.noEvents) {
789 while (true) {
790 std::ifstream in;
791 in.open((eventsDir + GPUCA_EVDUMP_FILE "." + std::to_string(nEventsInDirectory) + ".dump").c_str(), std::ifstream::binary);
792 if (in.fail()) {
793 break;
794 }
795 in.close();
797 }
798 }
799
800 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
802 }
803
804 if (configStandalone.eventGenerator) {
806 return 0;
807 }
808
809 int32_t nEvents = configStandalone.nEvents;
810 if (configStandalone.TF.bunchSim) {
811 nEvents = configStandalone.nEvents > 0 ? configStandalone.nEvents : 1;
812 } else {
813 if (nEvents == -1 || nEvents > nEventsInDirectory) {
814 if (nEvents >= 0) {
815 printf("Only %d events available in directory %s (%d events requested)\n", nEventsInDirectory, eventsDir.c_str(), nEvents);
816 }
818 }
819 if (nEvents == 0 && !configStandalone.noEvents) {
820 printf("No event data found in event folder\n");
821 }
822 if (configStandalone.TF.nMerge > 1) {
823 nEvents /= configStandalone.TF.nMerge;
824 }
825 }
826
827 ioPtrEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
828 ioMemEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
829 if (configStandalone.preloadEvents) {
830 printf("Preloading events%s", configStandalone.proc.debugLevel >= 2 ? "\n" : "");
831 fflush(stdout);
832 for (int32_t i = 0; i < nEvents - configStandalone.StartEvent; i++) {
833 LoadEvent(configStandalone.StartEvent + i, i);
834 printf(configStandalone.proc.debugLevel >= 2 ? "Loading event %d\n" : " %d", i + configStandalone.StartEvent);
835 fflush(stdout);
836 }
837 printf("\n");
838 }
839
840 for (int32_t iRunOuter = 0; iRunOuter < configStandalone.runs2; iRunOuter++) {
841 if (configStandalone.QA.inputHistogramsOnly) {
843 break;
844 }
845 if (configStandalone.runs2 > 1) {
846 printf("\nRUN2: %d\n", iRunOuter);
847 }
848 int64_t nTracksTotal = 0;
849 int64_t nClustersTotal = 0;
850 int32_t nEventsProcessed = 0;
851
852 if (configStandalone.noEvents) {
853 nEvents = 1;
854 configStandalone.StartEvent = 0;
856 }
857
858 for (int32_t iEvent = configStandalone.StartEvent; iEvent < nEvents; iEvent++) {
859 if (iEvent != configStandalone.StartEvent) {
860 printf("\n");
861 }
862 if (!configStandalone.noEvents && !configStandalone.preloadEvents) {
863 HighResTimer timerLoad;
864 timerLoad.Start();
865 if (LoadEvent(iEvent, 0)) {
866 goto breakrun;
867 }
868 if (configStandalone.dumpEvents) {
869 char fname[1024];
870 snprintf(fname, 1024, "event.%d.dump", nEventsProcessed);
871 chainTracking->DumpData(fname);
872 if (nEventsProcessed == 0) {
873 rec->DumpSettings();
874 }
875 }
876
879 if (grp.grpContinuousMaxTimeBin == 0) {
880 printf("Cannot override max time bin for non-continuous data!\n");
881 } else {
883 printf("Max time bin set to %d\n", grp.grpContinuousMaxTimeBin);
884 rec->UpdateSettings(&grp);
885 if (recAsync) {
887 }
888 if (recPipeline) {
890 }
891 }
892 }
893 printf("Loading time: %'d us\n", (int32_t)(1000000 * timerLoad.GetCurrentElapsedTime()));
894 }
895
896 nIteration.store(0);
897 nIterationEnd.store(0);
898 double pipelineWalltime = 1.;
899 if (configStandalone.noEvents) {
900 printf("No processing, no events loaded\n");
901 } else if (configStandalone.proc.doublePipeline) {
902 printf(configStandalone.preloadEvents ? "Processing Events %d to %d in Pipeline\n" : "Processing Event %d in Pipeline %d times\n", iEvent, configStandalone.preloadEvents ? std::min(iEvent + configStandalone.runs - 1, nEvents - 1) : configStandalone.runs);
903 HighResTimer timerPipeline;
904 if (configStandalone.proc.debugLevel < 2 && (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal))) {
905 goto breakrun;
906 }
907 auto pipeline1 = std::async(std::launch::async, RunBenchmark, rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 0, &timerPipeline);
908 auto pipeline2 = std::async(std::launch::async, RunBenchmark, recPipeline, chainTrackingPipeline, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 1, &timerPipeline);
909 if (pipeline1.get() || pipeline2.get()) {
910 goto breakrun;
911 }
912 pipelineWalltime = timerPipeline.GetElapsedTime() / (configStandalone.runs - 2);
913 printf("Pipeline wall time: %f, %d iterations, %f per event\n", timerPipeline.GetElapsedTime(), configStandalone.runs - 2, pipelineWalltime);
914 } else {
915 printf("Processing Event %d\n", iEvent);
916 if (RunBenchmark(rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal)) {
917 goto breakrun;
918 }
919 }
920 nEventsProcessed++;
921
922 if (configStandalone.timeFrameTime) {
923 double nClusters = chainTracking->GetProcessors()->tpcMerger.NMaxClusters();
924 if (nClusters > 0) {
925 const int32_t nOrbits = 32;
926 const double colRate = 50000;
927 const double orbitRate = 11245;
928 const double nClsPerTF = 755851. * nOrbits * colRate / orbitRate;
929 double timePerTF = (configStandalone.proc.doublePipeline ? pipelineWalltime : ((configStandalone.proc.debugLevel ? rec->GetStatKernelTime() : rec->GetStatWallTime()) / 1000000.)) * nClsPerTF / nClusters;
930 const double nGPUsReq = timePerTF * orbitRate / nOrbits;
931 char stat[1024];
932 snprintf(stat, 1024, "Sync phase: %.2f sec per %d orbit TF, %.1f GPUs required", timePerTF, nOrbits, nGPUsReq);
933 if (configStandalone.testSyncAsync) {
934 timePerTF = (configStandalone.proc.debugLevel ? recAsync->GetStatKernelTime() : recAsync->GetStatWallTime()) / 1000000. * nClsPerTF / nClusters;
935 snprintf(stat + strlen(stat), 1024 - strlen(stat), " - Async phase: %f sec per TF", timePerTF);
936 }
937 printf("%s (Measured %s time - Extrapolated from %d clusters to %d)\n", stat, configStandalone.proc.debugLevel ? "kernel" : "wall", (int32_t)nClusters, (int32_t)nClsPerTF);
938 }
939 }
942 float rejectionPercentage = (rejected) * 100.f / chainTracking->mIOPtrs.clustersNative->nClustersTotal;
943 printf("Cluster Rejection: Sync: %'u, Compressed %'u, Async %'u, Rejected %'u (%7.2f%%)\n", chainTracking->mIOPtrs.clustersNative->nClustersTotal, syncAsyncDecodedClusters, chainTrackingAsync->mIOPtrs.clustersNative->nClustersTotal, rejected, rejectionPercentage);
944 }
945
946 if (configStandalone.preloadEvents && configStandalone.proc.doublePipeline) {
947 break;
948 }
949 }
950 if (nEventsProcessed > 1) {
951 printf("Total: %ld clusters, %ld tracks\n", nClustersTotal, nTracksTotal);
952 }
953 }
954
955breakrun:
956 if (rec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
958 }
959
960#ifndef _WIN32
961 if (configStandalone.proc.runQA && configStandalone.fpe) {
962 fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
963 }
964#endif
965
966 if (configStandalone.proc.doublePipeline) {
968 pipelineThread->join();
969 }
970
971 rec->Finalize();
972 if (configStandalone.testSyncAsync) {
974 }
975 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
977 printf("Error unregistering memory\n");
978 }
979 }
980 rec->Exit();
981
982 if (!configStandalone.noprompt) {
983 printf("Press a key to exit!\n");
984 getchar();
985 }
986 return (0);
987}
Container to store compressed TPC cluster data.
Helper class to access correction maps.
int32_t i
#define GPUCA_BUFFER_ALIGNMENT
#define GPUCA_EVDUMP_FILE
Definition GPUDef.h:37
Definition of TPCFastTransform class.
int nClusters
void Start()
Definition timer.cxx:64
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:117
void set(S v)
Definition bitfield.h:55
void clear()
Definition bitfield.h:54
S get() const
Definition bitfield.h:63
bitfield & setBits(const bitfield v, bool w)
Definition bitfield.h:45
bool isSet(const bitfield &v) const
Definition bitfield.h:66
GPUd() value_type estimateLTFast(o2 static GPUd() float estimateLTIncrement(const o2 PropagatorImpl * Instance(bool uninitialized=false)
Definition Propagator.h:178
static void ApplySyncSettings(GPUSettingsProcessing &proc, GPUSettingsRec &rec, gpudatatypes::RecoStepField &steps, bool syncMode, int32_t dEdxMode=-2)
void SetQAFromForeignChain(GPUChainTracking *chain)
const CorrectionMapsHelper * GetTPCTransformHelper() const
void SetO2Propagator(const o2::base::Propagator *prop)
const GPUTRDGeometry * GetTRDGeometry() const
const o2::base::MatLayerCylSet * GetMatLUT() const
const GPUSettingsDisplay * mConfigDisplay
void DumpData(const char *filename, const GPUTrackingInOutPointers *ioPtrs=nullptr)
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
int32_t ReadData(const char *filename)
const GPUSettingsQA * mConfigQA
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76
static constexpr int32_t NSECTORS
Definition GPUChain.h:58
const GPUConstantMem * GetProcessors() const
Definition GPUChain.h:68
static GPUDisplayFrontendInterface * getFrontend(const char *type)
int32_t ReadO2MCData(const char *filename)
Definition GPUQA.h:55
void UpdateChain(GPUChainTracking *chain)
Definition GPUQA.h:59
static int32_t GetMaxTimeBin(const o2::tpc::ClusterNativeAccess &native)
static DeviceType GetDeviceType(const char *type)
void SetInputControl(void *ptr, size_t size)
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
virtual int32_t RunChains()=0
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
const GPUSettingsGRP & GetGRPSettings() const
void SetDebugLevelTmp(int32_t level)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
static void RunEventGenerator(GPUChainTracking *rec, const std::string &dir)
Definition genEvents.h:35
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLdouble * v
Definition glcorearb.h:832
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
GLenum GLenum GLsizei len
Definition glcorearb.h:4232
GLenum GLfloat param
Definition glcorearb.h:271
GLint GLuint mask
Definition glcorearb.h:291
DeviceType GetDeviceType(const char *type)
GPUSettingsStandalone configStandalone
Definition genEvents.cxx:47
@ qcrHelp
Definition qconfig.h:29
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52
void qConfigPrint()
Definition qconfig.cxx:515
int32_t qConfigParse(int argc, const char **argv, const char *filename)
Definition qconfig.cxx:513
std::unique_ptr< GPUReconstructionTimeframe > tf
int32_t RunBenchmark(GPUReconstruction *recUse, GPUChainTracking *chainTrackingUse, int32_t runs, int32_t iEvent, int64_t *nTracksTotal, int64_t *nClustersTotal, int32_t threadId=0, HighResTimer *timerPipeline=nullptr)
int32_t SetupReconstruction()
std::unique_ptr< char, void(*)(char *)> outputmemoryPipeline(nullptr, unique_ptr_aligned_delete)
std::atomic< uint32_t > nIteration
GPUReconstruction * recPipeline
std::unique_ptr< char, void(*)(char *)> outputmemory(nullptr, unique_ptr_aligned_delete)
int32_t nEventsInDirectory
uint32_t syncAsyncDecodedClusters
int32_t ReadConfiguration(int argc, char **argv)
std::unique_ptr< char, void(*)(char *)> inputmemory(nullptr, unique_ptr_aligned_delete)
int32_t LoadEvent(int32_t iEvent, int32_t x)
std::vector< GPUTrackingInOutPointers > ioPtrEvents
GPUChainITS * chainITSPipeline
int32_t ReadEvent(int32_t n)
void unique_ptr_aligned_delete(char *v)
std::vector< GPUChainTracking::InOutMemory > ioMemEvents
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
GPUReconstruction * rec
GPUChainITS * chainITS
GPUChainTracking * chainTrackingAsync
GPUChainITS * chainITSAsync
GPUChainTracking * chainTrackingPipeline
GPUReconstruction * recAsync
void OutputStat(GPUChainTracking *t, int64_t *nTracksTotal=nullptr, int64_t *nClustersTotal=nullptr)
std::atomic< uint32_t > nIterationEnd
std::string eventsDir
GPUChainTracking * chainTracking
gpudatatypes::RecoStepField steps
gpudatatypes::InOutTypeField inputs
gpudatatypes::RecoStepField stepsGPUMask
gpudatatypes::InOutTypeField outputs
const o2::tpc::ClusterNativeAccess * clustersNative
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
const AliHLTTPCClusterMCLabel * mcLabelsTPC
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
const GPUTPCMCInfoCol * mcInfosTPCCol
const GPUTPCGMMergedTrack * mergedTracks
GPUTPCGMPolynomialField polynomialField
Definition GPUParam.h:63
const int nEvents
Definition test_Fifo.cxx:27
#define main
std::random_device rd
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31