Project
Loading...
Searching...
No Matches
standalone.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "utils/qconfig.h"
16#include "GPUReconstruction.h"
19#include "GPUChainTracking.h"
20#include "GPUChainTrackingGetters.inc"
21#include "GPUTPCDef.h"
22#include "GPUQA.h"
23#include "GPUParam.h"
25#include "genEvents.h"
26
27#include "TPCFastTransform.h"
29#include "GPUTPCGMMergedTrack.h"
30#include "GPUSettings.h"
31#include "GPUConstantMem.h"
32
33#include "GPUO2DataTypes.h"
34#include "GPUChainITS.h"
35
37
38#include <iostream>
39#include <fstream>
40#include <cstdio>
41#include <cstring>
42#include <chrono>
43#include <tuple>
44#include <algorithm>
45#include <thread>
46#include <future>
47#include <atomic>
48#include <vector>
49
50#ifndef _WIN32
51#include <unistd.h>
52#include <sched.h>
53#include <csignal>
54#include <sys/types.h>
55#include <sys/wait.h>
56#include <sys/select.h>
57#include <cfenv>
58#include <clocale>
59#include <sys/stat.h>
60#endif
61#include "utils/timer.h"
63#include "utils/vecpod.h"
64
65using namespace o2::gpu;
66
67// #define BROKEN_EVENTS
68
69namespace o2::gpu
70{
71extern GPUSettingsStandalone configStandalone;
72}
73
78{
79 operator delete(v, std::align_val_t(GPUCA_BUFFER_ALIGNMENT));
80}
82std::unique_ptr<GPUDisplayFrontendInterface> eventDisplay;
83std::unique_ptr<GPUReconstructionTimeframe> tf;
85std::atomic<uint32_t> nIteration, nIterationEnd;
86
87std::vector<GPUTrackingInOutPointers> ioPtrEvents;
88std::vector<GPUChainTracking::InOutMemory> ioMemEvents;
89
90int32_t ReadConfiguration(int argc, char** argv)
91{
92 int32_t qcRet = qConfigParse(argc, (const char**)argv);
93 if (qcRet) {
94 if (qcRet != qConfig::qcrHelp) {
95 printf("Error parsing command line parameters\n");
96 }
97 return 1;
98 }
99 if (configStandalone.printSettings > 1) {
100 printf("Config Dump before ReadConfiguration\n");
101 qConfigPrint();
102 }
103 if (configStandalone.proc.debugLevel == -1) {
104 configStandalone.proc.debugLevel = 0;
105 }
106#ifndef _WIN32
107 setlocale(LC_ALL, "en_US.utf-8");
108 setlocale(LC_NUMERIC, "en_US.utf-8");
109 if (configStandalone.cpuAffinity != -1) {
110 cpu_set_t mask;
111 CPU_ZERO(&mask);
112 CPU_SET(configStandalone.cpuAffinity, &mask);
113
114 printf("Setting affinitiy to restrict on CPU core %d\n", configStandalone.cpuAffinity);
115 if (0 != sched_setaffinity(0, sizeof(mask), &mask)) {
116 printf("Error setting CPU affinity\n");
117 return 1;
118 }
119 }
120 if (configStandalone.fifoScheduler) {
121 printf("Setting FIFO scheduler\n");
122 sched_param param;
123 sched_getparam(0, &param);
124 param.sched_priority = 1;
125 if (0 != sched_setscheduler(0, SCHED_FIFO, &param)) {
126 printf("Error setting scheduler\n");
127 return 1;
128 }
129 }
130#ifdef __FAST_MATH__
131 if (configStandalone.fpe == 1) {
132#else
133 if (configStandalone.fpe) {
134#endif
135 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
136 }
137 if (configStandalone.flushDenormals) {
138 disable_denormals();
139 }
140
141#else
142 if (configStandalone.cpuAffinity != -1) {
143 printf("Affinity setting not supported on Windows\n");
144 return 1;
145 }
146 if (configStandalone.fifoScheduler) {
147 printf("FIFO Scheduler setting not supported on Windows\n");
148 return 1;
149 }
150 if (configStandalone.fpe == 1) {
151 printf("FPE not supported on Windows\n");
152 return 1;
153 }
154#endif
155#ifndef GPUCA_TPC_GEOMETRY_O2
156#error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed?
157 configStandalone.proc.inKernelParallel = false;
158 configStandalone.proc.createO2Output = 0;
159 if (configStandalone.rundEdx == -1) {
160 configStandalone.rundEdx = 0;
161 }
162#endif
163#ifndef GPUCA_BUILD_QA
164 if (configStandalone.proc.runQA || configStandalone.eventGenerator) {
165 printf("QA not enabled in build\n");
166 return 1;
167 }
168#endif
169 if (configStandalone.proc.doublePipeline && configStandalone.testSyncAsync) {
170 printf("Cannot run asynchronous processing with double pipeline\n");
171 return 1;
172 }
173 if (configStandalone.proc.doublePipeline && (configStandalone.runs < 4 || !configStandalone.outputcontrolmem)) {
174 printf("Double pipeline mode needs at least 3 runs per event and external output. To cycle though multiple events, use --preloadEvents and --runs n for n iterations round-robin\n");
175 return 1;
176 }
177 if (configStandalone.TF.bunchSim && configStandalone.TF.nMerge) {
178 printf("Cannot run --MERGE and --SIMBUNCHES togeterh\n");
179 return 1;
180 }
181 if (configStandalone.TF.bunchSim > 1) {
182 configStandalone.TF.timeFrameLen = 1.e9 * configStandalone.TF.bunchSim / configStandalone.TF.interactionRate;
183 }
184 if (configStandalone.TF.nMerge) {
185 double len = configStandalone.TF.nMerge - 1;
186 if (configStandalone.TF.randomizeDistance) {
187 len += 0.5;
188 }
189 if (configStandalone.TF.shiftFirstEvent) {
190 len += 0.5;
191 }
193 }
194 if (configStandalone.QA.inputHistogramsOnly && configStandalone.QA.compareInputs.size() == 0) {
195 printf("Can only produce QA pdf output when input files are specified!\n");
196 return 1;
197 }
198 if (configStandalone.QA.inputHistogramsOnly) {
199 configStandalone.rundEdx = false;
200 }
201 if (configStandalone.QA.dumpToROOT) {
202 configStandalone.proc.outputSharedClusterMap = true;
203 }
204 if (configStandalone.eventDisplay) {
205 configStandalone.noprompt = 1;
206 }
207 if (configStandalone.proc.debugLevel >= 4) {
208 if (configStandalone.proc.inKernelParallel) {
209 configStandalone.proc.inKernelParallel = 1;
210 } else {
211 configStandalone.proc.nHostThreads = 1;
212 }
213 }
214 if (configStandalone.setO2Settings) {
215 if (!(configStandalone.inputcontrolmem && configStandalone.outputcontrolmem)) {
216 printf("setO2Settings requires the usage of --inputMemory and --outputMemory as in O2\n");
217 return 1;
218 }
219 if (configStandalone.runGPU) {
220 configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024;
221 }
222 configStandalone.rec.tpc.nWaysOuter = 1;
223 configStandalone.rec.tpc.trackReferenceX = 83;
224 configStandalone.proc.outputSharedClusterMap = 1;
225 }
226
227 if (configStandalone.outputcontrolmem) {
228 bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr;
229 outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
230 if (forceEmptyMemory) {
231 printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads");
232 memset(outputmemory.get(), 0, configStandalone.outputcontrolmem);
233 }
234 if (configStandalone.proc.doublePipeline) {
235 outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
236 if (forceEmptyMemory) {
237 memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem);
238 }
239 }
240 }
241 if (configStandalone.inputcontrolmem) {
242 inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)));
243 }
244
245 configStandalone.proc.showOutputStat = true;
246
247 if (configStandalone.runGPU && configStandalone.gpuType == "AUTO") {
248 if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::CUDA, configStandalone.proc.debugLevel >= 2)) {
249 configStandalone.gpuType = "CUDA";
250 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::HIP, configStandalone.proc.debugLevel >= 2)) {
251 configStandalone.gpuType = "HIP";
252 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
253 configStandalone.gpuType = "OCL";
254 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
255 configStandalone.gpuType = "OCL";
256 } else {
257 if (configStandalone.runGPU > 1 && configStandalone.runGPUforce) {
258 printf("No GPU backend / device found, running on CPU is disabled due to runGPUforce\n");
259 return 1;
260 }
261 configStandalone.runGPU = false;
262 configStandalone.gpuType = "CPU";
263 }
264 }
265
266 if (configStandalone.printSettings) {
267 configStandalone.proc.printSettings = true;
268 }
269 if (configStandalone.printSettings > 1) {
270 printf("Config Dump after ReadConfiguration\n");
271 qConfigPrint();
272 }
273
274 return (0);
275}
276
278{
279 if (!configStandalone.eventGenerator) {
280 char filename[256];
281 snprintf(filename, 256, "events/%s/", configStandalone.eventsDir);
282 if (configStandalone.noEvents) {
283 configStandalone.eventsDir = "NON_EXISTING";
284 configStandalone.rundEdx = false;
285 } else if (rec->ReadSettings(filename)) {
286 printf("Error reading event config file\n");
287 return 1;
288 }
289 printf("Read event settings from dir %s (solenoidBz: %f, home-made events %d, constBz %d, maxTimeBin %d)\n", filename, rec->GetGRPSettings().solenoidBzNominalGPU, (int32_t)rec->GetGRPSettings().homemadeEvents, (int32_t)rec->GetGRPSettings().constBz, rec->GetGRPSettings().grpContinuousMaxTimeBin);
290 if (configStandalone.testSyncAsync) {
292 }
293 if (configStandalone.proc.doublePipeline) {
295 }
296 }
297
300
302 GPUSettingsRec recSet;
303 GPUSettingsProcessing procSet;
304 recSet = configStandalone.rec;
305 procSet = configStandalone.proc;
307
308 if (configStandalone.eventGenerator) {
309 grp.homemadeEvents = true;
310 }
311 if (configStandalone.solenoidBzNominalGPU != -1e6f) {
312 grp.solenoidBzNominalGPU = configStandalone.solenoidBzNominalGPU;
313 }
314 if (configStandalone.constBz) {
315 grp.constBz = true;
316 }
317 if (configStandalone.TF.nMerge || configStandalone.TF.bunchSim) {
318 if (grp.grpContinuousMaxTimeBin) {
319 printf("ERROR: requested to overlay continuous data - not supported\n");
320 return 1;
321 }
322 if (!configStandalone.cont) {
323 printf("Continuous mode forced\n");
324 configStandalone.cont = true;
325 }
328 }
329 }
330 if (configStandalone.cont && grp.grpContinuousMaxTimeBin == 0) {
332 }
333 if (rec->GetDeviceType() == GPUReconstruction::DeviceType::CPU) {
334 printf("Standalone Test Framework for CA Tracker - Using CPU\n");
335 } else {
336 printf("Standalone Test Framework for CA Tracker - Using GPU\n");
337 }
338
339 configStandalone.proc.forceMemoryPoolSize = (configStandalone.proc.forceMemoryPoolSize == 1 && configStandalone.eventDisplay) ? 2 : configStandalone.proc.forceMemoryPoolSize;
340 if (configStandalone.eventDisplay) {
341 eventDisplay.reset(GPUDisplayFrontendInterface::getFrontend(configStandalone.display.displayFrontend.c_str()));
342 if (eventDisplay.get() == nullptr) {
343 throw std::runtime_error("Requested display not available");
344 }
345 printf("Enabling event display (%s backend)\n", eventDisplay->frontendName());
346 procSet.eventDisplay = eventDisplay.get();
347 if (!configStandalone.QA.noMC) {
348 procSet.runMC = true;
349 }
350 }
351
352 if (procSet.runQA && !configStandalone.QA.noMC) {
353 procSet.runMC = true;
354 }
355
356 steps.steps = GPUDataTypes::RecoStep::AllRecoSteps;
357 if (configStandalone.runTRD != -1) {
358 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, configStandalone.runTRD > 0);
359 } else if (chainTracking->GetTRDGeometry() == nullptr) {
360 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
361 }
362 if (configStandalone.rundEdx != -1) {
363 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, configStandalone.rundEdx > 0);
364 }
365 if (configStandalone.runCompression != -1) {
366 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, configStandalone.runCompression > 0);
367 }
368 if (configStandalone.runTransformation != -1) {
369 steps.steps.setBits(GPUDataTypes::RecoStep::TPCConversion, configStandalone.runTransformation > 0);
370 }
371 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, configStandalone.runRefit);
372 if (!configStandalone.runMerger) {
373 steps.steps.setBits(GPUDataTypes::RecoStep::TPCMerging, false);
374 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
375 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, false);
376 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
377 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, false);
378 }
379
380 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
381 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
382 }
383 steps.inputs.set(GPUDataTypes::InOutType::TPCClusters, GPUDataTypes::InOutType::TRDTracklets);
384 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, false);
385 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
386 if (grp.doCompClusterDecode) {
387 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
388 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
389 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
390 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
391 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
392 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
393 } else if (grp.needsClusterer) {
394 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, true);
395 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
396 } else {
397 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
398 }
399
400 if (configStandalone.recoSteps >= 0) {
401 steps.steps &= configStandalone.recoSteps;
402 }
403 if (configStandalone.recoStepsGPU >= 0) {
404 steps.stepsGPUMask &= configStandalone.recoStepsGPU;
405 }
406
407 steps.outputs.clear();
408 steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging));
409 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression));
410 steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking));
411 steps.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCClusterFinding));
412
413 if (steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)) {
414 if (recSet.tpc.nWays > 1) {
415 recSet.tpc.nWaysOuter = 1;
416 }
417 if (procSet.createO2Output && !procSet.trdTrackModelO2) {
418 procSet.createO2Output = 1; // Must not be 2, to make sure TPC GPU tracks are still available for TRD
419 }
420 }
421
422 if (configStandalone.testSyncAsync || configStandalone.testSync) {
423 // Set settings for synchronous
424 if (configStandalone.rundEdx == -1) {
425 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, 0);
426 }
427 recSet.useMatLUT = false;
428 if (configStandalone.testSyncAsync) {
429 procSet.eventDisplay = nullptr;
430 }
431 }
432 if (configStandalone.proc.rtc.optSpecialCode == -1) {
433 configStandalone.proc.rtc.optSpecialCode = configStandalone.testSyncAsync || configStandalone.testSync;
434 }
435
436 rec->SetSettings(&grp, &recSet, &procSet, &steps);
437 if (configStandalone.proc.doublePipeline) {
438 recPipeline->SetSettings(&grp, &recSet, &procSet, &steps);
439 }
440 if (configStandalone.testSyncAsync) {
441 // Set settings for asynchronous
442 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
443 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, true);
444 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
445 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
446 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, false);
447 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
448 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
449 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
450 procSet.runMC = false;
451 procSet.runQA = false;
452 procSet.eventDisplay = eventDisplay.get();
453 procSet.runCompressionStatistics = 0;
454 procSet.rtc.optSpecialCode = 0;
455 if (recSet.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
456 procSet.tpcInputWithClusterRejection = 1;
457 }
458 recSet.tpc.disableRefitAttachment = 0xFF;
459 recSet.tpc.looperInterpolationInExtraPass = 0;
460 recSet.maxTrackQPtB5 = CAMath::Min(recSet.maxTrackQPtB5, recSet.tpc.rejectQPtB5);
461 recSet.useMatLUT = true;
462 recAsync->SetSettings(&grp, &recSet, &procSet, &steps);
463 }
464
465 if (configStandalone.outputcontrolmem) {
466 rec->SetOutputControl(outputmemory.get(), configStandalone.outputcontrolmem);
467 if (configStandalone.proc.doublePipeline) {
469 }
470 }
471
472 o2::base::Propagator* prop = nullptr;
474 prop->setGPUField(&rec->GetParam().polynomialField);
475 prop->setNominalBz(rec->GetParam().bzkG);
476 prop->setMatLUT(chainTracking->GetMatLUT());
478 if (chainTrackingAsync) {
480 }
483 }
484 procSet.o2PropagatorUseGPUField = true;
485
486 if (rec->Init()) {
487 printf("Error initializing GPUReconstruction!\n");
488 return 1;
489 }
490 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
491 if (rec->registerMemoryForGPU(outputmemory.get(), configStandalone.outputcontrolmem) || (configStandalone.proc.doublePipeline && recPipeline->registerMemoryForGPU(outputmemoryPipeline.get(), configStandalone.outputcontrolmem))) {
492 printf("ERROR registering memory for the GPU!!!\n");
493 return 1;
494 }
495 }
496 if (configStandalone.inputcontrolmem && rec->IsGPU()) {
497 if (rec->registerMemoryForGPU(inputmemory.get(), configStandalone.inputcontrolmem)) {
498 printf("ERROR registering input memory for the GPU!!!\n");
499 return 1;
500 }
501 }
502 if (configStandalone.proc.debugLevel >= 4) {
504 }
505 return (0);
506}
507
508int32_t ReadEvent(int32_t n)
509{
510 char filename[256];
511 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, n);
512 if (configStandalone.inputcontrolmem && !configStandalone.preloadEvents) {
513 rec->SetInputControl(inputmemory.get(), configStandalone.inputcontrolmem);
514 }
515 int32_t r = chainTracking->ReadData(filename);
516 if (r) {
517 return r;
518 }
519#if defined(GPUCA_TPC_GEOMETRY_O2) && defined(GPUCA_BUILD_QA) && !defined(GPUCA_O2_LIB)
520 if ((configStandalone.proc.runQA || configStandalone.eventDisplay) && !configStandalone.QA.noMC) {
522 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, n);
525 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, 0);
527 throw std::runtime_error("Error reading O2 MC dump");
528 }
529 }
530 }
531#endif
532 if (chainTracking->mIOPtrs.clustersNative && (configStandalone.TF.bunchSim || configStandalone.TF.nMerge || !configStandalone.runTransformation)) {
533 if (configStandalone.proc.debugLevel >= 2) {
534 printf("Converting Native to Legacy ClusterData for overlaying - WARNING: No raw clusters produced - Compression etc will not run!!!\n");
535 }
537 }
538 return 0;
539}
540
541int32_t LoadEvent(int32_t iEvent, int32_t x)
542{
543 if (configStandalone.TF.bunchSim) {
544 if (tf->LoadCreateTimeFrame(iEvent)) {
545 return 1;
546 }
547 } else if (configStandalone.TF.nMerge) {
548 if (tf->LoadMergedEvents(iEvent)) {
549 return 1;
550 }
551 } else {
552 if (ReadEvent(iEvent)) {
553 return 1;
554 }
555 }
556 bool encodeZS = configStandalone.encodeZS == -1 ? (chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.encodeZS;
557 bool zsFilter = configStandalone.zsFilter == -1 ? (!encodeZS && chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.zsFilter;
558 if (encodeZS || zsFilter) {
560 printf("Need digit input to run ZS\n");
561 return 1;
562 }
563 if (zsFilter) {
565 }
566 if (encodeZS) {
568 }
569 }
570 if (!configStandalone.runTransformation) {
572 } else {
573 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
575 if (configStandalone.proc.debugLevel >= 2) {
576 printf("Converting Legacy Raw Cluster to Native\n");
577 }
579 break;
580 }
581 }
582 }
583
584 if (configStandalone.stripDumpedEvents) {
587 }
588 }
589
591 printf("Need cluster native data for on-the-fly TPC transform\n");
592 return 1;
593 }
594
596 ioMemEvents[x] = std::move(chainTracking->mIOMem);
598 return 0;
599}
600
601void OutputStat(GPUChainTracking* t, int64_t* nTracksTotal = nullptr, int64_t* nClustersTotal = nullptr)
602{
603 int32_t nTracks = 0;
604 if (t->GetProcessingSettings().createO2Output) {
605 nTracks += t->mIOPtrs.nOutputTracksTPCO2;
606 } else {
607 for (uint32_t k = 0; k < t->mIOPtrs.nMergedTracks; k++) {
608 if (t->mIOPtrs.mergedTracks[k].OK()) {
609 nTracks++;
610 }
611 }
612 }
613 if (nTracksTotal && nClustersTotal) {
614 *nTracksTotal += nTracks;
615 *nClustersTotal += t->mIOPtrs.nMergedTrackHits;
616 }
617}
618
619int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse, int32_t runs, int32_t iEvent, int64_t* nTracksTotal, int64_t* nClustersTotal, int32_t threadId = 0, HighResTimer* timerPipeline = nullptr)
620{
621 int32_t iRun = 0, iteration = 0;
622 while ((iteration = nIteration.fetch_add(1)) < runs) {
623 if (configStandalone.runs > 1) {
624 printf("Run %d (thread %d)\n", iteration + 1, threadId);
625 }
626 recUse->SetResetTimers(iRun < configStandalone.runsInit);
627 if (configStandalone.outputcontrolmem) {
628 recUse->SetOutputControl(threadId ? outputmemoryPipeline.get() : outputmemory.get(), configStandalone.outputcontrolmem);
629 }
630
631 if (configStandalone.testSyncAsync) {
632 printf("Running synchronous phase\n");
633 }
634 const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size()) : (iEvent - configStandalone.StartEvent)];
635 chainTrackingUse->mIOPtrs = ioPtrs;
636 if (iteration == (configStandalone.proc.doublePipeline ? 2 : (configStandalone.runs - 1))) {
637 if (configStandalone.proc.doublePipeline && timerPipeline) {
638 timerPipeline->Start();
639 }
640 if (configStandalone.controlProfiler) {
642 }
643 }
644 int32_t tmpRetVal = recUse->RunChains();
645 int32_t iterationEnd = nIterationEnd.fetch_add(1);
646 if (iterationEnd == configStandalone.runs - 1) {
647 if (configStandalone.proc.doublePipeline && timerPipeline) {
648 timerPipeline->Stop();
649 }
650 if (configStandalone.controlProfiler) {
652 }
653 }
654
655 if (tmpRetVal == 0 || tmpRetVal == 2) {
656 OutputStat(chainTrackingUse, iRun == 0 ? nTracksTotal : nullptr, iRun == 0 ? nClustersTotal : nullptr);
657 if (configStandalone.memoryStat) {
658 recUse->PrintMemoryStatistics();
659 } else if (configStandalone.proc.debugLevel >= 2) {
660 recUse->PrintMemoryOverview();
661 }
662 }
663
664 if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
665 if (configStandalone.testSyncAsync) {
666 printf("Running asynchronous phase\n");
667 }
668
671
672 chainTrackingAsync->mIOPtrs = ioPtrs;
682 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
687 }
689 recAsync->SetResetTimers(iRun < configStandalone.runsInit);
690 tmpRetVal = recAsync->RunChains();
691 if (tmpRetVal == 0 || tmpRetVal == 2) {
692 OutputStat(chainTrackingAsync, nullptr, nullptr);
693 if (configStandalone.memoryStat) {
695 }
696 }
698 }
699 if (!configStandalone.proc.doublePipeline) {
700 recUse->ClearAllocatedMemory();
701 }
702
703 if (tmpRetVal == 2) {
704 configStandalone.continueOnError = 0; // Forced exit from event display loop
705 configStandalone.noprompt = 1;
706 }
707 if (tmpRetVal == 3 && configStandalone.proc.ignoreNonFatalGPUErrors) {
708 printf("Non-FATAL GPU error occured, ignoring\n");
709 } else if (tmpRetVal && !configStandalone.continueOnError) {
710 if (tmpRetVal != 2) {
711 printf("Error occured\n");
712 }
713 return 1;
714 }
715 iRun++;
716 }
717 if (configStandalone.proc.doublePipeline) {
718 recUse->ClearAllocatedMemory();
719 }
720 nIteration.store(runs);
721 return 0;
722}
723
724int32_t main(int argc, char** argv)
725{
726 std::unique_ptr<GPUReconstruction> recUnique, recUniqueAsync, recUniquePipeline;
727
728 if (ReadConfiguration(argc, argv)) {
729 return 1;
730 }
731
732 GPUSettingsDeviceBackend deviceSet;
733 deviceSet.deviceType = configStandalone.runGPU ? GPUDataTypes::GetDeviceType(configStandalone.gpuType.c_str()) : GPUDataTypes::DeviceType::CPU;
734 deviceSet.forceDeviceType = configStandalone.runGPUforce;
735 deviceSet.master = nullptr;
736 recUnique.reset(GPUReconstruction::CreateInstance(deviceSet));
737 rec = recUnique.get();
738 deviceSet.master = rec;
739 if (configStandalone.testSyncAsync) {
740 recUniqueAsync.reset(GPUReconstruction::CreateInstance(deviceSet));
741 recAsync = recUniqueAsync.get();
742 }
743 if (configStandalone.proc.doublePipeline) {
744 recUniquePipeline.reset(GPUReconstruction::CreateInstance(deviceSet));
745 recPipeline = recUniquePipeline.get();
746 }
747 if (rec == nullptr || (configStandalone.testSyncAsync && recAsync == nullptr)) {
748 printf("Error initializing GPUReconstruction\n");
749 return 1;
750 }
751 rec->SetDebugLevelTmp(configStandalone.proc.debugLevel);
753 if (configStandalone.testSyncAsync) {
754 if (configStandalone.proc.debugLevel >= 3) {
756 }
759 }
760 if (configStandalone.proc.doublePipeline) {
761 if (configStandalone.proc.debugLevel >= 3) {
763 }
766 }
767 if (!configStandalone.proc.doublePipeline) {
769 if (configStandalone.testSyncAsync) {
771 }
772 }
773
774 if (SetupReconstruction()) {
775 return 1;
776 }
777
778 std::unique_ptr<std::thread> pipelineThread;
779 if (configStandalone.proc.doublePipeline) {
780 pipelineThread.reset(new std::thread([]() { rec->RunPipelineWorker(); }));
781 }
782
783 if (configStandalone.seed == -1) {
784 std::random_device rd;
785 configStandalone.seed = (int32_t)rd();
786 printf("Using random seed %d\n", configStandalone.seed);
787 }
788
789 srand(configStandalone.seed);
790
791 for (nEventsInDirectory = 0; true; nEventsInDirectory++) {
792 std::ifstream in;
793 char filename[256];
794 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, nEventsInDirectory);
795 in.open(filename, std::ifstream::binary);
796 if (in.fail()) {
797 break;
798 }
799 in.close();
800 }
801
802 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
804 }
805
806 if (configStandalone.eventGenerator) {
808 return 0;
809 }
810
811 int32_t nEvents = configStandalone.nEvents;
812 if (configStandalone.TF.bunchSim) {
813 nEvents = configStandalone.nEvents > 0 ? configStandalone.nEvents : 1;
814 } else {
815 if (nEvents == -1 || nEvents > nEventsInDirectory) {
816 if (nEvents >= 0) {
817 printf("Only %d events available in directors %s (%d events requested)\n", nEventsInDirectory, configStandalone.eventsDir, nEvents);
818 }
820 }
821 if (configStandalone.TF.nMerge > 1) {
822 nEvents /= configStandalone.TF.nMerge;
823 }
824 }
825
826 ioPtrEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
827 ioMemEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
828 if (configStandalone.preloadEvents) {
829 printf("Preloading events%s", configStandalone.proc.debugLevel >= 2 ? "\n" : "");
830 fflush(stdout);
831 for (int32_t i = 0; i < nEvents - configStandalone.StartEvent; i++) {
832 LoadEvent(configStandalone.StartEvent + i, i);
833 if (configStandalone.proc.debugLevel >= 2) {
834 printf("Loading event %d\n", i);
835 } else {
836 printf(" %d", i);
837 }
838 fflush(stdout);
839 }
840 printf("\n");
841 }
842
843 for (int32_t iRunOuter = 0; iRunOuter < configStandalone.runs2; iRunOuter++) {
844 if (configStandalone.QA.inputHistogramsOnly) {
846 break;
847 }
848 if (configStandalone.runs2 > 1) {
849 printf("RUN2: %d\n", iRunOuter);
850 }
851 int64_t nTracksTotal = 0;
852 int64_t nClustersTotal = 0;
853 int32_t nEventsProcessed = 0;
854
855 if (configStandalone.noEvents) {
856 nEvents = 1;
857 configStandalone.StartEvent = 0;
859 }
860
861 for (int32_t iEvent = configStandalone.StartEvent; iEvent < nEvents; iEvent++) {
862 if (iEvent != configStandalone.StartEvent) {
863 printf("\n");
864 }
865 if (configStandalone.noEvents == false && !configStandalone.preloadEvents) {
866 HighResTimer timerLoad;
867 timerLoad.Start();
868 if (LoadEvent(iEvent, 0)) {
869 goto breakrun;
870 }
871 if (configStandalone.dumpEvents) {
872 char fname[1024];
873 snprintf(fname, 1024, "event.%d.dump", nEventsProcessed);
874 chainTracking->DumpData(fname);
875 if (nEventsProcessed == 0) {
876 rec->DumpSettings();
877 }
878 }
879
882 if (grp.grpContinuousMaxTimeBin == 0) {
883 printf("Cannot override max time bin for non-continuous data!\n");
884 } else {
886 printf("Max time bin set to %d\n", grp.grpContinuousMaxTimeBin);
887 rec->UpdateSettings(&grp);
888 if (recAsync) {
890 }
891 if (recPipeline) {
893 }
894 }
895 }
896 printf("Loading time: %'d us\n", (int32_t)(1000000 * timerLoad.GetCurrentElapsedTime()));
897 }
898 printf("Processing Event %d\n", iEvent);
899
900 nIteration.store(0);
901 nIterationEnd.store(0);
902 double pipelineWalltime = 1.;
903 if (configStandalone.proc.doublePipeline) {
904 HighResTimer timerPipeline;
905 if (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal)) {
906 goto breakrun;
907 }
908 auto pipeline1 = std::async(std::launch::async, RunBenchmark, rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 0, &timerPipeline);
909 auto pipeline2 = std::async(std::launch::async, RunBenchmark, recPipeline, chainTrackingPipeline, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 1, &timerPipeline);
910 if (pipeline1.get() || pipeline2.get()) {
911 goto breakrun;
912 }
913 pipelineWalltime = timerPipeline.GetElapsedTime() / (configStandalone.runs - 2);
914 printf("Pipeline wall time: %f, %d iterations, %f per event\n", timerPipeline.GetElapsedTime(), configStandalone.runs - 2, pipelineWalltime);
915 } else {
916 if (RunBenchmark(rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal)) {
917 goto breakrun;
918 }
919 }
920 nEventsProcessed++;
921
922 if (configStandalone.timeFrameTime) {
923 double nClusters = chainTracking->GetProcessors()->tpcMerger.NMaxClusters();
924 if (nClusters > 0) {
925 const int32_t nOrbits = 32;
926 const double colRate = 50000;
927 const double orbitRate = 11245;
928 const double nClsPerTF = 755851. * nOrbits * colRate / orbitRate;
929 double timePerTF = (configStandalone.proc.doublePipeline ? pipelineWalltime : ((configStandalone.proc.debugLevel ? rec->GetStatKernelTime() : rec->GetStatWallTime()) / 1000000.)) * nClsPerTF / nClusters;
930 const double nGPUsReq = timePerTF * orbitRate / nOrbits;
931 char stat[1024];
932 snprintf(stat, 1024, "Sync phase: %.2f sec per %d orbit TF, %.1f GPUs required", timePerTF, nOrbits, nGPUsReq);
933 if (configStandalone.testSyncAsync) {
934 timePerTF = (configStandalone.proc.debugLevel ? recAsync->GetStatKernelTime() : recAsync->GetStatWallTime()) / 1000000. * nClsPerTF / nClusters;
935 snprintf(stat + strlen(stat), 1024 - strlen(stat), " - Async phase: %f sec per TF", timePerTF);
936 }
937 printf("%s (Measured %s time - Extrapolated from %d clusters to %d)\n", stat, configStandalone.proc.debugLevel ? "kernel" : "wall", (int32_t)nClusters, (int32_t)nClsPerTF);
938 }
939 }
940
941 if (configStandalone.preloadEvents && configStandalone.proc.doublePipeline) {
942 break;
943 }
944 }
945 if (nEventsProcessed > 1) {
946 printf("Total: %ld clusters, %ld tracks\n", nClustersTotal, nTracksTotal);
947 }
948 }
949
950breakrun:
951 if (rec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
953 }
954
955#ifndef _WIN32
956 if (configStandalone.proc.runQA && configStandalone.fpe) {
957 fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
958 }
959#endif
960
961 if (configStandalone.proc.doublePipeline) {
963 pipelineThread->join();
964 }
965
966 rec->Finalize();
967 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
969 printf("Error unregistering memory\n");
970 }
971 }
972 rec->Exit();
973
974 if (!configStandalone.noprompt) {
975 printf("Press a key to exit!\n");
976 getchar();
977 }
978 return (0);
979}
Container to store compressed TPC cluster data.
Helper class to access correction maps.
int32_t i
#define GPUCA_BUFFER_ALIGNMENT
#define GPUCA_EVDUMP_FILE
Definition GPUDef.h:40
Definition of TPCFastTransform class.
int nClusters
void Start()
Definition timer.cxx:57
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:110
double GetElapsedTime()
Definition timer.cxx:108
void set(S v)
Definition bitfield.h:59
void clear()
Definition bitfield.h:58
S get() const
Definition bitfield.h:67
bitfield & setBits(const bitfield v, bool w)
Definition bitfield.h:49
bool isSet(const bitfield &v) const
Definition bitfield.h:70
GPUd() value_type estimateLTFast(o2 static GPUd() float estimateLTIncrement(const o2 PropagatorImpl * Instance(bool uninitialized=false)
Definition Propagator.h:143
void SetQAFromForeignChain(GPUChainTracking *chain)
const CorrectionMapsHelper * GetTPCTransformHelper() const
void SetO2Propagator(const o2::base::Propagator *prop)
const GPUTRDGeometry * GetTRDGeometry() const
void DumpData(const char *filename)
const o2::base::MatLayerCylSet * GetMatLUT() const
const GPUSettingsDisplay * mConfigDisplay
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
int32_t ReadData(const char *filename)
const GPUSettingsQA * mConfigQA
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76
static constexpr int32_t NSECTORS
Definition GPUChain.h:58
const GPUConstantMem * GetProcessors() const
Definition GPUChain.h:68
static DeviceType GetDeviceType(const char *type)
static GPUDisplayFrontendInterface * getFrontend(const char *type)
int32_t ReadO2MCData(const char *filename)
Definition GPUQA.h:54
void UpdateChain(GPUChainTracking *chain)
Definition GPUQA.h:58
static int32_t GetMaxTimeBin(const o2::tpc::ClusterNativeAccess &native)
static DeviceType GetDeviceType(const char *type)
void SetInputControl(void *ptr, size_t size)
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
virtual int32_t RunChains()=0
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
const GPUSettingsGRP & GetGRPSettings() const
void SetDebugLevelTmp(int32_t level)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
static void RunEventGenerator(GPUChainTracking *rec)
Definition genEvents.h:34
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLdouble * v
Definition glcorearb.h:832
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
GLenum GLenum GLsizei len
Definition glcorearb.h:4232
GLenum GLfloat param
Definition glcorearb.h:271
GLint GLuint mask
Definition glcorearb.h:291
GPUSettingsStandalone configStandalone
Definition genEvents.cxx:48
@ qcrHelp
Definition qconfig.h:29
std::string filename()
void qConfigPrint()
Definition qconfig.cxx:515
int32_t qConfigParse(int argc, const char **argv, const char *filename)
Definition qconfig.cxx:513
std::unique_ptr< GPUReconstructionTimeframe > tf
int32_t RunBenchmark(GPUReconstruction *recUse, GPUChainTracking *chainTrackingUse, int32_t runs, int32_t iEvent, int64_t *nTracksTotal, int64_t *nClustersTotal, int32_t threadId=0, HighResTimer *timerPipeline=nullptr)
int32_t SetupReconstruction()
std::unique_ptr< char, void(*)(char *)> outputmemoryPipeline(nullptr, unique_ptr_aligned_delete)
std::atomic< uint32_t > nIteration
GPUReconstruction * recPipeline
std::unique_ptr< char, void(*)(char *)> outputmemory(nullptr, unique_ptr_aligned_delete)
int32_t nEventsInDirectory
int32_t ReadConfiguration(int argc, char **argv)
std::unique_ptr< char, void(*)(char *)> inputmemory(nullptr, unique_ptr_aligned_delete)
int32_t LoadEvent(int32_t iEvent, int32_t x)
std::vector< GPUTrackingInOutPointers > ioPtrEvents
GPUChainITS * chainITSPipeline
int32_t ReadEvent(int32_t n)
void unique_ptr_aligned_delete(char *v)
std::vector< GPUChainTracking::InOutMemory > ioMemEvents
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
GPUReconstruction * rec
GPUChainITS * chainITS
GPUChainTracking * chainTrackingAsync
GPUChainITS * chainITSAsync
GPUChainTracking * chainTrackingPipeline
GPUReconstruction * recAsync
void OutputStat(GPUChainTracking *t, int64_t *nTracksTotal=nullptr, int64_t *nClustersTotal=nullptr)
std::atomic< uint32_t > nIterationEnd
GPUChainTracking * chainTracking
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCMCInfo * mcInfosTPC
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
const AliHLTTPCClusterMCLabel * mcLabelsTPC
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
const GPUTPCMCInfoCol * mcInfosTPCCol
const GPUTPCGMMergedTrack * mergedTracks
GPUTPCGMPolynomialField polynomialField
Definition GPUParam.h:61
const int nEvents
Definition test_Fifo.cxx:27
#define main
std::random_device rd
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31