Project
Loading...
Searching...
No Matches
standalone.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "utils/qconfig.h"
16#include "GPUReconstruction.h"
19#include "GPUChainTracking.h"
20#include "GPUTPCDef.h"
21#include "GPUQA.h"
23#include "genEvents.h"
24
25#include <iostream>
26#include <fstream>
27#include <cstdio>
28#include <cstring>
29#include <chrono>
30#include <tuple>
31#include <algorithm>
32#include <thread>
33#include <future>
34#include <atomic>
35
36#ifndef _WIN32
37#include <unistd.h>
38#include <sched.h>
39#include <csignal>
40#include <sys/types.h>
41#include <sys/wait.h>
42#include <sys/select.h>
43#include <cfenv>
44#include <clocale>
45#include <sys/stat.h>
46#endif
47#include "utils/timer.h"
49#include "utils/vecpod.h"
50
51#include "TPCFastTransform.h"
53#include "GPUTPCGMMergedTrack.h"
54#include "GPUSettings.h"
55#include <vector>
56
57#include "GPUO2DataTypes.h"
58#include "GPUChainITS.h"
59
60using namespace o2::gpu;
61
62// #define BROKEN_EVENTS
63
64namespace o2::gpu
65{
66extern GPUSettingsStandalone configStandalone;
67}
68
73{
74 operator delete(v GPUCA_OPERATOR_NEW_ALIGNMENT);
75}
77std::unique_ptr<GPUDisplayFrontendInterface> eventDisplay;
78std::unique_ptr<GPUReconstructionTimeframe> tf;
80std::atomic<uint32_t> nIteration, nIterationEnd;
81
82std::vector<GPUTrackingInOutPointers> ioPtrEvents;
83std::vector<GPUChainTracking::InOutMemory> ioMemEvents;
84
85int32_t ReadConfiguration(int argc, char** argv)
86{
87 int32_t qcRet = qConfigParse(argc, (const char**)argv);
88 if (qcRet) {
89 if (qcRet != qConfig::qcrHelp) {
90 printf("Error parsing command line parameters\n");
91 }
92 return 1;
93 }
94 if (configStandalone.printSettings > 1) {
95 printf("Config Dump before ReadConfiguration\n");
97 }
98 if (configStandalone.proc.debugLevel == -1) {
99 configStandalone.proc.debugLevel = 0;
100 }
101#ifndef _WIN32
102 setlocale(LC_ALL, "en_US.utf-8");
103 setlocale(LC_NUMERIC, "en_US.utf-8");
104 if (configStandalone.cpuAffinity != -1) {
105 cpu_set_t mask;
106 CPU_ZERO(&mask);
107 CPU_SET(configStandalone.cpuAffinity, &mask);
108
109 printf("Setting affinitiy to restrict on CPU core %d\n", configStandalone.cpuAffinity);
110 if (0 != sched_setaffinity(0, sizeof(mask), &mask)) {
111 printf("Error setting CPU affinity\n");
112 return 1;
113 }
114 }
115 if (configStandalone.fifoScheduler) {
116 printf("Setting FIFO scheduler\n");
117 sched_param param;
118 sched_getparam(0, &param);
119 param.sched_priority = 1;
120 if (0 != sched_setscheduler(0, SCHED_FIFO, &param)) {
121 printf("Error setting scheduler\n");
122 return 1;
123 }
124 }
125#ifdef __FAST_MATH__
126 if (configStandalone.fpe == 1) {
127#else
128 if (configStandalone.fpe) {
129#endif
130 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
131 }
132 if (configStandalone.flushDenormals) {
133 disable_denormals();
134 }
135
136#else
137 if (configStandalone.cpuAffinity != -1) {
138 printf("Affinity setting not supported on Windows\n");
139 return 1;
140 }
141 if (configStandalone.fifoScheduler) {
142 printf("FIFO Scheduler setting not supported on Windows\n");
143 return 1;
144 }
145 if (configStandalone.fpe == 1) {
146 printf("FPE not supported on Windows\n");
147 return 1;
148 }
149#endif
150#ifndef GPUCA_TPC_GEOMETRY_O2
151#error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed?
152 configStandalone.proc.inKernelParallel = false;
153 configStandalone.proc.createO2Output = 0;
154 if (configStandalone.rundEdx == -1) {
155 configStandalone.rundEdx = 0;
156 }
157#endif
158#ifndef GPUCA_BUILD_QA
159 if (configStandalone.proc.runQA || configStandalone.eventGenerator) {
160 printf("QA not enabled in build\n");
161 return 1;
162 }
163#endif
164 if (configStandalone.proc.doublePipeline && configStandalone.testSyncAsync) {
165 printf("Cannot run asynchronous processing with double pipeline\n");
166 return 1;
167 }
168 if (configStandalone.proc.doublePipeline && (configStandalone.runs < 4 || !configStandalone.outputcontrolmem)) {
169 printf("Double pipeline mode needs at least 3 runs per event and external output. To cycle though multiple events, use --preloadEvents and --runs n for n iterations round-robin\n");
170 return 1;
171 }
172 if (configStandalone.TF.bunchSim && configStandalone.TF.nMerge) {
173 printf("Cannot run --MERGE and --SIMBUNCHES togeterh\n");
174 return 1;
175 }
176 if (configStandalone.TF.bunchSim > 1) {
177 configStandalone.TF.timeFrameLen = 1.e9 * configStandalone.TF.bunchSim / configStandalone.TF.interactionRate;
178 }
179 if (configStandalone.TF.nMerge) {
180 double len = configStandalone.TF.nMerge - 1;
181 if (configStandalone.TF.randomizeDistance) {
182 len += 0.5;
183 }
184 if (configStandalone.TF.shiftFirstEvent) {
185 len += 0.5;
186 }
188 }
189 if (configStandalone.QA.inputHistogramsOnly && configStandalone.QA.compareInputs.size() == 0) {
190 printf("Can only produce QA pdf output when input files are specified!\n");
191 return 1;
192 }
193 if (configStandalone.QA.inputHistogramsOnly) {
194 configStandalone.rundEdx = false;
195 }
196 if (configStandalone.QA.dumpToROOT) {
197 configStandalone.proc.outputSharedClusterMap = true;
198 }
199 if (configStandalone.eventDisplay) {
200 configStandalone.noprompt = 1;
201 }
202 if (configStandalone.proc.debugLevel >= 4) {
203 if (configStandalone.proc.inKernelParallel) {
204 configStandalone.proc.inKernelParallel = 1;
205 } else {
206 configStandalone.proc.nHostThreads = 1;
207 }
208 }
209 if (configStandalone.setO2Settings) {
210 if (!(configStandalone.inputcontrolmem && configStandalone.outputcontrolmem)) {
211 printf("setO2Settings requires the usage of --inputMemory and --outputMemory as in O2\n");
212 return 1;
213 }
214 if (configStandalone.runGPU) {
215 configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024;
216 }
217 configStandalone.rec.tpc.nWaysOuter = 1;
218 configStandalone.rec.tpc.trackReferenceX = 83;
219 configStandalone.proc.outputSharedClusterMap = 1;
220 }
221
222 if (configStandalone.outputcontrolmem) {
223 bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr;
224 outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
225 if (forceEmptyMemory) {
226 printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads");
227 memset(outputmemory.get(), 0, configStandalone.outputcontrolmem);
228 }
229 if (configStandalone.proc.doublePipeline) {
230 outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
231 if (forceEmptyMemory) {
232 memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem);
233 }
234 }
235 }
236 if (configStandalone.inputcontrolmem) {
237 inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
238 }
239
240 configStandalone.proc.showOutputStat = true;
241
242 if (configStandalone.runGPU && configStandalone.gpuType == "AUTO") {
243 if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::CUDA, configStandalone.proc.debugLevel >= 2)) {
244 configStandalone.gpuType = "CUDA";
245 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::HIP, configStandalone.proc.debugLevel >= 2)) {
246 configStandalone.gpuType = "HIP";
247 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
248 configStandalone.gpuType = "OCL";
249 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
250 configStandalone.gpuType = "OCL";
251 } else {
252 if (configStandalone.runGPU > 1 && configStandalone.runGPUforce) {
253 printf("No GPU backend / device found, running on CPU is disabled due to runGPUforce\n");
254 return 1;
255 }
256 configStandalone.runGPU = false;
257 configStandalone.gpuType = "CPU";
258 }
259 }
260
261 if (configStandalone.printSettings) {
262 configStandalone.proc.printSettings = true;
263 }
264 if (configStandalone.printSettings > 1) {
265 printf("Config Dump after ReadConfiguration\n");
266 qConfigPrint();
267 }
268
269 return (0);
270}
271
273{
274 if (!configStandalone.eventGenerator) {
275 char filename[256];
276 snprintf(filename, 256, "events/%s/", configStandalone.eventsDir);
277 if (configStandalone.noEvents) {
278 configStandalone.eventsDir = "NON_EXISTING";
279 configStandalone.rundEdx = false;
280 } else if (rec->ReadSettings(filename)) {
281 printf("Error reading event config file\n");
282 return 1;
283 }
284 printf("Read event settings from dir %s (solenoidBz: %f, home-made events %d, constBz %d, maxTimeBin %d)\n", filename, rec->GetGRPSettings().solenoidBzNominalGPU, (int32_t)rec->GetGRPSettings().homemadeEvents, (int32_t)rec->GetGRPSettings().constBz, rec->GetGRPSettings().grpContinuousMaxTimeBin);
285 if (configStandalone.testSyncAsync) {
287 }
288 if (configStandalone.proc.doublePipeline) {
290 }
291 }
292
295
297 GPUSettingsRec recSet;
298 GPUSettingsProcessing procSet;
299 recSet = configStandalone.rec;
300 procSet = configStandalone.proc;
302
303 if (configStandalone.eventGenerator) {
304 grp.homemadeEvents = true;
305 }
306 if (configStandalone.solenoidBzNominalGPU != -1e6f) {
307 grp.solenoidBzNominalGPU = configStandalone.solenoidBzNominalGPU;
308 }
309 if (configStandalone.constBz) {
310 grp.constBz = true;
311 }
312 if (configStandalone.TF.nMerge || configStandalone.TF.bunchSim) {
313 if (grp.grpContinuousMaxTimeBin) {
314 printf("ERROR: requested to overlay continuous data - not supported\n");
315 return 1;
316 }
317 if (!configStandalone.cont) {
318 printf("Continuous mode forced\n");
319 configStandalone.cont = true;
320 }
323 }
324 }
325 if (configStandalone.cont && grp.grpContinuousMaxTimeBin == 0) {
327 }
328 if (rec->GetDeviceType() == GPUReconstruction::DeviceType::CPU) {
329 printf("Standalone Test Framework for CA Tracker - Using CPU\n");
330 } else {
331 printf("Standalone Test Framework for CA Tracker - Using GPU\n");
332 }
333
334 configStandalone.proc.forceMemoryPoolSize = (configStandalone.proc.forceMemoryPoolSize == 1 && configStandalone.eventDisplay) ? 2 : configStandalone.proc.forceMemoryPoolSize;
335 if (configStandalone.eventDisplay) {
336 eventDisplay.reset(GPUDisplayFrontendInterface::getFrontend(configStandalone.display.displayFrontend.c_str()));
337 if (eventDisplay.get() == nullptr) {
338 throw std::runtime_error("Requested display not available");
339 }
340 printf("Enabling event display (%s backend)\n", eventDisplay->frontendName());
341 procSet.eventDisplay = eventDisplay.get();
342 if (!configStandalone.QA.noMC) {
343 procSet.runMC = true;
344 }
345 }
346
347 if (procSet.runQA && !configStandalone.QA.noMC) {
348 procSet.runMC = true;
349 }
350
351 steps.steps = GPUDataTypes::RecoStep::AllRecoSteps;
352 if (configStandalone.runTRD != -1) {
353 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, configStandalone.runTRD > 0);
354 } else if (chainTracking->GetTRDGeometry() == nullptr) {
355 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
356 }
357 if (configStandalone.rundEdx != -1) {
358 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, configStandalone.rundEdx > 0);
359 }
360 if (configStandalone.runCompression != -1) {
361 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, configStandalone.runCompression > 0);
362 }
363 if (configStandalone.runTransformation != -1) {
364 steps.steps.setBits(GPUDataTypes::RecoStep::TPCConversion, configStandalone.runTransformation > 0);
365 }
366 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, configStandalone.runRefit);
367 if (!configStandalone.runMerger) {
368 steps.steps.setBits(GPUDataTypes::RecoStep::TPCMerging, false);
369 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
370 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, false);
371 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
372 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, false);
373 }
374
375 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
376 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
377 }
378 steps.inputs.set(GPUDataTypes::InOutType::TPCClusters, GPUDataTypes::InOutType::TRDTracklets);
379 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, false);
380 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
381 if (grp.doCompClusterDecode) {
382 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
383 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
384 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
385 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
386 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
387 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
388 } else if (grp.needsClusterer) {
389 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, true);
390 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
391 } else {
392 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
393 }
394
395 if (configStandalone.recoSteps >= 0) {
396 steps.steps &= configStandalone.recoSteps;
397 }
398 if (configStandalone.recoStepsGPU >= 0) {
399 steps.stepsGPUMask &= configStandalone.recoStepsGPU;
400 }
401
402 steps.outputs.clear();
403 steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging));
404 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression));
405 steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking));
406 steps.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCClusterFinding));
407
408 if (steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)) {
409 if (recSet.tpc.nWays > 1) {
410 recSet.tpc.nWaysOuter = 1;
411 }
412 if (procSet.createO2Output && !procSet.trdTrackModelO2) {
413 procSet.createO2Output = 1; // Must not be 2, to make sure TPC GPU tracks are still available for TRD
414 }
415 }
416
417 if (configStandalone.testSyncAsync || configStandalone.testSync) {
418 // Set settings for synchronous
419 if (configStandalone.rundEdx == -1) {
420 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, 0);
421 }
422 recSet.useMatLUT = false;
423 if (configStandalone.testSyncAsync) {
424 procSet.eventDisplay = nullptr;
425 }
426 }
427 if (configStandalone.proc.rtc.optSpecialCode == -1) {
428 configStandalone.proc.rtc.optSpecialCode = configStandalone.testSyncAsync || configStandalone.testSync;
429 }
430
431 rec->SetSettings(&grp, &recSet, &procSet, &steps);
432 if (configStandalone.proc.doublePipeline) {
433 recPipeline->SetSettings(&grp, &recSet, &procSet, &steps);
434 }
435 if (configStandalone.testSyncAsync) {
436 // Set settings for asynchronous
437 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
438 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, true);
439 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
440 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
441 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, false);
442 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
443 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
444 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
445 procSet.runMC = false;
446 procSet.runQA = false;
447 procSet.eventDisplay = eventDisplay.get();
448 procSet.runCompressionStatistics = 0;
449 procSet.rtc.optSpecialCode = 0;
450 if (recSet.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
451 procSet.tpcInputWithClusterRejection = 1;
452 }
453 recSet.tpc.disableRefitAttachment = 0xFF;
454 recSet.tpc.looperInterpolationInExtraPass = 0;
455 recSet.maxTrackQPtB5 = CAMath::Min(recSet.maxTrackQPtB5, recSet.tpc.rejectQPtB5);
456 recSet.useMatLUT = true;
457 recAsync->SetSettings(&grp, &recSet, &procSet, &steps);
458 }
459
460 if (configStandalone.outputcontrolmem) {
461 rec->SetOutputControl(outputmemory.get(), configStandalone.outputcontrolmem);
462 if (configStandalone.proc.doublePipeline) {
464 }
465 }
466
467 o2::base::Propagator* prop = nullptr;
469 prop->setGPUField(&rec->GetParam().polynomialField);
470 prop->setNominalBz(rec->GetParam().bzkG);
471 prop->setMatLUT(chainTracking->GetMatLUT());
473 if (chainTrackingAsync) {
475 }
478 }
479 procSet.o2PropagatorUseGPUField = true;
480
481 if (rec->Init()) {
482 printf("Error initializing GPUReconstruction!\n");
483 return 1;
484 }
485 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
486 if (rec->registerMemoryForGPU(outputmemory.get(), configStandalone.outputcontrolmem) || (configStandalone.proc.doublePipeline && recPipeline->registerMemoryForGPU(outputmemoryPipeline.get(), configStandalone.outputcontrolmem))) {
487 printf("ERROR registering memory for the GPU!!!\n");
488 return 1;
489 }
490 }
491 if (configStandalone.inputcontrolmem && rec->IsGPU()) {
492 if (rec->registerMemoryForGPU(inputmemory.get(), configStandalone.inputcontrolmem)) {
493 printf("ERROR registering input memory for the GPU!!!\n");
494 return 1;
495 }
496 }
497 if (configStandalone.proc.debugLevel >= 4) {
499 }
500 return (0);
501}
502
503int32_t ReadEvent(int32_t n)
504{
505 char filename[256];
506 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, n);
507 if (configStandalone.inputcontrolmem && !configStandalone.preloadEvents) {
508 rec->SetInputControl(inputmemory.get(), configStandalone.inputcontrolmem);
509 }
510 int32_t r = chainTracking->ReadData(filename);
511 if (r) {
512 return r;
513 }
514#if defined(GPUCA_TPC_GEOMETRY_O2) && defined(GPUCA_BUILD_QA) && !defined(GPUCA_O2_LIB)
515 if ((configStandalone.proc.runQA || configStandalone.eventDisplay) && !configStandalone.QA.noMC) {
517 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, n);
520 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, 0);
522 throw std::runtime_error("Error reading O2 MC dump");
523 }
524 }
525 }
526#endif
527 if (chainTracking->mIOPtrs.clustersNative && (configStandalone.TF.bunchSim || configStandalone.TF.nMerge || !configStandalone.runTransformation)) {
528 if (configStandalone.proc.debugLevel >= 2) {
529 printf("Converting Native to Legacy ClusterData for overlaying - WARNING: No raw clusters produced - Compression etc will not run!!!\n");
530 }
532 }
533 return 0;
534}
535
536int32_t LoadEvent(int32_t iEvent, int32_t x)
537{
538 if (configStandalone.TF.bunchSim) {
539 if (tf->LoadCreateTimeFrame(iEvent)) {
540 return 1;
541 }
542 } else if (configStandalone.TF.nMerge) {
543 if (tf->LoadMergedEvents(iEvent)) {
544 return 1;
545 }
546 } else {
547 if (ReadEvent(iEvent)) {
548 return 1;
549 }
550 }
551 bool encodeZS = configStandalone.encodeZS == -1 ? (chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.encodeZS;
552 bool zsFilter = configStandalone.zsFilter == -1 ? (!encodeZS && chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.zsFilter;
553 if (encodeZS || zsFilter) {
555 printf("Need digit input to run ZS\n");
556 return 1;
557 }
558 if (zsFilter) {
560 }
561 if (encodeZS) {
563 }
564 }
565 if (!configStandalone.runTransformation) {
567 } else {
568 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
570 if (configStandalone.proc.debugLevel >= 2) {
571 printf("Converting Legacy Raw Cluster to Native\n");
572 }
574 break;
575 }
576 }
577 }
578
579 if (configStandalone.stripDumpedEvents) {
582 }
583 }
584
586 printf("Need cluster native data for on-the-fly TPC transform\n");
587 return 1;
588 }
589
591 ioMemEvents[x] = std::move(chainTracking->mIOMem);
593 return 0;
594}
595
596void OutputStat(GPUChainTracking* t, int64_t* nTracksTotal = nullptr, int64_t* nClustersTotal = nullptr)
597{
598 int32_t nTracks = 0;
599 if (t->GetProcessingSettings().createO2Output) {
600 nTracks += t->mIOPtrs.nOutputTracksTPCO2;
601 } else {
602 for (uint32_t k = 0; k < t->mIOPtrs.nMergedTracks; k++) {
603 if (t->mIOPtrs.mergedTracks[k].OK()) {
604 nTracks++;
605 }
606 }
607 }
608 if (nTracksTotal && nClustersTotal) {
609 *nTracksTotal += nTracks;
610 *nClustersTotal += t->mIOPtrs.nMergedTrackHits;
611 }
612}
613
614int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse, int32_t runs, int32_t iEvent, int64_t* nTracksTotal, int64_t* nClustersTotal, int32_t threadId = 0, HighResTimer* timerPipeline = nullptr)
615{
616 int32_t iRun = 0, iteration = 0;
617 while ((iteration = nIteration.fetch_add(1)) < runs) {
618 if (configStandalone.runs > 1) {
619 printf("Run %d (thread %d)\n", iteration + 1, threadId);
620 }
621 recUse->SetResetTimers(iRun < configStandalone.runsInit);
622 if (configStandalone.outputcontrolmem) {
623 recUse->SetOutputControl(threadId ? outputmemoryPipeline.get() : outputmemory.get(), configStandalone.outputcontrolmem);
624 }
625
626 if (configStandalone.testSyncAsync) {
627 printf("Running synchronous phase\n");
628 }
629 const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size()) : (iEvent - configStandalone.StartEvent)];
630 chainTrackingUse->mIOPtrs = ioPtrs;
631 if (iteration == (configStandalone.proc.doublePipeline ? 2 : (configStandalone.runs - 1))) {
632 if (configStandalone.proc.doublePipeline && timerPipeline) {
633 timerPipeline->Start();
634 }
635 if (configStandalone.controlProfiler) {
637 }
638 }
639 int32_t tmpRetVal = recUse->RunChains();
640 int32_t iterationEnd = nIterationEnd.fetch_add(1);
641 if (iterationEnd == configStandalone.runs - 1) {
642 if (configStandalone.proc.doublePipeline && timerPipeline) {
643 timerPipeline->Stop();
644 }
645 if (configStandalone.controlProfiler) {
647 }
648 }
649
650 if (tmpRetVal == 0 || tmpRetVal == 2) {
651 OutputStat(chainTrackingUse, iRun == 0 ? nTracksTotal : nullptr, iRun == 0 ? nClustersTotal : nullptr);
652 if (configStandalone.memoryStat) {
653 recUse->PrintMemoryStatistics();
654 } else if (configStandalone.proc.debugLevel >= 2) {
655 recUse->PrintMemoryOverview();
656 }
657 }
658
659 if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
660 if (configStandalone.testSyncAsync) {
661 printf("Running asynchronous phase\n");
662 }
663
666
667 chainTrackingAsync->mIOPtrs = ioPtrs;
677 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
682 }
684 recAsync->SetResetTimers(iRun < configStandalone.runsInit);
685 tmpRetVal = recAsync->RunChains();
686 if (tmpRetVal == 0 || tmpRetVal == 2) {
687 OutputStat(chainTrackingAsync, nullptr, nullptr);
688 if (configStandalone.memoryStat) {
690 }
691 }
693 }
694 if (!configStandalone.proc.doublePipeline) {
695 recUse->ClearAllocatedMemory();
696 }
697
698 if (tmpRetVal == 2) {
699 configStandalone.continueOnError = 0; // Forced exit from event display loop
700 configStandalone.noprompt = 1;
701 }
702 if (tmpRetVal == 3 && configStandalone.proc.ignoreNonFatalGPUErrors) {
703 printf("Non-FATAL GPU error occured, ignoring\n");
704 } else if (tmpRetVal && !configStandalone.continueOnError) {
705 if (tmpRetVal != 2) {
706 printf("Error occured\n");
707 }
708 return 1;
709 }
710 iRun++;
711 }
712 if (configStandalone.proc.doublePipeline) {
713 recUse->ClearAllocatedMemory();
714 }
715 nIteration.store(runs);
716 return 0;
717}
718
719int32_t main(int argc, char** argv)
720{
721 std::unique_ptr<GPUReconstruction> recUnique, recUniqueAsync, recUniquePipeline;
722
723 if (ReadConfiguration(argc, argv)) {
724 return 1;
725 }
726
727 GPUSettingsDeviceBackend deviceSet;
728 deviceSet.deviceType = configStandalone.runGPU ? GPUDataTypes::GetDeviceType(configStandalone.gpuType.c_str()) : GPUDataTypes::DeviceType::CPU;
729 deviceSet.forceDeviceType = configStandalone.runGPUforce;
730 deviceSet.master = nullptr;
731 recUnique.reset(GPUReconstruction::CreateInstance(deviceSet));
732 rec = recUnique.get();
733 deviceSet.master = rec;
734 if (configStandalone.testSyncAsync) {
735 recUniqueAsync.reset(GPUReconstruction::CreateInstance(deviceSet));
736 recAsync = recUniqueAsync.get();
737 }
738 if (configStandalone.proc.doublePipeline) {
739 recUniquePipeline.reset(GPUReconstruction::CreateInstance(deviceSet));
740 recPipeline = recUniquePipeline.get();
741 }
742 if (rec == nullptr || (configStandalone.testSyncAsync && recAsync == nullptr)) {
743 printf("Error initializing GPUReconstruction\n");
744 return 1;
745 }
746 rec->SetDebugLevelTmp(configStandalone.proc.debugLevel);
748 if (configStandalone.testSyncAsync) {
749 if (configStandalone.proc.debugLevel >= 3) {
751 }
754 }
755 if (configStandalone.proc.doublePipeline) {
756 if (configStandalone.proc.debugLevel >= 3) {
758 }
761 }
762 if (!configStandalone.proc.doublePipeline) {
764 if (configStandalone.testSyncAsync) {
766 }
767 }
768
769 if (SetupReconstruction()) {
770 return 1;
771 }
772
773 std::unique_ptr<std::thread> pipelineThread;
774 if (configStandalone.proc.doublePipeline) {
775 pipelineThread.reset(new std::thread([]() { rec->RunPipelineWorker(); }));
776 }
777
778 if (configStandalone.seed == -1) {
779 std::random_device rd;
780 configStandalone.seed = (int32_t)rd();
781 printf("Using random seed %d\n", configStandalone.seed);
782 }
783
784 srand(configStandalone.seed);
785
786 for (nEventsInDirectory = 0; true; nEventsInDirectory++) {
787 std::ifstream in;
788 char filename[256];
789 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, nEventsInDirectory);
790 in.open(filename, std::ifstream::binary);
791 if (in.fail()) {
792 break;
793 }
794 in.close();
795 }
796
797 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
799 }
800
801 if (configStandalone.eventGenerator) {
803 return 0;
804 }
805
806 int32_t nEvents = configStandalone.nEvents;
807 if (configStandalone.TF.bunchSim) {
808 nEvents = configStandalone.nEvents > 0 ? configStandalone.nEvents : 1;
809 } else {
810 if (nEvents == -1 || nEvents > nEventsInDirectory) {
811 if (nEvents >= 0) {
812 printf("Only %d events available in directors %s (%d events requested)\n", nEventsInDirectory, configStandalone.eventsDir, nEvents);
813 }
815 }
816 if (configStandalone.TF.nMerge > 1) {
817 nEvents /= configStandalone.TF.nMerge;
818 }
819 }
820
821 ioPtrEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
822 ioMemEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
823 if (configStandalone.preloadEvents) {
824 printf("Preloading events%s", configStandalone.proc.debugLevel >= 2 ? "\n" : "");
825 fflush(stdout);
826 for (int32_t i = 0; i < nEvents - configStandalone.StartEvent; i++) {
827 LoadEvent(configStandalone.StartEvent + i, i);
828 if (configStandalone.proc.debugLevel >= 2) {
829 printf("Loading event %d\n", i);
830 } else {
831 printf(" %d", i);
832 }
833 fflush(stdout);
834 }
835 printf("\n");
836 }
837
838 for (int32_t iRunOuter = 0; iRunOuter < configStandalone.runs2; iRunOuter++) {
839 if (configStandalone.QA.inputHistogramsOnly) {
841 break;
842 }
843 if (configStandalone.runs2 > 1) {
844 printf("RUN2: %d\n", iRunOuter);
845 }
846 int64_t nTracksTotal = 0;
847 int64_t nClustersTotal = 0;
848 int32_t nEventsProcessed = 0;
849
850 if (configStandalone.noEvents) {
851 nEvents = 1;
852 configStandalone.StartEvent = 0;
854 }
855
856 for (int32_t iEvent = configStandalone.StartEvent; iEvent < nEvents; iEvent++) {
857 if (iEvent != configStandalone.StartEvent) {
858 printf("\n");
859 }
860 if (configStandalone.noEvents == false && !configStandalone.preloadEvents) {
861 HighResTimer timerLoad;
862 timerLoad.Start();
863 if (LoadEvent(iEvent, 0)) {
864 goto breakrun;
865 }
866 if (configStandalone.dumpEvents) {
867 char fname[1024];
868 snprintf(fname, 1024, "event.%d.dump", nEventsProcessed);
869 chainTracking->DumpData(fname);
870 if (nEventsProcessed == 0) {
871 rec->DumpSettings();
872 }
873 }
874
877 if (grp.grpContinuousMaxTimeBin == 0) {
878 printf("Cannot override max time bin for non-continuous data!\n");
879 } else {
881 printf("Max time bin set to %d\n", grp.grpContinuousMaxTimeBin);
882 rec->UpdateSettings(&grp);
883 if (recAsync) {
885 }
886 if (recPipeline) {
888 }
889 }
890 }
891 printf("Loading time: %'d us\n", (int32_t)(1000000 * timerLoad.GetCurrentElapsedTime()));
892 }
893 printf("Processing Event %d\n", iEvent);
894
895 nIteration.store(0);
896 nIterationEnd.store(0);
897 double pipelineWalltime = 1.;
898 if (configStandalone.proc.doublePipeline) {
899 HighResTimer timerPipeline;
900 if (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal)) {
901 goto breakrun;
902 }
903 auto pipeline1 = std::async(std::launch::async, RunBenchmark, rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 0, &timerPipeline);
904 auto pipeline2 = std::async(std::launch::async, RunBenchmark, recPipeline, chainTrackingPipeline, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 1, &timerPipeline);
905 if (pipeline1.get() || pipeline2.get()) {
906 goto breakrun;
907 }
908 pipelineWalltime = timerPipeline.GetElapsedTime() / (configStandalone.runs - 2);
909 printf("Pipeline wall time: %f, %d iterations, %f per event\n", timerPipeline.GetElapsedTime(), configStandalone.runs - 2, pipelineWalltime);
910 } else {
911 if (RunBenchmark(rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal)) {
912 goto breakrun;
913 }
914 }
915 nEventsProcessed++;
916
917 if (configStandalone.timeFrameTime) {
918 double nClusters = chainTracking->GetTPCMerger().NMaxClusters();
919 if (nClusters > 0) {
920 const int32_t nOrbits = 32;
921 const double colRate = 50000;
922 const double orbitRate = 11245;
923 const double nClsPerTF = 755851. * nOrbits * colRate / orbitRate;
924 double timePerTF = (configStandalone.proc.doublePipeline ? pipelineWalltime : ((configStandalone.proc.debugLevel ? rec->GetStatKernelTime() : rec->GetStatWallTime()) / 1000000.)) * nClsPerTF / nClusters;
925 const double nGPUsReq = timePerTF * orbitRate / nOrbits;
926 char stat[1024];
927 snprintf(stat, 1024, "Sync phase: %.2f sec per %d orbit TF, %.1f GPUs required", timePerTF, nOrbits, nGPUsReq);
928 if (configStandalone.testSyncAsync) {
929 timePerTF = (configStandalone.proc.debugLevel ? recAsync->GetStatKernelTime() : recAsync->GetStatWallTime()) / 1000000. * nClsPerTF / nClusters;
930 snprintf(stat + strlen(stat), 1024 - strlen(stat), " - Async phase: %f sec per TF", timePerTF);
931 }
932 printf("%s (Measured %s time - Extrapolated from %d clusters to %d)\n", stat, configStandalone.proc.debugLevel ? "kernel" : "wall", (int32_t)nClusters, (int32_t)nClsPerTF);
933 }
934 }
935
936 if (configStandalone.preloadEvents && configStandalone.proc.doublePipeline) {
937 break;
938 }
939 }
940 if (nEventsProcessed > 1) {
941 printf("Total: %ld clusters, %ld tracks\n", nClustersTotal, nTracksTotal);
942 }
943 }
944
945breakrun:
946 if (rec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
948 }
949
950#ifndef _WIN32
951 if (configStandalone.proc.runQA && configStandalone.fpe) {
952 fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
953 }
954#endif
955
956 if (configStandalone.proc.doublePipeline) {
958 pipelineThread->join();
959 }
960
961 rec->Finalize();
962 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
964 printf("Error unregistering memory\n");
965 }
966 }
967 rec->Exit();
968
969 if (!configStandalone.noprompt) {
970 printf("Press a key to exit!\n");
971 getchar();
972 }
973 return (0);
974}
Helper class to access correction maps.
int32_t i
#define GPUCA_OPERATOR_NEW_ALIGNMENT
#define GPUCA_EVDUMP_FILE
Definition GPUDef.h:40
Definition of TPCFastTransform class.
int nClusters
void Start()
Definition timer.cxx:57
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:110
double GetElapsedTime()
Definition timer.cxx:108
void set(S v)
Definition bitfield.h:59
void clear()
Definition bitfield.h:58
S get() const
Definition bitfield.h:67
bitfield & setBits(const bitfield v, bool w)
Definition bitfield.h:49
bool isSet(const bitfield &v) const
Definition bitfield.h:70
GPUd() value_type estimateLTFast(o2 static GPUd() float estimateLTIncrement(const o2 PropagatorImpl * Instance(bool uninitialized=false)
Definition Propagator.h:143
void SetQAFromForeignChain(GPUChainTracking *chain)
const CorrectionMapsHelper * GetTPCTransformHelper() const
const GPUTPCGMMerger & GetTPCMerger() const
void SetO2Propagator(const o2::base::Propagator *prop)
const GPUTRDGeometry * GetTRDGeometry() const
void DumpData(const char *filename)
const o2::base::MatLayerCylSet * GetMatLUT() const
const GPUSettingsDisplay * mConfigDisplay
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
int32_t ReadData(const char *filename)
const GPUSettingsQA * mConfigQA
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
static DeviceType GetDeviceType(const char *type)
static GPUDisplayFrontendInterface * getFrontend(const char *type)
int32_t ReadO2MCData(const char *filename)
Definition GPUQA.h:54
void UpdateChain(GPUChainTracking *chain)
Definition GPUQA.h:58
static int32_t GetMaxTimeBin(const o2::tpc::ClusterNativeAccess &native)
static DeviceType GetDeviceType(const char *type)
void SetInputControl(void *ptr, size_t size)
void SetDebugLevelTmp(int32_t level)
const GPUParam & GetParam() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
virtual int32_t RunChains()=0
void ClearAllocatedMemory(bool clearOutputs=true)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
const GPUSettingsGRP & GetGRPSettings() const
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
static void RunEventGenerator(GPUChainTracking *rec)
Definition genEvents.h:34
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLdouble * v
Definition glcorearb.h:832
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
GLenum GLenum GLsizei len
Definition glcorearb.h:4232
GLenum GLfloat param
Definition glcorearb.h:271
GLint GLuint mask
Definition glcorearb.h:291
GPUSettingsStandalone configStandalone
Definition genEvents.cxx:47
@ qcrHelp
Definition qconfig.h:29
std::string filename()
void qConfigPrint()
Definition qconfig.cxx:515
int32_t qConfigParse(int argc, const char **argv, const char *filename)
Definition qconfig.cxx:513
std::unique_ptr< GPUReconstructionTimeframe > tf
int32_t RunBenchmark(GPUReconstruction *recUse, GPUChainTracking *chainTrackingUse, int32_t runs, int32_t iEvent, int64_t *nTracksTotal, int64_t *nClustersTotal, int32_t threadId=0, HighResTimer *timerPipeline=nullptr)
int32_t SetupReconstruction()
std::unique_ptr< char, void(*)(char *)> outputmemoryPipeline(nullptr, unique_ptr_aligned_delete)
std::atomic< uint32_t > nIteration
GPUReconstruction * recPipeline
std::unique_ptr< char, void(*)(char *)> outputmemory(nullptr, unique_ptr_aligned_delete)
int32_t nEventsInDirectory
int32_t ReadConfiguration(int argc, char **argv)
std::unique_ptr< char, void(*)(char *)> inputmemory(nullptr, unique_ptr_aligned_delete)
int32_t LoadEvent(int32_t iEvent, int32_t x)
std::vector< GPUTrackingInOutPointers > ioPtrEvents
GPUChainITS * chainITSPipeline
int32_t ReadEvent(int32_t n)
void unique_ptr_aligned_delete(char *v)
std::vector< GPUChainTracking::InOutMemory > ioMemEvents
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
GPUReconstruction * rec
GPUChainITS * chainITS
GPUChainTracking * chainTrackingAsync
GPUChainITS * chainITSAsync
GPUChainTracking * chainTrackingPipeline
GPUReconstruction * recAsync
void OutputStat(GPUChainTracking *t, int64_t *nTracksTotal=nullptr, int64_t *nClustersTotal=nullptr)
std::atomic< uint32_t > nIterationEnd
GPUChainTracking * chainTracking
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCMCInfo * mcInfosTPC
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
const AliHLTTPCClusterMCLabel * mcLabelsTPC
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
const GPUTPCMCInfoCol * mcInfosTPCCol
const GPUTPCGMMergedTrack * mergedTracks
GPUTPCGMPolynomialField polynomialField
Definition GPUParam.h:61
const int nEvents
Definition test_Fifo.cxx:27
#define main
std::random_device rd
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31