Project
Loading...
Searching...
No Matches
standalone.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "utils/qconfig.h"
16#include "GPUReconstruction.h"
19#include "GPUChainTracking.h"
20#include "GPUTPCDef.h"
21#include "GPUQA.h"
23#include "genEvents.h"
24
25#include <iostream>
26#include <fstream>
27#include <cstdio>
28#include <cstring>
29#include <chrono>
30#include <tuple>
31#include <algorithm>
32#include <thread>
33#include <future>
34#include <atomic>
35
36#ifndef _WIN32
37#include <unistd.h>
38#include <sched.h>
39#include <csignal>
40#include <sys/types.h>
41#include <sys/wait.h>
42#include <sys/select.h>
43#include <cfenv>
44#include <clocale>
45#include <sys/stat.h>
46#endif
47#include "utils/timer.h"
49#include "utils/vecpod.h"
50
51#include "TPCFastTransform.h"
53#include "GPUTPCGMMergedTrack.h"
54#include "GPUSettings.h"
55#include <vector>
56#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE
57#include <xmmintrin.h>
58#endif
59
60#include "GPUO2DataTypes.h"
61#include "GPUChainITS.h"
62
63using namespace o2::gpu;
64
65// #define BROKEN_EVENTS
66
67namespace o2::gpu
68{
69extern GPUSettingsStandalone configStandalone;
70}
71
76{
77 operator delete(v GPUCA_OPERATOR_NEW_ALIGNMENT);
78}
80std::unique_ptr<GPUDisplayFrontendInterface> eventDisplay;
81std::unique_ptr<GPUReconstructionTimeframe> tf;
83std::atomic<uint32_t> nIteration, nIterationEnd;
84
85std::vector<GPUTrackingInOutPointers> ioPtrEvents;
86std::vector<GPUChainTracking::InOutMemory> ioMemEvents;
87
89{
90#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE
91#ifdef FE_DFL_DISABLE_SSE_DENORMS_ENV // Flush and load denormals to zero in any case
92 fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
93#else
94#ifndef _MM_FLUSH_ZERO_ON
95#define _MM_FLUSH_ZERO_ON 0x8000
96#endif
97#ifndef _MM_DENORMALS_ZERO_ON
98#define _MM_DENORMALS_ZERO_ON 0x0040
99#endif
100 _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON));
101#endif
102#endif // ARM
103}
104
105int32_t ReadConfiguration(int argc, char** argv)
106{
107 int32_t qcRet = qConfigParse(argc, (const char**)argv);
108 if (qcRet) {
109 if (qcRet != qConfig::qcrHelp) {
110 printf("Error parsing command line parameters\n");
111 }
112 return 1;
113 }
114 if (configStandalone.printSettings > 1) {
115 printf("Config Dump before ReadConfiguration\n");
116 qConfigPrint();
117 }
118 if (configStandalone.proc.debugLevel == -1) {
119 configStandalone.proc.debugLevel = 0;
120 }
121#ifndef _WIN32
122 setlocale(LC_ALL, "en_US.utf-8");
123 setlocale(LC_NUMERIC, "en_US.utf-8");
124 if (configStandalone.cpuAffinity != -1) {
125 cpu_set_t mask;
126 CPU_ZERO(&mask);
127 CPU_SET(configStandalone.cpuAffinity, &mask);
128
129 printf("Setting affinitiy to restrict on CPU core %d\n", configStandalone.cpuAffinity);
130 if (0 != sched_setaffinity(0, sizeof(mask), &mask)) {
131 printf("Error setting CPU affinity\n");
132 return 1;
133 }
134 }
135 if (configStandalone.fifoScheduler) {
136 printf("Setting FIFO scheduler\n");
137 sched_param param;
138 sched_getparam(0, &param);
139 param.sched_priority = 1;
140 if (0 != sched_setscheduler(0, SCHED_FIFO, &param)) {
141 printf("Error setting scheduler\n");
142 return 1;
143 }
144 }
145 if (configStandalone.fpe) {
146 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
147 }
148 if (configStandalone.flushDenormals) {
149 disable_denormals();
150 }
151
152#else
153 if (configStandalone.cpuAffinity != -1) {
154 printf("Affinity setting not supported on Windows\n");
155 return 1;
156 }
157 if (configStandalone.fifoScheduler) {
158 printf("FIFO Scheduler setting not supported on Windows\n");
159 return 1;
160 }
161 if (configStandalone.fpe) {
162 printf("FPE not supported on Windows\n");
163 return 1;
164 }
165#endif
166#ifndef GPUCA_TPC_GEOMETRY_O2
167#error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed?
168 configStandalone.proc.inKernelParallel = false;
169 configStandalone.proc.createO2Output = 0;
170 if (configStandalone.rundEdx == -1) {
171 configStandalone.rundEdx = 0;
172 }
173#endif
174#ifndef GPUCA_BUILD_QA
175 if (configStandalone.proc.runQA || configStandalone.eventGenerator) {
176 printf("QA not enabled in build\n");
177 return 1;
178 }
179#endif
180 if (configStandalone.proc.doublePipeline && configStandalone.testSyncAsync) {
181 printf("Cannot run asynchronous processing with double pipeline\n");
182 return 1;
183 }
184 if (configStandalone.proc.doublePipeline && (configStandalone.runs < 4 || !configStandalone.outputcontrolmem)) {
185 printf("Double pipeline mode needs at least 3 runs per event and external output. To cycle though multiple events, use --preloadEvents and --runs n for n iterations round-robin\n");
186 return 1;
187 }
188 if (configStandalone.TF.bunchSim && configStandalone.TF.nMerge) {
189 printf("Cannot run --MERGE and --SIMBUNCHES togeterh\n");
190 return 1;
191 }
192 if (configStandalone.TF.bunchSim > 1) {
193 configStandalone.TF.timeFrameLen = 1.e9 * configStandalone.TF.bunchSim / configStandalone.TF.interactionRate;
194 }
195 if (configStandalone.TF.nMerge) {
196 double len = configStandalone.TF.nMerge - 1;
197 if (configStandalone.TF.randomizeDistance) {
198 len += 0.5;
199 }
200 if (configStandalone.TF.shiftFirstEvent) {
201 len += 0.5;
202 }
204 }
205 if (configStandalone.QA.inputHistogramsOnly && configStandalone.QA.compareInputs.size() == 0) {
206 printf("Can only produce QA pdf output when input files are specified!\n");
207 return 1;
208 }
209 if (configStandalone.QA.inputHistogramsOnly) {
210 configStandalone.rundEdx = false;
211 }
212 if (configStandalone.QA.dumpToROOT) {
213 configStandalone.proc.outputSharedClusterMap = true;
214 }
215 if (configStandalone.eventDisplay) {
216 configStandalone.noprompt = 1;
217 }
218 if (configStandalone.proc.debugLevel >= 4) {
219 if (configStandalone.proc.inKernelParallel) {
220 configStandalone.proc.inKernelParallel = 1;
221 } else {
222 configStandalone.proc.nHostThreads = 1;
223 }
224 }
225 if (configStandalone.setO2Settings) {
226 if (configStandalone.runGPU) {
227 configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024;
228 }
229 configStandalone.rec.tpc.nWaysOuter = 1;
230 configStandalone.rec.tpc.trackReferenceX = 83;
231 configStandalone.proc.outputSharedClusterMap = 1;
232 }
233
234 if (configStandalone.outputcontrolmem) {
235 bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr;
236 outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
237 if (forceEmptyMemory) {
238 printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads");
239 memset(outputmemory.get(), 0, configStandalone.outputcontrolmem);
240 }
241 if (configStandalone.proc.doublePipeline) {
242 outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
243 if (forceEmptyMemory) {
244 memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem);
245 }
246 }
247 }
248 if (configStandalone.inputcontrolmem) {
249 inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT));
250 }
251
252 configStandalone.proc.showOutputStat = true;
253
254 if (configStandalone.runGPU && configStandalone.gpuType == "AUTO") {
255 if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::CUDA, configStandalone.proc.debugLevel >= 2)) {
256 configStandalone.gpuType = "CUDA";
257 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::HIP, configStandalone.proc.debugLevel >= 2)) {
258 configStandalone.gpuType = "HIP";
259 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
260 configStandalone.gpuType = "OCL";
261 } else if (GPUReconstruction::CheckInstanceAvailable(GPUReconstruction::DeviceType::OCL, configStandalone.proc.debugLevel >= 2)) {
262 configStandalone.gpuType = "OCL";
263 } else {
264 if (configStandalone.runGPU > 1 && configStandalone.runGPUforce) {
265 printf("No GPU backend / device found, running on CPU is disabled due to runGPUforce\n");
266 return 1;
267 }
268 configStandalone.runGPU = false;
269 configStandalone.gpuType = "CPU";
270 }
271 }
272
273 if (configStandalone.printSettings) {
274 configStandalone.proc.printSettings = true;
275 }
276 if (configStandalone.printSettings > 1) {
277 printf("Config Dump after ReadConfiguration\n");
278 qConfigPrint();
279 }
280
281 return (0);
282}
283
285{
286 if (!configStandalone.eventGenerator) {
287 char filename[256];
288 snprintf(filename, 256, "events/%s/", configStandalone.eventsDir);
289 if (configStandalone.noEvents) {
290 configStandalone.eventsDir = "NON_EXISTING";
291 configStandalone.rundEdx = false;
292 } else if (rec->ReadSettings(filename)) {
293 printf("Error reading event config file\n");
294 return 1;
295 }
296 printf("Read event settings from dir %s (solenoidBz: %f, home-made events %d, constBz %d, maxTimeBin %d)\n", filename, rec->GetGRPSettings().solenoidBzNominalGPU, (int32_t)rec->GetGRPSettings().homemadeEvents, (int32_t)rec->GetGRPSettings().constBz, rec->GetGRPSettings().grpContinuousMaxTimeBin);
297 if (configStandalone.testSyncAsync) {
299 }
300 if (configStandalone.proc.doublePipeline) {
302 }
303 }
304
307
309 GPUSettingsRec recSet;
310 GPUSettingsProcessing procSet;
311 recSet = configStandalone.rec;
312 procSet = configStandalone.proc;
314
315 if (configStandalone.eventGenerator) {
316 grp.homemadeEvents = true;
317 }
318 if (configStandalone.solenoidBzNominalGPU != -1e6f) {
319 grp.solenoidBzNominalGPU = configStandalone.solenoidBzNominalGPU;
320 }
321 if (configStandalone.constBz) {
322 grp.constBz = true;
323 }
324 if (configStandalone.TF.nMerge || configStandalone.TF.bunchSim) {
325 if (grp.grpContinuousMaxTimeBin) {
326 printf("ERROR: requested to overlay continuous data - not supported\n");
327 return 1;
328 }
329 if (!configStandalone.cont) {
330 printf("Continuous mode forced\n");
331 configStandalone.cont = true;
332 }
335 }
336 }
337 if (configStandalone.cont && grp.grpContinuousMaxTimeBin == 0) {
339 }
340 if (rec->GetDeviceType() == GPUReconstruction::DeviceType::CPU) {
341 printf("Standalone Test Framework for CA Tracker - Using CPU\n");
342 } else {
343 printf("Standalone Test Framework for CA Tracker - Using GPU\n");
344 }
345
346 configStandalone.proc.forceMemoryPoolSize = (configStandalone.proc.forceMemoryPoolSize == 1 && configStandalone.eventDisplay) ? 2 : configStandalone.proc.forceMemoryPoolSize;
347 if (configStandalone.eventDisplay) {
348 eventDisplay.reset(GPUDisplayFrontendInterface::getFrontend(configStandalone.display.displayFrontend.c_str()));
349 if (eventDisplay.get() == nullptr) {
350 throw std::runtime_error("Requested display not available");
351 }
352 printf("Enabling event display (%s backend)\n", eventDisplay->frontendName());
353 procSet.eventDisplay = eventDisplay.get();
354 if (!configStandalone.QA.noMC) {
355 procSet.runMC = true;
356 }
357 }
358
359 if (procSet.runQA && !configStandalone.QA.noMC) {
360 procSet.runMC = true;
361 }
362
363 steps.steps = GPUDataTypes::RecoStep::AllRecoSteps;
364 if (configStandalone.runTRD != -1) {
365 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, configStandalone.runTRD > 0);
366 } else if (chainTracking->GetTRDGeometry() == nullptr) {
367 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
368 }
369 if (configStandalone.rundEdx != -1) {
370 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, configStandalone.rundEdx > 0);
371 }
372 if (configStandalone.runCompression != -1) {
373 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, configStandalone.runCompression > 0);
374 }
375 if (configStandalone.runTransformation != -1) {
376 steps.steps.setBits(GPUDataTypes::RecoStep::TPCConversion, configStandalone.runTransformation > 0);
377 }
378 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, configStandalone.runRefit);
379 if (!configStandalone.runMerger) {
380 steps.steps.setBits(GPUDataTypes::RecoStep::TPCMerging, false);
381 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
382 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, false);
383 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
384 steps.steps.setBits(GPUDataTypes::RecoStep::Refit, false);
385 }
386
387 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
388 steps.steps.setBits(GPUDataTypes::RecoStep::TRDTracking, false);
389 }
390 steps.inputs.set(GPUDataTypes::InOutType::TPCClusters, GPUDataTypes::InOutType::TRDTracklets);
391 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, false);
392 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
393 if (grp.doCompClusterDecode) {
394 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
395 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
396 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
397 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
398 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
399 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
400 } else if (grp.needsClusterer) {
401 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, true);
402 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
403 } else {
404 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
405 }
406
407 if (configStandalone.recoSteps >= 0) {
408 steps.steps &= configStandalone.recoSteps;
409 }
410 if (configStandalone.recoStepsGPU >= 0) {
411 steps.stepsGPUMask &= configStandalone.recoStepsGPU;
412 }
413
414 steps.outputs.clear();
415 steps.outputs.setBits(GPUDataTypes::InOutType::TPCSectorTracks, false);
416 steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging));
417 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression));
418 steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking));
419 steps.outputs.setBits(GPUDataTypes::InOutType::TPCClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCClusterFinding));
420
421 if (steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)) {
422 if (recSet.tpc.nWays > 1) {
423 recSet.tpc.nWaysOuter = 1;
424 }
425 if (procSet.createO2Output && !procSet.trdTrackModelO2) {
426 procSet.createO2Output = 1; // Must not be 2, to make sure TPC GPU tracks are still available for TRD
427 }
428 }
429
430 if (configStandalone.testSyncAsync || configStandalone.testSync) {
431 // Set settings for synchronous
432 if (configStandalone.rundEdx == -1) {
433 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, 0);
434 }
435 recSet.useMatLUT = false;
436 if (configStandalone.testSyncAsync) {
437 procSet.eventDisplay = nullptr;
438 }
439 }
440 if (configStandalone.proc.rtc.optSpecialCode == -1) {
441 configStandalone.proc.rtc.optSpecialCode = configStandalone.testSyncAsync || configStandalone.testSync;
442 }
443
444 rec->SetSettings(&grp, &recSet, &procSet, &steps);
445 if (configStandalone.proc.doublePipeline) {
446 recPipeline->SetSettings(&grp, &recSet, &procSet, &steps);
447 }
448 if (configStandalone.testSyncAsync) {
449 // Set settings for asynchronous
450 steps.steps.setBits(GPUDataTypes::RecoStep::TPCDecompression, true);
451 steps.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, true);
452 steps.steps.setBits(GPUDataTypes::RecoStep::TPCCompression, false);
453 steps.steps.setBits(GPUDataTypes::RecoStep::TPCClusterFinding, false);
454 steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, false);
455 steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
456 steps.inputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, true);
457 steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, false);
458 procSet.runMC = false;
459 procSet.runQA = false;
460 procSet.eventDisplay = eventDisplay.get();
461 procSet.runCompressionStatistics = 0;
462 procSet.rtc.optSpecialCode = 0;
463 if (recSet.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
464 procSet.tpcInputWithClusterRejection = 1;
465 }
466 recSet.tpc.disableRefitAttachment = 0xFF;
467 recSet.tpc.looperInterpolationInExtraPass = 0;
468 recSet.maxTrackQPtB5 = CAMath::Min(recSet.maxTrackQPtB5, recSet.tpc.rejectQPtB5);
469 recSet.useMatLUT = true;
470 recAsync->SetSettings(&grp, &recSet, &procSet, &steps);
471 }
472
473 if (configStandalone.outputcontrolmem) {
474 rec->SetOutputControl(outputmemory.get(), configStandalone.outputcontrolmem);
475 if (configStandalone.proc.doublePipeline) {
477 }
478 }
479
480 o2::base::Propagator* prop = nullptr;
482 prop->setGPUField(&rec->GetParam().polynomialField);
483 prop->setNominalBz(rec->GetParam().bzkG);
484 prop->setMatLUT(chainTracking->GetMatLUT());
486 if (chainTrackingAsync) {
488 }
491 }
492 procSet.o2PropagatorUseGPUField = true;
493
494 if (rec->Init()) {
495 printf("Error initializing GPUReconstruction!\n");
496 return 1;
497 }
498 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
499 if (rec->registerMemoryForGPU(outputmemory.get(), configStandalone.outputcontrolmem) || (configStandalone.proc.doublePipeline && recPipeline->registerMemoryForGPU(outputmemoryPipeline.get(), configStandalone.outputcontrolmem))) {
500 printf("ERROR registering memory for the GPU!!!\n");
501 return 1;
502 }
503 }
504 if (configStandalone.inputcontrolmem && rec->IsGPU()) {
505 if (rec->registerMemoryForGPU(inputmemory.get(), configStandalone.inputcontrolmem)) {
506 printf("ERROR registering input memory for the GPU!!!\n");
507 return 1;
508 }
509 }
510 if (configStandalone.proc.debugLevel >= 4) {
512 }
513 return (0);
514}
515
516int32_t ReadEvent(int32_t n)
517{
518 char filename[256];
519 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, n);
520 if (configStandalone.inputcontrolmem && !configStandalone.preloadEvents) {
521 rec->SetInputControl(inputmemory.get(), configStandalone.inputcontrolmem);
522 }
523 int32_t r = chainTracking->ReadData(filename);
524 if (r) {
525 return r;
526 }
527#if defined(GPUCA_TPC_GEOMETRY_O2) && defined(GPUCA_BUILD_QA) && !defined(GPUCA_O2_LIB)
528 if ((configStandalone.proc.runQA || configStandalone.eventDisplay) && !configStandalone.QA.noMC) {
530 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, n);
533 snprintf(filename, 256, "events/%s/mc.%d.dump", configStandalone.eventsDir, 0);
535 throw std::runtime_error("Error reading O2 MC dump");
536 }
537 }
538 }
539#endif
540 if (chainTracking->mIOPtrs.clustersNative && (configStandalone.TF.bunchSim || configStandalone.TF.nMerge || !configStandalone.runTransformation)) {
541 if (configStandalone.proc.debugLevel >= 2) {
542 printf("Converting Native to Legacy ClusterData for overlaying - WARNING: No raw clusters produced - Compression etc will not run!!!\n");
543 }
545 }
546 return 0;
547}
548
549int32_t LoadEvent(int32_t iEvent, int32_t x)
550{
551 if (configStandalone.TF.bunchSim) {
552 if (tf->LoadCreateTimeFrame(iEvent)) {
553 return 1;
554 }
555 } else if (configStandalone.TF.nMerge) {
556 if (tf->LoadMergedEvents(iEvent)) {
557 return 1;
558 }
559 } else {
560 if (ReadEvent(iEvent)) {
561 return 1;
562 }
563 }
564 bool encodeZS = configStandalone.encodeZS == -1 ? (chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.encodeZS;
565 bool zsFilter = configStandalone.zsFilter == -1 ? (!encodeZS && chainTracking->mIOPtrs.tpcPackedDigits && !chainTracking->mIOPtrs.tpcZS) : (bool)configStandalone.zsFilter;
566 if (encodeZS || zsFilter) {
568 printf("Need digit input to run ZS\n");
569 return 1;
570 }
571 if (zsFilter) {
573 }
574 if (encodeZS) {
576 }
577 }
578 if (!configStandalone.runTransformation) {
580 } else {
581 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
583 if (configStandalone.proc.debugLevel >= 2) {
584 printf("Converting Legacy Raw Cluster to Native\n");
585 }
587 break;
588 }
589 }
590 }
591
592 if (configStandalone.stripDumpedEvents) {
595 }
596 }
597
599 printf("Need cluster native data for on-the-fly TPC transform\n");
600 return 1;
601 }
602
604 ioMemEvents[x] = std::move(chainTracking->mIOMem);
606 return 0;
607}
608
609void OutputStat(GPUChainTracking* t, int64_t* nTracksTotal = nullptr, int64_t* nClustersTotal = nullptr)
610{
611 int32_t nTracks = 0;
612 if (t->GetProcessingSettings().createO2Output) {
613 nTracks += t->mIOPtrs.nOutputTracksTPCO2;
614 } else {
615 for (uint32_t k = 0; k < t->mIOPtrs.nMergedTracks; k++) {
616 if (t->mIOPtrs.mergedTracks[k].OK()) {
617 nTracks++;
618 }
619 }
620 }
621 if (nTracksTotal && nClustersTotal) {
622 *nTracksTotal += nTracks;
623 *nClustersTotal += t->mIOPtrs.nMergedTrackHits;
624 }
625}
626
627int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingUse, int32_t runs, int32_t iEvent, int64_t* nTracksTotal, int64_t* nClustersTotal, int32_t threadId = 0, HighResTimer* timerPipeline = nullptr)
628{
629 int32_t iRun = 0, iteration = 0;
630 while ((iteration = nIteration.fetch_add(1)) < runs) {
631 if (configStandalone.runs > 1) {
632 printf("Run %d (thread %d)\n", iteration + 1, threadId);
633 }
634 recUse->SetResetTimers(iRun < configStandalone.runsInit);
635 if (configStandalone.outputcontrolmem) {
636 recUse->SetOutputControl(threadId ? outputmemoryPipeline.get() : outputmemory.get(), configStandalone.outputcontrolmem);
637 }
638
639 if (configStandalone.testSyncAsync) {
640 printf("Running synchronous phase\n");
641 }
642 const GPUTrackingInOutPointers& ioPtrs = ioPtrEvents[!configStandalone.preloadEvents ? 0 : configStandalone.proc.doublePipeline ? (iteration % ioPtrEvents.size()) : (iEvent - configStandalone.StartEvent)];
643 chainTrackingUse->mIOPtrs = ioPtrs;
644 if (iteration == (configStandalone.proc.doublePipeline ? 2 : (configStandalone.runs - 1))) {
645 if (configStandalone.proc.doublePipeline && timerPipeline) {
646 timerPipeline->Start();
647 }
648 if (configStandalone.controlProfiler) {
650 }
651 }
652 int32_t tmpRetVal = recUse->RunChains();
653 int32_t iterationEnd = nIterationEnd.fetch_add(1);
654 if (iterationEnd == configStandalone.runs - 1) {
655 if (configStandalone.proc.doublePipeline && timerPipeline) {
656 timerPipeline->Stop();
657 }
658 if (configStandalone.controlProfiler) {
660 }
661 }
662
663 if (tmpRetVal == 0 || tmpRetVal == 2) {
664 OutputStat(chainTrackingUse, iRun == 0 ? nTracksTotal : nullptr, iRun == 0 ? nClustersTotal : nullptr);
665 if (configStandalone.memoryStat) {
666 recUse->PrintMemoryStatistics();
667 } else if (configStandalone.proc.debugLevel >= 2) {
668 recUse->PrintMemoryOverview();
669 }
670 }
671
672 if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
673 if (configStandalone.testSyncAsync) {
674 printf("Running asynchronous phase\n");
675 }
676
679
680 chainTrackingAsync->mIOPtrs = ioPtrs;
690 for (int32_t i = 0; i < chainTracking->NSECTORS; i++) {
695 }
697 recAsync->SetResetTimers(iRun < configStandalone.runsInit);
698 tmpRetVal = recAsync->RunChains();
699 if (tmpRetVal == 0 || tmpRetVal == 2) {
700 OutputStat(chainTrackingAsync, nullptr, nullptr);
701 if (configStandalone.memoryStat) {
703 }
704 }
706 }
707 if (!configStandalone.proc.doublePipeline) {
708 recUse->ClearAllocatedMemory();
709 }
710
711 if (tmpRetVal == 2) {
712 configStandalone.continueOnError = 0; // Forced exit from event display loop
713 configStandalone.noprompt = 1;
714 }
715 if (tmpRetVal == 3 && configStandalone.proc.ignoreNonFatalGPUErrors) {
716 printf("Non-FATAL GPU error occured, ignoring\n");
717 } else if (tmpRetVal && !configStandalone.continueOnError) {
718 if (tmpRetVal != 2) {
719 printf("Error occured\n");
720 }
721 return 1;
722 }
723 iRun++;
724 }
725 if (configStandalone.proc.doublePipeline) {
726 recUse->ClearAllocatedMemory();
727 }
728 nIteration.store(runs);
729 return 0;
730}
731
732int32_t main(int argc, char** argv)
733{
734 std::unique_ptr<GPUReconstruction> recUnique, recUniqueAsync, recUniquePipeline;
735
737
738 if (ReadConfiguration(argc, argv)) {
739 return 1;
740 }
741
742 GPUSettingsDeviceBackend deviceSet;
743 deviceSet.deviceType = configStandalone.runGPU ? GPUDataTypes::GetDeviceType(configStandalone.gpuType.c_str()) : GPUDataTypes::DeviceType::CPU;
744 deviceSet.forceDeviceType = configStandalone.runGPUforce;
745 deviceSet.master = nullptr;
746 recUnique.reset(GPUReconstruction::CreateInstance(deviceSet));
747 rec = recUnique.get();
748 deviceSet.master = rec;
749 if (configStandalone.testSyncAsync) {
750 recUniqueAsync.reset(GPUReconstruction::CreateInstance(deviceSet));
751 recAsync = recUniqueAsync.get();
752 }
753 if (configStandalone.proc.doublePipeline) {
754 recUniquePipeline.reset(GPUReconstruction::CreateInstance(deviceSet));
755 recPipeline = recUniquePipeline.get();
756 }
757 if (rec == nullptr || (configStandalone.testSyncAsync && recAsync == nullptr)) {
758 printf("Error initializing GPUReconstruction\n");
759 return 1;
760 }
761 rec->SetDebugLevelTmp(configStandalone.proc.debugLevel);
763 if (configStandalone.testSyncAsync) {
764 if (configStandalone.proc.debugLevel >= 3) {
766 }
769 }
770 if (configStandalone.proc.doublePipeline) {
771 if (configStandalone.proc.debugLevel >= 3) {
773 }
776 }
777 if (!configStandalone.proc.doublePipeline) {
779 if (configStandalone.testSyncAsync) {
781 }
782 }
783
784 if (SetupReconstruction()) {
785 return 1;
786 }
787
788 std::unique_ptr<std::thread> pipelineThread;
789 if (configStandalone.proc.doublePipeline) {
790 pipelineThread.reset(new std::thread([]() { rec->RunPipelineWorker(); }));
791 }
792
793 if (configStandalone.seed == -1) {
794 std::random_device rd;
795 configStandalone.seed = (int32_t)rd();
796 printf("Using random seed %d\n", configStandalone.seed);
797 }
798
799 srand(configStandalone.seed);
800
801 for (nEventsInDirectory = 0; true; nEventsInDirectory++) {
802 std::ifstream in;
803 char filename[256];
804 snprintf(filename, 256, "events/%s/" GPUCA_EVDUMP_FILE ".%d.dump", configStandalone.eventsDir, nEventsInDirectory);
805 in.open(filename, std::ifstream::binary);
806 if (in.fail()) {
807 break;
808 }
809 in.close();
810 }
811
812 if (configStandalone.TF.bunchSim || configStandalone.TF.nMerge) {
814 }
815
816 if (configStandalone.eventGenerator) {
818 return 0;
819 }
820
821 int32_t nEvents = configStandalone.nEvents;
822 if (configStandalone.TF.bunchSim) {
823 nEvents = configStandalone.nEvents > 0 ? configStandalone.nEvents : 1;
824 } else {
825 if (nEvents == -1 || nEvents > nEventsInDirectory) {
826 if (nEvents >= 0) {
827 printf("Only %d events available in directors %s (%d events requested)\n", nEventsInDirectory, configStandalone.eventsDir, nEvents);
828 }
830 }
831 if (configStandalone.TF.nMerge > 1) {
832 nEvents /= configStandalone.TF.nMerge;
833 }
834 }
835
836 ioPtrEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
837 ioMemEvents.resize(configStandalone.preloadEvents ? (nEvents - configStandalone.StartEvent) : 1);
838 if (configStandalone.preloadEvents) {
839 printf("Preloading events%s", configStandalone.proc.debugLevel >= 2 ? "\n" : "");
840 fflush(stdout);
841 for (int32_t i = 0; i < nEvents - configStandalone.StartEvent; i++) {
842 LoadEvent(configStandalone.StartEvent + i, i);
843 if (configStandalone.proc.debugLevel >= 2) {
844 printf("Loading event %d\n", i);
845 } else {
846 printf(" %d", i);
847 }
848 fflush(stdout);
849 }
850 printf("\n");
851 }
852
853 for (int32_t iRunOuter = 0; iRunOuter < configStandalone.runs2; iRunOuter++) {
854 if (configStandalone.QA.inputHistogramsOnly) {
856 break;
857 }
858 if (configStandalone.runs2 > 1) {
859 printf("RUN2: %d\n", iRunOuter);
860 }
861 int64_t nTracksTotal = 0;
862 int64_t nClustersTotal = 0;
863 int32_t nEventsProcessed = 0;
864
865 if (configStandalone.noEvents) {
866 nEvents = 1;
867 configStandalone.StartEvent = 0;
869 }
870
871 for (int32_t iEvent = configStandalone.StartEvent; iEvent < nEvents; iEvent++) {
872 if (iEvent != configStandalone.StartEvent) {
873 printf("\n");
874 }
875 if (configStandalone.noEvents == false && !configStandalone.preloadEvents) {
876 HighResTimer timerLoad;
877 timerLoad.Start();
878 if (LoadEvent(iEvent, 0)) {
879 goto breakrun;
880 }
881 if (configStandalone.dumpEvents) {
882 char fname[1024];
883 snprintf(fname, 1024, "event.%d.dump", nEventsProcessed);
884 chainTracking->DumpData(fname);
885 if (nEventsProcessed == 0) {
886 rec->DumpSettings();
887 }
888 }
889
892 if (grp.grpContinuousMaxTimeBin == 0) {
893 printf("Cannot override max time bin for non-continuous data!\n");
894 } else {
896 printf("Max time bin set to %d\n", grp.grpContinuousMaxTimeBin);
897 rec->UpdateSettings(&grp);
898 if (recAsync) {
900 }
901 if (recPipeline) {
903 }
904 }
905 }
906 printf("Loading time: %'d us\n", (int32_t)(1000000 * timerLoad.GetCurrentElapsedTime()));
907 }
908 printf("Processing Event %d\n", iEvent);
909
910 nIteration.store(0);
911 nIterationEnd.store(0);
912 double pipelineWalltime = 1.;
913 if (configStandalone.proc.doublePipeline) {
914 HighResTimer timerPipeline;
915 if (RunBenchmark(rec, chainTracking, 1, iEvent, &nTracksTotal, &nClustersTotal) || RunBenchmark(recPipeline, chainTrackingPipeline, 2, iEvent, &nTracksTotal, &nClustersTotal)) {
916 goto breakrun;
917 }
918 auto pipeline1 = std::async(std::launch::async, RunBenchmark, rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 0, &timerPipeline);
919 auto pipeline2 = std::async(std::launch::async, RunBenchmark, recPipeline, chainTrackingPipeline, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal, 1, &timerPipeline);
920 if (pipeline1.get() || pipeline2.get()) {
921 goto breakrun;
922 }
923 pipelineWalltime = timerPipeline.GetElapsedTime() / (configStandalone.runs - 2);
924 printf("Pipeline wall time: %f, %d iterations, %f per event\n", timerPipeline.GetElapsedTime(), configStandalone.runs - 2, pipelineWalltime);
925 } else {
926 if (RunBenchmark(rec, chainTracking, configStandalone.runs, iEvent, &nTracksTotal, &nClustersTotal)) {
927 goto breakrun;
928 }
929 }
930 nEventsProcessed++;
931
932 if (configStandalone.timeFrameTime) {
933 double nClusters = chainTracking->GetTPCMerger().NMaxClusters();
934 if (nClusters > 0) {
935 const int32_t nOrbits = 32;
936 const double colRate = 50000;
937 const double orbitRate = 11245;
938 const double nClsPerTF = 755851. * nOrbits * colRate / orbitRate;
939 double timePerTF = (configStandalone.proc.doublePipeline ? pipelineWalltime : ((configStandalone.proc.debugLevel ? rec->GetStatKernelTime() : rec->GetStatWallTime()) / 1000000.)) * nClsPerTF / nClusters;
940 const double nGPUsReq = timePerTF * orbitRate / nOrbits;
941 char stat[1024];
942 snprintf(stat, 1024, "Sync phase: %.2f sec per %d orbit TF, %.1f GPUs required", timePerTF, nOrbits, nGPUsReq);
943 if (configStandalone.testSyncAsync) {
944 timePerTF = (configStandalone.proc.debugLevel ? recAsync->GetStatKernelTime() : recAsync->GetStatWallTime()) / 1000000. * nClsPerTF / nClusters;
945 snprintf(stat + strlen(stat), 1024 - strlen(stat), " - Async phase: %f sec per TF", timePerTF);
946 }
947 printf("%s (Measured %s time - Extrapolated from %d clusters to %d)\n", stat, configStandalone.proc.debugLevel ? "kernel" : "wall", (int32_t)nClusters, (int32_t)nClsPerTF);
948 }
949 }
950
951 if (configStandalone.preloadEvents && configStandalone.proc.doublePipeline) {
952 break;
953 }
954 }
955 if (nEventsProcessed > 1) {
956 printf("Total: %ld clusters, %ld tracks\n", nClustersTotal, nTracksTotal);
957 }
958 }
959
960breakrun:
961 if (rec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
963 }
964
965#ifndef _WIN32
966 if (configStandalone.proc.runQA && configStandalone.fpe) {
967 fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
968 }
969#endif
970
971 if (configStandalone.proc.doublePipeline) {
973 pipelineThread->join();
974 }
975
976 rec->Finalize();
977 if (configStandalone.outputcontrolmem && rec->IsGPU()) {
979 printf("Error unregistering memory\n");
980 }
981 }
982 rec->Exit();
983
984 if (!configStandalone.noprompt) {
985 printf("Press a key to exit!\n");
986 getchar();
987 }
988 return (0);
989}
Helper class to access correction maps.
int32_t i
#define GPUCA_OPERATOR_NEW_ALIGNMENT
#define GPUCA_EVDUMP_FILE
Definition GPUDef.h:40
Definition of TPCFastTransform class.
int nClusters
void Start()
Definition timer.cxx:57
double GetCurrentElapsedTime(bool reset=false)
Definition timer.cxx:110
double GetElapsedTime()
Definition timer.cxx:108
void set(S v)
Definition bitfield.h:59
void clear()
Definition bitfield.h:58
S get() const
Definition bitfield.h:67
bitfield & setBits(const bitfield v, bool w)
Definition bitfield.h:49
bool isSet(const bitfield &v) const
Definition bitfield.h:70
GPUd() value_type estimateLTFast(o2 static GPUd() float estimateLTIncrement(const o2 PropagatorImpl * Instance(bool uninitialized=false)
Definition Propagator.h:143
void SetQAFromForeignChain(GPUChainTracking *chain)
const CorrectionMapsHelper * GetTPCTransformHelper() const
const GPUTPCGMMerger & GetTPCMerger() const
void SetO2Propagator(const o2::base::Propagator *prop)
const GPUTRDGeometry * GetTRDGeometry() const
void DumpData(const char *filename)
const o2::base::MatLayerCylSet * GetMatLUT() const
const GPUSettingsDisplay * mConfigDisplay
const GPUQA * GetQA() const
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
int32_t ReadData(const char *filename)
const GPUSettingsQA * mConfigQA
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
static DeviceType GetDeviceType(const char *type)
static GPUDisplayFrontendInterface * getFrontend(const char *type)
int32_t ReadO2MCData(const char *filename)
Definition GPUQA.h:54
void UpdateChain(GPUChainTracking *chain)
Definition GPUQA.h:58
static int32_t GetMaxTimeBin(const o2::tpc::ClusterNativeAccess &native)
static DeviceType GetDeviceType(const char *type)
void SetInputControl(void *ptr, size_t size)
void SetDebugLevelTmp(int32_t level)
const GPUParam & GetParam() const
static bool CheckInstanceAvailable(DeviceType type, bool verbose)
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
static GPUReconstruction * CreateInstance(const GPUSettingsDeviceBackend &cfg)
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
virtual int32_t RunChains()=0
void ClearAllocatedMemory(bool clearOutputs=true)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
const GPUSettingsGRP & GetGRPSettings() const
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
static void RunEventGenerator(GPUChainTracking *rec)
Definition genEvents.h:34
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLdouble * v
Definition glcorearb.h:832
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean r
Definition glcorearb.h:1233
GLenum GLenum GLsizei len
Definition glcorearb.h:4232
GLenum GLfloat param
Definition glcorearb.h:271
GLint GLuint mask
Definition glcorearb.h:291
GPUSettingsStandalone configStandalone
Definition genEvents.cxx:47
@ qcrHelp
Definition qconfig.h:29
std::string filename()
void qConfigPrint()
Definition qconfig.cxx:517
int32_t qConfigParse(int argc, const char **argv, const char *filename)
Definition qconfig.cxx:515
std::unique_ptr< GPUReconstructionTimeframe > tf
int32_t RunBenchmark(GPUReconstruction *recUse, GPUChainTracking *chainTrackingUse, int32_t runs, int32_t iEvent, int64_t *nTracksTotal, int64_t *nClustersTotal, int32_t threadId=0, HighResTimer *timerPipeline=nullptr)
int32_t SetupReconstruction()
std::unique_ptr< char, void(*)(char *)> outputmemoryPipeline(nullptr, unique_ptr_aligned_delete)
std::atomic< uint32_t > nIteration
GPUReconstruction * recPipeline
std::unique_ptr< char, void(*)(char *)> outputmemory(nullptr, unique_ptr_aligned_delete)
int32_t nEventsInDirectory
int32_t ReadConfiguration(int argc, char **argv)
std::unique_ptr< char, void(*)(char *)> inputmemory(nullptr, unique_ptr_aligned_delete)
int32_t LoadEvent(int32_t iEvent, int32_t x)
std::vector< GPUTrackingInOutPointers > ioPtrEvents
GPUChainITS * chainITSPipeline
int32_t ReadEvent(int32_t n)
void unique_ptr_aligned_delete(char *v)
std::vector< GPUChainTracking::InOutMemory > ioMemEvents
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
GPUReconstruction * rec
GPUChainITS * chainITS
GPUChainTracking * chainTrackingAsync
GPUChainITS * chainITSAsync
GPUChainTracking * chainTrackingPipeline
GPUReconstruction * recAsync
void OutputStat(GPUChainTracking *t, int64_t *nTracksTotal=nullptr, int64_t *nClustersTotal=nullptr)
void SetCPUAndOSSettings()
std::atomic< uint32_t > nIterationEnd
GPUChainTracking * chainTracking
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUTPCMCInfo * mcInfosTPC
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
const AliHLTTPCClusterMCLabel * mcLabelsTPC
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
const GPUTPCMCInfoCol * mcInfosTPCCol
const GPUTPCGMMergedTrack * mergedTracks
GPUTPCGMPolynomialField polynomialField
Definition GPUParam.h:63
const int nEvents
Definition test_Fifo.cxx:27
#define main
std::random_device rd
typename std::vector< T, vecpod_allocator< T > > vecpod
Definition vecpod.h:31