23#include <condition_variable>
54struct GPUReconstructionPipelineQueue {
58 std::condition_variable c;
65 std::queue<GPUReconstructionPipelineQueue*>
queue;
67 std::condition_variable
cond;
78static ptrdiff_t ptrDiff(
void*
a,
void*
b) {
return (
char*)
a - (
char*)
b; }
84 throw std::invalid_argument(
"device type of master and slave GPUReconstruction does not match");
87 throw std::invalid_argument(
"Cannot be slave to a slave");
103 mROOTDump = GPUROOTDumpCore::getAndCreate();
110 GPUError(
"GPU Reconstruction not properly deinitialized!");
119 if (vertexerTraits) {
129 return std::max<int32_t>(0, tbb::this_task_arena::current_thread_index());
135 throw std::runtime_error(
"Must not call init on slave!");
141 for (uint32_t
i = 0;
i <
mSlaves.size();
i++) {
144 GPUError(
"Error initialization slave (before deviceinit)");
163 for (uint32_t
i = 0;
i <
mSlaves.size();
i++) {
171 GPUError(
"Error initialization slave (deviceinit)");
175 GPUError(
"Error initialization slave (permanent memory)");
186 for (uint32_t
i = 0;
i <
mSlaves.size();
i++) {
191 GPUError(
"Error initialization slave (after device init)");
202static uint32_t getDefaultNThreads()
204 const char* tbbEnv = getenv(
"TBB_NUM_THREADS");
205 uint32_t tbbNum = tbbEnv ? atoi(tbbEnv) : 0;
209 const char* ompEnv = getenv(
"OMP_NUM_THREADS");
210 uint32_t ompNum = ompEnv ? atoi(ompEnv) : 0;
214 return tbb::info::default_concurrency();
222 printf(
"\nConfig Dump %s\n",
mMaster ?
"Slave" :
"Master");
225 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
269#ifndef GPUCA_DETERMINISTIC_MODE
270 GPUError(
"WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!");
274 if (
param().
rec.tpc.looperInterpolationInExtraPass == -1) {
275 param().
rec.tpc.looperInterpolationInExtraPass = 0;
282#ifdef GPUCA_DETERMINISTIC_MODE
283 GPUError(
"WARNING, compiled with GPUCA_DETERMINISTIC_MODE but deterministicGPUReconstruction not set, only compile-time determinism and deterministic math enforced, not fully deterministic!");
315 GPUFatal(
"Must not use both nHostThreads and ompThreads at the same time!");
318 GPUWarning(
"You are using the deprecated ompThreads option, please switch to nHostThreads!");
328 mThreading = std::make_shared<GPUReconstructionThreading>();
329 mThreading->control = std::make_unique<tbb::global_control>(tbb::global_control::max_allowed_parallelism,
mMaxHostThreads);
355 GPUError(
"Must use double pipeline mode only with exactly one chain that must support it");
363 GPUError(
"Not allowed to create optimized RTC code with more than one GPUReconstruction instances");
368 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
372 mChains[
i]->RegisterPermanentMemoryAndProcessors();
373 size_t memPrimary, memPageLocked;
374 mChains[
i]->MemorySize(memPrimary, memPageLocked);
376 memPageLocked = memPrimary;
403 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
404 mChains[
i]->RegisterGPUProcessors();
416 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
441 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
452 for (uint32_t
i = 0;
i <
mSlaves.size();
i++) {
454 GPUError(
"Error exiting slave");
484 auto& re = it->second;
485 if (proc ==
nullptr || re.proc == proc) {
487 resMain.mOverrideSize = 0;
488 for (uint32_t
i = 0;
i < re.res.size();
i++) {
490 resMain.mOverrideSize = std::max<size_t>(resMain.mOverrideSize, ptrDiff(
res.SetPointers((
void*)1), (
char*)1));
510 throw std::bad_alloc();
519 it->second.res.emplace_back(
retVal);
528 GPUInfo(
"Allocating memory %p", (
void*)proc);
541 GPUInfo(
"Allocating memory done");
549 GPUInfo(
"Allocating Permanent Memory");
552 GPUError(
"Must not allocate permanent memory while volatile chunks are allocated");
553 throw std::bad_alloc();
564 GPUInfo(
"Permanent Memory Done");
571 if (
res->mReuse >= 0) {
573 if (
ptr ==
nullptr) {
574 GPUError(
"Invalid reuse ptr (%s)",
res->mName);
575 throw std::bad_alloc();
580 throw std::bad_alloc();
583 std::cout <<
"Reused (" << device <<
") " <<
res->mName <<
": " <<
retVal <<
"\n";
587 if (memorypool ==
nullptr) {
588 GPUError(
"Cannot allocate memory from uninitialized pool");
589 throw std::bad_alloc();
593 retVal = ptrDiff((
res->*setPtr)((
char*)1), (
char*)(1));
594 memorypoolend = (
void*)((
char*)memorypoolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(memorypoolend));
595 if (retVal < res->mOverrideSize) {
598 retVal += GPUProcessor::getAlignment<GPUCA_MEMALIGN>(
retVal);
599 memorypoolend = (
char*)memorypoolend -
retVal;
604 memorypool = (
char*)((
res->*setPtr)(
ptr));
606 if (retVal < res->mOverrideSize) {
608 memorypool = (
char*)
ptr +
res->mOverrideSize;
610 memorypool = (
void*)((
char*)memorypool + GPUProcessor::getAlignment<GPUCA_MEMALIGN>(memorypool));
612 if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)ptrDiff(memorypool, memorybase) > memorysize)) {
613 std::cerr <<
"Memory pool size exceeded (" << device <<
") (" <<
res->mName <<
": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) <<
" > " << memorysize <<
"\n";
614 throw std::bad_alloc();
617 std::cout <<
"Allocated (" << device <<
") " <<
res->mName <<
": " <<
retVal <<
" - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) <<
"\n";
626 if (
res->mPtrDevice &&
res->mReuse < 0) {
629 res->mSize = std::max((
size_t)
res->SetPointers((
void*)1) - 1,
res->mOverrideSize);
630 if (
res->mReuse >= 0) {
632 GPUError(
"Invalid reuse, insufficient size: %ld < %ld", (int64_t)
mMemoryResources[
res->mReuse].mSize, (int64_t)
res->mSize);
633 throw std::bad_alloc();
639 res->mPtr = GPUProcessor::alignPointer<GPUCA_BUFFER_ALIGNMENT>(
res->mPtrDevice);
640 res->SetPointers(
res->mPtr);
642 std::cout << (
res->mReuse >= 0 ?
"Reused " :
"Allocated ") <<
res->mName <<
": " <<
res->mSize <<
"\n";
648 GPUError(
"Got buffer with insufficient alignment");
649 throw std::bad_alloc();
653 if (
res->mPtr !=
nullptr) {
654 GPUError(
"Double allocation! (%s)",
res->mName);
655 throw std::bad_alloc();
663 res->mSize = std::max((
size_t)
res->SetPointers((
void*)1) - 1,
res->mOverrideSize);
664 res->mPtr = control->
allocator(CAMath::nextMultipleOf<GPUCA_BUFFER_ALIGNMENT>(
res->mSize));
665 res->mSize = std::max<size_t>(ptrDiff(
res->SetPointers(
res->mPtr),
res->mPtr),
res->mOverrideSize);
667 std::cout <<
"Allocated (from callback) " <<
res->mName <<
": " <<
res->mSize <<
"\n";
670 void* dummy =
nullptr;
677 GPUError(
"Got buffer with insufficient alignment");
678 throw std::bad_alloc();
682 if (
res->mProcessor->mLinkedProcessor ==
nullptr) {
683 GPUError(
"Device Processor not set (%s)",
res->mName);
684 throw std::bad_alloc();
687 GPUError(
"Must not allocate non-stacked device memory while volatile chunks are allocated");
688 throw std::bad_alloc();
694 }
else if (
size !=
res->mSize) {
695 GPUError(
"Inconsistent device memory allocation (%s: device %lu vs %lu)",
res->mName,
size,
res->mSize);
696 throw std::bad_alloc();
699 GPUError(
"Got buffer with insufficient alignment");
700 throw std::bad_alloc();
720 return res->mReuse >= 0 ? 0 :
res->mSize;
736 throw std::runtime_error(
"Requested invalid memory typo for direct allocation");
739 GPUError(
"Must not allocate direct memory while volatile chunks are allocated");
740 throw std::bad_alloc();
747 poolend = (
char*)poolend -
size;
748 poolend = (
char*)poolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(poolend);
753 if (pool > poolend) {
754 GPUError(
"Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend));
755 throw std::bad_alloc();
776 throw std::bad_alloc();
833 size_t size = ptrDiff(
res->SetPointers(basePtr), basePtr);
834 if (basePtr &&
size > std::max(
res->mSize,
res->mOverrideSize)) {
835 std::cerr <<
"Updated pointers exceed available memory size: " <<
size <<
" > " << std::max(
res->mSize,
res->mOverrideSize) <<
" - host - " <<
res->mName <<
"\n";
836 throw std::bad_alloc();
841 size_t size = ptrDiff(
res->SetDevicePointers(basePtr), basePtr);
842 if (basePtr &&
size > std::max(
res->mSize,
res->mOverrideSize)) {
843 std::cerr <<
"Updated pointers exceed available memory size: " <<
size <<
" > " << std::max(
res->mSize,
res->mOverrideSize) <<
" - GPU - " <<
res->mName <<
"\n";
844 throw std::bad_alloc();
866 std::cout <<
"Freeing " <<
res->mName <<
": size " <<
res->mSize <<
" (reused " <<
res->mReuse <<
")\n";
872 res->mPtrDevice =
nullptr;
886 GPUFatal(
"Trying to pop memory state from empty stack");
901 if (
res->mReuse < 0) {
905 res->mPtrDevice =
nullptr;
915 throw std::runtime_error(
"temporary memory stack already blocked");
924 throw std::runtime_error(
"cannot unblock while there is stacked memory");
974 printf(
"Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n",
975 ptrDiff(
mHostMemoryPool,
mHostMemoryBase) + ptrDiff((
char*)
mHostMemoryBase +
mHostMemorySize,
mHostMemoryPoolEnd),
mHostMemorySize, ptrDiff(
mHostMemoryPermanent,
mHostMemoryBase), ptrDiff(
mHostMemoryPool,
mHostMemoryPermanent), ptrDiff((
char*)
mHostMemoryBase +
mHostMemorySize,
mHostMemoryPoolEnd),
976 ptrDiff(
mDeviceMemoryPool,
mDeviceMemoryBase) + ptrDiff((
char*)
mDeviceMemoryBase +
mDeviceMemorySize,
mDeviceMemoryPoolEnd),
mDeviceMemorySize, ptrDiff(
mDeviceMemoryPermanent,
mDeviceMemoryBase), ptrDiff(
mDeviceMemoryPool,
mDeviceMemoryPermanent), ptrDiff((
char*)
mDeviceMemoryBase +
mDeviceMemorySize,
mDeviceMemoryPoolEnd),
983 std::map<std::string, std::array<size_t, 3>>
sizes;
986 if (
res.mReuse >= 0) {
993 if (
res.mPtrDevice) {
1000 printf(
"%59s CPU / %9s GPU\n",
"",
"");
1001 for (
auto it =
sizes.begin(); it !=
sizes.end(); it++) {
1002 printf(
"Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ?
"P" :
" ", it->second[0], it->second[1]);
1005 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
1006 mChains[
i]->PrintMemoryStatistics();
1040constexpr static inline int32_t getStepNum(T step,
bool validCheck, int32_t N,
const char* err =
"Invalid step num")
1042 static_assert(
sizeof(step) ==
sizeof(uint32_t),
"Invalid step enum size");
1043 int32_t
retVal = 8 *
sizeof(uint32_t) - 1 - CAMath::Clz((uint32_t)step);
1044 if ((uint32_t)step == 0 ||
retVal >= N) {
1048 throw std::runtime_error(
"Invalid General Step");
1061 throw std::invalid_argument(
"Cannot start double pipeline mode");
1064 GPUInfo(
"Pipeline worker started");
1066 bool terminate =
false;
1067 while (!terminate) {
1072 GPUReconstructionPipelineQueue* q;
1081 q->retVal = q->chain->RunChain();
1084 std::lock_guard<std::mutex> lk(q->m);
1090 GPUInfo(
"Pipeline worker ended");
1103 std::unique_ptr<GPUReconstructionPipelineQueue> qu(
new GPUReconstructionPipelineQueue);
1104 GPUReconstructionPipelineQueue* q = qu.get();
1105 q->chain = terminate ? nullptr :
mChains[0].get();
1106 q->op = terminate ? 1 : 0;
1107 std::unique_lock<std::mutex> lkdone(q->m);
1111 throw std::runtime_error(
"Must not enqueue work after termination request");
1117 q->c.wait(lkdone, [&q]() {
return q->done; });
1124 return mChains[0]->FinalizePipelinedProcessing();
1138 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
1157 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
1175 throw std::runtime_error(
"GPU Backend Failure");
1184 f +=
"settings.dump";
1186 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
1219 f +=
"settings.dump";
1225 for (uint32_t
i = 0;
i <
mChains.size();
i++) {
1248 GPUError(
"Cannot update settings while initialized");
1249 throw std::runtime_error(
"Settings updated while initialized");
1282 mAlloc = [&
r](
size_t n) {
return (
char*)
r->AllocateVolatileDeviceMemory(
n); };
#define GPUCA_BUFFER_ALIGNMENT
bool isSet(const bitfield &v) const
const GPUSettingsDisplay * GetEventDisplayConfig() const
const GPUSettingsQA * GetQAConfig() const
static void dumpConfig(const GPUSettingsRec *rec, const GPUSettingsProcessing *proc, const GPUSettingsQA *qa, const GPUSettingsDisplay *display, const GPUSettingsDeviceBackend *device, const GPURecoStepConfiguration *workflow)
static constexpr const char *const RECO_STEP_NAMES[]
static constexpr int32_t N_RECO_STEPS
static constexpr int32_t N_GENERAL_STEPS
void * SetDevicePointers(void *ptr)
void * SetPointers(void *ptr)
static void computePointerWithAlignment(T *&basePtr, S *&objPtr, size_t nEntries=1)
void InitGPUProcessor(GPUReconstruction *rec, ProcessorType type=PROCESSOR_TYPE_CPU, GPUProcessor *slaveProcessor=nullptr)
ProcessorType mGPUProcessorType
GPURecoStepConfiguration mRecoSteps
int32_t InitPhaseBeforeDevice()
virtual void GetITSTraits(std::unique_ptr< o2::its::TrackerTraits< 7 > > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame< 7 > > *timeFrame)
std::unordered_set< const void * > mRegisteredMemoryPtrs
int32_t InitPhasePermanentMemory()
int16_t RegisterMemoryAllocationHelper(GPUProcessor *proc, void *(GPUProcessor::*setPtr)(void *), int32_t type, const char *name, const GPUMemoryReuse &re)
std::vector< std::unique_ptr< GPUChain > > mChains
GPUReconstruction * mMaster
void * AllocateVolatileMemory(size_t size, bool device)
ThrustVolatileAllocator getThrustVolatileDeviceAllocator()
std::unique_ptr< GPUMemorySizeScalers > mMemoryScalers
void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr)
void SetInputControl(void *ptr, size_t size)
GPUConstantMem * mDeviceConstantMem
void ConstructGPUProcessor(GPUProcessor *proc)
void TerminatePipelineWorker()
std::shared_ptr< GPUROOTDumpCore > mROOTDump
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
size_t AllocateRegisteredMemoryHelper(GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device)
GPUConstantMem * processors()
void ReturnVolatileMemory()
const GPUSettingsDeviceBackend & GetDeviceBackendSettings() const
void ComputeReuseMax(GPUProcessor *proc)
void SetMemoryExternalInput(int16_t res, void *ptr)
int32_t getGeneralStepNum(GeneralStep step, bool validCheck=true)
static constexpr uint32_t NSECTORS
void MakeFutureDeviceMemoryAllocationsVolatile()
GPUOutputControl mInputControl
RecoStepField GetRecoStepsGPU() const
void SetResetTimers(bool reset)
void RegisterGPUDeviceProcessor(GPUProcessor *proc, GPUProcessor *slaveProcessor)
std::vector< GPUReconstruction * > mSlaves
std::vector< std::tuple< void *, void *, size_t, size_t, uint64_t > > mNonPersistentMemoryStack
std::unique_ptr< T > ReadStructFromFile(const char *file)
void UpdateDynamicSettings(const GPUSettingsRecDynamic *d)
std::unique_ptr< GPUSettingsDeviceBackend > mDeviceBackendSettings
std::vector< GPUMemoryResource > mMemoryResources
std::unique_ptr< GPUReconstructionPipelineContext > mPipelineContext
std::unique_ptr< GPUConstantMem > mHostConstantMem
size_t AllocateRegisteredPermanentMemory()
void ResetRegisteredMemoryPointers(GPUProcessor *proc)
void DumpStructToFile(const T *obj, const char *file)
void AllocateRegisteredMemoryInternal(GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool)
virtual int32_t registerMemoryForGPU_internal(const void *ptr, size_t size)=0
virtual size_t WriteToConstantMemory(size_t offset, const void *src, size_t size, int32_t stream=-1, gpu_reconstruction_kernels::deviceEvent *ev=nullptr)=0
std::unordered_map< GPUMemoryReuse::ID, MemoryReuseMeta > mMemoryReuse1to1
size_t mDeviceMemoryUsedMax
std::vector< ProcessorData > mProcessors
void ReturnVolatileDeviceMemory()
void * AllocateVolatileDeviceMemory(size_t size)
bool mDeviceMemoryAsVolatile
virtual int32_t InitDevice()=0
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr)
virtual ~GPUReconstruction()
int32_t mMaxBackendThreads
const GPUCalibObjectsConst & GetCalib() const
const GPUTrackingInOutPointers GetIOPtrs() const
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()=0
void UnblockStackedMemory()
GPUReconstruction(const GPUReconstruction &)=delete
static constexpr GeometryType geometryType
std::vector< std::unique_ptr< char[], alignedDeleter > > mNonPersistentIndividualDirectAllocations
void WriteConstantParams()
void FreeRegisteredMemory(GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false)
std::vector< std::unique_ptr< char[], alignedDeleter > > mVolatileChunks
void UpdateMaxMemoryUsed()
virtual RecoStepField AvailableGPURecoSteps()
static constexpr const char *const IOTYPENAMES[]
std::vector< std::unique_ptr< char[], alignedDeleter > > mDirectMemoryChunks
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr)
DeviceType GetDeviceType() const
int32_t CheckErrorCodes(bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr)
const GPUParam & GetParam() const
void ClearAllocatedMemory(bool clearOutputs=true)
static constexpr const char *const GEOMETRY_TYPE_NAMES[]
GPUOutputControl mOutputControl
size_t mHostMemoryUsedMax
void * mDeviceMemoryPoolEnd
virtual int32_t ExitDevice()=0
void PrintMemoryOverview()
std::unique_ptr< GPUSettingsGRP > mGRPSettings
std::unique_ptr< GPUSettingsProcessing > mProcessingSettings
virtual bool CanQueryMaxMemory()
void PrintMemoryStatistics()
void PushNonPersistentMemory(uint64_t tag)
int32_t getRecoStepNum(RecoStep step, bool validCheck=true)
virtual int32_t unregisterMemoryForGPU_internal(const void *ptr)=0
int32_t InitPhaseAfterDevice()
static int32_t getHostThreadIndex()
void * mDeviceMemoryPermanent
void BlockStackedMemory(GPUReconstruction *rec)
const GPUSettingsProcessing & GetProcessingSettings() const
void DumpSettings(const char *dir="")
void * AllocateDirectMemory(size_t size, int32_t type)
void * mHostMemoryPoolBlocked
int32_t unregisterMemoryForGPU(const void *ptr)
int32_t registerMemoryForGPU(const void *ptr, size_t size)
void SetDebugLevelTmp(int32_t level)
int32_t EnqueuePipeline(bool terminate=false)
std::shared_ptr< GPUReconstructionThreading > mThreading
std::vector< GPUMemoryResource * > mNonPersistentIndividualAllocations
void * mHostMemoryPoolEnd
void * mDeviceMemoryPoolBlocked
void * mVolatileMemoryStart
virtual int32_t GPUChkErrInternal(const int64_t error, const char *file, int32_t line) const
GPUChain * GetNextChainInQueue()
void * mHostMemoryPermanent
int32_t GPUChkErrA(const int64_t error, const char *file, int32_t line, bool failOnError)
size_t AllocateRegisteredMemory(GPUProcessor *proc, bool resetCustom=false)
int32_t ReadSettings(const char *dir="")
void SetOutputControl(const GPUOutputControl &v)
void SetSector(int32_t iSector)
#define TPC_MAX_FRAGMENT_LEN_GPU
#define TPC_MAX_FRAGMENT_LEN_HOST
GLuint GLsizei const GLuint const GLintptr const GLsizeiptr * sizes
GLuint const GLchar * name
GLboolean GLboolean GLboolean b
GLint GLint GLsizei GLint GLenum GLenum type
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLboolean GLboolean GLboolean GLboolean a
GLubyte GLubyte GLubyte GLubyte w
std::unique_ptr< GPUDisplayFrontendInterface > eventDisplay
std::string qTag2Str(const T tag)
GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]
GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]
GPUCalibObjectsConst calibObjects
GPUSettingsProcessing configProcessing
GPUSettingsO2 ReadConfigurableParam()
GPUSettingsRec configReconstruction
void set(void *p, size_t s)
std::function< void *(size_t)> allocator
void UpdateSettings(const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPURecoStepConfiguration *w=nullptr, const GPUSettingsRecDynamic *d=nullptr)
void SetDefaults(float solenoidBz, bool assumeConstantBz)
GPUDataTypes::RecoStepField stepsGPUMask
GPUDataTypes::InOutTypeField outputs
GPUDataTypes::RecoStepField steps
GPUDataTypes::InOutTypeField inputs
std::condition_variable cond
std::queue< GPUReconstructionPipelineQueue * > queue
GPUReconstruction * master
float solenoidBzNominalGPU