|
| | ~GPUReconstructionDeviceBase () override |
| |
| const GPUParam * | DeviceParam () const |
| |
| | ~GPUReconstructionCPU () override |
| |
| template<class S , int32_t I = 0> |
| krnlProperties | getKernelProperties (int gpu=-1) |
| |
| template<class T , int32_t I = 0, typename... Args> |
| void | runKernelBackend (const krnlSetupTime &_xyz, const Args &... args) |
| |
| int32_t | GPUStuck () |
| |
| void | ResetDeviceProcessorTypes () |
| |
| int32_t | RunChains () override |
| |
| void | UpdateParamOccupancyMap (const uint32_t *mapHost, const uint32_t *mapGPU, uint32_t occupancyTotal, int32_t stream=-1) |
| |
| template<> |
| void | runKernelBackend (const krnlSetupTime &_xyz, void *const &ptr, uint64_t const &size) |
| |
| template<class S , int32_t I> |
| GPUReconstructionProcessing::krnlProperties | getKernelProperties (int gpu) |
| |
| | KernelInterface (const Args &... args) |
| |
| | ~GPUReconstructionProcessing () override |
| |
| int32_t | getNKernelHostThreads (bool splitCores) |
| |
| uint32_t | getNActiveThreadsOuterLoop () const |
| |
| void | SetNActiveThreadsOuterLoop (uint32_t f) |
| |
| uint32_t | SetAndGetNActiveThreadsOuterLoop (bool condition, uint32_t max) |
| |
| void | runParallelOuterLoop (bool doGPU, uint32_t nThreads, std::function< void(uint32_t)> lambda) |
| |
| void | SetNActiveThreads (int32_t n) |
| |
| const std::string & | GetKernelName (int32_t i) const |
| |
| auto & | getRecoStepTimer (RecoStep step) |
| |
| HighResTimer & | getGeneralStepTimer (GeneralStep step) |
| |
| template<class T > |
| void | AddGPUEvents (T *&events) |
| |
| virtual std::unique_ptr< threadContext > | GetThreadContext () override |
| |
| const GPUDefParameters & | getGPUParameters (bool doGPU) const override |
| |
| virtual | ~GPUReconstruction () |
| |
| | GPUReconstruction (const GPUReconstruction &)=delete |
| |
| GPUReconstruction & | operator= (const GPUReconstruction &)=delete |
| |
| template<class T , typename... Args> |
| T * | AddChain (Args... args) |
| |
| int32_t | Init () |
| |
| int32_t | Finalize () |
| |
| int32_t | Exit () |
| |
| void | DumpSettings (const char *dir="") |
| |
| int32_t | ReadSettings (const char *dir="") |
| |
| void | PrepareEvent () |
| |
| uint32_t | getNEventsProcessed () |
| |
| uint32_t | getNEventsProcessedInStat () |
| |
| int32_t | registerMemoryForGPU (const void *ptr, size_t size) |
| |
| int32_t | unregisterMemoryForGPU (const void *ptr) |
| |
| virtual void * | getGPUPointer (void *ptr) |
| |
| virtual void | startGPUProfiling () |
| |
| virtual void | endGPUProfiling () |
| |
| int32_t | GPUChkErrA (const int64_t error, const char *file, int32_t line, bool failOnError) |
| |
| int32_t | CheckErrorCodes (bool cpuOnly=false, bool forceShowErrors=false, std::vector< std::array< uint32_t, 4 > > *fillErrors=nullptr) |
| |
| void | RunPipelineWorker () |
| |
| void | TerminatePipelineWorker () |
| |
| GPUMemoryResource & | Res (int16_t num) |
| |
| template<class T > |
| int16_t | RegisterMemoryAllocation (T *proc, void *(T::*setPtr)(void *), int32_t type, const char *name="", const GPUMemoryReuse &re=GPUMemoryReuse()) |
| |
| size_t | AllocateMemoryResources () |
| |
| size_t | AllocateRegisteredMemory (GPUProcessor *proc, bool resetCustom=false) |
| |
| size_t | AllocateRegisteredMemory (int16_t res, GPUOutputControl *control=nullptr) |
| |
| void | AllocateRegisteredForeignMemory (int16_t res, GPUReconstruction *rec, GPUOutputControl *control=nullptr) |
| |
| void * | AllocateDirectMemory (size_t size, int32_t type) |
| |
| void * | AllocateVolatileDeviceMemory (size_t size) |
| |
| void * | AllocateVolatileMemory (size_t size, bool device) |
| |
| void | MakeFutureDeviceMemoryAllocationsVolatile () |
| |
| void | FreeRegisteredMemory (GPUProcessor *proc, bool freeCustom=false, bool freePermanent=false) |
| |
| void | FreeRegisteredMemory (int16_t res) |
| |
| void | ClearAllocatedMemory (bool clearOutputs=true) |
| |
| void | ReturnVolatileDeviceMemory () |
| |
| void | ReturnVolatileMemory () |
| |
| ThrustVolatileAllocator | getThrustVolatileDeviceAllocator () |
| |
| void | PushNonPersistentMemory (uint64_t tag) |
| |
| void | PopNonPersistentMemory (RecoStep step, uint64_t tag, const GPUProcessor *proc=nullptr) |
| |
| void | BlockStackedMemory (GPUReconstruction *rec) |
| |
| void | UnblockStackedMemory () |
| |
| void | ResetRegisteredMemoryPointers (GPUProcessor *proc) |
| |
| void | ResetRegisteredMemoryPointers (int16_t res) |
| |
| void | ComputeReuseMax (GPUProcessor *proc) |
| |
| void | PrintMemoryStatistics () |
| |
| void | PrintMemoryOverview () |
| |
| void | PrintMemoryMax () |
| |
| void | SetMemoryExternalInput (int16_t res, void *ptr) |
| |
| GPUMemorySizeScalers * | MemoryScalers () |
| |
| virtual void | GetITSTraits (std::unique_ptr< o2::its::TrackerTraits< 7 > > *trackerTraits, std::unique_ptr< o2::its::VertexerTraits< 7 > > *vertexerTraits, std::unique_ptr< o2::its::TimeFrame< 7 > > *timeFrame) |
| |
| bool | slavesExist () |
| |
| int | slaveId () |
| |
| DeviceType | GetDeviceType () const |
| |
| bool | IsGPU () const |
| |
| const GPUParam & | GetParam () const |
| |
| const GPUConstantMem & | GetConstantMem () const |
| |
| const GPUTrackingInOutPointers | GetIOPtrs () const |
| |
| const GPUSettingsGRP & | GetGRPSettings () const |
| |
| const GPUSettingsDeviceBackend & | GetDeviceBackendSettings () const |
| |
| const GPUSettingsProcessing & | GetProcessingSettings () const |
| |
| const GPUCalibObjectsConst & | GetCalib () const |
| |
| bool | IsInitialized () const |
| |
| void | SetSettings (float solenoidBzNominalGPU, const GPURecoStepConfiguration *workflow=nullptr) |
| |
| void | SetSettings (const GPUSettingsGRP *grp, const GPUSettingsRec *rec=nullptr, const GPUSettingsProcessing *proc=nullptr, const GPURecoStepConfiguration *workflow=nullptr) |
| |
| void | SetResetTimers (bool reset) |
| |
| void | SetDebugLevelTmp (int32_t level) |
| |
| void | UpdateSettings (const GPUSettingsGRP *g, const GPUSettingsProcessing *p=nullptr, const GPUSettingsRecDynamic *d=nullptr) |
| |
| void | UpdateDynamicSettings (const GPUSettingsRecDynamic *d) |
| |
| void | SetOutputControl (const GPUOutputControl &v) |
| |
| void | SetOutputControl (void *ptr, size_t size) |
| |
| void | SetInputControl (void *ptr, size_t size) |
| |
| GPUOutputControl & | OutputControl () |
| |
| uint32_t | NStreams () const |
| |
| const void * | DeviceMemoryBase () const |
| |
| RecoStepField | GetRecoSteps () const |
| |
| RecoStepField | GetRecoStepsGPU () const |
| |
| InOutTypeField | GetRecoStepsInputs () const |
| |
| InOutTypeField | GetRecoStepsOutputs () const |
| |
| int32_t | getRecoStepNum (RecoStep step, bool validCheck=true) |
| |
| int32_t | getGeneralStepNum (GeneralStep step, bool validCheck=true) |
| |
| void | setErrorCodeOutput (std::vector< std::array< uint32_t, 4 > > *v) |
| |
| std::vector< std::array< uint32_t, 4 > > * | getErrorCodeOutput () |
| |
| template<class T > |
| void | RegisterGPUProcessor (T *proc, bool deviceSlave) |
| |
| template<class T > |
| void | SetupGPUProcessor (T *proc, bool allocate) |
| |
| void | RegisterGPUDeviceProcessor (GPUProcessor *proc, GPUProcessor *slaveProcessor) |
| |
| void | ConstructGPUProcessor (GPUProcessor *proc) |
| |
| virtual void | PrintKernelOccupancies () |
| |
| double | GetStatKernelTime () |
| |
| double | GetStatWallTime () |
| |
| void | setDebugDumpCallback (std::function< void()> &&callback=std::function< void()>(nullptr)) |
| |
| bool | triggerDebugDump () |
| |
| std::string | getDebugFolder (const std::string &prefix="") |
| |
| int32_t | GetMaxBackendThreads () const |
| |
|
| | GPUReconstructionDeviceBase (const GPUSettingsDeviceBackend &cfg, size_t sizeCheck) |
| |
| int32_t | InitDevice () override |
| |
| virtual int32_t | InitDevice_Runtime ()=0 |
| |
| int32_t | ExitDevice () override |
| |
| virtual int32_t | ExitDevice_Runtime ()=0 |
| |
| virtual int32_t | GPUChkErrInternal (const int64_t error, const char *file, int32_t line) const override=0 |
| |
| int32_t | registerMemoryForGPU_internal (const void *ptr, size_t size) override |
| |
| int32_t | unregisterMemoryForGPU_internal (const void *ptr) override |
| |
| void | unregisterRemainingRegisteredMemory () |
| |
| int32_t | GPUDebug (const char *state="UNKNOWN", int32_t stream=-1, bool force=false) override=0 |
| |
| size_t | TransferMemoryInternal (GPUMemoryResource *res, int32_t stream, deviceEvent *ev, deviceEvent *evList, int32_t nEvents, bool toGPU, const void *src, void *dst) override |
| |
| size_t | GPUMemCpy (void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override=0 |
| |
| size_t | GPUMemCpyAlways (bool onGpu, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) override |
| |
| size_t | WriteToConstantMemory (size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr) override=0 |
| |
| int32_t | GetGlobalLock (void *&pLock) |
| |
| void | ReleaseGlobalLock (void *sem) |
| |
| void | runConstantRegistrators () |
| |
| | GPUReconstructionCPU (const GPUSettingsDeviceBackend &cfg) |
| |
| virtual void | SynchronizeStream (int32_t stream) |
| |
| virtual void | SynchronizeEvents (deviceEvent *evList, int32_t nEvents=1) |
| |
| virtual void | StreamWaitForEvents (int32_t stream, deviceEvent *evList, int32_t nEvents=1) |
| |
| virtual bool | IsEventDone (deviceEvent *evList, int32_t nEvents=1) |
| |
| virtual void | RecordMarker (deviceEvent *ev, int32_t stream) |
| |
| virtual void | SynchronizeGPU () |
| |
| virtual void | ReleaseEvent (deviceEvent ev) |
| |
| size_t | TransferMemoryResourceToGPU (GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) |
| |
| size_t | TransferMemoryResourceToHost (GPUMemoryResource *res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) |
| |
| size_t | TransferMemoryResourcesToGPU (GPUProcessor *proc, int32_t stream=-1, bool all=false) |
| |
| size_t | TransferMemoryResourcesToHost (GPUProcessor *proc, int32_t stream=-1, bool all=false) |
| |
| size_t | TransferMemoryResourceLinkToGPU (int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) |
| |
| size_t | TransferMemoryResourceLinkToHost (int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1) |
| |
| virtual void | SetONNXGPUStream (Ort::SessionOptions &, int32_t, int32_t *) |
| |
| int32_t | GetThread () |
| |
| virtual int32_t | DoStuckProtection (int32_t stream, deviceEvent event) |
| |
| virtual void | runKernelVirtual (const int num, const void *args) |
| |
| | GPUReconstructionProcessing (const GPUSettingsDeviceBackend &cfg) |
| |
| template<class T , int32_t I = 0> |
| HighResTimer & | getKernelTimer (RecoStep step, int32_t num=0, size_t addMemorySize=0, bool increment=true) |
| |
| template<class T , int32_t J = -1> |
| HighResTimer & | getTimer (const char *name, int32_t num=-1) |
| |
| void | AllocateRegisteredMemoryInternal (GPUMemoryResource *res, GPUOutputControl *control, GPUReconstruction *recPool) |
| |
| void | FreeRegisteredMemory (GPUMemoryResource *res) |
| |
| | GPUReconstruction (const GPUSettingsDeviceBackend &cfg) |
| |
| int32_t | InitPhaseBeforeDevice () |
| |
| int32_t | InitPhasePermanentMemory () |
| |
| int32_t | InitPhaseAfterDevice () |
| |
| void | WriteConstantParams () |
| |
| void | UpdateMaxMemoryUsed () |
| |
| int32_t | EnqueuePipeline (bool terminate=false) |
| |
| GPUChain * | GetNextChainInQueue () |
| |
| size_t | AllocateRegisteredMemoryHelper (GPUMemoryResource *res, void *&ptr, void *&memorypool, void *memorybase, size_t memorysize, void *(GPUMemoryResource::*SetPointers)(void *), void *&memorypoolend, const char *device) |
| |
| size_t | AllocateRegisteredPermanentMemory () |
| |
| template<class T , class S > |
| uint32_t | DumpData (FILE *fp, const T *const *entries, const S *num, InOutPointerType type) |
| |
| template<class T , class S > |
| size_t | ReadData (FILE *fp, const T **entries, S *num, std::unique_ptr< T[]> *mem, InOutPointerType type, T **nonConstPtrs=nullptr) |
| |
| template<class T > |
| T * | AllocateIOMemoryHelper (size_t n, const T *&ptr, std::unique_ptr< T[]> &u) |
| |
| int16_t | RegisterMemoryAllocationHelper (GPUProcessor *proc, void *(GPUProcessor::*setPtr)(void *), int32_t type, const char *name, const GPUMemoryReuse &re) |
| |
| template<class T > |
| void | DumpFlatObjectToFile (const T *obj, const char *file) |
| |
| template<class T > |
| std::unique_ptr< T > | ReadFlatObjectFromFile (const char *file) |
| |
| template<class T > |
| void | DumpStructToFile (const T *obj, const char *file) |
| |
| template<class T > |
| std::unique_ptr< T > | ReadStructFromFile (const char *file) |
| |
| template<class T > |
| int32_t | ReadStructFromFile (const char *file, T *obj) |
| |
| virtual RecoStepField | AvailableGPURecoSteps () |
| |
| virtual bool | CanQueryMaxMemory () |
| |
| GPUConstantMem * | processors () |
| |
| const GPUConstantMem * | processors () const |
| |
| GPUParam & | param () |
| |
| void | debugInit () |
| |
| void | debugExit () |
| |
|
| using | deviceEvent = gpu_reconstruction_kernels::deviceEvent |
| |
| using | threadContext = gpu_reconstruction_kernels::threadContext |
| |
| enum | InOutPointerType : uint32_t {
CLUSTER_DATA = 0
, SECTOR_OUT_TRACK = 1
, SECTOR_OUT_CLUSTER = 2
, MC_LABEL_TPC = 3
,
MC_INFO_TPC = 4
, MERGED_TRACK = 5
, MERGED_TRACK_HIT = 6
, TRD_TRACK = 7
,
TRD_TRACKLET = 8
, RAW_CLUSTERS = 9
, CLUSTERS_NATIVE = 10
, TRD_TRACKLET_MC = 11
,
TPC_COMPRESSED_CL = 12
, TPC_DIGIT = 13
, TPC_ZS = 14
, CLUSTER_NATIVE_MC = 15
,
TPC_DIGIT_MC = 16
, TRD_SPACEPOINT = 17
, TRD_TRIGGERRECORDS = 18
, TF_SETTINGS = 19
} |
| |
| enum class | krnlDeviceType : int32_t { CPU = 0
, Device = 1
, Auto = -1
} |
| |
| using | GeometryType = GPUDataTypes::GeometryType |
| |
| using | DeviceType = GPUDataTypes::DeviceType |
| |
| using | RecoStep = GPUDataTypes::RecoStep |
| |
| using | GeneralStep = GPUDataTypes::GeneralStep |
| |
| using | RecoStepField = GPUDataTypes::RecoStepField |
| |
| using | InOutTypeField = GPUDataTypes::InOutTypeField |
| |
| template<class T , int32_t I> |
| static const char * | GetKernelName () |
| |
| template<class T , int32_t I = 0> |
| static uint32_t | GetKernelNum () |
| |
| static DeviceType | GetDeviceType (const char *type) |
| |
| static uint32_t | getNIOTypeMultiplicity (InOutPointerType type) |
| |
| static GPUReconstruction * | CreateInstance (const GPUSettingsDeviceBackend &cfg) |
| |
| static GPUReconstruction * | CreateInstance (DeviceType type=DeviceType::CPU, bool forceType=true, GPUReconstruction *master=nullptr) |
| |
| static GPUReconstruction * | CreateInstance (int32_t type, bool forceType, GPUReconstruction *master=nullptr) |
| |
| static GPUReconstruction * | CreateInstance (const char *type, bool forceType, GPUReconstruction *master=nullptr) |
| |
| static bool | CheckInstanceAvailable (DeviceType type, bool verbose) |
| |
| static int32_t | getHostThreadIndex () |
| |
| std::shared_ptr< GPUReconstructionThreading > | mThreading |
| |
| static constexpr krnlRunRange | krnlRunRangeNone {0} |
| |
| static constexpr krnlEvent | krnlEventNone = krnlEvent{nullptr, nullptr, 0} |
| |
| static constexpr uint32_t | NSECTORS = GPUCA_NSECTORS |
| |
| static constexpr const char *const | GEOMETRY_TYPE_NAMES [] = {"INVALID", "ALIROOT", "O2"} |
| |
| static constexpr GeometryType | geometryType = GeometryType::ALIROOT |
| |
| static constexpr const char *const | IOTYPENAMES [] |
| |
| static const std::vector< std::string > | mKernelNames |
| |
| static std::shared_ptr< LibraryLoader > | sLibCUDA |
| |
| static std::shared_ptr< LibraryLoader > | sLibHIP |
| |
| static std::shared_ptr< LibraryLoader > | sLibOCL |
| |
| static std::unique_ptr< debugInternal > | mDebugData |
| |