Project
Loading...
Searching...
No Matches
GPUChainTrackingTransformation.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
19#include "GPUTPCClusterData.h"
22#include "AliHLTTPCRawCluster.h"
23
26#include "utils/strtag.h"
27
28using namespace o2::gpu;
29using namespace o2::tpc;
30
31bool GPUChainTracking::NeedTPCClustersOnGPU()
32{
33 return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression);
34}
35
37{
39 const auto& threadContext = GetThreadContext();
40 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCConversion;
42 GPUTPCConvert& convertShadow = doGPU ? processorsShadow()->tpcConverter : convert;
43
44 bool transferClusters = false;
45 if (mRec->IsGPU() && !(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) && NeedTPCClustersOnGPU()) {
46 mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
47 AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
48 processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
49 WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
50 *mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
51 mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
52 mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
53 GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
54 TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
55 transferClusters = true;
56 }
57 if (!param().par.earlyTpcTransform) {
58 if (GetProcessingSettings().debugLevel >= 3) {
59 GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit");
60 }
61 if (transferClusters) {
62 SynchronizeStream(0); // TODO: Synchronize implicitly with next step
63 }
64 return 0;
65 }
67 for (uint32_t i = 0; i < NSECTORS; i++) {
68 convert.mMemory->clusters[i] = convertShadow.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0];
69 }
70
71 WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0);
72 TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0);
73 runKernel<GPUTPCConvertKernel>(GetGridBlk(NSECTORS * GPUCA_ROW_COUNT, 0));
74 TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0);
76
77 for (uint32_t i = 0; i < NSECTORS; i++) {
80 }
81 mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS"));
82 return 0;
83}
84
86{
88 if (tmp != mIOPtrs.clustersNative) {
89 *tmp = *mIOPtrs.clustersNative;
90 }
92 for (uint32_t i = 0; i < NSECTORS; i++) {
94 if (GetProcessingSettings().registerStandaloneInputMemory) {
96 throw std::runtime_error("Error registering memory for GPU");
97 }
98 }
99 }
100 mIOPtrs.clustersNative = nullptr;
101 mIOMem.clustersNative.reset(nullptr);
102}
103
105{
107 for (uint32_t i = 0; i < NSECTORS; i++) {
108 mIOPtrs.rawClusters[i] = nullptr;
110 mIOMem.rawClusters[i].reset(nullptr);
111 mIOPtrs.clusterData[i] = nullptr;
113 mIOMem.clusterData[i].reset(nullptr);
114 }
116 if (GetProcessingSettings().registerStandaloneInputMemory) {
117 if (mRec->registerMemoryForGPU(mIOMem.clustersNative.get(), mIOMem.clusterNativeAccess->nClustersTotal * sizeof(*mIOMem.clusterNativeAccess->clustersLinear))) {
118 throw std::runtime_error("Error registering memory for GPU");
119 }
120 }
121}
122
124{
131 if (GetProcessingSettings().registerStandaloneInputMemory) {
132 for (uint32_t i = 0; i < NSECTORS; i++) {
133 for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
134 for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) {
136 throw std::runtime_error("Error registering memory for GPU");
137 }
138 }
139 }
140 }
141 }
142}
143
148
150{
151 if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) {
152 throw std::runtime_error("Cannot forward TPC digits with Clusterizer on GPU");
153 }
154 std::vector<ClusterNative> tmp[NSECTORS][GPUCA_ROW_COUNT];
155 uint32_t nTotal = 0;
156 const float zsThreshold = param().rec.tpc.zsThreshold;
157 for (int32_t i = 0; i < NSECTORS; i++) {
158 for (uint32_t j = 0; j < mIOPtrs.tpcPackedDigits->nTPCDigits[i]; j++) {
159 const auto& d = mIOPtrs.tpcPackedDigits->tpcDigits[i][j];
160 if (d.getChargeFloat() >= zsThreshold) {
162 c.setTimeFlags(d.getTimeStamp(), 0);
163 c.setPad(d.getPad());
164 c.setSigmaTime(1);
165 c.setSigmaPad(1);
166 c.qTot = c.qMax = d.getChargeFloat();
167 tmp[i][d.getRow()].emplace_back(c);
168 nTotal++;
169 }
170 }
171 }
172 mIOMem.clustersNative.reset(new ClusterNative[nTotal]);
173 nTotal = 0;
174 mClusterNativeAccess->clustersLinear = mIOMem.clustersNative.get();
175 for (int32_t i = 0; i < NSECTORS; i++) {
176 for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
177 mClusterNativeAccess->nClusters[i][j] = tmp[i][j].size();
178 memcpy(&mIOMem.clustersNative[nTotal], tmp[i][j].data(), tmp[i][j].size() * sizeof(*mClusterNativeAccess->clustersLinear));
179 nTotal += tmp[i][j].size();
180 }
181 }
182 mClusterNativeAccess->setOffsetPtrs();
183 mIOPtrs.tpcPackedDigits = nullptr;
185 GPUInfo("Forwarded %u TPC clusters", nTotal);
186 mRec->MemoryScalers()->nTPCHits = nTotal;
187 return 0;
188}
Class of a TPC cluster in TPC-native coordinates (row, time)
int32_t i
#define GPUCA_ROW_COUNT
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
uint32_t version
Definition RawData.h:8
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:119
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st=GPUCA_RECO_STEP::NoRecoStep)
Definition GPUChain.cxx:32
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:68
virtual std::unique_ptr< gpu_reconstruction_kernels::threadContext > GetThreadContext()
Definition GPUChain.h:104
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:122
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:209
GPUConstantMem * processors()
Definition GPUChain.h:80
GPUParam & param()
Definition GPUChain.h:83
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:212
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:72
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:85
GPUReconstructionCPU * mRec
Definition GPUChain.h:75
GPUConstantMem * processorsShadow()
Definition GPUChain.h:81
static constexpr int32_t NSECTORS
Definition GPUChain.h:54
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:118
GPUReconstruction * rec()
Definition GPUChain.h:62
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:117
static void RunZSEncoder(const S &in, std::unique_ptr< uint64_t[]> *outBuffer, uint32_t *outSizes, o2::raw::RawFileWriter *raw, const o2::InteractionRecord *ir, const GPUParam &param, int32_t version, bool verify, float threshold=0.f, bool padding=false, std::function< void(std::vector< o2::tpc::Digit > &)> digitsFilter=nullptr)
static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess *native, std::unique_ptr< GPUTPCClusterData[]> *clusters, uint32_t *nClusters, const TPCFastTransform *transform, int32_t continuousMaxTimeBin=0)
static void RunZSFilter(std::unique_ptr< o2::tpc::Digit[]> *buffers, const o2::tpc::Digit *const *ptrs, size_t *nsb, const size_t *ns, const GPUParam &param, bool zs12bit, float threshold)
static void RunZSEncoderCreateMeta(const uint64_t *buffer, const uint32_t *sizes, void **ptrs, GPUTrackingInOutZS *out)
static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess &native, std::unique_ptr< o2::tpc::ClusterNative[]> &nativeBuffer, const AliHLTTPCRawCluster **rawClusters, uint32_t *nRawClusters)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
RecoStepField GetRecoStepsGPU() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
int32_t registerMemoryForGPU(const void *ptr, size_t size)
GPUTPCClusterData * mClusters
GLenum GLfloat param
Definition glcorearb.h:271
Global TPC definitions and constants.
Definition SimTraits.h:167
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
std::unique_ptr< o2::tpc::Digit[]> tpcDigits[NSECTORS]
std::unique_ptr< GPUTrackingInOutDigits > digitMap
std::unique_ptr< GPUTPCClusterData[]> clusterData[NSECTORS]
std::unique_ptr< o2::tpc::ClusterNative[]> clustersNative
std::unique_ptr< uint64_t[]> tpcZSpages
std::unique_ptr< GPUTrackingInOutZS > tpcZSmeta
std::unique_ptr< o2::tpc::ClusterNativeAccess > clusterNativeAccess
std::unique_ptr< AliHLTTPCRawCluster[]> rawClusters[NSECTORS]
std::unique_ptr< GPUTrackingInOutZS::GPUTrackingInOutZSMeta > tpcZSmeta2
GPUTrackingInOutPointers ioPtrs
const o2::tpc::Digit * tpcDigits[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUSettingsTF * settingsTF
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
GPUTrackingInOutZSSector sector[NSECTORS]
static constexpr uint32_t NENDPOINTS
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const ClusterNative * clustersLinear
static constexpr size_t TPC_ZS_PAGE_SIZE
std::vector< uint64_t > convert(gsl::span< const uint64_t > page)
o2::InteractionRecord ir(0, 0)