Project
Loading...
Searching...
No Matches
GPUChainTrackingTransformation.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
14
15#include "GPUChainTracking.h"
16#include "GPULogging.h"
17#include "GPUO2DataTypes.h"
19#include "GPUTPCClusterData.h"
22#include "GPUTPCConvert.h"
23#include "AliHLTTPCRawCluster.h"
24#include "GPUConstantMem.h"
25#include "GPUTPCConvertKernel.h"
26
30#include "utils/strtag.h"
31
32using namespace o2::gpu;
33using namespace o2::tpc;
34
35bool GPUChainTracking::NeedTPCClustersOnGPU()
36{
38}
39
41{
43 const auto& threadContext = GetThreadContext();
44 bool doGPU = GetRecoStepsGPU() & RecoStep::TPCConversion;
46 GPUTPCConvert& convertShadow = doGPU ? processorsShadow()->tpcConverter : convert;
47
48 bool transferClusters = false;
49 if (mRec->IsGPU() && !(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) && NeedTPCClustersOnGPU()) {
50 mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
51 AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
52 processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
53 WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
54 *mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
55 mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
56 mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
57 GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
58 TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
59 transferClusters = true;
60 }
61 if (!param().par.earlyTpcTransform) {
62 if (GetProcessingSettings().debugLevel >= 3) {
63 GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit");
64 }
65 if (transferClusters) {
66 SynchronizeStream(0); // TODO: Synchronize implicitly with next step
67 }
68 return 0;
69 }
71 for (uint32_t i = 0; i < NSECTORS; i++) {
72 convert.mMemory->clusters[i] = convertShadow.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0];
73 }
74
75 WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0);
76 TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0);
77 runKernel<GPUTPCConvertKernel>(GetGridBlk(NSECTORS * GPUCA_ROW_COUNT, 0));
78 TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0);
80
81 for (uint32_t i = 0; i < NSECTORS; i++) {
84 }
85 mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS"));
86 return 0;
87}
88
90{
92 if (tmp != mIOPtrs.clustersNative) {
93 *tmp = *mIOPtrs.clustersNative;
94 }
96 for (uint32_t i = 0; i < NSECTORS; i++) {
98 if (GetProcessingSettings().registerStandaloneInputMemory) {
100 throw std::runtime_error("Error registering memory for GPU");
101 }
102 }
103 }
104 mIOPtrs.clustersNative = nullptr;
105 mIOMem.clustersNative.reset(nullptr);
106}
107
109{
111 for (uint32_t i = 0; i < NSECTORS; i++) {
112 mIOPtrs.rawClusters[i] = nullptr;
114 mIOMem.rawClusters[i].reset(nullptr);
115 mIOPtrs.clusterData[i] = nullptr;
117 mIOMem.clusterData[i].reset(nullptr);
118 }
120 if (GetProcessingSettings().registerStandaloneInputMemory) {
121 if (mRec->registerMemoryForGPU(mIOMem.clustersNative.get(), mIOMem.clusterNativeAccess->nClustersTotal * sizeof(*mIOMem.clusterNativeAccess->clustersLinear))) {
122 throw std::runtime_error("Error registering memory for GPU");
123 }
124 }
125}
126
128{
135 if (GetProcessingSettings().registerStandaloneInputMemory) {
136 for (uint32_t i = 0; i < NSECTORS; i++) {
137 for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
138 for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) {
140 throw std::runtime_error("Error registering memory for GPU");
141 }
142 }
143 }
144 }
145 }
146}
147
152
154{
155 if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) {
156 throw std::runtime_error("Cannot forward TPC digits with Clusterizer on GPU");
157 }
158 std::vector<ClusterNative> tmp[NSECTORS][GPUCA_ROW_COUNT];
159 uint32_t nTotal = 0;
160 const float zsThreshold = param().rec.tpc.zsThreshold;
161 for (int32_t i = 0; i < NSECTORS; i++) {
162 for (uint32_t j = 0; j < mIOPtrs.tpcPackedDigits->nTPCDigits[i]; j++) {
163 const auto& d = mIOPtrs.tpcPackedDigits->tpcDigits[i][j];
164 if (d.getChargeFloat() >= zsThreshold) {
166 c.setTimeFlags(d.getTimeStamp(), 0);
167 c.setPad(d.getPad());
168 c.setSigmaTime(1);
169 c.setSigmaPad(1);
170 c.qTot = c.qMax = d.getChargeFloat();
171 tmp[i][d.getRow()].emplace_back(c);
172 nTotal++;
173 }
174 }
175 }
176 mIOMem.clustersNative.reset(new ClusterNative[nTotal]);
177 nTotal = 0;
178 mClusterNativeAccess->clustersLinear = mIOMem.clustersNative.get();
179 for (int32_t i = 0; i < NSECTORS; i++) {
180 for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
181 mClusterNativeAccess->nClusters[i][j] = tmp[i][j].size();
182 memcpy(&mIOMem.clustersNative[nTotal], tmp[i][j].data(), tmp[i][j].size() * sizeof(*mClusterNativeAccess->clustersLinear));
183 nTotal += tmp[i][j].size();
184 }
185 }
186 mClusterNativeAccess->setOffsetPtrs();
187 mIOPtrs.tpcPackedDigits = nullptr;
189 GPUInfo("Forwarded %u TPC clusters", nTotal);
190 mRec->MemoryScalers()->nTPCHits = nTotal;
191 return 0;
192}
Class of a TPC cluster in TPC-native coordinates (row, time)
int32_t i
#define GPUCA_ROW_COUNT
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
uint32_t version
Definition RawData.h:8
Definitions of TPC Zero Suppression Data Headers.
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
GPUTrackingInOutPointers & mIOPtrs
struct o2::gpu::GPUChainTracking::InOutMemory mIOMem
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:123
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:127
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:71
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:126
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:216
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:108
GPUConstantMem * processors()
Definition GPUChain.h:83
GPUParam & param()
Definition GPUChain.h:86
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:219
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:75
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:88
GPUReconstructionCPU * mRec
Definition GPUChain.h:78
GPUConstantMem * processorsShadow()
Definition GPUChain.h:84
static constexpr int32_t NSECTORS
Definition GPUChain.h:57
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122
krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:32
GPUReconstruction * rec()
Definition GPUChain.h:65
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:121
static void RunZSEncoder(const S &in, std::unique_ptr< uint64_t[]> *outBuffer, uint32_t *outSizes, o2::raw::RawFileWriter *raw, const o2::InteractionRecord *ir, const GPUParam &param, int32_t version, bool verify, float threshold=0.f, bool padding=false, std::function< void(std::vector< o2::tpc::Digit > &)> digitsFilter=nullptr)
static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess *native, std::unique_ptr< GPUTPCClusterData[]> *clusters, uint32_t *nClusters, const TPCFastTransform *transform, int32_t continuousMaxTimeBin=0)
static void RunZSFilter(std::unique_ptr< o2::tpc::Digit[]> *buffers, const o2::tpc::Digit *const *ptrs, size_t *nsb, const size_t *ns, const GPUParam &param, bool zs12bit, float threshold)
static void RunZSEncoderCreateMeta(const uint64_t *buffer, const uint32_t *sizes, void **ptrs, GPUTrackingInOutZS *out)
static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess &native, std::unique_ptr< o2::tpc::ClusterNative[]> &nativeBuffer, const AliHLTTPCRawCluster **rawClusters, uint32_t *nRawClusters)
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
RecoStepField GetRecoStepsGPU() const
void PushNonPersistentMemory(uint64_t tag)
GPUMemorySizeScalers * MemoryScalers()
int32_t registerMemoryForGPU(const void *ptr, size_t size)
GPUTPCClusterData * mClusters
GLenum GLfloat param
Definition glcorearb.h:271
Global TPC definitions and constants.
Definition SimTraits.h:167
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22
std::unique_ptr< o2::tpc::Digit[]> tpcDigits[NSECTORS]
std::unique_ptr< GPUTrackingInOutDigits > digitMap
std::unique_ptr< GPUTPCClusterData[]> clusterData[NSECTORS]
std::unique_ptr< o2::tpc::ClusterNative[]> clustersNative
std::unique_ptr< uint64_t[]> tpcZSpages
std::unique_ptr< GPUTrackingInOutZS > tpcZSmeta
std::unique_ptr< o2::tpc::ClusterNativeAccess > clusterNativeAccess
std::unique_ptr< AliHLTTPCRawCluster[]> rawClusters[NSECTORS]
std::unique_ptr< GPUTrackingInOutZS::GPUTrackingInOutZSMeta > tpcZSmeta2
GPUTrackingInOutPointers ioPtrs
const o2::tpc::Digit * tpcDigits[NSECTORS]
const o2::tpc::ClusterNativeAccess * clustersNative
const GPUSettingsTF * settingsTF
const GPUTrackingInOutZS * tpcZS
const AliHLTTPCRawCluster * rawClusters[NSECTORS]
const GPUTPCClusterData * clusterData[NSECTORS]
const GPUTrackingInOutDigits * tpcPackedDigits
GPUTrackingInOutZSSector sector[NSECTORS]
static constexpr uint32_t NENDPOINTS
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
const ClusterNative * clustersLinear
static constexpr size_t TPC_ZS_PAGE_SIZE
std::vector< uint64_t > convert(gsl::span< const uint64_t > page)
o2::InteractionRecord ir(0, 0)