dd/dc2/GPUChainTrackingCompression_8cxx_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#include "GPUChainTracking.h"

#include "GPUChainTrackingDebug.h"

#include "GPULogging.h"

#include "GPUO2DataTypes.h"

#include "GPUTrackingInputProvider.h"

#include "GPUTPCCFChainContext.h"

#include "TPCClusterDecompressor.h"

#include "GPUDefParametersRuntime.h"

#include "GPUConstantMem.h" // TODO: Try to get rid of as many GPUConstantMem includes as possible!

#include "GPUTPCCompressionKernels.h"

#include "GPUTPCDecompressionKernels.h"

#include "utils/strtag.h"


#include <numeric>


using namespace o2::gpu;

using namespace o2::tpc;


int32_t GPUChainTracking::RunTPCCompression()

{

  mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR"));

  RecoStep myStep = RecoStep::TPCCompression;

  bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression;

  int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;

  GPUTPCCompression& Compressor = processors()->tpcCompressor;

  GPUTPCCompression& CompressorShadow = doGPU ? processorsShadow()->tpcCompressor : Compressor;

  const auto& threadContext = GetThreadContext();

  if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) {

    RecordMarker(&mEvents->single, 0);

  }


  if (gatherMode == 3) {

    mRec->MakeFutureDeviceMemoryAllocationsVolatile();

  }

  SetupGPUProcessor(&Compressor, true);

  new (Compressor.mMemory) GPUTPCCompression::memory;

  WriteToConstantMemory(myStep, (char*)&processors()->tpcCompressor - (char*)processors(), &CompressorShadow, sizeof(CompressorShadow), 0);

  TransferMemoryResourcesToGPU(myStep, &Compressor, 0);

  runKernel<GPUMemClean16>(GetGridAutoStep(0, RecoStep::TPCCompression), CompressorShadow.mClusterStatus, Compressor.mMaxClusters * sizeof(CompressorShadow.mClusterStatus[0]));

  runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step0attached>(GetGridAuto(0));

  runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step1unattached>(GetGridAuto(0));

  TransferMemoryResourcesToHost(myStep, &Compressor, 0);

#ifdef GPUCA_TPC_GEOMETRY_O2

  if (mPipelineFinalizationCtx && GetProcessingSettings().doublePipelineClusterizer) {

    SynchronizeEventAndRelease(mEvents->single);

    ((GPUChainTracking*)GetNextChainInQueue())->RunTPCClusterizer_prepare(false);

    ((GPUChainTracking*)GetNextChainInQueue())->mCFContext->ptrClusterNativeSave = processorsShadow()->ioPtrs.clustersNative;

  }

#endif

  SynchronizeStream(0);

  o2::tpc::CompressedClusters* O = Compressor.mOutput;

  memset((void*)O, 0, sizeof(*O));

  O->nTracks = Compressor.mMemory->nStoredTracks;

  O->nAttachedClusters = Compressor.mMemory->nStoredAttachedClusters;

  O->nUnattachedClusters = Compressor.mMemory->nStoredUnattachedClusters;

  O->nAttachedClustersReduced = O->nAttachedClusters - O->nTracks;

  O->nSliceRows = NSECTORS * GPUCA_ROW_COUNT;

  O->nComppressionModes = param().rec.tpc.compressionTypeMask;

  O->solenoidBz = param().bzkG;

  O->maxTimeBin = param().continuousMaxTimeBin;

  size_t outputSize = AllocateRegisteredMemory(Compressor.mMemoryResOutputHost, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::compressedClusters)]);

  Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput);

  char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters

  size_t copySize = 0;

  if (gatherMode == 3) {

    CompressorShadow.mOutputA = Compressor.mOutput;

    copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU

  }

  const o2::tpc::CompressedClustersPtrs* P = nullptr;

  HighResTimer* gatherTimer = nullptr;

  int32_t outputStream = 0;

  if (GetProcessingSettings().doublePipeline) {

    SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes.

    outputStream = OutputStream();

  }

  if (gatherMode >= 2) {

    if (gatherMode == 2) {

      void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat);

      if (devicePtr != Compressor.mOutputFlat) {

        CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space

        for (uint32_t i = 0; i < sizeof(ptrs) / sizeof(void*); i++) {

          reinterpret_cast<char**>(&ptrs)[i] = reinterpret_cast<char**>(&ptrs)[i] + (reinterpret_cast<char*>(devicePtr) - reinterpret_cast<char*>(Compressor.mOutputFlat));

        }

      }

    }

    TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream);

    constexpr uint32_t nBlocksDefault = 2;

    constexpr uint32_t nBlocksMulti = 1 + 2 * 200;

    int32_t gatherModeKernel = mRec->GetProcessingSettings().tpcCompressionGatherModeKernel == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_KERNEL : mRec->GetProcessingSettings().tpcCompressionGatherMode;

    switch (gatherModeKernel) {

      case 0:

        runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));

        getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(RecoStep::TPCCompression, 0, outputSize, false);

        break;

      case 1:

        runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered32>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));

        getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered32>(RecoStep::TPCCompression, 0, outputSize, false);

        break;

      case 2:

        runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered64>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));

        getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered64>(RecoStep::TPCCompression, 0, outputSize, false);

        break;

      case 3:

        runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered128>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));

        getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::buffered128>(RecoStep::TPCCompression, 0, outputSize, false);

        break;

      case 4:

        static_assert((nBlocksMulti & 1) && nBlocksMulti >= 3);

        runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(GetGridBlkStep(nBlocksMulti, outputStream, RecoStep::TPCCompression));

        getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(RecoStep::TPCCompression, 0, outputSize, false);

        break;

      default:

        GPUError("Invalid compression kernel %d selected.", (int32_t)gatherModeKernel);

        return 1;

    }

    if (gatherMode == 3) {

      RecordMarker(&mEvents->stream[outputStream], outputStream);

      char* deviceFlatPts = (char*)Compressor.mOutput->qTotU;

      if (GetProcessingSettings().doublePipeline) {

        const size_t blockSize = CAMath::nextMultipleOf<1024>(copySize / 30);

        const uint32_t n = (copySize + blockSize - 1) / blockSize;

        for (uint32_t i = 0; i < n; i++) {

          GPUMemCpy(myStep, hostFlatPtr + i * blockSize, deviceFlatPts + i * blockSize, CAMath::Min(blockSize, copySize - i * blockSize), outputStream, false);

        }

      } else {

        GPUMemCpy(myStep, hostFlatPtr, deviceFlatPts, copySize, outputStream, false);

      }

    }

  } else {

    int8_t direction = 0;

    if (gatherMode == 0) {

      P = &CompressorShadow.mPtrs;

    } else if (gatherMode == 1) {

      P = &Compressor.mPtrs;

      direction = -1;

      gatherTimer = &getTimer<GPUTPCCompressionKernels>("GPUTPCCompression_GatherOnCPU", 0);

      gatherTimer->Start();

    }

    GPUMemCpyAlways(myStep, O->nSliceRowClusters, P->nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(O->nSliceRowClusters[0]), outputStream, direction);

    GPUMemCpyAlways(myStep, O->nTrackClusters, P->nTrackClusters, O->nTracks * sizeof(O->nTrackClusters[0]), outputStream, direction);

    SynchronizeStream(outputStream);

    uint32_t offset = 0;

    for (uint32_t i = 0; i < NSECTORS; i++) {

      for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {

        uint32_t srcOffset = mIOPtrs.clustersNative->clusterOffset[i][j] * Compressor.mMaxClusterFactorBase1024 / 1024;

        GPUMemCpyAlways(myStep, O->qTotU + offset, P->qTotU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->qTotU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->qMaxU + offset, P->qMaxU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->qMaxU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->flagsU + offset, P->flagsU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->flagsU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->padDiffU + offset, P->padDiffU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->padDiffU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->timeDiffU + offset, P->timeDiffU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->timeDiffU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->sigmaPadU + offset, P->sigmaPadU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->sigmaPadU[0]), outputStream, direction);

        GPUMemCpyAlways(myStep, O->sigmaTimeU + offset, P->sigmaTimeU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->sigmaTimeU[0]), outputStream, direction);

        offset += O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j];

      }

    }

    offset = 0;

    for (uint32_t i = 0; i < O->nTracks; i++) {

      GPUMemCpyAlways(myStep, O->qTotA + offset, P->qTotA + Compressor.mAttachedClusterFirstIndex[i], O->nTrackClusters[i] * sizeof(O->qTotA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->qMaxA + offset, P->qMaxA + Compressor.mAttachedClusterFirstIndex[i], O->nTrackClusters[i] * sizeof(O->qMaxA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->flagsA + offset, P->flagsA + Compressor.mAttachedClusterFirstIndex[i], O->nTrackClusters[i] * sizeof(O->flagsA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->sigmaPadA + offset, P->sigmaPadA + Compressor.mAttachedClusterFirstIndex[i], O->nTrackClusters[i] * sizeof(O->sigmaPadA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->sigmaTimeA + offset, P->sigmaTimeA + Compressor.mAttachedClusterFirstIndex[i], O->nTrackClusters[i] * sizeof(O->sigmaTimeA[0]), outputStream, direction);


      // First index stored with track

      GPUMemCpyAlways(myStep, O->rowDiffA + offset - i, P->rowDiffA + Compressor.mAttachedClusterFirstIndex[i] + 1, (O->nTrackClusters[i] - 1) * sizeof(O->rowDiffA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->sliceLegDiffA + offset - i, P->sliceLegDiffA + Compressor.mAttachedClusterFirstIndex[i] + 1, (O->nTrackClusters[i] - 1) * sizeof(O->sliceLegDiffA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->padResA + offset - i, P->padResA + Compressor.mAttachedClusterFirstIndex[i] + 1, (O->nTrackClusters[i] - 1) * sizeof(O->padResA[0]), outputStream, direction);

      GPUMemCpyAlways(myStep, O->timeResA + offset - i, P->timeResA + Compressor.mAttachedClusterFirstIndex[i] + 1, (O->nTrackClusters[i] - 1) * sizeof(O->timeResA[0]), outputStream, direction);

      offset += O->nTrackClusters[i];

    }

    GPUMemCpyAlways(myStep, O->qPtA, P->qPtA, O->nTracks * sizeof(O->qPtA[0]), outputStream, direction);

    GPUMemCpyAlways(myStep, O->rowA, P->rowA, O->nTracks * sizeof(O->rowA[0]), outputStream, direction);

    GPUMemCpyAlways(myStep, O->sliceA, P->sliceA, O->nTracks * sizeof(O->sliceA[0]), outputStream, direction);

    GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction);

    GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction);

  }

  if (gatherMode == 1) {

    gatherTimer->Stop();

  }

  mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat;

  if (gatherMode == 3) {

    SynchronizeEventAndRelease(mEvents->stream[outputStream]);

    mRec->ReturnVolatileDeviceMemory();

  }


  if (mPipelineFinalizationCtx == nullptr) {

    SynchronizeStream(outputStream);

  } else {

    ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec);

  }

  mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR"));

  if (GetProcessingSettings().deterministicGPUReconstruction) {

    SynchronizeGPU();

    DebugSortCompressedClusters(Compressor.mOutputFlat);

  }

  DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile);

  return 0;

}


int32_t GPUChainTracking::RunTPCDecompression()

{

  const bool needFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0);

  const bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0;

  if (needFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) {

    GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding");

  }


  if (GetProcessingSettings().tpcUseOldCPUDecoding) {

    const bool runFiltering = needFullFiltering || runTimeBinCutFiltering;

    const auto& threadContext = GetThreadContext();

    TPCClusterDecompressor decomp;

    auto allocatorFinal = [this](size_t size) {

      this->mInputsHost->mNClusterNative = this->mInputsShadow->mNClusterNative = size;

      this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);

      return this->mInputsHost->mPclusterNativeOutput;

    };

    std::unique_ptr<ClusterNative[]> tmpBuffer;

    auto allocatorTmp = [&tmpBuffer](size_t size) {

      return ((tmpBuffer = std::make_unique<ClusterNative[]>(size))).get();

    };

    auto& decompressTimer = getTimer<TPCClusterDecompressor>("TPCDecompression", 0);

    auto allocatorUse = runFiltering ? std::function<ClusterNative*(size_t)>{allocatorTmp} : std::function<ClusterNative*(size_t)>{allocatorFinal};

    decompressTimer.Start();

    if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) {

      GPUError("Error decompressing clusters");

      return 1;

    }

    if (runFiltering) {

      RunTPCClusterFilter(mClusterNativeAccess.get(), allocatorFinal, GetProcessingSettings().tpcApplyCFCutsAtDecoding);

    }

    decompressTimer.Stop();

    mIOPtrs.clustersNative = mClusterNativeAccess.get();

    if (mRec->IsGPU()) {

      AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);

      processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;

      WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);

      *mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;

      mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;

      mInputsHost->mPclusterNativeAccess->setOffsetPtrs();

      GPUMemCpy(RecoStep::TPCDecompression, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);

      TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, 0);

      SynchronizeStream(0);

    }

  } else {

    mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR"));

    RecoStep myStep = RecoStep::TPCDecompression;

    bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression;

    GPUTPCDecompression& Decompressor = processors()->tpcDecompressor;

    GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor;

    const auto& threadContext = GetThreadContext();

    CompressedClusters cmprClsHost = *mIOPtrs.tpcCompressedClusters;

    CompressedClusters& inputGPU = Decompressor.mInputGPU;

    CompressedClusters& inputGPUShadow = DecompressorShadow.mInputGPU;


    if (cmprClsHost.nTracks && cmprClsHost.solenoidBz != -1e6f && cmprClsHost.solenoidBz != param().bzkG) {

      throw std::runtime_error("Configured solenoid Bz does not match value used for track model encoding");

    }

    if (cmprClsHost.nTracks && cmprClsHost.maxTimeBin != -1e6 && cmprClsHost.maxTimeBin != param().continuousMaxTimeBin) {

      throw std::runtime_error("Configured max time bin does not match value used for track model encoding");

    }


    int32_t inputStream = 0;

    int32_t unattachedStream = mRec->NStreams() - 1;

    inputGPU = cmprClsHost;

    SetupGPUProcessor(&Decompressor, true);

    WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);

    inputGPU = cmprClsHost;


    bool toGPU = true;

    runKernel<GPUMemClean16>({GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, &mEvents->init}, DecompressorShadow.mNativeClustersIndex, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNativeClustersIndex[0]));

    int32_t nStreams = doGPU ? mRec->NStreams() - 1 : 1;

    if (cmprClsHost.nAttachedClusters != 0) {

      std::exclusive_scan(cmprClsHost.nTrackClusters, cmprClsHost.nTrackClusters + cmprClsHost.nTracks, Decompressor.mAttachedClustersOffsets, 0u); // computing clusters offsets for first kernel

      for (int32_t iStream = 0; iStream < nStreams; iStream++) {

        uint32_t startTrack = cmprClsHost.nTracks / nStreams * iStream;

        uint32_t endTrack = cmprClsHost.nTracks / nStreams * (iStream + 1) + (iStream < nStreams - 1 ? 0 : cmprClsHost.nTracks % nStreams); // index of last track (excluded from computation)

        uint32_t numTracks = endTrack - startTrack;

        uint32_t* offsets = Decompressor.mAttachedClustersOffsets;

        uint32_t numClusters = (endTrack == cmprClsHost.nTracks ? offsets[endTrack - 1] + cmprClsHost.nTrackClusters[endTrack - 1] : offsets[endTrack]) - offsets[startTrack];

        uint32_t numClustersRed = numClusters - numTracks;

        GPUMemCpy(myStep, DecompressorShadow.mAttachedClustersOffsets + startTrack, Decompressor.mAttachedClustersOffsets + startTrack, numTracks * sizeof(Decompressor.mAttachedClustersOffsets[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.nTrackClusters + startTrack, cmprClsHost.nTrackClusters + startTrack, numTracks * sizeof(cmprClsHost.nTrackClusters[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.qTotA + offsets[startTrack], cmprClsHost.qTotA + offsets[startTrack], numClusters * sizeof(cmprClsHost.qTotA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.qMaxA + offsets[startTrack], cmprClsHost.qMaxA + offsets[startTrack], numClusters * sizeof(cmprClsHost.qMaxA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.flagsA + offsets[startTrack], cmprClsHost.flagsA + offsets[startTrack], numClusters * sizeof(cmprClsHost.flagsA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.rowDiffA + offsets[startTrack] - startTrack, cmprClsHost.rowDiffA + offsets[startTrack] - startTrack, numClustersRed * sizeof(cmprClsHost.rowDiffA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.sliceLegDiffA + offsets[startTrack] - startTrack, cmprClsHost.sliceLegDiffA + offsets[startTrack] - startTrack, numClustersRed * sizeof(cmprClsHost.sliceLegDiffA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.padResA + offsets[startTrack] - startTrack, cmprClsHost.padResA + offsets[startTrack] - startTrack, numClustersRed * sizeof(cmprClsHost.padResA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.timeResA + offsets[startTrack] - startTrack, cmprClsHost.timeResA + offsets[startTrack] - startTrack, numClustersRed * sizeof(cmprClsHost.timeResA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.sigmaPadA + offsets[startTrack], cmprClsHost.sigmaPadA + offsets[startTrack], numClusters * sizeof(cmprClsHost.sigmaPadA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.sigmaTimeA + offsets[startTrack], cmprClsHost.sigmaTimeA + offsets[startTrack], numClusters * sizeof(cmprClsHost.sigmaTimeA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.qPtA + startTrack, cmprClsHost.qPtA + startTrack, numTracks * sizeof(cmprClsHost.qPtA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.rowA + startTrack, cmprClsHost.rowA + startTrack, numTracks * sizeof(cmprClsHost.rowA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.sliceA + startTrack, cmprClsHost.sliceA + startTrack, numTracks * sizeof(cmprClsHost.sliceA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.timeA + startTrack, cmprClsHost.timeA + startTrack, numTracks * sizeof(cmprClsHost.timeA[0]), iStream, toGPU);

        GPUMemCpy(myStep, inputGPUShadow.padA + startTrack, cmprClsHost.padA + startTrack, numTracks * sizeof(cmprClsHost.padA[0]), iStream, toGPU);

        runKernel<GPUTPCDecompressionKernels, GPUTPCDecompressionKernels::step0attached>({GetGridAuto(iStream), krnlRunRangeNone, {&mEvents->stream[iStream], &mEvents->init}}, startTrack, endTrack);

      }

    }

    GPUMemCpy(myStep, inputGPUShadow.nSliceRowClusters, cmprClsHost.nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(cmprClsHost.nSliceRowClusters[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.qTotU, cmprClsHost.qTotU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qTotU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.qMaxU, cmprClsHost.qMaxU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qMaxU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.flagsU, cmprClsHost.flagsU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.flagsU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.padDiffU, cmprClsHost.padDiffU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.padDiffU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.timeDiffU, cmprClsHost.timeDiffU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.timeDiffU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.sigmaPadU, cmprClsHost.sigmaPadU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.sigmaPadU[0]), unattachedStream, toGPU);

    GPUMemCpy(myStep, inputGPUShadow.sigmaTimeU, cmprClsHost.sigmaTimeU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.sigmaTimeU[0]), unattachedStream, toGPU);


    TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceTmpIndexes, inputStream, nullptr, mEvents->stream, nStreams);

    SynchronizeStream(inputStream);

    uint32_t offset = 0;

    uint32_t decodedAttachedClusters = 0;

    for (uint32_t i = 0; i < NSECTORS; i++) {

      for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {

        uint32_t linearIndex = i * GPUCA_ROW_COUNT + j;

        uint32_t unattachedOffset = (linearIndex >= cmprClsHost.nSliceRows) ? 0 : cmprClsHost.nSliceRowClusters[linearIndex];

        (mClusterNativeAccess->nClusters)[i][j] = Decompressor.mNativeClustersIndex[linearIndex] + unattachedOffset;

        Decompressor.mUnattachedClustersOffsets[linearIndex] = offset;

        offset += unattachedOffset;

        decodedAttachedClusters += Decompressor.mNativeClustersIndex[linearIndex];

      }

    }

    TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, Decompressor.mResourceTmpClustersOffsets, inputStream);

    if (decodedAttachedClusters != cmprClsHost.nAttachedClusters) {

      GPUWarning("%u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters);

    }

    if (runTimeBinCutFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context

      Decompressor.mNClusterNativeBeforeFiltering = DecompressorShadow.mNClusterNativeBeforeFiltering = decodedAttachedClusters + cmprClsHost.nUnattachedClusters;

      AllocateRegisteredMemory(Decompressor.mResourceTmpBufferBeforeFiltering);

      AllocateRegisteredMemory(Decompressor.mResourceClusterNativeAccess);

      mClusterNativeAccess->clustersLinear = DecompressorShadow.mNativeClustersBuffer;

      mClusterNativeAccess->setOffsetPtrs();

      *Decompressor.mClusterNativeAccess = *mClusterNativeAccess;

      WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);

      TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, Decompressor.mResourceClusterNativeAccess, inputStream, &mEvents->single);

    } else { // If not filtering, directly allocate the final buffers

      mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = cmprClsHost.nAttachedClusters + cmprClsHost.nUnattachedClusters;

      AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);

      AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);

      DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer;

      Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;

      DecompressorShadow.mClusterNativeAccess = mInputsShadow->mPclusterNativeAccess;

      Decompressor.mClusterNativeAccess = mInputsHost->mPclusterNativeAccess;

      WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), inputStream);

      if (doGPU) {

        mClusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;

        mClusterNativeAccess->setOffsetPtrs();

        *mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;

        processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;

        WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), inputStream);

        TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, inputStream, &mEvents->single);

      }

      mIOPtrs.clustersNative = mClusterNativeAccess.get();

      mClusterNativeAccess->clustersLinear = mInputsHost->mPclusterNativeOutput;

      mClusterNativeAccess->setOffsetPtrs();

      *mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;

    }


    uint32_t batchSize = doGPU ? 6 : NSECTORS;

    for (uint32_t iSector = 0; iSector < NSECTORS; iSector = iSector + batchSize) {

      int32_t iStream = (iSector / batchSize) % mRec->NStreams();

      runKernel<GPUTPCDecompressionKernels, GPUTPCDecompressionKernels::step1unattached>({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSector, batchSize);

      uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSector, mClusterNativeAccess->nClustersSector + iSector + batchSize, 0u);

      if (!runTimeBinCutFiltering) {

        GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSector][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSector][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false);

      }

    }

    SynchronizeGPU();


    if (runTimeBinCutFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly

      AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow);

      WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream);

      runKernel<GPUMemClean16>({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0]));

      runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::countFilteredClusters>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));

      TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceNClusterPerSectorRow, unattachedStream);

      SynchronizeStream(unattachedStream);

      uint32_t nClustersFinal = std::accumulate(Decompressor.mNClusterPerSectorRow, Decompressor.mNClusterPerSectorRow + inputGPU.nSliceRows, 0u);

      mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = nClustersFinal;

      AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);

      AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);

      DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer;

      Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput;

      WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream);

      for (uint32_t i = 0; i < NSECTORS; i++) {

        for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {

          mClusterNativeAccess->nClusters[i][j] = Decompressor.mNClusterPerSectorRow[i * GPUCA_ROW_COUNT + j];

        }

      }

      if (doGPU) {

        mClusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;

        mClusterNativeAccess->setOffsetPtrs();

        *mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess;

        processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;

        WriteToConstantMemory(RecoStep::TPCDecompression, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), unattachedStream);

        TransferMemoryResourceLinkToGPU(RecoStep::TPCDecompression, mInputsHost->mResourceClusterNativeAccess, unattachedStream);

      }

      mIOPtrs.clustersNative = mClusterNativeAccess.get();

      mClusterNativeAccess->clustersLinear = mInputsHost->mPclusterNativeOutput;

      mClusterNativeAccess->setOffsetPtrs();

      runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::storeFilteredClusters>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));

      GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput, DecompressorShadow.mNativeClustersBuffer, sizeof(Decompressor.mNativeClustersBuffer[0]) * nClustersFinal, unattachedStream, false);

      SynchronizeStream(unattachedStream);

    }

    if (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4) {

      runKernel<GPUTPCDecompressionUtilKernels, GPUTPCDecompressionUtilKernels::sortPerSectorRow>(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression));

      const ClusterNativeAccess* decoded = mIOPtrs.clustersNative;

      if (doGPU) {

        for (uint32_t i = 0; i < NSECTORS; i++) {

          for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {

            ClusterNative* begin = mInputsHost->mPclusterNativeOutput + decoded->clusterOffset[i][j];

            ClusterNative* end = begin + decoded->nClusters[i][j];

            std::sort(begin, end);

          }

        }

      }

      SynchronizeStream(unattachedStream);

    }

    mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR"));

  }

  DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative);

  return 0;

}


GPUChainTrackingDebug.h

GPUChainTracking.h

i
int32_t i
Definition GPUCommonAlgorithm.h:436

GPUConstantMem.h

GPULogging.h

GPUO2DataTypes.h

GPUTPCCFChainContext.h

GPUTPCCompressionKernels.h

GPUTPCDecompressionKernels.h

GPUCA_ROW_COUNT
#define GPUCA_ROW_COUNT
Definition GPUTPCGeometry.h:23

GPUTrackingInputProvider.h

j
uint32_t j
Definition RawData.h:0

TPCClusterDecompressor.h

HighResTimer
Definition timer.h:21

HighResTimer::Start
void Start()
Definition timer.cxx:57

HighResTimer::Stop
void Stop()
Definition timer.cxx:69

o2::gpu::GPUChainTracking
Definition GPUChainTracking.h:71

o2::gpu::GPUChainTracking::DebugSortCompressedClusters
static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat *cls)
Definition GPUChainTrackingDebugAndProfiling.cxx:353

o2::gpu::GPUChainTracking::RunTPCCompression
int32_t RunTPCCompression()
Definition GPUChainTrackingCompression.cxx:33

o2::gpu::GPUChainTracking::mClusterNativeAccess
std::unique_ptr< o2::tpc::ClusterNativeAccess > mClusterNativeAccess
Definition GPUChainTracking.h:269

o2::gpu::GPUChainTracking::mEvents
eventStruct * mEvents
Definition GPUChainTracking.h:288

o2::gpu::GPUChainTracking::DumpClusters
static void DumpClusters(std::ostream &out, const o2::tpc::ClusterNativeAccess *clusters)
Definition GPUChainTrackingDebugAndProfiling.cxx:338

o2::gpu::GPUChainTracking::mInputsHost
std::unique_ptr< GPUTrackingInputProvider > mInputsHost
Definition GPUChainTracking.h:247

o2::gpu::GPUChainTracking::mSubOutputControls
std::array< GPUOutputControl *, GPUTrackingOutputs::count()> mSubOutputControls
Definition GPUChainTracking.h:270

o2::gpu::GPUChainTracking::mDebugFile
std::unique_ptr< std::ofstream > mDebugFile
Definition GPUChainTracking.h:285

o2::gpu::GPUChainTracking::mCFContext
std::unique_ptr< GPUTPCCFChainContext > mCFContext
Definition GPUChainTracking.h:274

o2::gpu::GPUChainTracking::RunTPCDecompression
int32_t RunTPCDecompression()
Definition GPUChainTrackingCompression.cxx:214

o2::gpu::GPUChainTracking::mIOPtrs
GPUTrackingInOutPointers & mIOPtrs
Definition GPUChainTracking.h:91

o2::gpu::GPUChainTracking::mInputsShadow
std::unique_ptr< GPUTrackingInputProvider > mInputsShadow
Definition GPUChainTracking.h:248

o2::gpu::GPUChain::RecordMarker
void RecordMarker(deviceEvent *ev, int32_t stream)
Definition GPUChain.h:108

o2::gpu::GPUChain::TransferMemoryResourceLinkToGPU
void TransferMemoryResourceLinkToGPU(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:124

o2::gpu::GPUChain::GPUMemCpyAlways
void GPUMemCpyAlways(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:129

o2::gpu::GPUChain::GPUMemCpy
void GPUMemCpy(RecoStep step, void *dst, const void *src, size_t size, int32_t stream, int32_t toGPU, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:128

o2::gpu::GPUChain::DoDebugAndDump
bool DoDebugAndDump(RecoStep step, uint32_t mask, T &processor, S T::*func, Args &&... args)
Definition GPUChain.h:230

o2::gpu::GPUChain::SynchronizeGPU
void SynchronizeGPU()
Definition GPUChain.h:110

o2::gpu::GPUChain::GetRecoStepsGPU
GPUReconstruction::RecoStepField GetRecoStepsGPU() const
Definition GPUChain.h:72

o2::gpu::GPUChain::WriteToConstantMemory
void WriteToConstantMemory(RecoStep step, size_t offset, const void *src, size_t size, int32_t stream=-1, deviceEvent *ev=nullptr)
Definition GPUChain.h:127

o2::gpu::GPUChain::GetNextChainInQueue
GPUChain * GetNextChainInQueue()
Definition GPUChain.h:225

o2::gpu::GPUChain::GetGridBlkStep
krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:37

o2::gpu::GPUChain::AllocateRegisteredMemory
size_t AllocateRegisteredMemory(GPUProcessor *proc)
Definition GPUChain.h:217

o2::gpu::GPUChain::GetThreadContext
virtual std::unique_ptr< GPUReconstructionProcessing::threadContext > GetThreadContext()
Definition GPUChain.h:109

o2::gpu::GPUChain::processors
GPUConstantMem * processors()
Definition GPUChain.h:84

o2::gpu::GPUChain::krnlRunRangeNone
static constexpr krnlRunRange krnlRunRangeNone
Definition GPUChain.h:41

o2::gpu::GPUChain::DoDebugDump
bool DoDebugDump(uint32_t mask, std::function< void(Args &...)> func, Args &... args)
Definition GPUChainTrackingDebug.h:69

o2::gpu::GPUChain::param
GPUParam & param()
Definition GPUChain.h:87

o2::gpu::GPUChain::SetupGPUProcessor
void SetupGPUProcessor(T *proc, bool allocate)
Definition GPUChain.h:220

o2::gpu::GPUChain::GetProcessingSettings
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUChain.h:76

o2::gpu::GPUChain::SynchronizeStream
void SynchronizeStream(int32_t stream)
Definition GPUChain.h:89

o2::gpu::GPUChain::mRec
GPUReconstructionCPU * mRec
Definition GPUChain.h:79

o2::gpu::GPUChain::processorsShadow
GPUConstantMem * processorsShadow()
Definition GPUChain.h:85

o2::gpu::GPUChain::GetGridAutoStep
krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:47

o2::gpu::GPUChain::NSECTORS
static constexpr int32_t NSECTORS
Definition GPUChain.h:58

o2::gpu::GPUChain::TransferMemoryResourceLinkToHost
void TransferMemoryResourceLinkToHost(RecoStep step, int16_t res, int32_t stream=-1, deviceEvent *ev=nullptr, deviceEvent *evList=nullptr, int32_t nEvents=1)
Definition GPUChain.h:125

o2::gpu::GPUChain::TransferMemoryResourcesToHost
void TransferMemoryResourcesToHost(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:123

o2::gpu::GPUChain::GetGridAuto
krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d=GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st=GPUDataTypes::RecoStep::NoRecoStep)
Definition GPUChain.cxx:42

o2::gpu::GPUChain::SynchronizeEventAndRelease
void SynchronizeEventAndRelease(deviceEvent &ev, bool doGPU=true)
Definition GPUChain.h:92

o2::gpu::GPUChain::TransferMemoryResourcesToGPU
void TransferMemoryResourcesToGPU(RecoStep step, GPUProcessor *proc, int32_t stream=-1, bool all=false)
Definition GPUChain.h:122

o2::gpu::GPUDataTypes::RecoStep
RecoStep
Definition GPUDataTypes.h:123

o2::gpu::GPUReconstructionProcessing::getGPUParameters
const GPUDefParameters & getGPUParameters(bool doGPU) const override
Definition GPUReconstructionProcessing.h:178

o2::gpu::GPUReconstruction::getGPUPointer
virtual void * getGPUPointer(void *ptr)
Definition GPUReconstruction.h:153

o2::gpu::GPUReconstruction::PopNonPersistentMemory
void PopNonPersistentMemory(RecoStep step, uint64_t tag)
Definition GPUReconstruction.cxx:880

o2::gpu::GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile
void MakeFutureDeviceMemoryAllocationsVolatile()
Definition GPUReconstruction.cxx:795

o2::gpu::GPUReconstruction::IsGPU
bool IsGPU() const
Definition GPUReconstruction.h:200

o2::gpu::GPUReconstruction::ReturnVolatileDeviceMemory
void ReturnVolatileDeviceMemory()
Definition GPUReconstruction.cxx:801

o2::gpu::GPUReconstruction::NStreams
uint32_t NStreams() const
Definition GPUReconstruction.h:219

o2::gpu::GPUReconstruction::PushNonPersistentMemory
void PushNonPersistentMemory(uint64_t tag)
Definition GPUReconstruction.cxx:875

o2::gpu::GPUReconstruction::BlockStackedMemory
void BlockStackedMemory(GPUReconstruction *rec)
Definition GPUReconstruction.cxx:912

o2::gpu::GPUReconstruction::GetProcessingSettings
const GPUSettingsProcessing & GetProcessingSettings() const
Definition GPUReconstruction.h:206

o2::gpu::GPUTPCCompression
Definition GPUTPCCompression.h:29

o2::gpu::GPUTPCCompression::mMemoryResOutputHost
int16_t mMemoryResOutputHost
Definition GPUTPCCompression.h:93

o2::gpu::GPUTPCCompression::mMemoryResOutputGPU
int16_t mMemoryResOutputGPU
Definition GPUTPCCompression.h:94

o2::gpu::GPUTPCCompression::mMaxClusters
uint32_t mMaxClusters
Definition GPUTPCCompression.h:83

o2::gpu::GPUTPCCompression::mAttachedClusterFirstIndex
uint32_t * mAttachedClusterFirstIndex
Definition GPUTPCCompression.h:79

o2::gpu::GPUTPCCompression::mClusterStatus
uint8_t * mClusterStatus
Definition GPUTPCCompression.h:80

o2::gpu::GPUTPCCompression::DumpCompressedClusters
void DumpCompressedClusters(std::ostream &out)
Definition GPUTPCCompression.cxx:134

o2::gpu::GPUTPCCompression::mOutput
o2::tpc::CompressedClusters * mOutput
Definition GPUTPCCompression.h:74

o2::gpu::GPUTPCCompression::mOutputA
o2::tpc::CompressedClusters * mOutputA
Definition GPUTPCCompression.h:75

o2::gpu::GPUTPCCompression::mPtrs
o2::tpc::CompressedClustersPtrs mPtrs
Definition GPUTPCCompression.h:73

o2::gpu::GPUTPCCompression::mMemory
memory * mMemory
Definition GPUTPCCompression.h:78

o2::gpu::GPUTPCCompression::mOutputFlat
o2::tpc::CompressedClustersFlat * mOutputFlat
Definition GPUTPCCompression.h:76

o2::gpu::GPUTPCCompression::mMaxClusterFactorBase1024
size_t mMaxClusterFactorBase1024
Definition GPUTPCCompression.h:86

o2::gpu::GPUTPCDecompression
Definition GPUTPCDecompression.h:29

o2::gpu::GPUTPCDecompression::mNClusterNativeBeforeFiltering
uint32_t mNClusterNativeBeforeFiltering
Definition GPUTPCDecompression.h:56

o2::gpu::GPUTPCDecompression::mNClusterPerSectorRow
uint32_t * mNClusterPerSectorRow
Definition GPUTPCDecompression.h:60

o2::gpu::GPUTPCDecompression::mInputGPU
o2::tpc::CompressedClusters mInputGPU
Definition GPUTPCDecompression.h:53

o2::gpu::GPUTPCDecompression::mUnattachedClustersOffsets
uint32_t * mUnattachedClustersOffsets
Definition GPUTPCDecompression.h:58

o2::gpu::GPUTPCDecompression::mResourceNClusterPerSectorRow
int16_t mResourceNClusterPerSectorRow
Definition GPUTPCDecompression.h:73

o2::gpu::GPUTPCDecompression::mResourceClusterNativeAccess
int16_t mResourceClusterNativeAccess
Definition GPUTPCDecompression.h:72

o2::gpu::GPUTPCDecompression::mNativeClustersIndex
uint32_t * mNativeClustersIndex
Definition GPUTPCDecompression.h:57

o2::gpu::GPUTPCDecompression::mAttachedClustersOffsets
uint32_t * mAttachedClustersOffsets
Definition GPUTPCDecompression.h:59

o2::gpu::GPUTPCDecompression::mResourceTmpIndexes
int16_t mResourceTmpIndexes
Definition GPUTPCDecompression.h:69

o2::gpu::GPUTPCDecompression::mResourceTmpClustersOffsets
int16_t mResourceTmpClustersOffsets
Definition GPUTPCDecompression.h:70

o2::gpu::GPUTPCDecompression::mResourceTmpBufferBeforeFiltering
int16_t mResourceTmpBufferBeforeFiltering
Definition GPUTPCDecompression.h:71

o2::gpu::GPUTPCDecompression::mNativeClustersBuffer
o2::tpc::ClusterNative * mNativeClustersBuffer
Definition GPUTPCDecompression.h:62

o2::gpu::GPUTPCDecompression::mClusterNativeAccess
o2::tpc::ClusterNativeAccess * mClusterNativeAccess
Definition GPUTPCDecompression.h:63

o2::gpu::TPCClusterDecompressor
Definition TPCClusterDecompressor.h:33

o2::gpu::TPCClusterDecompressor::decompress
static int32_t decompress(const o2::tpc::CompressedClustersFlat *clustersCompressed, o2::tpc::ClusterNativeAccess &clustersNative, std::function< o2::tpc::ClusterNative *(size_t)> allocator, const GPUParam &param, bool deterministicRec)

n
GLdouble n
Definition glcorearb.h:1982

size
GLsizeiptr size
Definition glcorearb.h:659

offsets
GLuint GLsizei const GLuint const GLintptr * offsets
Definition glcorearb.h:2595

end
GLuint GLuint end
Definition glcorearb.h:469

offset
GLintptr offset
Definition glcorearb.h:660

param
GLenum GLfloat param
Definition glcorearb.h:271

o2::gpu
Definition TrackTRD.h:35

o2::gpu::TPCCompressedClusters
@ TPCCompressedClusters
Definition GPUChainTrackingDebug.h:41

o2::gpu::TPCDecompressedClusters
@ TPCDecompressedClusters
Definition GPUChainTrackingDebug.h:42

o2::tpc
Global TPC definitions and constants.
Definition SimTraits.h:167

o2::tpc::begin
Enum< T >::Iterator begin(Enum< T >)
Definition Defs.h:173

strtag.h

qStr2Tag
constexpr T qStr2Tag(const char *str)
Definition strtag.h:22

P
Definition test_Concepts.cxx:27

o2::gpu::GPUChainTracking::eventStruct::stream
deviceEvent stream[GPUCA_MAX_STREAMS]
Definition GPUChainTracking.h:214

o2::gpu::GPUChainTracking::eventStruct::single
deviceEvent single
Definition GPUChainTracking.h:216

o2::gpu::GPUChainTracking::eventStruct::init
deviceEvent init
Definition GPUChainTracking.h:215

o2::gpu::GPUConstantMem::tpcDecompressor
GPUTPCDecompression tpcDecompressor
Definition GPUConstantMem.h:47

o2::gpu::GPUConstantMem::ioPtrs
GPUTrackingInOutPointers ioPtrs
Definition GPUConstantMem.h:53

o2::gpu::GPUConstantMem::tpcCompressor
GPUTPCCompression tpcCompressor
Definition GPUConstantMem.h:46

o2::gpu::GPUTPCCompression::memory
Definition GPUTPCCompression.h:65

o2::gpu::GPUTPCCompression::memory::nStoredAttachedClusters
uint32_t nStoredAttachedClusters
Definition GPUTPCCompression.h:67

o2::gpu::GPUTPCCompression::memory::nStoredTracks
uint32_t nStoredTracks
Definition GPUTPCCompression.h:66

o2::gpu::GPUTPCCompression::memory::nStoredUnattachedClusters
uint32_t nStoredUnattachedClusters
Definition GPUTPCCompression.h:68

o2::gpu::GPUTrackingInOutPointers::clustersNative
const o2::tpc::ClusterNativeAccess * clustersNative
Definition GPUDataTypes.h:226

o2::gpu::GPUTrackingInOutPointers::tpcCompressedClusters
const o2::tpc::CompressedClustersFlat * tpcCompressedClusters
Definition GPUDataTypes.h:249

o2::gpu::GPUTrackingOutputs::clustersNative
GPUOutputControl clustersNative
Definition GPUOutputControl.h:68

o2::gpu::GPUTrackingOutputs::getIndex
size_t getIndex(const GPUOutputControl &v)
Definition GPUOutputControl.h:80

o2::gpu::GPUTrackingOutputs::compressedClusters
GPUOutputControl compressedClusters
Definition GPUOutputControl.h:67

o2::gpu::internal::GPUParam_t::bzkG
float bzkG
Definition GPUParam.h:55

o2::gpu::internal::GPUParam_t::rec
T rec
Definition GPUParam.h:52

o2::gpu::internal::GPUParam_t::tpcCutTimeBin
int32_t tpcCutTimeBin
Definition GPUParam.h:61

o2::gpu::internal::GPUParam_t::continuousMaxTimeBin
int32_t continuousMaxTimeBin
Definition GPUParam.h:60

o2::tpc::ClusterNativeAccess
Definition ClusterNative.h:174

o2::tpc::ClusterNativeAccess::nClusters
unsigned int nClusters[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
Definition ClusterNative.h:178

o2::tpc::ClusterNativeAccess::nClustersTotal
unsigned int nClustersTotal
Definition ClusterNative.h:181

o2::tpc::ClusterNativeAccess::clusterOffset
unsigned int clusterOffset[constants::MAXSECTOR][constants::MAXGLOBALPADROW]
Definition ClusterNative.h:180

o2::tpc::ClusterNativeAccess::clustersLinear
const ClusterNative * clustersLinear
Definition ClusterNative.h:175

o2::tpc::ClusterNative
Definition ClusterNative.h:54

o2::tpc::CompressedClustersCounters::nUnattachedClusters
unsigned int nUnattachedClusters
Definition CompressedClusters.h:29

o2::tpc::CompressedClustersCounters::nTracks
unsigned int nTracks
Definition CompressedClusters.h:27

o2::tpc::CompressedClustersCounters::nComppressionModes
unsigned char nComppressionModes
Definition CompressedClusters.h:32

o2::tpc::CompressedClustersCounters::nSliceRows
unsigned int nSliceRows
Definition CompressedClusters.h:31

o2::tpc::CompressedClustersCounters::maxTimeBin
int maxTimeBin
Definition CompressedClusters.h:34

o2::tpc::CompressedClustersCounters::solenoidBz
float solenoidBz
Definition CompressedClusters.h:33

o2::tpc::CompressedClustersCounters::nAttachedClusters
unsigned int nAttachedClusters
Definition CompressedClusters.h:28

o2::tpc::CompressedClustersCounters::nAttachedClustersReduced
unsigned int nAttachedClustersReduced
Definition CompressedClusters.h:30

o2::tpc::CompressedClustersFlat::set
void set(size_t bufferSize, const CompressedClusters &v)
Definition CompressedClusters.cxx:39

o2::tpc::CompressedClustersPtrs_x::nTrackClusters
TSHORT nTrackClusters
Definition CompressedClusters.h:65

o2::tpc::CompressedClustersPtrs_x::sigmaPadA
TCHAR sigmaPadA
Definition CompressedClusters.h:48

o2::tpc::CompressedClustersPtrs_x::qMaxU
TSHORT qMaxU
Definition CompressedClusters.h:58

o2::tpc::CompressedClustersPtrs_x::qTotU
TSHORT qTotU
Definition CompressedClusters.h:57

o2::tpc::CompressedClustersPtrs_x::rowA
TCHAR rowA
Definition CompressedClusters.h:52

o2::tpc::CompressedClustersPtrs_x::timeA
TINT timeA
Definition CompressedClusters.h:54

o2::tpc::CompressedClustersPtrs_x::rowDiffA
TCHAR rowDiffA
Definition CompressedClusters.h:44

o2::tpc::CompressedClustersPtrs_x::padDiffU
TSHORT padDiffU
Definition CompressedClusters.h:60

o2::tpc::CompressedClustersPtrs_x::sigmaPadU
TCHAR sigmaPadU
Definition CompressedClusters.h:62

o2::tpc::CompressedClustersPtrs_x::padA
TSHORT padA
Definition CompressedClusters.h:55

o2::tpc::CompressedClustersPtrs_x::timeResA
TINT timeResA
Definition CompressedClusters.h:47

o2::tpc::CompressedClustersPtrs_x::padResA
TSHORT padResA
Definition CompressedClusters.h:46

o2::tpc::CompressedClustersPtrs_x::sliceA
TCHAR sliceA
Definition CompressedClusters.h:53

o2::tpc::CompressedClustersPtrs_x::qPtA
TCHAR qPtA
Definition CompressedClusters.h:51

o2::tpc::CompressedClustersPtrs_x::sigmaTimeU
TCHAR sigmaTimeU
Definition CompressedClusters.h:63

o2::tpc::CompressedClustersPtrs_x::qTotA
TSHORT qTotA
Definition CompressedClusters.h:41

o2::tpc::CompressedClustersPtrs_x::flagsA
TCHAR flagsA
Definition CompressedClusters.h:43

o2::tpc::CompressedClustersPtrs_x::sigmaTimeA
TCHAR sigmaTimeA
Definition CompressedClusters.h:49

o2::tpc::CompressedClustersPtrs_x::timeDiffU
TINT timeDiffU
Definition CompressedClusters.h:61

o2::tpc::CompressedClustersPtrs_x::sliceLegDiffA
TCHAR sliceLegDiffA
Definition CompressedClusters.h:45

o2::tpc::CompressedClustersPtrs_x::nSliceRowClusters
TINT nSliceRowClusters
Definition CompressedClusters.h:66

o2::tpc::CompressedClustersPtrs_x::qMaxA
TSHORT qMaxA
Definition CompressedClusters.h:42

o2::tpc::CompressedClustersPtrs_x::flagsU
TCHAR flagsU
Definition CompressedClusters.h:59

o2::tpc::CompressedClustersPtrs
Definition CompressedClusters.h:71

o2::tpc::CompressedClusters
Definition CompressedClusters.h:79