d3/dbe/dataSamplingParallel_8cxx_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#include "DataSampling/DataSampling.h"


#include <thread>


using namespace o2::framework;

using namespace o2::utilities;


void customize(std::vector<CompletionPolicy>& policies)

{

  DataSampling::CustomizeInfrastructure(policies);

}


void customize(std::vector<ChannelConfigurationPolicy>& policies)

{

  DataSampling::CustomizeInfrastructure(policies);

}


#include "Framework/InputSpec.h"

#include "Framework/DataProcessorSpec.h"

#include "DataSampling/DataSampling.h"

#include "Framework/DataSpecUtils.h"

#include "Framework/ParallelContext.h"

#include "Framework/runDataProcessing.h"

#include "Framework/DataRefUtils.h"


#include <chrono>

#include <iostream>


#include <boost/algorithm/string.hpp>


using namespace o2::framework;


struct FakeCluster {

  float x;

  float y;

  float z;

  float q;

};

using DataHeader = o2::header::DataHeader;


size_t parallelSize = 4;

size_t collectionChunkSize = 1000;

void someDataProducerAlgorithm(ProcessingContext& ctx);

void someProcessingStageAlgorithm(ProcessingContext& ctx);

void someSinkAlgorithm(ProcessingContext& ctx);


WorkflowSpec defineDataProcessing(ConfigContext const&)

{

  auto dataProducers = parallel(

    DataProcessorSpec{

      "dataProducer",

      Inputs{},

      {OutputSpec{"TPC", "CLUSTERS"}},

      AlgorithmSpec{

        (AlgorithmSpec::ProcessCallback)someDataProducerAlgorithm}},

    parallelSize,

    [](DataProcessorSpec& spec, size_t index) {

      DataSpecUtils::updateMatchingSubspec(spec.outputs[0], index);

    });


  auto processingStages = parallel(

    DataProcessorSpec{

      "processingStage",

      Inputs{

        {"dataTPC", "TPC", "CLUSTERS"}},

      Outputs{

        {"TPC", "CLUSTERS_P"}},

      AlgorithmSpec{

        (AlgorithmSpec::ProcessCallback)someProcessingStageAlgorithm}},

    parallelSize,

    [](DataProcessorSpec& spec, size_t index) {

      DataSpecUtils::updateMatchingSubspec(spec.inputs[0], index);

      DataSpecUtils::updateMatchingSubspec(spec.outputs[0], index);

    });


  auto inputsSink = mergeInputs(

    {"dataTPC-proc", "TPC", "CLUSTERS_P"},

    parallelSize,

    [](InputSpec& input, size_t index) {

      DataSpecUtils::updateMatchingSubspec(input, index);

    });


  DataProcessorSpec sink{

    "sink",

    inputsSink,

    Outputs{},

    AlgorithmSpec{

      (AlgorithmSpec::ProcessCallback)someSinkAlgorithm}};


  // clang-format off

  DataProcessorSpec simpleQcTask{

    "simpleQcTask",

    Inputs{

      { "TPC_CLUSTERS_S",   { "DS", "simpleQcTask0" } },

      { "TPC_CLUSTERS_P_S", { "DS", "simpleQcTask1" } }

    },

    Outputs{},

    AlgorithmSpec{

      (AlgorithmSpec::ProcessCallback) [](ProcessingContext& ctx){

        auto inputDataTpc = reinterpret_cast<const FakeCluster*>(ctx.inputs().get("TPC_CLUSTERS_S").payload);

        auto inputDataTpcProcessed = reinterpret_cast<const FakeCluster*>(ctx.inputs().get(

          "TPC_CLUSTERS_P_S").payload);


        auto ref = ctx.inputs().get("TPC_CLUSTERS_S");

        const auto* header = DataRefUtils::getHeader<DataHeader*>(ref);


        bool dataGood = true;

        for (int j = 0; j < DataRefUtils::getPayloadSize(ref) / sizeof(FakeCluster); ++j) {

          float diff = std::abs(-inputDataTpc[j].x - inputDataTpcProcessed[j].x) +

                       std::abs(2 * inputDataTpc[j].y - inputDataTpcProcessed[j].y) +

                       std::abs(inputDataTpc[j].z * inputDataTpc[j].q - inputDataTpcProcessed[j].z) +

                       std::abs(inputDataTpc[j].q - inputDataTpcProcessed[j].q);

          if (diff > 1) {

            dataGood = false;

            break;

          }

        }


        LOG(info) << "simpleQcTask - received data is " << (dataGood ? "correct" : "wrong");

      }

    }

  };


  DataProcessorSpec dummyProducer{

    "dummy",

    Inputs{},

    Outputs{

      { {"tsthistos"}, "TST", "HISTOS", 0 },

      { {"tststring"}, "TST", "STRING", 0 }

    },

    AlgorithmSpec{[](ProcessingContext& ctx){}}

  };


  WorkflowSpec specs;

  specs.swap(dataProducers);

  specs.insert(std::end(specs), std::begin(processingStages), std::end(processingStages));

  specs.push_back(sink);

  specs.push_back(simpleQcTask);

  specs.push_back(dummyProducer);


  const char* o2Root = getenv("O2_ROOT");

  if (o2Root == nullptr) {

    throw std::runtime_error("The O2_ROOT environment variable is not set, probably the O2 environment has not been loaded.");

  }

  std::string configurationSource = std::string("json:/") + o2Root + "/share/etc/exampleDataSamplingConfig.json";

  DataSampling::GenerateInfrastructure(specs, configurationSource);

  return specs;

}


// clang-format on


void someDataProducerAlgorithm(ProcessingContext& ctx)

{

  size_t index = ctx.services().get<ParallelContext>().index1D();

  std::this_thread::sleep_for(std::chrono::seconds(1));

  // Creates a new message of size collectionChunkSize which

  // has "TPC" as data origin and "CLUSTERS" as data description.

  auto& tpcClusters = ctx.outputs().make<FakeCluster>(

    Output{"TPC", "CLUSTERS", static_cast<o2::header::DataHeader::SubSpecificationType>(index)}, collectionChunkSize);

  int i = 0;


  for (auto& cluster : tpcClusters) {

    assert(i < collectionChunkSize);

    cluster.x = index;

    cluster.y = i;

    cluster.z = i;

    cluster.q = rand() % 1000;

    i++;

  }

}


void someProcessingStageAlgorithm(ProcessingContext& ctx)

{

  size_t index = ctx.services().get<ParallelContext>().index1D();


  const FakeCluster* inputDataTpc = reinterpret_cast<const FakeCluster*>(ctx.inputs().get("dataTPC").payload);

  auto& processedTpcClusters = ctx.outputs().make<FakeCluster>(

    Output{"TPC", "CLUSTERS_P", static_cast<o2::header::DataHeader::SubSpecificationType>(index)},

    collectionChunkSize);


  int i = 0;

  for (auto& cluster : processedTpcClusters) {

    assert(i < collectionChunkSize);

    cluster.x = -inputDataTpc[i].x;

    cluster.y = 2 * inputDataTpc[i].y;

    cluster.z = inputDataTpc[i].z * inputDataTpc[i].q;

    cluster.q = inputDataTpc[i].q;

    i++;

  }

};


void someSinkAlgorithm(ProcessingContext& ctx)

{

  const FakeCluster* inputDataTpc = reinterpret_cast<const FakeCluster*>(ctx.inputs().get("dataTPC-proc").payload);

}


DataProcessorSpec.h

DataRefUtils.h

DataSampling.h
Definition of O2 Data Sampling, v1.0.

DataSpecUtils.h

i
int32_t i
Definition GPUCommonAlgorithm.h:436

InputSpec.h

ParallelContext.h

j
uint32_t j
Definition RawData.h:0

int

o2::framework::ConfigContext
Definition ConfigContext.h:24

o2::framework::DataAllocator::make
decltype(auto) make(const Output &spec, Args... args)
Definition DataAllocator.h:166

o2::framework::InputRecord::get
decltype(auto) get(R binding, int part=0) const
Definition InputRecord.h:248

o2::framework::ParallelContext
Definition ParallelContext.h:33

o2::framework::ProcessingContext
Definition ProcessingContext.h:27

o2::framework::ProcessingContext::outputs
DataAllocator & outputs()
The data allocator is used to allocate memory for the output data.
Definition ProcessingContext.h:41

o2::framework::ProcessingContext::inputs
InputRecord & inputs()
The inputs associated with this processing context.
Definition ProcessingContext.h:37

o2::framework::ProcessingContext::services
ServiceRegistryRef services()
The services registry associated with this processing context.
Definition ProcessingContext.h:39

o2::framework::ServiceRegistryRef::get
T & get() const
Definition ServiceRegistryRef.h:85

o2::utilities::DataSampling::CustomizeInfrastructure
static void CustomizeInfrastructure(std::vector< framework::CompletionPolicy > &)
Configures dispatcher to consume any data immediately.
Definition DataSampling.cxx:112

o2::utilities::DataSampling::GenerateInfrastructure
static void GenerateInfrastructure(framework::WorkflowSpec &workflow, const std::string &policiesSource, size_t threads=1, const std::string &host="")
Generates data sampling infrastructure.
Definition DataSampling.cxx:40

collectionChunkSize
size_t collectionChunkSize
Definition dataSamplingParallel.cxx:51

someProcessingStageAlgorithm
void someProcessingStageAlgorithm(ProcessingContext &ctx)
Definition dataSamplingParallel.cxx:180

customize
void customize(std::vector< CompletionPolicy > &policies)
Definition dataSamplingParallel.cxx:18

parallelSize
size_t parallelSize
Definition dataSamplingParallel.cxx:50

someDataProducerAlgorithm
void someDataProducerAlgorithm(ProcessingContext &ctx)
Definition dataSamplingParallel.cxx:160

someSinkAlgorithm
void someSinkAlgorithm(ProcessingContext &ctx)
Definition dataSamplingParallel.cxx:200

defineDataProcessing
WorkflowSpec defineDataProcessing(ConfigContext const &)
This function hooks up the the workflow specifications into the DPL driver.
Definition dataSamplingParallel.cxx:56

x
GLint GLenum GLint x
Definition glcorearb.h:403

index
GLuint index
Definition glcorearb.h:781

z
GLdouble GLdouble GLdouble z
Definition glcorearb.h:843

o2::framework
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20

o2::framework::parallel
WorkflowSpec parallel(DataProcessorSpec original, size_t maxIndex, std::function< void(DataProcessorSpec &, size_t id)> amendCallback)

o2::framework::mergeInputs
Inputs mergeInputs(InputSpec original, size_t maxIndex, std::function< void(InputSpec &, size_t)> amendCallback)
Definition WorkflowSpec.cxx:112

o2::framework::WorkflowSpec
std::vector< DataProcessorSpec > WorkflowSpec
Definition HBFUtilsInitializer.h:39

o2::framework::Inputs
std::vector< InputSpec > Inputs
Definition DataProcessorSpec.h:29

o2::framework::Outputs
std::vector< OutputSpec > Outputs
Definition DataProcessorSpec.h:30

o2::utilities
A header which contains some meta-data generated by Data Sampling.
Definition DataSampling.h:38

runDataProcessing.h

FakeCluster
Definition test_AlgorithmWrapper.cxx:22

FakeCluster::q
float q
Definition test_AlgorithmWrapper.cxx:26

FakeCluster::z
float z
Definition test_AlgorithmWrapper.cxx:25

FakeCluster::y
float y
Definition test_AlgorithmWrapper.cxx:24

FakeCluster::x
float x
Definition test_AlgorithmWrapper.cxx:23

o2::framework::AlgorithmSpec
Definition AlgorithmSpec.h:43

o2::framework::AlgorithmSpec::ProcessCallback
std::function< void(ProcessingContext &)> ProcessCallback
Definition AlgorithmSpec.h:44

o2::framework::DataProcessorSpec
Definition DataProcessorSpec.h:41

o2::framework::DataRefUtils::getPayloadSize
static o2::header::DataHeader::PayloadSizeType getPayloadSize(const DataRef &ref)
Definition DataRefUtils.h:188

o2::framework::DataSpecUtils::updateMatchingSubspec
static void updateMatchingSubspec(InputSpec &in, header::DataHeader::SubSpecificationType subSpec)
Definition DataSpecUtils.cxx:150

o2::framework::InputSpec
Definition InputSpec.h:31

o2::framework::OutputSpec
Definition OutputSpec.h:33

o2::framework::Output
Definition Output.h:27

o2::header::DataHeader
the main header struct
Definition DataHeader.h:618

o2::header::DataHeader::SubSpecificationType
uint32_t SubSpecificationType
Definition DataHeader.h:620

LOG
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"