d8/d7e/test__ParallelPipeline_8cxx_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#include "Framework/InputSpec.h"

#include "Framework/CallbackService.h"

#include "Framework/DataProcessorSpec.h"

#include "Framework/DataSpecUtils.h"

#include "Framework/EndOfStreamContext.h"

#include "Framework/ParallelContext.h"

#include "Framework/ControlService.h"

#include "Framework/RawDeviceService.h"

#include "Framework/ParallelContext.h"

#include "Framework/CompletionPolicy.h"

#include "Framework/CompletionPolicyHelpers.h"

#include "Framework/DataRefUtils.h"

#include <fairmq/Device.h>

#include <algorithm>

#include <memory>

#include <unordered_map>


// customize clusterers and cluster decoders to process immediately what comes in


void customize(std::vector<o2::framework::CompletionPolicy>& policies)

{

  // we customize the pipeline processors to consume data as it comes

  using CompletionPolicy = o2::framework::CompletionPolicy;

  using CompletionPolicyHelpers = o2::framework::CompletionPolicyHelpers;

  policies.push_back(CompletionPolicyHelpers::defineByName("consumer", CompletionPolicy::CompletionOp::Consume));

}


#include "Framework/runDataProcessing.h"


#define ASSERT_ERROR(condition)                                   \

  if ((condition) == false) {                                     \

    LOG(fatal) << R"(Test condition ")" #condition R"(" failed)"; \

  }


using DataHeader = o2::header::DataHeader;

using namespace o2::framework;


size_t nPipelines = 4;

size_t nParallelChannels = 6;

size_t nRolls = 1;


std::vector<DataProcessorSpec> defineDataProcessing(ConfigContext const&)

{

  // define a template workflow with processors to be executed in a pipeline

  std::vector<DataProcessorSpec> workflowSpecs{

    {"processor1",

     Inputs{

       {"input", "TST", "TRIGGER", 0, Lifetime::Timeframe}},

     Outputs{

       {{"output"}, "TST", "PREPROC", 0, Lifetime::Timeframe}},

     AlgorithmSpec{[](ProcessingContext& ctx) {

       for (auto const& input : ctx.inputs()) {

         auto const& parallelContext = ctx.services().get<ParallelContext>();

         LOG(debug) << "instance " << parallelContext.index1D() << " of " << parallelContext.index1DSize() << ": "

                    << *input.spec << ": " << *((int*)input.payload);

         auto const* dataheader = DataRefUtils::getHeader<o2::header::DataHeader*>(input);

         auto& data = ctx.outputs().make<int>(Output{"TST", "PREPROC", dataheader->subSpecification});

         ASSERT_ERROR(ctx.inputs().get<int>(input.spec->binding.c_str()) == parallelContext.index1D());

         data = parallelContext.index1D();

       }

     }}},

    {"processor2",

     Inputs{

       {"input", "TST", "PREPROC", 0, Lifetime::Timeframe}},

     Outputs{

       {{"output"}, "TST", "DATA", 0, Lifetime::Timeframe},

       {{"metadt"}, "TST", "META", 0, Lifetime::Timeframe}},

     AlgorithmSpec{[](ProcessingContext& ctx) {

       for (auto const& input : ctx.inputs()) {

         auto const& parallelContext = ctx.services().get<ParallelContext>();

         LOG(debug) << "instance " << parallelContext.index1D() << " of " << parallelContext.index1DSize() << ": "

                    << *input.spec << ": " << *((int*)input.payload);

         ASSERT_ERROR(ctx.inputs().get<int>(input.spec->binding.c_str()) == parallelContext.index1D());

         auto const* dataheader = DataRefUtils::getHeader<o2::header::DataHeader*>(input);

         // TODO: there is a bug in the API for using OutputRef, returns an rvalue which can not be bound to

         // lvalue reference

         auto& data = ctx.outputs().make<int>(Output{"TST", "DATA", dataheader->subSpecification});

         data = ctx.inputs().get<int>(input.spec->binding.c_str());

         auto& meta = ctx.outputs().make<int>(Output{"TST", "META", dataheader->subSpecification});

         meta = dataheader->subSpecification;

       }

     }}},

  };


  // create parallel pipelines from the template workflow, the number of parallel channel is defined by

  // nParallelChannels and is distributed among the pipelines

  std::vector<o2::header::DataHeader::SubSpecificationType> subspecs(nParallelChannels);

  std::generate(subspecs.begin(), subspecs.end(), [counter = std::make_shared<int>(0)]() { return 0x1 << (*counter)++; });

  // correspondence between the subspec and the instance which serves this particular subspec

  // this is checked in the final consumer

  auto checkMap = std::make_shared<std::unordered_map<o2::header::DataHeader::SubSpecificationType, int>>();

  {

    size_t pipeline = 0;

    for (auto const& subspec : subspecs) {

      (*checkMap)[subspec] = pipeline;

      pipeline++;

      if (pipeline >= nPipelines) {

        pipeline = 0;

      }

    }

  }

  workflowSpecs = parallelPipeline(

    workflowSpecs, nPipelines,

    [&subspecs]() { return subspecs.size(); },

    [&subspecs](size_t index) { return subspecs[index]; });


  // define a producer process with outputs for all subspecs

  auto producerOutputs = [&subspecs]() {

    Outputs outputs;

    for (auto const& subspec : subspecs) {

      outputs.emplace_back("TST", "TRIGGER", subspec, Lifetime::Timeframe);

    }

    return outputs;

  };


  workflowSpecs.emplace_back(DataProcessorSpec{

    "trigger",

    Inputs{},

    producerOutputs(),

    AlgorithmSpec{[subspecs, counter = std::make_shared<int>(0)](ProcessingContext& ctx) {

      if (*counter < nRolls) {

        size_t pipeline = 0;

        size_t channels = subspecs.size();

        std::vector<size_t> multiplicities(nPipelines);

        for (pipeline = 0; pipeline < nPipelines; pipeline++) {

          multiplicities[pipeline] = channels / (nPipelines - pipeline) + ((channels % (nPipelines - pipeline)) > 0 ? 1 : 0);

          channels -= multiplicities[pipeline];

        }

        ASSERT_ERROR(channels == 0);

        size_t index = 0;

        auto end = subspecs.size();

        for (pipeline = 0; index < end; index++) {

          if (multiplicities[pipeline] == 0) {

            continue;

          }

          ctx.outputs().make<int>(Output{"TST", "TRIGGER", subspecs[index]}) = pipeline;

          multiplicities[pipeline++]--;

          if (pipeline >= nPipelines) {

            pipeline = 0;

          }

        }

        ASSERT_ERROR(index == subspecs.size());

        (*counter)++;

      }

      if (*counter == nRolls) {

        ctx.services().get<ControlService>().endOfStream();

        ctx.services().get<ControlService>().readyToQuit(QuitRequest::Me);

      }

    }}});


  // the final consumer

  // map of bindings is used to check the channel names, note that the object is captured by

  // reference in mergeInputs which is a helper executed at construction of DataProcessorSpec,

  // while the AlgorithmSpec stores a lambda to be called later on, and the object must be

  // passed by copy or move in order to have a valid object upon invocation

  std::unordered_map<o2::header::DataHeader::SubSpecificationType, std::string> bindings;

  workflowSpecs.emplace_back(DataProcessorSpec{

    "consumer",

    mergeInputs({{"datain", "TST", "DATA", 0, Lifetime::Timeframe},

                 {"metain", "TST", "META", 0, Lifetime::Timeframe}},

                subspecs.size(),

                [&subspecs, &bindings](InputSpec& input, size_t index) {

                  input.binding += std::to_string(index);

                  DataSpecUtils::updateMatchingSubspec(input, subspecs[index]);

                  if (input.binding.compare(0, 6, "datain") == 0) {

                    bindings[subspecs[index]] = input.binding;

                  }

                }),

    Outputs(),

    AlgorithmSpec{adaptStateful([checkMap, bindings = std::move(bindings)](CallbackService& callbacks) {

      callbacks.set<CallbackService::Id::EndOfStream>([checkMap](EndOfStreamContext& ctx) {

        for (auto const& [subspec, pipeline] : *checkMap) {

          // we require all checks to be invalidated

          ASSERT_ERROR(pipeline == -1);

        }

        checkMap->clear();

      });

      callbacks.set<CallbackService::Id::Stop>([checkMap]() {

        ASSERT_ERROR(checkMap->size() == 0);

      });

      return adaptStateless([checkMap, bindings = std::move(bindings)](InputRecord& inputs) {

        bool haveDataIn = false;

        size_t index = 0;

        for (auto const& input : inputs) {

          if (!DataRefUtils::isValid(input)) {

            continue;

          }

          LOG(info) << "consuming : " << *input.spec << ": " << *((int*)input.payload);

          auto const* dataheader = DataRefUtils::getHeader<o2::header::DataHeader*>(input);

          if (input.spec->binding.compare(0, 6, "datain") == 0) {

            if (input.spec->binding != bindings.at(dataheader->subSpecification)) {

              LOG(error) << "data with subspec " << dataheader->subSpecification << " at unexpected binding " << input.spec->binding << ", expected " << bindings.at(dataheader->subSpecification);

            }

            haveDataIn = true;

            ASSERT_ERROR(checkMap->at(dataheader->subSpecification) == inputs.get<int>(input.spec->binding.c_str()));

            // keep a backup before invalidating, the backup is used in the check below, which can throw and therefor

            // must be after invalidation

            auto pipeline = checkMap->at(dataheader->subSpecification);

            // invalidate, we check in the end of stream callback that all are invalidated

            (*checkMap)[dataheader->subSpecification] = -1;

            // check if we can access channels by binding

            if (inputs.isValid(bindings.at(dataheader->subSpecification))) {

              ASSERT_ERROR(inputs.get<int>(bindings.at(dataheader->subSpecification)) == pipeline);

            }

          }

        }

        // we require each input cycle to have data on datain channel

        ASSERT_ERROR(haveDataIn);

      });

    })}});


  return workflowSpecs;

}


CallbackService.h

CompletionPolicyHelpers.h

CompletionPolicy.h

ControlService.h

DataProcessorSpec.h

DataRefUtils.h

DataSpecUtils.h

EndOfStreamContext.h

InputSpec.h

ParallelContext.h

RawDeviceService.h

debug
std::ostringstream debug
Definition VariantJSONHelpers.cxx:305

o2::framework::CallbackService
Definition CallbackService.h:35

o2::framework::CallbackService::set
void set(U &&cb)
Definition CallbackService.h:127

o2::framework::ConfigContext
Definition ConfigContext.h:24

o2::framework::ControlService
Definition ControlService.h:40

o2::framework::EndOfStreamContext
Definition EndOfStreamContext.h:22

o2::framework::InputRecord
The input API of the Data Processing Layer This class holds the inputs which are valid for processing...
Definition InputRecord.h:102

o2::framework::InputRecord::isValid
bool isValid(std::string const &s) const
Helper method to be used to check if a given part of the InputRecord is present.
Definition InputRecord.h:515

o2::framework::InputRecord::get
decltype(auto) get(R binding, int part=0) const
Definition InputRecord.h:248

o2::framework::ParallelContext
Definition ParallelContext.h:33

o2::framework::ParallelContext::index1D
size_t index1D() const
Definition ParallelContext.h:43

o2::framework::ProcessingContext
Definition ProcessingContext.h:27

end
GLuint GLuint end
Definition glcorearb.h:469

index
GLuint index
Definition glcorearb.h:781

data
GLboolean * data
Definition glcorearb.h:298

counter
GLuint counter
Definition glcorearb.h:3987

o2::framework
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20

o2::framework::parallelPipeline
WorkflowSpec parallelPipeline(const WorkflowSpec &specs, size_t nPipelines, std::function< size_t()> getNumberOfSubspecs, std::function< size_t(size_t)> getSubSpec)
Definition WorkflowSpec.cxx:53

o2::framework::mergeInputs
Inputs mergeInputs(InputSpec original, size_t maxIndex, std::function< void(InputSpec &, size_t)> amendCallback)
Definition WorkflowSpec.cxx:112

o2::framework::adaptStateless
AlgorithmSpec::ProcessCallback adaptStateless(LAMBDA l)
Definition AlgorithmSpec.h:229

o2::framework::Inputs
std::vector< InputSpec > Inputs
Definition DataProcessorSpec.h:29

o2::framework::Outputs
std::vector< OutputSpec > Outputs
Definition DataProcessorSpec.h:30

o2::framework::adaptStateful
AlgorithmSpec::InitCallback adaptStateful(LAMBDA l)
Definition AlgorithmSpec.h:236

std::to_string
std::string to_string(gsl::span< T, Size > span)
Definition common.h:52

runDataProcessing.h

o2::framework::AlgorithmSpec
Definition AlgorithmSpec.h:43

o2::framework::CompletionPolicyHelpers
Helper class which holds commonly used policies.
Definition CompletionPolicyHelpers.h:26

o2::framework::CompletionPolicyHelpers::defineByName
static CompletionPolicy defineByName(std::string const &name, CompletionPolicy::CompletionOp op)
Definition CompletionPolicyHelpers.cxx:79

o2::framework::CompletionPolicy
Definition CompletionPolicy.h:34

o2::framework::DataProcessorSpec
Definition DataProcessorSpec.h:41

o2::framework::DataRefUtils::isValid
static bool isValid(DataRef const &ref)
Definition DataRefUtils.h:214

o2::framework::DataSpecUtils::updateMatchingSubspec
static void updateMatchingSubspec(InputSpec &in, header::DataHeader::SubSpecificationType subSpec)
Definition DataSpecUtils.cxx:150

o2::framework::InputSpec
Definition InputSpec.h:31

o2::framework::InputSpec::binding
std::string binding
A mnemonic name for the input spec.
Definition InputSpec.h:66

o2::framework::Output
Definition Output.h:27

o2::header::DataHeader
the main header struct
Definition DataHeader.h:618

nPipelines
size_t nPipelines
Definition test_ParallelPipeline.cxx:47

nParallelChannels
size_t nParallelChannels
Definition test_ParallelPipeline.cxx:48

ASSERT_ERROR
#define ASSERT_ERROR(condition)
Definition test_ParallelPipeline.cxx:39

nRolls
size_t nRolls
Definition test_ParallelPipeline.cxx:49

defineDataProcessing
std::vector< DataProcessorSpec > defineDataProcessing(ConfigContext const &)
This function hooks up the the workflow specifications into the DPL driver.
Definition test_ParallelPipeline.cxx:51

customize
void customize(std::vector< o2::framework::CompletionPolicy > &policies)
Definition test_ParallelPipeline.cxx:30

LOG
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"

channels
std::vector< ChannelData > channels
Definition test_ctf_io_fdd.cxx:39