34#define protected public
38#if __has_include(<ROOT/RDF/Utils.hxx>)
39#include <ROOT/RDF/Utils.hxx>
41#include <ROOT/RDFUtils.hxx>
44#include <ROOT/TSeq.hxx>
45#include <ROOT/RDataFrame.hxx>
64 return "antidiagonal";
68 return "uppertriangular";
70 return "stricly-uppertriangular";
72 throw std::runtime_error(
"Unknown BlockCombinationRule");
75std::vector<std::pair<ULong64_t, ULong64_t>>
77 std::unique_ptr<RDataFrame>&
right)
79 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
80 fLeftCount = *
left->Count();
81 fRightCount = *
right->Count();
82 ranges.reserve(fLeftCount);
83 for (ULong64_t
i = 0;
i < fLeftCount; ++
i) {
84 ranges.emplace_back(std::make_pair<ULong64_t, ULong64_t>(fRightCount *
i, fRightCount * (
i + 1)));
89std::vector<std::pair<ULong64_t, ULong64_t>>
91 std::unique_ptr<RDataFrame>&
right)
93 auto leftCount = *
left->Count();
94 auto rightCount = *
right->Count();
95 if (leftCount != rightCount) {
96 throw std::runtime_error(
"Union can be performed only with two datasources which have the same amount of entries");
98 std::vector<std::pair<ULong64_t, ULong64_t>> ranges;
100 auto nSlots = std::min(
left->GetLoopManager()->GetNSlots(),
right->GetLoopManager()->GetNSlots());
102 auto deltaSize = rightCount / nSlots;
104 for (;
i < (nSlots - 1); ++
i) {
105 ranges.emplace_back(std::pair<ULong64_t, ULong64_t>(deltaSize *
i, deltaSize * (
i + 1)));
107 ranges.emplace_back(std::pair<ULong64_t, ULong64_t>(
i * deltaSize, rightCount));
119 std::unique_ptr<RCombinedDSIndex> inIndex,
120 std::string inLeftPrefix, std::string inRightPrefix)
125 fRight{inRight.get()},
126 fLeftDF{
std::make_unique<RDataFrame>(
std::move(inLeft))},
127 fRightDF{
std::make_unique<RDataFrame>(
std::move(inRight))},
128 fLeftPrefix{inLeftPrefix},
129 fRightPrefix{inRightPrefix},
130 fIndex{
std::move(inIndex)}
132 fColumnNames.reserve(fLeft->GetColumnNames().size() + fRight->GetColumnNames().size());
133 for (
auto&
c : fLeft->GetColumnNames()) {
134 fColumnNames.push_back(fLeftPrefix +
c);
136 for (
auto&
c : fRight->GetColumnNames()) {
137 fColumnNames.push_back(fRightPrefix +
c);
152 auto entryRanges(std::move(fEntryRanges));
158 if (colName.compare(0, fLeftPrefix.size(), fLeftPrefix) == 0) {
159 colName.remove_prefix(fLeftPrefix.size());
160 return fLeft->GetTypeName(colName);
162 if (colName.compare(0, fRightPrefix.size(), fRightPrefix) == 0) {
163 colName.remove_prefix(fRightPrefix.size());
164 return fRight->GetTypeName(colName);
166 std::string dummy(
"Column not found: ");
167 dummy += colName.data();
168 throw std::runtime_error(dummy);
173 if (colName.compare(0, fLeftPrefix.size(), fLeftPrefix) == 0) {
174 colName.remove_prefix(fLeftPrefix.size());
175 return fLeft->HasColumn(colName);
177 if (colName.compare(0, fRightPrefix.size(), fRightPrefix) == 0) {
178 colName.remove_prefix(fRightPrefix.size());
179 return fRight->HasColumn(colName);
186 std::pair<ULong64_t, ULong64_t> association = fIndex->GetAssociatedEntries(
entry);
187 fLeft->SetEntry(slot, association.first);
188 fRight->SetEntry(slot, association.second);
194 std::pair<ULong64_t, ULong64_t> association = fIndex->GetAssociatedEntries(
entry);
195 fLeft->InitSlot(slot, association.first);
196 fRight->InitSlot(slot, association.second);
201 assert(0U == fNSlots &&
"Setting the number of slots even if the number of slots is different from zero.");
204 fLeft->SetNSlots(nSlots);
205 fRight->SetNSlots(nSlots);
211 if (colName.compare(0, fLeftPrefix.size(), fLeftPrefix) == 0) {
212 colName.remove_prefix(fLeftPrefix.size());
213 return fLeft->GetColumnReadersImpl(colName, info);
215 if (colName.compare(0, fRightPrefix.size(), fRightPrefix) == 0) {
216 colName.remove_prefix(fRightPrefix.size());
217 return fRight->GetColumnReadersImpl(colName, info);
225 fEntryRanges = fIndex->BuildIndex(fLeftDF, fRightDF);
228 fRight->Initialize();
236 std::unique_ptr<RCombinedDSIndex>
index,
237 std::string leftPrefix, std::string rightPrefix)
239 ROOT::RDataFrame tdf(std::make_unique<RCombinedDS>(std::move(
left), std::move(
right), std::move(
index), leftPrefix, rightPrefix));
244 std::string leftPrefix, std::string rightPrefix)
246 ROOT::RDataFrame tdf(std::make_unique<RCombinedDS>(std::move(
left), std::move(
right), std::move(std::make_unique<RCombinedDSCrossJoinIndex>()), leftPrefix, rightPrefix));
251 std::string indexColumnName,
252 std::string leftPrefix, std::string rightPrefix)
259 std::string indexColumnName,
260 std::string leftPrefix, std::string rightPrefix)
267 std::string leftPrefix, std::string rightPrefix)
269 ROOT::RDataFrame tdf(std::make_unique<RCombinedDS>(std::move(
left), std::move(
right), std::move(std::make_unique<RCombinedDSFriendIndex>()), leftPrefix, rightPrefix));
#define O2_BUILTIN_UNREACHABLE
std::vector< std::pair< ULong64_t, ULong64_t > > BuildIndex(std::unique_ptr< RDataFrame > &left, std::unique_ptr< RDataFrame > &right) final
std::vector< std::pair< ULong64_t, ULong64_t > > BuildIndex(std::unique_ptr< RDataFrame > &left, std::unique_ptr< RDataFrame > &right) final
bool HasColumn(std::string_view colName) const override
~RCombinedDS() override
Destructor.
RCombinedDS(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource > right, std::unique_ptr< RCombinedDSIndex > index=std::make_unique< RCombinedDSFriendIndex >(), std::string leftPrefix=std::string{"left_"}, std::string rightPrefix=std::string{"right_"})
bool SetEntry(unsigned int slot, ULong64_t entry) override
std::string GetTypeName(std::string_view colName) const override
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() override
void InitSlot(unsigned int slot, ULong64_t firstEntry) override
void SetNSlots(unsigned int nSlots) override
std::vector< void * > GetColumnReadersImpl(std::string_view colName, const std::type_info &info) override
This should never be called, since we did a template overload for GetColumnReaders()
const std::vector< std::string > & GetColumnNames() const override
void Initialize() override
GLint GLint GLsizei GLint GLenum GLenum type
RDataFrame MakeColumnIndexedDataFrame(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource >, std::string indexColName, std::string leftPrefix="left_", std::string rightPrefix="right_")
RDataFrame MakeBlockAntiDataFrame(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource > right, std::string indexColumnName, std::string leftPrefix="left_", std::string rightPrefix="right_")
RDataFrame MakeCombinedDataFrame(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource >, std::unique_ptr< RCombinedDSIndex > index, std::string leftPrefix="left_", std::string rightPrefix="right_")
Factory method to create a Apache Arrow RDataFrame.
RDataFrame MakeFriendDataFrame(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource > right, std::string leftPrefix="left_", std::string rightPrefix="right_")
RDataFrame MakeCrossProductDataFrame(std::unique_ptr< RDataSource > left, std::unique_ptr< RDataSource >, std::string leftPrefix="left_", std::string rightPrefix="right_")
Defining DataPointCompositeObject explicitly as copiable.
static char const * combinationRuleAsString(BlockCombinationRule ruleType)