15#include "arrow/table.h"
16#include "gandiva/tree_expr_builder.h"
21#include <unordered_map>
30static const std::array<std::string, BasicOp::Conditional + 1> basicOperationsMap = {
41 "less_than_or_equal_to",
43 "greater_than_or_equal_to",
65 std::stack<NodeRecord>
path;
66 auto local_index =
index;
69 while (!
path.empty()) {
71 top.node_ptr->index = local_index;
73 if (
top.node_ptr->condition !=
nullptr) {
80 if (
top.node_ptr->left !=
nullptr) {
81 path.emplace(
top.node_ptr->left.get(), 0);
83 if (
top.node_ptr->right !=
nullptr) {
84 path.emplace(
top.node_ptr->right.get(), 0);
94struct LiteralNodeHelper {
101struct BindingNodeHelper {
102 DatumSpec operator()(BindingNode
const& node)
const
104 return DatumSpec{node.name, node.hash, node.type};
109 ColumnOperationSpec operator()(OpNode
const& node)
const
111 return ColumnOperationSpec{node.op};
115struct PlaceholderNodeHelper {
116 DatumSpec operator()(PlaceholderNode
const& node)
const
118 return DatumSpec{node.value, node.type};
122struct ParameterNodeHelper {
123 DatumSpec operator()(ParameterNode
const& node)
const
125 return DatumSpec{node.value, node.type};
134 return arrow::uint8();
136 return arrow::int8();
138 return arrow::int16();
140 return arrow::uint16();
142 return arrow::int32();
144 return arrow::uint32();
146 return arrow::int64();
148 return arrow::uint64();
150 return arrow::float32();
152 return arrow::float64();
154 return arrow::boolean();
178 return (lhs.datum == rhs.datum) && (lhs.type == rhs.type);
187 [&os](
auto&& arg) { os << arg; },
190 [&os](
size_t&& arg) { os << arg; },
191 [&os](std::string&& arg) { os << arg; },
199 auto updateNode = [&](
Node* node) {
200 if (node->self.index() == 3) {
201 std::get_if<3>(&node->self)->reset(context);
234 return "unsupported";
242 std::stack<NodeRecord>
path;
243 auto isLeaf = [](
Node const*
const node) {
244 return ((node->left ==
nullptr) && (node->right ==
nullptr));
247 auto processLeaf = [](
Node const*
const node) {
250 [lh = LiteralNodeHelper{}](
LiteralNode const& node) {
return lh(node); },
251 [bh = BindingNodeHelper{}](
BindingNode const& node) {
return bh(node); },
252 [ph = PlaceholderNodeHelper{}](
PlaceholderNode const& node) {
return ph(node); },
253 [pr = ParameterNodeHelper{}](
ParameterNode const& node) {
return pr(node); },
263 while (!
path.empty()) {
278 auto*
left =
top.node_ptr->left.get();
279 bool leftLeaf = isLeaf(
left);
282 operationSpec.left = processLeaf(
left);
289 if (
top.node_ptr->right !=
nullptr) {
292 bool rightLeaf =
true;
293 if (
right !=
nullptr) {
294 rightLeaf = isLeaf(
right);
297 auto isUnary =
false;
298 if (
top.node_ptr->right ==
nullptr) {
303 operationSpec.right = processLeaf(
right);
310 decltype(
left) condition =
nullptr;
311 if (
top.node_ptr->condition !=
nullptr) {
312 condition =
top.node_ptr->condition.get();
314 bool condleaf = condition !=
nullptr ? isLeaf(condition) :
true;
316 if (condition !=
nullptr) {
318 operationSpec.condition = processLeaf(condition);
327 OperationSpecs.push_back(std::move(operationSpec));
331 if (!isUnary && !rightLeaf) {
335 path.emplace(condition, ci);
340 std::vector<atype::type> resultTypes;
341 resultTypes.resize(OperationSpecs.size());
345 if (
left.datum.index() == 0) {
346 throw runtime_error(
"Malformed operation spec: empty left datum");
350 if (
left.datum.index() == 1) {
351 left.type = resultTypes[std::get<size_t>(
left.datum)];
354 if (
right.datum.index() == 1) {
355 right.type = resultTypes[std::get<size_t>(
right.datum)];
359 auto t2 =
right.type;
361 if (
right.datum.index() == 0) {
362 if (
t1 == atype::DOUBLE) {
363 return atype::DOUBLE;
372 auto isIntType = [](
auto t) {
373 return (t == atype::UINT8) || (t == atype::INT8) || (t == atype::UINT16) || (t == atype::INT16) || (t == atype::UINT32) || (t == atype::INT32) || (t == atype::UINT64) || (t == atype::INT64);
377 if (t2 == atype::FLOAT) {
380 if (t2 == atype::DOUBLE) {
381 return atype::DOUBLE;
390 if (
t1 == atype::FLOAT) {
394 if (t2 == atype::DOUBLE) {
395 return atype::DOUBLE;
398 if (
t1 == atype::DOUBLE) {
399 return atype::DOUBLE;
404 for (
auto it = OperationSpecs.rbegin(); it != OperationSpecs.rend(); ++it) {
405 auto type = inferResultType(it->left, it->right);
406 if (it->type == atype::NA) {
410 it->result.type = it->type;
411 resultTypes[std::get<size_t>(it->result.datum)] = it->type;
414 return OperationSpecs;
419 return gandiva::TreeExprBuilder::MakeCondition(std::move(node));
424 return gandiva::TreeExprBuilder::MakeExpression(std::move(node), std::move(
result));
427std::shared_ptr<gandiva::Filter>
430 std::shared_ptr<gandiva::Filter>
filter;
431 auto s = gandiva::Filter::Make(Schema,
435 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
440std::shared_ptr<gandiva::Filter>
441 createFilter(gandiva::SchemaPtr
const& Schema, gandiva::ConditionPtr condition)
443 std::shared_ptr<gandiva::Filter>
filter;
444 auto s = gandiva::Filter::Make(Schema,
448 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
453std::shared_ptr<gandiva::Projector>
456 std::shared_ptr<gandiva::Projector> projector;
457 auto s = gandiva::Projector::Make(Schema,
461 throw runtime_error_f(
"Failed to create projector: %s", s.ToString().c_str());
466std::shared_ptr<gandiva::Projector>
473 std::shared_ptr<arrow::Schema> schema,
474 std::vector<std::shared_ptr<arrow::Field>>
const& fields)
476 std::vector<gandiva::ExpressionPtr> expressions;
478 for (
size_t ci = 0; ci < nColumns; ++ci) {
479 expressions.push_back(
487 std::shared_ptr<gandiva::Projector> projector;
488 auto s = gandiva::Projector::Make(
501 auto s = gandiva::SelectionVector::MakeInt64(table->num_rows(),
502 arrow::default_memory_pool(),
505 throw runtime_error_f(
"Cannot allocate selection vector %s", s.ToString().c_str());
507 if (table->num_rows() == 0) {
510 arrow::TableBatchReader reader(*table);
511 std::shared_ptr<arrow::RecordBatch> batch;
513 s = reader.ReadNext(&batch);
515 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
517 if (batch ==
nullptr) {
520 s =
gfilter->Evaluate(*batch, selection);
535auto createProjection(std::shared_ptr<arrow::Table>
const& table, std::shared_ptr<gandiva::Projector>
const& gprojector)
537 arrow::TableBatchReader reader(*table);
538 std::shared_ptr<arrow::RecordBatch> batch;
539 std::shared_ptr<arrow::ArrayVector>
v;
541 auto s = reader.ReadNext(&batch);
543 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
545 if (batch ==
nullptr) {
548 s = gprojector->Evaluate(*batch, arrow::default_memory_pool(),
v.get());
550 throw runtime_error_f(
"Cannot apply projector %s", s.ToString().c_str());
557 gandiva::SchemaPtr
const& Schema)
559 std::vector<gandiva::NodePtr> opNodes;
560 opNodes.resize(opSpecs.size());
561 std::fill(opNodes.begin(), opNodes.end(),
nullptr);
562 std::unordered_map<std::string, gandiva::NodePtr> fieldNodes;
563 std::unordered_map<size_t, gandiva::NodePtr> subtrees;
565 auto datumNode = [Schema, &opNodes, &fieldNodes](
DatumSpec const& spec) {
566 if (spec.datum.index() == 0) {
567 return gandiva::NodePtr(
nullptr);
569 if (spec.datum.index() == 1) {
570 return opNodes[std::get<size_t>(spec.datum)];
573 if (spec.datum.index() == 2) {
574 auto content = std::get<LiteralNode::var_t>(spec.datum);
575 switch (content.index()) {
577 return gandiva::TreeExprBuilder::MakeLiteral(
static_cast<int32_t
>(std::get<int>(content)));
579 return gandiva::TreeExprBuilder::MakeLiteral(std::get<bool>(content));
581 return gandiva::TreeExprBuilder::MakeLiteral(std::get<float>(content));
583 return gandiva::TreeExprBuilder::MakeLiteral(std::get<double>(content));
585 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint8_t>(content));
587 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int64_t>(content));
589 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int16_t>(content));
591 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint16_t>(content));
593 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int8_t>(content));
595 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint32_t>(content));
597 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint64_t>(content));
603 if (spec.datum.index() == 3) {
604 auto name = std::get<std::string>(spec.datum);
606 if (
lookup != fieldNodes.end()) {
609 auto field = Schema->GetFieldByName(
name);
610 if (field ==
nullptr) {
613 auto node = gandiva::TreeExprBuilder::MakeField(field);
614 fieldNodes.insert({
name, node});
620 gandiva::NodePtr
tree =
nullptr;
621 for (
auto it = opSpecs.rbegin(); it != opSpecs.rend(); ++it) {
622 auto leftNode = datumNode(it->left);
623 auto rightNode = datumNode(it->right);
624 auto condNode = datumNode(it->condition);
626 auto insertUpcastNode = [&](gandiva::NodePtr node, atype::type t) {
634 auto insertEqualizeUpcastNode = [&](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type
t1, atype::type t2) {
638 }
else if (
t1 > t2) {
644 gandiva::NodePtr temp_node;
648 temp_node = gandiva::TreeExprBuilder::MakeOr({leftNode, rightNode});
651 temp_node = gandiva::TreeExprBuilder::MakeAnd({leftNode, rightNode});
654 temp_node = gandiva::TreeExprBuilder::MakeIf(condNode, leftNode, rightNode,
concreteArrowType(it->type));
658 if (it->type != atype::BOOL) {
659 leftNode = insertUpcastNode(leftNode, it->left.type);
660 rightNode = insertUpcastNode(rightNode, it->right.type);
662 insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type);
664 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode, rightNode},
concreteArrowType(it->type));
666 leftNode = insertUpcastNode(leftNode, it->left.type);
667 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode},
concreteArrowType(it->type));
671 if (it->index == 0) {
674 auto subtree = subtrees.find(it->index);
675 if (subtree == subtrees.end()) {
676 subtrees.insert({it->index, temp_node});
678 subtree->second = temp_node;
681 opNodes[std::get<size_t>(it->result.datum)] = temp_node;
689 std::set<uint32_t> opHashes;
690 for (
auto const& spec : specs) {
691 if (spec.left.datum.index() == 3) {
692 opHashes.insert(spec.left.hash);
694 if (spec.right.datum.index() == 3) {
695 opHashes.insert(spec.right.hash);
699 return std::includes(hashes.begin(), hashes.end(),
700 opHashes.begin(), opHashes.end());
705 if (eInfos.empty()) {
709 for (
auto& info : eInfos) {
713 if (info.tree !=
nullptr) {
714 info.tree = gandiva::TreeExprBuilder::MakeAnd({info.tree,
tree});
724 if (info.
tree !=
nullptr && info.
filter ==
nullptr) {
#define O2_BUILTIN_UNREACHABLE
uint8_t lookup(const char input) noexcept
GLdouble GLdouble GLdouble GLdouble top
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
GLsizei const GLchar *const * path
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
std::shared_ptr< gandiva::SelectionVector > Selection
std::shared_ptr< arrow::DataType > concreteArrowType(atype::type type)
std::shared_ptr< gandiva::Filter > createFilter(gandiva::SchemaPtr const &Schema, gandiva::ConditionPtr condition)
Function to create gandiva filter from gandiva condition.
gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr result)
Function to create gandiva projecting expression from generic gandiva expression tree.
std::shared_ptr< gandiva::Projector > createProjectorHelper(size_t nColumns, expressions::Projector *projectors, std::shared_ptr< arrow::Schema > schema, std::vector< std::shared_ptr< arrow::Field > > const &fields)
gandiva::Selection createSelection(std::shared_ptr< arrow::Table > const &table, Filter const &expression)
Function for creating gandiva selection from our internal filter tree.
bool operator==(DatumSpec const &lhs, DatumSpec const &rhs)
auto createProjection(std::shared_ptr< arrow::Table > const &table, std::shared_ptr< gandiva::Projector > const &gprojector)
const char * stringType(atype::type t)
std::vector< ColumnOperationSpec > Operations
void updateExpressionInfos(expressions::Filter const &filter, std::vector< ExpressionInfo > &eInfos)
Function for attaching gandiva filters to to compatible task inputs.
Operations createOperations(Filter const &expression)
Function to create an internal operation sequence from a filter tree.
std::ostream & operator<<(std::ostream &os, DatumSpec const &spec)
gandiva::ConditionPtr makeCondition(gandiva::NodePtr node)
Function to create gandiva condition expression from generic gandiva expression tree.
bool isTableCompatible(std::set< uint32_t > const &hashes, Operations const &specs)
Function to check compatibility of a given arrow schema with operation sequence.
gandiva::NodePtr createExpressionTree(Operations const &opSpecs, gandiva::SchemaPtr const &Schema)
Function to create gandiva expression tree from operation sequence.
void updatePlaceholders(Filter &filter, InitContext &context)
Update placeholder nodes from context.
std::string upcastTo(atype::type f)
std::shared_ptr< gandiva::Projector > createProjector(gandiva::SchemaPtr const &Schema, Operations const &opSpecs, gandiva::FieldPtr result)
Function to create gandiva projector from operation sequence.
void updateFilterInfo(ExpressionInfo &info, std::shared_ptr< arrow::Table > &table)
void walk(Node *head, L const &pred)
Tree-walker helper.
Defining PrimaryVertex explicitly as messageable.
RuntimeErrorRef runtime_error(const char *)
RuntimeErrorRef runtime_error_f(const char *,...)
gandiva::Selection selection
gandiva::FilterPtr filter
An expression tree node corresponding to a column binding.
A struct, containing the root of the expression tree.
size_t designateSubtrees(Node *node, size_t index=0)
std::unique_ptr< Node > node
An expression tree node corresponding to a literal value.
LiteralValue::stored_type var_t
An expression tree node corresponding to binary or unary operation.
A placeholder node for parameters taken from an array.
A placeholder node for simple type configurable.
From https://en.cppreference.com/w/cpp/utility/variant/visit.
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))