15#include "arrow/table.h"
16#include "gandiva/tree_expr_builder.h"
21#include <unordered_map>
30static const std::array<std::string, BasicOp::Conditional + 1> basicOperationsMap = {
41 "less_than_or_equal_to",
43 "greater_than_or_equal_to",
65 std::stack<NodeRecord>
path;
66 auto local_index =
index;
69 while (!
path.empty()) {
71 top.node_ptr->index = local_index;
73 if (
top.node_ptr->condition !=
nullptr) {
80 if (
top.node_ptr->left !=
nullptr) {
81 path.emplace(
top.node_ptr->left.get(), 0);
83 if (
top.node_ptr->right !=
nullptr) {
84 path.emplace(
top.node_ptr->right.get(), 0);
94struct LiteralNodeHelper {
101struct BindingNodeHelper {
102 DatumSpec operator()(BindingNode
const& node)
const
104 return DatumSpec{node.name, node.hash, node.type};
109 ColumnOperationSpec operator()(OpNode
const& node)
const
111 return ColumnOperationSpec{node.op};
115struct PlaceholderNodeHelper {
116 DatumSpec operator()(PlaceholderNode
const& node)
const
118 return DatumSpec{node.value, node.type};
127 return arrow::uint8();
129 return arrow::int8();
131 return arrow::int16();
133 return arrow::uint16();
135 return arrow::int32();
137 return arrow::uint32();
139 return arrow::int64();
141 return arrow::uint64();
143 return arrow::float32();
145 return arrow::float64();
147 return arrow::boolean();
171 return (lhs.datum == rhs.datum) && (lhs.type == rhs.type);
180 [&os](
auto&& arg) { os << arg; },
183 [&os](
size_t&& arg) { os << arg; },
184 [&os](std::string&& arg) { os << arg; },
192 std::stack<NodeRecord>
path;
197 auto updateNode = [&](
Node* node) {
198 if (node->self.index() == 3) {
199 std::get_if<3>(&node->self)->reset(context);
204 while (!
path.empty()) {
206 updateNode(
top.node_ptr);
208 auto* leftp =
top.node_ptr->left.get();
209 auto* rightp =
top.node_ptr->right.get();
210 auto* condp =
top.node_ptr->condition.get();
213 if (leftp !=
nullptr) {
214 path.emplace(leftp, 0);
216 if (rightp !=
nullptr) {
217 path.emplace(rightp, 0);
219 if (condp !=
nullptr) {
220 path.emplace(condp, 0);
251 return "unsupported";
259 std::stack<NodeRecord>
path;
260 auto isLeaf = [](
Node const*
const node) {
261 return ((node->left ==
nullptr) && (node->right ==
nullptr));
264 auto processLeaf = [](
Node const*
const node) {
267 [lh = LiteralNodeHelper{}](
LiteralNode const& node) {
return lh(node); },
268 [bh = BindingNodeHelper{}](
BindingNode const& node) {
return bh(node); },
269 [ph = PlaceholderNodeHelper{}](
PlaceholderNode const& node) {
return ph(node); },
279 while (!
path.empty()) {
294 auto*
left =
top.node_ptr->left.get();
295 bool leftLeaf = isLeaf(
left);
298 operationSpec.left = processLeaf(
left);
305 if (
top.node_ptr->right !=
nullptr) {
308 bool rightLeaf =
true;
309 if (
right !=
nullptr) {
310 rightLeaf = isLeaf(
right);
313 auto isUnary =
false;
314 if (
top.node_ptr->right ==
nullptr) {
319 operationSpec.right = processLeaf(
right);
326 decltype(
left) condition =
nullptr;
327 if (
top.node_ptr->condition !=
nullptr) {
328 condition =
top.node_ptr->condition.get();
330 bool condleaf = condition !=
nullptr ? isLeaf(condition) :
true;
332 if (condition !=
nullptr) {
334 operationSpec.condition = processLeaf(condition);
343 OperationSpecs.push_back(std::move(operationSpec));
347 if (!isUnary && !rightLeaf) {
351 path.emplace(condition, ci);
356 std::vector<atype::type> resultTypes;
357 resultTypes.resize(OperationSpecs.size());
361 if (
left.datum.index() == 0) {
362 throw runtime_error(
"Malformed operation spec: empty left datum");
366 if (
left.datum.index() == 1) {
367 left.type = resultTypes[std::get<size_t>(
left.datum)];
370 if (
right.datum.index() == 1) {
371 right.type = resultTypes[std::get<size_t>(
right.datum)];
375 auto t2 =
right.type;
377 if (
right.datum.index() == 0) {
378 if (
t1 == atype::DOUBLE) {
379 return atype::DOUBLE;
388 auto isIntType = [](
auto t) {
389 return (t == atype::UINT8) || (t == atype::INT8) || (t == atype::UINT16) || (t == atype::INT16) || (t == atype::UINT32) || (t == atype::INT32) || (t == atype::UINT64) || (t == atype::INT64);
393 if (t2 == atype::FLOAT) {
396 if (t2 == atype::DOUBLE) {
397 return atype::DOUBLE;
406 if (
t1 == atype::FLOAT) {
410 if (t2 == atype::DOUBLE) {
411 return atype::DOUBLE;
414 if (
t1 == atype::DOUBLE) {
415 return atype::DOUBLE;
420 for (
auto it = OperationSpecs.rbegin(); it != OperationSpecs.rend(); ++it) {
421 auto type = inferResultType(it->left, it->right);
422 if (it->type == atype::NA) {
426 it->result.type = it->type;
427 resultTypes[std::get<size_t>(it->result.datum)] = it->type;
430 return OperationSpecs;
435 return gandiva::TreeExprBuilder::MakeCondition(std::move(node));
440 return gandiva::TreeExprBuilder::MakeExpression(std::move(node), std::move(
result));
443std::shared_ptr<gandiva::Filter>
446 std::shared_ptr<gandiva::Filter>
filter;
447 auto s = gandiva::Filter::Make(Schema,
451 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
456std::shared_ptr<gandiva::Filter>
457 createFilter(gandiva::SchemaPtr
const& Schema, gandiva::ConditionPtr condition)
459 std::shared_ptr<gandiva::Filter>
filter;
460 auto s = gandiva::Filter::Make(Schema,
464 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
469std::shared_ptr<gandiva::Projector>
472 std::shared_ptr<gandiva::Projector> projector;
473 auto s = gandiva::Projector::Make(Schema,
477 throw runtime_error_f(
"Failed to create projector: %s", s.ToString().c_str());
482std::shared_ptr<gandiva::Projector>
489 std::shared_ptr<arrow::Schema> schema,
490 std::vector<std::shared_ptr<arrow::Field>>
const& fields)
492 std::vector<gandiva::ExpressionPtr> expressions;
494 for (
size_t ci = 0; ci < nColumns; ++ci) {
495 expressions.push_back(
503 std::shared_ptr<gandiva::Projector> projector;
504 auto s = gandiva::Projector::Make(
517 auto s = gandiva::SelectionVector::MakeInt64(table->num_rows(),
518 arrow::default_memory_pool(),
521 throw runtime_error_f(
"Cannot allocate selection vector %s", s.ToString().c_str());
523 if (table->num_rows() == 0) {
526 arrow::TableBatchReader reader(*table);
527 std::shared_ptr<arrow::RecordBatch> batch;
529 s = reader.ReadNext(&batch);
531 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
533 if (batch ==
nullptr) {
536 s =
gfilter->Evaluate(*batch, selection);
551auto createProjection(std::shared_ptr<arrow::Table>
const& table, std::shared_ptr<gandiva::Projector>
const& gprojector)
553 arrow::TableBatchReader reader(*table);
554 std::shared_ptr<arrow::RecordBatch> batch;
555 std::shared_ptr<arrow::ArrayVector>
v;
557 auto s = reader.ReadNext(&batch);
559 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
561 if (batch ==
nullptr) {
564 s = gprojector->Evaluate(*batch, arrow::default_memory_pool(),
v.get());
566 throw runtime_error_f(
"Cannot apply projector %s", s.ToString().c_str());
573 gandiva::SchemaPtr
const& Schema)
575 std::vector<gandiva::NodePtr> opNodes;
576 opNodes.resize(opSpecs.size());
577 std::fill(opNodes.begin(), opNodes.end(),
nullptr);
578 std::unordered_map<std::string, gandiva::NodePtr> fieldNodes;
579 std::unordered_map<size_t, gandiva::NodePtr> subtrees;
581 auto datumNode = [Schema, &opNodes, &fieldNodes](
DatumSpec const& spec) {
582 if (spec.datum.index() == 0) {
583 return gandiva::NodePtr(
nullptr);
585 if (spec.datum.index() == 1) {
586 return opNodes[std::get<size_t>(spec.datum)];
589 if (spec.datum.index() == 2) {
590 auto content = std::get<LiteralNode::var_t>(spec.datum);
591 switch (content.index()) {
593 return gandiva::TreeExprBuilder::MakeLiteral(
static_cast<int32_t
>(std::get<int>(content)));
595 return gandiva::TreeExprBuilder::MakeLiteral(std::get<bool>(content));
597 return gandiva::TreeExprBuilder::MakeLiteral(std::get<float>(content));
599 return gandiva::TreeExprBuilder::MakeLiteral(std::get<double>(content));
601 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint8_t>(content));
603 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int64_t>(content));
605 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int16_t>(content));
607 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint16_t>(content));
609 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int8_t>(content));
611 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint32_t>(content));
613 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint64_t>(content));
619 if (spec.datum.index() == 3) {
620 auto name = std::get<std::string>(spec.datum);
622 if (
lookup != fieldNodes.end()) {
625 auto field = Schema->GetFieldByName(
name);
626 if (field ==
nullptr) {
629 auto node = gandiva::TreeExprBuilder::MakeField(field);
630 fieldNodes.insert({
name, node});
636 gandiva::NodePtr
tree =
nullptr;
637 for (
auto it = opSpecs.rbegin(); it != opSpecs.rend(); ++it) {
638 auto leftNode = datumNode(it->left);
639 auto rightNode = datumNode(it->right);
640 auto condNode = datumNode(it->condition);
642 auto insertUpcastNode = [&](gandiva::NodePtr node, atype::type t) {
650 auto insertEqualizeUpcastNode = [&](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type
t1, atype::type t2) {
654 }
else if (
t1 > t2) {
660 gandiva::NodePtr temp_node;
664 temp_node = gandiva::TreeExprBuilder::MakeOr({leftNode, rightNode});
667 temp_node = gandiva::TreeExprBuilder::MakeAnd({leftNode, rightNode});
670 temp_node = gandiva::TreeExprBuilder::MakeIf(condNode, leftNode, rightNode,
concreteArrowType(it->type));
674 if (it->type != atype::BOOL) {
675 leftNode = insertUpcastNode(leftNode, it->left.type);
676 rightNode = insertUpcastNode(rightNode, it->right.type);
678 insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type);
680 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode, rightNode},
concreteArrowType(it->type));
682 leftNode = insertUpcastNode(leftNode, it->left.type);
683 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode},
concreteArrowType(it->type));
687 if (it->index == 0) {
690 auto subtree = subtrees.find(it->index);
691 if (subtree == subtrees.end()) {
692 subtrees.insert({it->index, temp_node});
694 subtree->second = temp_node;
697 opNodes[std::get<size_t>(it->result.datum)] = temp_node;
705 std::set<uint32_t> opHashes;
706 for (
auto const& spec : specs) {
707 if (spec.left.datum.index() == 3) {
708 opHashes.insert(spec.left.hash);
710 if (spec.right.datum.index() == 3) {
711 opHashes.insert(spec.right.hash);
715 return std::includes(hashes.begin(), hashes.end(),
716 opHashes.begin(), opHashes.end());
721 if (eInfos.empty()) {
725 for (
auto& info : eInfos) {
729 if (info.tree !=
nullptr) {
730 info.tree = gandiva::TreeExprBuilder::MakeAnd({info.tree,
tree});
740 if (info.
tree !=
nullptr && info.
filter ==
nullptr) {
#define O2_BUILTIN_UNREACHABLE
uint8_t lookup(const char input) noexcept
GLdouble GLdouble GLdouble GLdouble top
GLuint const GLchar * name
GLint GLint GLsizei GLint GLenum GLenum type
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
GLsizei const GLchar *const * path
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
std::shared_ptr< gandiva::SelectionVector > Selection
std::shared_ptr< arrow::DataType > concreteArrowType(atype::type type)
std::shared_ptr< gandiva::Filter > createFilter(gandiva::SchemaPtr const &Schema, gandiva::ConditionPtr condition)
Function to create gandiva filter from gandiva condition.
gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr result)
Function to create gandiva projecting expression from generic gandiva expression tree.
std::shared_ptr< gandiva::Projector > createProjectorHelper(size_t nColumns, expressions::Projector *projectors, std::shared_ptr< arrow::Schema > schema, std::vector< std::shared_ptr< arrow::Field > > const &fields)
gandiva::Selection createSelection(std::shared_ptr< arrow::Table > const &table, Filter const &expression)
Function for creating gandiva selection from our internal filter tree.
bool operator==(DatumSpec const &lhs, DatumSpec const &rhs)
auto createProjection(std::shared_ptr< arrow::Table > const &table, std::shared_ptr< gandiva::Projector > const &gprojector)
const char * stringType(atype::type t)
std::vector< ColumnOperationSpec > Operations
void updateExpressionInfos(expressions::Filter const &filter, std::vector< ExpressionInfo > &eInfos)
Function for attaching gandiva filters to to compatible task inputs.
Operations createOperations(Filter const &expression)
Function to create an internal operation sequence from a filter tree.
std::ostream & operator<<(std::ostream &os, DatumSpec const &spec)
gandiva::ConditionPtr makeCondition(gandiva::NodePtr node)
Function to create gandiva condition expression from generic gandiva expression tree.
bool isTableCompatible(std::set< uint32_t > const &hashes, Operations const &specs)
Function to check compatibility of a given arrow schema with operation sequence.
gandiva::NodePtr createExpressionTree(Operations const &opSpecs, gandiva::SchemaPtr const &Schema)
Function to create gandiva expression tree from operation sequence.
void updatePlaceholders(Filter &filter, InitContext &context)
Update placeholder nodes from context.
std::string upcastTo(atype::type f)
std::shared_ptr< gandiva::Projector > createProjector(gandiva::SchemaPtr const &Schema, Operations const &opSpecs, gandiva::FieldPtr result)
Function to create gandiva projector from operation sequence.
void updateFilterInfo(ExpressionInfo &info, std::shared_ptr< arrow::Table > &table)
Defining PrimaryVertex explicitly as messageable.
RuntimeErrorRef runtime_error(const char *)
RuntimeErrorRef runtime_error_f(const char *,...)
gandiva::Selection selection
gandiva::FilterPtr filter
An expression tree node corresponding to a column binding.
A struct, containing the root of the expression tree.
size_t designateSubtrees(Node *node, size_t index=0)
std::unique_ptr< Node > node
An expression tree node corresponding to a literal value.
LiteralValue::stored_type var_t
An expression tree node corresponding to binary or unary operation.
A placeholder node for simple type configurable.
From https://en.cppreference.com/w/cpp/utility/variant/visit.
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))