15#include "arrow/table.h"
16#include "gandiva/tree_expr_builder.h"
21#include <unordered_map>
34constexpr std::array<std::string_view, BasicOp::Conditional + 1>
mapping{
67constexpr std::array<std::string_view, 8>
cfgtypes{
115 "less_than_or_equal_to",
117 "greater_than_or_equal_to",
139 std::stack<NodeRecord>
path;
140 auto local_index =
index;
143 while (!
path.empty()) {
145 top.node_ptr->index = local_index;
147 if (
top.node_ptr->condition !=
nullptr) {
154 if (
top.node_ptr->left !=
nullptr) {
155 path.emplace(
top.node_ptr->left.get(), 0);
157 if (
top.node_ptr->right !=
nullptr) {
158 path.emplace(
top.node_ptr->right.get(), 0);
178template <is_literal_like T>
179constexpr inline auto makeDatum(T
const& node)
181 return DatumSpec{node.value, node.
type};
184template <is_binding T>
185constexpr inline auto makeDatum(T
const& node)
187 return DatumSpec{node.name, node.hash, node.type};
191constexpr inline auto makeOp(T
const&,
size_t const&)
196template <is_operation T>
197constexpr inline auto makeOp(T
const& node,
size_t const&
index)
199 return ColumnOperationSpec{node.
op,
index};
202template <is_conditional T>
203constexpr inline auto makeOp(T
const&,
size_t const&
index)
212 return arrow::uint8();
214 return arrow::int8();
216 return arrow::int16();
218 return arrow::uint16();
220 return arrow::int32();
222 return arrow::uint32();
224 return arrow::int64();
226 return arrow::uint64();
228 return arrow::float32();
230 return arrow::float64();
232 return arrow::boolean();
260 [&os](
auto&& arg) { os << arg; },
263 [&os](
size_t&& arg) { os << arg; },
264 [&os](std::string&& arg) { os << arg; },
276 if (node->self.index() == 3) {
277 std::get_if<3>(&node->self)->reset(context);
308 return "unsupported";
316 std::stack<NodeRecord>
path;
317 auto isLeaf = [](
Node const*
const node) {
318 return ((node->left ==
nullptr) && (node->right ==
nullptr));
321 auto processLeaf = [](
Node const*
const node) {
332 while (!
path.empty()) {
338 [&](
auto const&
n) {
return makeOp(
n,
top.node_ptr->index); },
344 auto*
left =
top.node_ptr->left.get();
345 bool leftLeaf = isLeaf(
left);
348 operationSpec.left = processLeaf(
left);
355 if (
top.node_ptr->right !=
nullptr) {
358 bool rightLeaf =
true;
359 if (
right !=
nullptr) {
360 rightLeaf = isLeaf(
right);
363 auto isUnary =
false;
364 if (
top.node_ptr->right ==
nullptr) {
369 operationSpec.right = processLeaf(
right);
376 decltype(
left) condition =
nullptr;
377 if (
top.node_ptr->condition !=
nullptr) {
378 condition =
top.node_ptr->condition.get();
380 bool condleaf = condition !=
nullptr ? isLeaf(condition) :
true;
382 if (condition !=
nullptr) {
384 operationSpec.condition = processLeaf(condition);
393 OperationSpecs.push_back(std::move(operationSpec));
397 if (!isUnary && !rightLeaf) {
401 path.emplace(condition, ci);
406 std::vector<atype::type> resultTypes;
407 resultTypes.resize(OperationSpecs.size());
411 if (
left.datum.index() == 0) {
412 throw runtime_error(
"Malformed operation spec: empty left datum");
416 if (
left.datum.index() == 1) {
417 left.type = resultTypes[std::get<size_t>(
left.datum)];
420 if (
right.datum.index() == 1) {
421 right.type = resultTypes[std::get<size_t>(
right.datum)];
425 auto t2 =
right.type;
427 if (
right.datum.index() == 0) {
428 if (
t1 == atype::DOUBLE) {
429 return atype::DOUBLE;
438 auto isIntType = [](
auto t) {
439 return (t == atype::UINT8) || (t == atype::INT8) || (t == atype::UINT16) || (t == atype::INT16) || (t == atype::UINT32) || (t == atype::INT32) || (t == atype::UINT64) || (t == atype::INT64);
442 auto isBitwiseOp = [](
auto o) {
447 if (t2 == atype::FLOAT && !isBitwiseOp(
op)) {
450 if (t2 == atype::DOUBLE && !isBitwiseOp(
op)) {
451 return atype::DOUBLE;
460 if (
t1 == atype::FLOAT) {
461 if (isIntType(t2) && !isBitwiseOp(
op)) {
464 if (t2 == atype::DOUBLE) {
465 return atype::DOUBLE;
468 if (
t1 == atype::DOUBLE) {
469 return atype::DOUBLE;
472 if (isIntType(
t1) && isBitwiseOp(
op)) {
475 if (isIntType(t2) && isBitwiseOp(
op)) {
482 for (
auto it = OperationSpecs.rbegin(); it != OperationSpecs.rend(); ++it) {
483 auto type = inferResultType(it->op, it->left, it->right);
484 if (it->type == atype::NA) {
488 it->result.type = it->type;
489 resultTypes[std::get<size_t>(it->result.datum)] = it->type;
492 return OperationSpecs;
497 return gandiva::TreeExprBuilder::MakeCondition(std::move(node));
502 return gandiva::TreeExprBuilder::MakeExpression(std::move(node), std::move(
result));
505std::shared_ptr<gandiva::Filter>
508 std::shared_ptr<gandiva::Filter>
filter;
509 auto s = gandiva::Filter::Make(Schema,
513 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
518std::shared_ptr<gandiva::Filter>
519 createFilter(gandiva::SchemaPtr
const& Schema, gandiva::ConditionPtr condition)
521 std::shared_ptr<gandiva::Filter>
filter;
522 auto s = gandiva::Filter::Make(Schema,
526 throw runtime_error_f(
"Failed to create filter: %s", s.ToString().c_str());
531std::shared_ptr<gandiva::Projector>
534 std::shared_ptr<gandiva::Projector> projector;
535 auto s = gandiva::Projector::Make(Schema,
539 throw runtime_error_f(
"Failed to create projector: %s", s.ToString().c_str());
544std::shared_ptr<gandiva::Projector>
551 std::shared_ptr<arrow::Schema> schema,
552 std::vector<std::shared_ptr<arrow::Field>>
const& fields)
554 std::vector<gandiva::ExpressionPtr> expressions;
556 for (
size_t ci = 0; ci < nColumns; ++ci) {
557 expressions.push_back(
565 std::shared_ptr<gandiva::Projector> projector;
566 auto s = gandiva::Projector::Make(
579 auto s = gandiva::SelectionVector::MakeInt64(table->num_rows(),
580 arrow::default_memory_pool(),
583 throw runtime_error_f(
"Cannot allocate selection vector %s", s.ToString().c_str());
585 if (table->num_rows() == 0) {
588 arrow::TableBatchReader reader(*table);
589 std::shared_ptr<arrow::RecordBatch> batch;
591 s = reader.ReadNext(&batch);
593 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
595 if (batch ==
nullptr) {
598 s =
gfilter->Evaluate(*batch, selection);
613auto createProjection(std::shared_ptr<arrow::Table>
const& table, std::shared_ptr<gandiva::Projector>
const& gprojector)
615 arrow::TableBatchReader reader(*table);
616 std::shared_ptr<arrow::RecordBatch> batch;
617 std::shared_ptr<arrow::ArrayVector>
v;
619 auto s = reader.ReadNext(&batch);
621 throw runtime_error_f(
"Cannot read batches from table %s", s.ToString().c_str());
623 if (batch ==
nullptr) {
626 s = gprojector->Evaluate(*batch, arrow::default_memory_pool(),
v.get());
628 throw runtime_error_f(
"Cannot apply projector %s", s.ToString().c_str());
635 gandiva::SchemaPtr
const& Schema)
637 std::vector<gandiva::NodePtr> opNodes;
638 opNodes.resize(opSpecs.size());
639 std::fill(opNodes.begin(), opNodes.end(),
nullptr);
640 std::unordered_map<std::string, gandiva::NodePtr> fieldNodes;
641 std::unordered_map<size_t, gandiva::NodePtr> subtrees;
643 auto datumNode = [Schema, &opNodes, &fieldNodes](
DatumSpec const& spec) {
644 if (spec.datum.index() == 0) {
645 return gandiva::NodePtr(
nullptr);
647 if (spec.datum.index() == 1) {
648 return opNodes[std::get<size_t>(spec.datum)];
651 if (spec.datum.index() == 2) {
652 auto content = std::get<LiteralNode::var_t>(spec.datum);
653 switch (content.index()) {
655 return gandiva::TreeExprBuilder::MakeLiteral(
static_cast<int32_t
>(std::get<int>(content)));
657 return gandiva::TreeExprBuilder::MakeLiteral(std::get<bool>(content));
659 return gandiva::TreeExprBuilder::MakeLiteral(std::get<float>(content));
661 return gandiva::TreeExprBuilder::MakeLiteral(std::get<double>(content));
663 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint8_t>(content));
665 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int64_t>(content));
667 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int16_t>(content));
669 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint16_t>(content));
671 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int8_t>(content));
673 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint32_t>(content));
675 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint64_t>(content));
681 if (spec.datum.index() == 3) {
682 auto name = std::get<std::string>(spec.datum);
684 if (
lookup != fieldNodes.end()) {
687 auto field = Schema->GetFieldByName(
name);
688 if (field ==
nullptr) {
691 auto node = gandiva::TreeExprBuilder::MakeField(field);
692 fieldNodes.insert({
name, node});
698 auto insertUpcastNode = [](gandiva::NodePtr node, atype::type
t0, atype::type t) {
706 auto insertEqualizeUpcastNode = [](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type
t1, atype::type t2) {
710 }
else if (
t1 > t2) {
716 auto isBitwiseOp = [](
auto o) {
720 gandiva::NodePtr
tree =
nullptr;
721 for (
auto it = opSpecs.rbegin(); it != opSpecs.rend(); ++it) {
722 auto leftNode = datumNode(it->left);
723 auto rightNode = datumNode(it->right);
724 auto condNode = datumNode(it->condition);
726 gandiva::NodePtr temp_node;
730 temp_node = gandiva::TreeExprBuilder::MakeOr({leftNode, rightNode});
733 temp_node = gandiva::TreeExprBuilder::MakeAnd({leftNode, rightNode});
736 temp_node = gandiva::TreeExprBuilder::MakeIf(condNode, leftNode, rightNode,
concreteArrowType(it->type));
740 if (it->type != atype::BOOL && !isBitwiseOp(it->op)) {
741 leftNode = insertUpcastNode(leftNode, it->type, it->left.type);
742 rightNode = insertUpcastNode(rightNode, it->type, it->right.type);
744 insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type);
748 if (!isBitwiseOp(it->op)) {
749 leftNode = insertUpcastNode(leftNode, it->type, it->left.type);
755 if (it->index == 0) {
758 auto subtree = subtrees.find(it->index);
759 if (subtree == subtrees.end()) {
760 subtrees.insert({it->index, temp_node});
762 subtree->second = temp_node;
765 opNodes[std::get<size_t>(it->result.datum)] = temp_node;
773 std::set<uint32_t> opHashes;
774 for (
auto const& spec : specs) {
775 if (spec.left.datum.index() == 3) {
776 opHashes.insert(spec.left.hash);
778 if (spec.right.datum.index() == 3) {
779 opHashes.insert(spec.right.hash);
783 return std::includes(hashes.begin(), hashes.end(),
784 opHashes.begin(), opHashes.end());
789 if (eInfos.empty()) {
793 for (
auto& info : eInfos) {
797 if (info.tree !=
nullptr) {
798 info.tree = gandiva::TreeExprBuilder::MakeAnd({info.tree,
tree});
808 if (info.
tree !=
nullptr && info.
filter ==
nullptr) {
818Tokenizer::Tokenizer(std::string
const& input)
982 bool isFloat =
false;
983 bool isUnsigned =
false;
1053 throw runtime_error_f(
"Unexpected token after expression: %s", tk.TokenStr.c_str());
1184 std::string binding =
id;
1190 auto pos = binding.rfind(
':');
1191 binding.erase(0,
pos + 1);
1192 binding[0] = std::toupper(binding[0]);
1193 binding.insert(binding.begin(),
'f');
1198 if (
id ==
"ifnode") {
1206 }
else if (args == 1) {
1208 }
else if (args == 2) {
1218 }
else if (
id ==
"ncfg") {
1229 }
else if (args == 1) {
1232 }
else if (args == 2) {
1246 switch (std::distance(
cfgtypes.begin(), locate)) {
1248 return std::make_unique<Node>(
1250 static_cast<uint16_t
>(std::stoi(
value)),
1253 return std::make_unique<Node>(
1255 static_cast<int16_t
>(std::stoi(
value)),
1258 return std::make_unique<Node>(
1260 static_cast<uint32_t
>(std::stoi(
value)),
1263 return std::make_unique<Node>(
1265 static_cast<int32_t
>(std::stoi(
value)),
1268 return std::make_unique<Node>(
1270 static_cast<uint64_t
>(std::stoll(
value)),
1273 return std::make_unique<Node>(
1275 static_cast<int64_t
>(std::stol(
value)),
1278 return std::make_unique<Node>(
1283 return std::make_unique<Node>(
1298 }
else if (args == 1) {
1307 node->right =
nullptr;
1345 if (locate ==
mapping.end()) {
#define O2_BUILTIN_UNREACHABLE
uint8_t lookup(const char input) noexcept
constexpr uint32_t runtime_hash(char const *str)
GLdouble GLdouble GLdouble GLdouble top
GLuint const GLchar * name
GLsizei GLsizei GLchar * source
GLsizei const GLfloat * value
GLint GLint GLsizei GLint GLenum GLenum type
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
GLsizei const GLchar *const * path
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t0
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
std::shared_ptr< gandiva::SelectionVector > Selection
constexpr float PIQuarter
std::shared_ptr< arrow::DataType > concreteArrowType(atype::type type)
std::shared_ptr< gandiva::Filter > createFilter(gandiva::SchemaPtr const &Schema, gandiva::ConditionPtr condition)
Function to create gandiva filter from gandiva condition.
constexpr std::array< std::string_view, 8 > cfgtypes
gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr result)
Function to create gandiva projecting expression from generic gandiva expression tree.
std::shared_ptr< gandiva::Projector > createProjectorHelper(size_t nColumns, expressions::Projector *projectors, std::shared_ptr< arrow::Schema > schema, std::vector< std::shared_ptr< arrow::Field > > const &fields)
constexpr std::array< const char *, BasicOp::Conditional+1 > basicOperationsMap
gandiva::Selection createSelection(std::shared_ptr< arrow::Table > const &table, Filter const &expression)
Function for creating gandiva selection from our internal filter tree.
auto createProjection(std::shared_ptr< arrow::Table > const &table, std::shared_ptr< gandiva::Projector > const &gprojector)
const char * stringType(atype::type t)
std::vector< ColumnOperationSpec > Operations
void updateExpressionInfos(expressions::Filter const &filter, std::vector< ExpressionInfo > &eInfos)
Function for attaching gandiva filters to to compatible task inputs.
constexpr auto makeDatum(T const &)
constexpr std::array< std::string_view, BasicOp::Conditional+1 > mapping
a map between BasicOp and tokens in string expressions
Operations createOperations(Filter const &expression)
Function to create an internal operation sequence from a filter tree.
std::ostream & operator<<(std::ostream &os, DatumSpec const &spec)
gandiva::ConditionPtr makeCondition(gandiva::NodePtr node)
Function to create gandiva condition expression from generic gandiva expression tree.
bool isTableCompatible(std::set< uint32_t > const &hashes, Operations const &specs)
Function to check compatibility of a given arrow schema with operation sequence.
void unknownParameterUsed(const char *name)
void walk(Node *head, L &&pred)
Tree-walker helper.
gandiva::NodePtr createExpressionTree(Operations const &opSpecs, gandiva::SchemaPtr const &Schema)
Function to create gandiva expression tree from operation sequence.
constexpr auto makeOp(T const &, size_t const &)
void updatePlaceholders(Filter &filter, InitContext &context)
Update placeholder nodes from context.
constexpr std::array< float, 9 > mathConstantsValues
values of math constants to substiture
std::string upcastTo(atype::type f)
std::shared_ptr< gandiva::Projector > createProjector(gandiva::SchemaPtr const &Schema, Operations const &opSpecs, gandiva::FieldPtr result)
Function to create gandiva projector from operation sequence.
constexpr std::array< std::string_view, 9 > mathConstants
math constants to recognize in string expressions
void updateFilterInfo(ExpressionInfo &info, std::shared_ptr< arrow::Table > &table)
Defining PrimaryVertex explicitly as messageable.
RuntimeErrorRef runtime_error(const char *)
RuntimeErrorRef runtime_error_f(const char *,...)
gandiva::Selection selection
gandiva::FilterPtr filter
An expression tree node corresponding to a column binding.
A struct, containing the root of the expression tree.
size_t designateSubtrees(Node *node, size_t index=0)
std::unique_ptr< Node > node
An expression tree node corresponding to a literal value.
LiteralValue::stored_type var_t
An expression tree node corresponding to binary or unary operation.
static std::unique_ptr< Node > parseBase(Tokenizer &tk)
static std::unique_ptr< Node > parseTier8(Tokenizer &tk)
static std::unique_ptr< Node > parseTier3(Tokenizer &tk)
static std::unique_ptr< Node > parseTier1(Tokenizer &tk)
static std::unique_ptr< Node > parseTier6(Tokenizer &tk)
static std::unique_ptr< Node > parseTier7(Tokenizer &tk)
static std::unique_ptr< Node > parsePrimary(Tokenizer &tk)
static std::unique_ptr< Node > parseTier4(Tokenizer &tk)
static std::unique_ptr< Node > parseTier2(Tokenizer &tk)
static Node parse(std::string const &input)
static OpNode opFromToken(std::string const &token)
static std::unique_ptr< Node > parseTier5(Tokenizer &tk)
A placeholder node for simple type configurable.
void reset(std::string const &input)
std::variant< uint32_t, int32_t, uint64_t, int64_t > IntegerValue
std::string::iterator current
std::variant< float, double > FloatValue
std::string IdentifierStr
From https://en.cppreference.com/w/cpp/utility/variant/visit.
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))