Project
Loading...
Searching...
No Matches
Expressions.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
15#include "arrow/table.h"
16#include "gandiva/tree_expr_builder.h"
17#include <algorithm>
18#include <iostream>
19#include <set>
20#include <stack>
21#include <unordered_map>
23
24using namespace o2::framework;
25
27{
28void unknownParameterUsed(const char* name)
29{
30 runtime_error_f("Unknown parameter used in expression: %s", name);
31}
32
34constexpr std::array<std::string_view, BasicOp::Conditional + 1> mapping{
35 "&&",
36 "||",
37 "+",
38 "-",
39 "/",
40 "*",
41 "&",
42 "|",
43 "^",
44 "<",
45 "<=",
46 ">",
47 ">=",
48 "==",
49 "!=",
50 "natan2",
51 "npow",
52 "nsqrt",
53 "nexp",
54 "nlog",
55 "nlog10",
56 "nsin",
57 "ncos",
58 "ntan",
59 "nasin",
60 "nacos",
61 "natan",
62 "nabs",
63 "nround",
64 "nbitwise_not",
65 "ifnode"};
66
67constexpr std::array<std::string_view, 8> cfgtypes{
68 "uint16_t", // 0
69 "int16_t", // 1
70 "uint32_t", // 2
71 "int32_t", // 3
72 "uint64_t", // 4
73 "int64_t", // 5
74 "float", // 6
75 "double" // 7
76};
77
79constexpr std::array<std::string_view, 9> mathConstants{
80 "Almost0",
81 "Epsilon",
82 "Almost1",
83 "VeryBig",
84 "PI",
85 "TwoPI",
86 "PIHalf",
87 "PIThird",
88 "PIQuarter"};
89
101
104constexpr std::array<const char*, BasicOp::Conditional + 1> basicOperationsMap = {
105 "and",
106 "or",
107 "add",
108 "subtract",
109 "divide",
110 "multiply",
111 "bitwise_and",
112 "bitwise_or",
113 "bitwise_xor",
114 "less_than",
115 "less_than_or_equal_to",
116 "greater_than",
117 "greater_than_or_equal_to",
118 "equal",
119 "not_equal",
120 "atan2f",
121 "powerf",
122 "sqrtf",
123 "expf",
124 "logf",
125 "log10f",
126 "sinf",
127 "cosf",
128 "tanf",
129 "asinf",
130 "acosf",
131 "atanf",
132 "absf",
133 "round",
134 "bitwise_not",
135 "if"};
136
138{
139 std::stack<NodeRecord> path;
140 auto local_index = index;
141 path.emplace(node, 0);
142
143 while (!path.empty()) {
144 auto top = path.top();
145 top.node_ptr->index = local_index;
146 path.pop();
147 if (top.node_ptr->condition != nullptr) {
148 // start new subtrees
149 index = designateSubtrees(top.node_ptr->left.get(), local_index + 1);
150 index = designateSubtrees(top.node_ptr->condition.get(), index + 1);
151 index = designateSubtrees(top.node_ptr->right.get(), index + 1);
152 } else {
153 // continue current subtree
154 if (top.node_ptr->left != nullptr) {
155 path.emplace(top.node_ptr->left.get(), 0);
156 }
157 if (top.node_ptr->right != nullptr) {
158 path.emplace(top.node_ptr->right.get(), 0);
159 }
160 }
161 }
162
163 return index;
164}
165
167{
168 node = std::make_unique<Node>(Parser::parse(input));
170}
171
172template <typename T>
173constexpr inline auto makeDatum(T const&)
174{
175 return DatumSpec{};
176}
177
178template <is_literal_like T>
179constexpr inline auto makeDatum(T const& node)
180{
181 return DatumSpec{node.value, node.type};
182}
183
184template <is_binding T>
185constexpr inline auto makeDatum(T const& node)
186{
187 return DatumSpec{node.name, node.hash, node.type};
188}
189
190template <typename T>
191constexpr inline auto makeOp(T const&, size_t const&)
192{
193 return ColumnOperationSpec{};
194}
195
196template <is_operation T>
197constexpr inline auto makeOp(T const& node, size_t const& index)
198{
199 return ColumnOperationSpec{node.op, index};
200}
201
202template <is_conditional T>
203constexpr inline auto makeOp(T const&, size_t const& index)
204{
205 return ColumnOperationSpec{BasicOp::Conditional, index};
206}
207
208std::shared_ptr<arrow::DataType> concreteArrowType(atype::type type)
209{
210 switch (type) {
211 case atype::UINT8:
212 return arrow::uint8();
213 case atype::INT8:
214 return arrow::int8();
215 case atype::INT16:
216 return arrow::int16();
217 case atype::UINT16:
218 return arrow::uint16();
219 case atype::INT32:
220 return arrow::int32();
221 case atype::UINT32:
222 return arrow::uint32();
223 case atype::INT64:
224 return arrow::int64();
225 case atype::UINT64:
226 return arrow::uint64();
227 case atype::FLOAT:
228 return arrow::float32();
229 case atype::DOUBLE:
230 return arrow::float64();
231 case atype::BOOL:
232 return arrow::boolean();
233 default:
234 return nullptr;
235 }
236}
237
238std::string upcastTo(atype::type f)
239{
240 switch (f) {
241 case atype::INT32:
242 return "castINT";
243 case atype::INT64:
244 return "castBIGINT";
245 case atype::FLOAT:
246 return "castFLOAT4";
247 case atype::DOUBLE:
248 return "castFLOAT8";
249 default:
250 throw runtime_error_f("Do not know how to cast to %s", stringType(f));
251 }
252}
253
254std::ostream& operator<<(std::ostream& os, DatumSpec const& spec)
255{
256 std::visit(
258 [&os](LiteralNode::var_t&& arg) {
259 std::visit(
260 [&os](auto&& arg) { os << arg; },
261 arg);
262 },
263 [&os](size_t&& arg) { os << arg; },
264 [&os](std::string&& arg) { os << arg; },
265 [](auto&&) {}},
266 spec.datum);
267 return os;
268}
269
271{
272 if (filter.node == nullptr && !filter.input.empty()) {
273 filter.parse();
274 }
275 expressions::walk(filter.node.get(), [&](Node* node) {
276 if (node->self.index() == 3) {
277 std::get_if<3>(&node->self)->reset(context);
278 }
279 });
280}
281
282const char* stringType(atype::type t)
283{
284 switch (t) {
285 case atype::BOOL:
286 return "bool";
287 case atype::DOUBLE:
288 return "double";
289 case atype::FLOAT:
290 return "float";
291 case atype::INT8:
292 return "int8";
293 case atype::INT16:
294 return "int16";
295 case atype::INT32:
296 return "int32";
297 case atype::INT64:
298 return "int64";
299 case atype::UINT8:
300 return "uint8";
301 case atype::UINT16:
302 return "uint16";
303 case atype::UINT32:
304 return "uint32";
305 case atype::UINT64:
306 return "uint64";
307 default:
308 return "unsupported";
309 }
311}
312
314{
315 Operations OperationSpecs;
316 std::stack<NodeRecord> path;
317 auto isLeaf = [](Node const* const node) {
318 return ((node->left == nullptr) && (node->right == nullptr));
319 };
320
321 auto processLeaf = [](Node const* const node) {
322 return std::visit(
323 [](auto const& n) { return makeDatum(n); },
324 node->self);
325 };
326
327 size_t index = 0;
328 // insert the top node into stack
329 path.emplace(expression.node.get(), index++);
330
331 // while the stack is not empty
332 while (!path.empty()) {
333 auto top = path.top();
334
335 // create operation spec, pop the node and add its children
336 auto operationSpec =
337 std::visit(
338 [&](auto const& n) { return makeOp(n, top.node_ptr->index); },
339 top.node_ptr->self);
340
341 operationSpec.result = DatumSpec{top.index, operationSpec.type};
342 path.pop();
343
344 auto* left = top.node_ptr->left.get();
345 bool leftLeaf = isLeaf(left);
346 size_t li = 0;
347 if (leftLeaf) {
348 operationSpec.left = processLeaf(left);
349 } else {
350 li = index;
351 operationSpec.left = DatumSpec{index++, atype::NA};
352 }
353
354 decltype(left) right = nullptr;
355 if (top.node_ptr->right != nullptr) {
356 right = top.node_ptr->right.get();
357 }
358 bool rightLeaf = true;
359 if (right != nullptr) {
360 rightLeaf = isLeaf(right);
361 }
362 size_t ri = 0;
363 auto isUnary = false;
364 if (top.node_ptr->right == nullptr) {
365 operationSpec.right = DatumSpec{};
366 isUnary = true;
367 } else {
368 if (rightLeaf) {
369 operationSpec.right = processLeaf(right);
370 } else {
371 ri = index;
372 operationSpec.right = DatumSpec{index++, atype::NA};
373 }
374 }
375
376 decltype(left) condition = nullptr;
377 if (top.node_ptr->condition != nullptr) {
378 condition = top.node_ptr->condition.get();
379 }
380 bool condleaf = condition != nullptr ? isLeaf(condition) : true;
381 size_t ci = 0;
382 if (condition != nullptr) {
383 if (condleaf) {
384 operationSpec.condition = processLeaf(condition);
385 } else {
386 ci = index;
387 operationSpec.condition = DatumSpec{index++, atype::BOOL};
388 }
389 } else {
390 operationSpec.condition = DatumSpec{};
391 }
392
393 OperationSpecs.push_back(std::move(operationSpec));
394 if (!leftLeaf) {
395 path.emplace(left, li);
396 }
397 if (!isUnary && !rightLeaf) {
398 path.emplace(right, ri);
399 }
400 if (!condleaf) {
401 path.emplace(condition, ci);
402 }
403 }
404 // at this stage the operations vector is created, but the field types are
405 // only set for the logical operations and leaf nodes
406 std::vector<atype::type> resultTypes;
407 resultTypes.resize(OperationSpecs.size());
408
409 auto inferResultType = [&resultTypes](BasicOp op, DatumSpec& left, DatumSpec& right) {
410 // if the left datum is monostate (error)
411 if (left.datum.index() == 0) {
412 throw runtime_error("Malformed operation spec: empty left datum");
413 }
414
415 // check if the datums are references
416 if (left.datum.index() == 1) {
417 left.type = resultTypes[std::get<size_t>(left.datum)];
418 }
419
420 if (right.datum.index() == 1) {
421 right.type = resultTypes[std::get<size_t>(right.datum)];
422 }
423
424 auto t1 = left.type;
425 auto t2 = right.type;
426 // if the right datum is monostate (unary op)
427 if (right.datum.index() == 0) {
428 if (t1 == atype::DOUBLE) {
429 return atype::DOUBLE;
430 }
431 return atype::FLOAT;
432 }
433
434 if (t1 == t2) {
435 return t1;
436 }
437
438 auto isIntType = [](auto t) {
439 return (t == atype::UINT8) || (t == atype::INT8) || (t == atype::UINT16) || (t == atype::INT16) || (t == atype::UINT32) || (t == atype::INT32) || (t == atype::UINT64) || (t == atype::INT64);
440 };
441
442 auto isBitwiseOp = [](auto o) {
444 };
445
446 if (isIntType(t1)) {
447 if (t2 == atype::FLOAT && !isBitwiseOp(op)) {
448 return atype::FLOAT;
449 }
450 if (t2 == atype::DOUBLE && !isBitwiseOp(op)) {
451 return atype::DOUBLE;
452 }
453 if (isIntType(t2)) {
454 if (t1 > t2) {
455 return t1;
456 }
457 return t2;
458 }
459 }
460 if (t1 == atype::FLOAT) {
461 if (isIntType(t2) && !isBitwiseOp(op)) {
462 return atype::FLOAT;
463 }
464 if (t2 == atype::DOUBLE) {
465 return atype::DOUBLE;
466 }
467 }
468 if (t1 == atype::DOUBLE) {
469 return atype::DOUBLE;
470 }
471
472 if (isIntType(t1) && isBitwiseOp(op)) {
473 return t1;
474 }
475 if (isIntType(t2) && isBitwiseOp(op)) {
476 return t2;
477 }
478
479 throw runtime_error_f("Invalid combination of argument types %s and %s", stringType(t1), stringType(t2));
480 };
481
482 for (auto it = OperationSpecs.rbegin(); it != OperationSpecs.rend(); ++it) {
483 auto type = inferResultType(it->op, it->left, it->right);
484 if (it->type == atype::NA) {
485 it->type = type;
486 }
487
488 it->result.type = it->type;
489 resultTypes[std::get<size_t>(it->result.datum)] = it->type;
490 }
491
492 return OperationSpecs;
493}
494
495gandiva::ConditionPtr makeCondition(gandiva::NodePtr node)
496{
497 return gandiva::TreeExprBuilder::MakeCondition(std::move(node));
498}
499
500gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr result)
501{
502 return gandiva::TreeExprBuilder::MakeExpression(std::move(node), std::move(result));
503}
504
505std::shared_ptr<gandiva::Filter>
506 createFilter(gandiva::SchemaPtr const& Schema, Operations const& opSpecs)
507{
508 std::shared_ptr<gandiva::Filter> filter;
509 auto s = gandiva::Filter::Make(Schema,
510 makeCondition(createExpressionTree(opSpecs, Schema)),
511 &filter);
512 if (!s.ok()) {
513 throw runtime_error_f("Failed to create filter: %s", s.ToString().c_str());
514 }
515 return filter;
516}
517
518std::shared_ptr<gandiva::Filter>
519 createFilter(gandiva::SchemaPtr const& Schema, gandiva::ConditionPtr condition)
520{
521 std::shared_ptr<gandiva::Filter> filter;
522 auto s = gandiva::Filter::Make(Schema,
523 condition,
524 &filter);
525 if (!s.ok()) {
526 throw runtime_error_f("Failed to create filter: %s", s.ToString().c_str());
527 }
528 return filter;
529}
530
531std::shared_ptr<gandiva::Projector>
532 createProjector(gandiva::SchemaPtr const& Schema, Operations const& opSpecs, gandiva::FieldPtr result)
533{
534 std::shared_ptr<gandiva::Projector> projector;
535 auto s = gandiva::Projector::Make(Schema,
536 {makeExpression(createExpressionTree(opSpecs, Schema), std::move(result))},
537 &projector);
538 if (!s.ok()) {
539 throw runtime_error_f("Failed to create projector: %s", s.ToString().c_str());
540 }
541 return projector;
542}
543
544std::shared_ptr<gandiva::Projector>
545 createProjector(gandiva::SchemaPtr const& Schema, Projector&& p, gandiva::FieldPtr result)
546{
547 return createProjector(Schema, createOperations(p), std::move(result));
548}
549
550std::shared_ptr<gandiva::Projector> createProjectorHelper(size_t nColumns, expressions::Projector* projectors,
551 std::shared_ptr<arrow::Schema> schema,
552 std::vector<std::shared_ptr<arrow::Field>> const& fields)
553{
554 std::vector<gandiva::ExpressionPtr> expressions;
555
556 for (size_t ci = 0; ci < nColumns; ++ci) {
557 expressions.push_back(
561 schema),
562 fields[ci]));
563 }
564
565 std::shared_ptr<gandiva::Projector> projector;
566 auto s = gandiva::Projector::Make(
567 schema,
568 expressions,
569 &projector);
570 if (s.ok()) {
571 return projector;
572 }
573 throw o2::framework::runtime_error_f("Failed to create projector: %s", s.ToString().c_str());
574}
575
576gandiva::Selection createSelection(std::shared_ptr<arrow::Table> const& table, std::shared_ptr<gandiva::Filter> const& gfilter)
577{
578 gandiva::Selection selection;
579 auto s = gandiva::SelectionVector::MakeInt64(table->num_rows(),
580 arrow::default_memory_pool(),
581 &selection);
582 if (!s.ok()) {
583 throw runtime_error_f("Cannot allocate selection vector %s", s.ToString().c_str());
584 }
585 if (table->num_rows() == 0) {
586 return selection;
587 }
588 arrow::TableBatchReader reader(*table);
589 std::shared_ptr<arrow::RecordBatch> batch;
590 while (true) {
591 s = reader.ReadNext(&batch);
592 if (!s.ok()) {
593 throw runtime_error_f("Cannot read batches from table %s", s.ToString().c_str());
594 }
595 if (batch == nullptr) {
596 break;
597 }
598 s = gfilter->Evaluate(*batch, selection);
599 if (!s.ok()) {
600 throw runtime_error_f("Cannot apply filter %s", s.ToString().c_str());
601 }
602 }
603
604 return selection;
605}
606
607gandiva::Selection createSelection(std::shared_ptr<arrow::Table> const& table,
608 Filter const& expression)
609{
610 return createSelection(table, createFilter(table->schema(), createOperations(std::move(expression))));
611}
612
613auto createProjection(std::shared_ptr<arrow::Table> const& table, std::shared_ptr<gandiva::Projector> const& gprojector)
614{
615 arrow::TableBatchReader reader(*table);
616 std::shared_ptr<arrow::RecordBatch> batch;
617 std::shared_ptr<arrow::ArrayVector> v;
618 while (true) {
619 auto s = reader.ReadNext(&batch);
620 if (!s.ok()) {
621 throw runtime_error_f("Cannot read batches from table %s", s.ToString().c_str());
622 }
623 if (batch == nullptr) {
624 break;
625 }
626 s = gprojector->Evaluate(*batch, arrow::default_memory_pool(), v.get());
627 if (!s.ok()) {
628 throw runtime_error_f("Cannot apply projector %s", s.ToString().c_str());
629 }
630 }
631 return v;
632}
633
634gandiva::NodePtr createExpressionTree(Operations const& opSpecs,
635 gandiva::SchemaPtr const& Schema)
636{
637 std::vector<gandiva::NodePtr> opNodes;
638 opNodes.resize(opSpecs.size());
639 std::fill(opNodes.begin(), opNodes.end(), nullptr);
640 std::unordered_map<std::string, gandiva::NodePtr> fieldNodes;
641 std::unordered_map<size_t, gandiva::NodePtr> subtrees;
642
643 auto datumNode = [Schema, &opNodes, &fieldNodes](DatumSpec const& spec) {
644 if (spec.datum.index() == 0) {
645 return gandiva::NodePtr(nullptr);
646 }
647 if (spec.datum.index() == 1) {
648 return opNodes[std::get<size_t>(spec.datum)];
649 }
650
651 if (spec.datum.index() == 2) {
652 auto content = std::get<LiteralNode::var_t>(spec.datum);
653 switch (content.index()) {
654 case 0: // int
655 return gandiva::TreeExprBuilder::MakeLiteral(static_cast<int32_t>(std::get<int>(content)));
656 case 1: // bool
657 return gandiva::TreeExprBuilder::MakeLiteral(std::get<bool>(content));
658 case 2: // float
659 return gandiva::TreeExprBuilder::MakeLiteral(std::get<float>(content));
660 case 3: // double
661 return gandiva::TreeExprBuilder::MakeLiteral(std::get<double>(content));
662 case 4: // uint8
663 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint8_t>(content));
664 case 5: // int64
665 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int64_t>(content));
666 case 6: // int16
667 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int16_t>(content));
668 case 7: // uint16
669 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint16_t>(content));
670 case 8: // int8
671 return gandiva::TreeExprBuilder::MakeLiteral(std::get<int8_t>(content));
672 case 9: // uint32
673 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint32_t>(content));
674 case 10: // uint64
675 return gandiva::TreeExprBuilder::MakeLiteral(std::get<uint64_t>(content));
676 default:
677 throw runtime_error("Malformed LiteralNode");
678 }
679 }
680
681 if (spec.datum.index() == 3) {
682 auto name = std::get<std::string>(spec.datum);
683 auto lookup = fieldNodes.find(name);
684 if (lookup != fieldNodes.end()) {
685 return lookup->second;
686 }
687 auto field = Schema->GetFieldByName(name);
688 if (field == nullptr) {
689 throw runtime_error_f("Cannot find field \"%s\"", name.c_str());
690 }
691 auto node = gandiva::TreeExprBuilder::MakeField(field);
692 fieldNodes.insert({name, node});
693 return node;
694 }
695 throw runtime_error("Malformed DatumSpec");
696 };
697
698 auto insertUpcastNode = [](gandiva::NodePtr node, atype::type t0, atype::type t) {
699 if (t != t0) {
700 auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t0), {node}, concreteArrowType(t0));
701 node = upcast;
702 }
703 return node;
704 };
705
706 auto insertEqualizeUpcastNode = [](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type t1, atype::type t2) {
707 if (t2 > t1) {
708 auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t2), {node1}, concreteArrowType(t2));
709 node1 = upcast;
710 } else if (t1 > t2) {
711 auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t1), {node2}, concreteArrowType(t1));
712 node2 = upcast;
713 }
714 };
715
716 auto isBitwiseOp = [](auto o) {
718 };
719
720 gandiva::NodePtr tree = nullptr;
721 for (auto it = opSpecs.rbegin(); it != opSpecs.rend(); ++it) {
722 auto leftNode = datumNode(it->left);
723 auto rightNode = datumNode(it->right);
724 auto condNode = datumNode(it->condition);
725
726 gandiva::NodePtr temp_node;
727
728 switch (it->op) {
730 temp_node = gandiva::TreeExprBuilder::MakeOr({leftNode, rightNode});
731 break;
733 temp_node = gandiva::TreeExprBuilder::MakeAnd({leftNode, rightNode});
734 break;
736 temp_node = gandiva::TreeExprBuilder::MakeIf(condNode, leftNode, rightNode, concreteArrowType(it->type));
737 break;
738 default:
739 if (it->op < BasicOp::Sqrt) {
740 if (it->type != atype::BOOL && !isBitwiseOp(it->op)) {
741 leftNode = insertUpcastNode(leftNode, it->type, it->left.type);
742 rightNode = insertUpcastNode(rightNode, it->type, it->right.type);
743 } else if (it->op == BasicOp::Equal || it->op == BasicOp::NotEqual) {
744 insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type);
745 }
746 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode, rightNode}, concreteArrowType(it->type));
747 } else {
748 if (!isBitwiseOp(it->op)) {
749 leftNode = insertUpcastNode(leftNode, it->type, it->left.type);
750 }
751 temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode}, concreteArrowType(it->type));
752 }
753 break;
754 }
755 if (it->index == 0) {
756 tree = temp_node;
757 } else {
758 auto subtree = subtrees.find(it->index);
759 if (subtree == subtrees.end()) {
760 subtrees.insert({it->index, temp_node});
761 } else {
762 subtree->second = temp_node;
763 }
764 }
765 opNodes[std::get<size_t>(it->result.datum)] = temp_node;
766 }
767
768 return tree;
769}
770
771bool isTableCompatible(std::set<uint32_t> const& hashes, Operations const& specs)
772{
773 std::set<uint32_t> opHashes;
774 for (auto const& spec : specs) {
775 if (spec.left.datum.index() == 3) {
776 opHashes.insert(spec.left.hash);
777 }
778 if (spec.right.datum.index() == 3) {
779 opHashes.insert(spec.right.hash);
780 }
781 }
782
783 return std::includes(hashes.begin(), hashes.end(),
784 opHashes.begin(), opHashes.end());
785}
786
787void updateExpressionInfos(expressions::Filter const& filter, std::vector<ExpressionInfo>& eInfos)
788{
789 if (eInfos.empty()) {
790 throw runtime_error("Empty expression info vector.");
791 }
793 for (auto& info : eInfos) {
794 if (isTableCompatible(info.hashes, ops)) {
795 auto tree = createExpressionTree(ops, info.schema);
797 if (info.tree != nullptr) {
798 info.tree = gandiva::TreeExprBuilder::MakeAnd({info.tree, tree});
799 } else {
800 info.tree = tree;
801 }
802 }
803 }
804}
805
806void updateFilterInfo(ExpressionInfo& info, std::shared_ptr<arrow::Table>& table)
807{
808 if (info.tree != nullptr && info.filter == nullptr) {
810 }
811 if (info.tree != nullptr && info.filter != nullptr && info.resetSelection == true) {
813 info.resetSelection = false;
814 }
815}
816
818Tokenizer::Tokenizer(std::string const& input)
819 : source{input},
820 IdentifierStr{""},
821 StrValue{""},
822 IntegerValue{0},
823 FloatValue{0.f}
824{
825 LastChar = ' ';
826 if (!source.empty()) {
827 source.erase(std::remove_if(source.begin(), source.end(), ::isspace), source.end()); // strip whitespaces
828 source.erase(std::remove(source.begin(), source.end(), '\"'), source.end()); // strip quotes
829 }
830 current = source.begin();
831}
832
833void Tokenizer::reset(std::string const& input)
834{
835 LastChar = ' ';
836 IdentifierStr = "";
837 StrValue = "";
838 IntegerValue = 0;
839 FloatValue = 0.f;
840 source = input;
841 if (!source.empty()) {
842 source.erase(std::remove_if(source.begin(), source.end(), ::isspace), source.end()); // strip whitespaces
843 source.erase(std::remove(source.begin(), source.end(), '\"'), source.end()); // strip quotes
844 }
845 current = source.begin();
847}
848
850{
851 // skip initial space
852 if (isspace(LastChar)) {
853 pop();
854 }
855 // logical or bitwise OR
856 if (LastChar == '|') {
858 if (peek() == '|') {
859 pop();
861 pop();
864 return currentToken;
865 } else {
866 pop();
869 return currentToken;
870 }
871 }
872 // logical or bitwise AND
873 if (LastChar == '&') {
875 if (peek() == '&') {
876 pop();
878 pop();
881 return currentToken;
882 } else {
883 pop();
886 return currentToken;
887 }
888 }
889 // less than or less or equal than
890 if (LastChar == '<') {
892 if (peek() == '=') {
893 pop();
895 pop();
898 return currentToken;
899 } else {
900 pop();
903 return currentToken;
904 }
905 }
906 // greater than or greater or equal than
907 if (LastChar == '>') {
909 if (peek() == '=') {
910 pop();
912 pop();
915 return currentToken;
916 } else {
917 pop();
920 return currentToken;
921 }
922 }
923 // equal or error
924 if (LastChar == '=') {
926 if (peek() == '=') {
927 pop();
929 pop();
932 return currentToken;
933 } else {
934 pop();
937 return currentToken;
938 }
939 }
940 // not equal or error
941 if (LastChar == '!') {
943 if (peek() == '=') {
944 pop();
946 pop();
949 return currentToken;
950 } else {
951 pop();
954 return currentToken;
955 }
956 }
957 // unambiguous single-character binary operations: addition, multiplication, subtraction, division, bitwise XOR
958 if (LastChar == '+' || LastChar == '*' || (LastChar == '-' && (currentToken != Token::BinaryOp && currentToken != '(' && currentToken != Token::Unexpected)) || LastChar == '/' || LastChar == '^') {
960 pop();
963 return currentToken;
964 }
965 // identifier: column, function, constant
966 if (isalpha(LastChar)) {
967 // identifier
969 pop();
970 while (isalnum(LastChar) || (LastChar == '_') || (LastChar == ':')) {
972 pop();
973 }
976 return currentToken;
977 }
978 // number: integer, unsigned integer or float
979 if (isdigit(LastChar) || LastChar == '.' || (LastChar == '-' && isdigit(peek()))) {
980 // number
981 StrValue = "";
982 bool isFloat = false;
983 bool isUnsigned = false;
984 do {
986 pop();
987 } while (isdigit(LastChar) || LastChar == '.');
988 if (LastChar == 'f') {
989 isFloat = true;
990 pop();
991 }
992 if (LastChar == 'u') {
993 isUnsigned = true;
994 pop();
995 }
996 if (std::find(StrValue.begin(), StrValue.end(), '.') == StrValue.end() && !isFloat) {
997 if (!isUnsigned) {
998 IntegerValue = atoi(StrValue.c_str());
999 } else {
1000 IntegerValue = static_cast<unsigned int>(atoi(StrValue.c_str()));
1001 }
1004 return currentToken;
1005 }
1006 if (isFloat) {
1007 FloatValue = strtof(StrValue.c_str(), nullptr);
1008 } else {
1009 FloatValue = strtod(StrValue.c_str(), nullptr);
1010 }
1013 return currentToken;
1014 }
1015 // end-of-line
1016 if (LastChar == '\0') {
1019 return currentToken;
1020 }
1021 // generic character
1024 pop();
1025 return currentToken;
1026}
1027
1029{
1030 if (current != source.end()) {
1031 LastChar = *current;
1032 ++current;
1033 } else {
1034 LastChar = '\0';
1035 }
1036}
1037
1039{
1040 if (current != source.end()) {
1041 return *current;
1042 } else {
1043 return '\0';
1044 }
1045}
1046
1047Node Parser::parse(std::string const& input)
1048{
1049 auto tk = Tokenizer(input);
1050 tk.nextToken();
1051 auto node = parsePrimary(tk);
1052 if (tk.currentToken != Token::EoL) {
1053 throw runtime_error_f("Unexpected token after expression: %s", tk.TokenStr.c_str());
1054 }
1055 return *node.get();
1056}
1057
1058std::unique_ptr<Node> Parser::parsePrimary(Tokenizer& tk)
1059{
1060 auto root = parseTier1(tk);
1061 while (tk.TokenStr == "||") {
1062 auto opnode = std::make_unique<Node>(OpNode{BasicOp::LogicalOr}, std::move(root), LiteralNode{-1});
1063 root.swap(opnode);
1064 tk.nextToken();
1065 root->right = parseTier1(tk);
1066 }
1067 return root;
1068}
1069
1070std::unique_ptr<Node> Parser::parseTier1(Tokenizer& tk)
1071{
1072 auto root = parseTier2(tk);
1073 while (tk.TokenStr == "&&") {
1074 auto opnode = std::make_unique<Node>(OpNode{BasicOp::LogicalAnd}, std::move(root), LiteralNode{-1});
1075 root.swap(opnode);
1076 tk.nextToken();
1077 root->right = parseTier2(tk);
1078 }
1079 return root;
1080}
1081
1082std::unique_ptr<Node> Parser::parseTier2(Tokenizer& tk)
1083{
1084 auto root = parseTier3(tk);
1085 while (tk.TokenStr == "|") {
1086 auto opnode = std::make_unique<Node>(OpNode{BasicOp::BitwiseOr}, std::move(root), LiteralNode{-1});
1087 root.swap(opnode);
1088 tk.nextToken();
1089 root->right = parseTier3(tk);
1090 }
1091 return root;
1092}
1093
1094std::unique_ptr<Node> Parser::parseTier3(Tokenizer& tk)
1095{
1096 auto root = parseTier4(tk);
1097 while (tk.TokenStr == "^") {
1098 auto opnode = std::make_unique<Node>(OpNode{BasicOp::BitwiseXor}, std::move(root), LiteralNode{-1});
1099 root.swap(opnode);
1100 tk.nextToken();
1101 root->right = parseTier4(tk);
1102 }
1103 return root;
1104}
1105
1106std::unique_ptr<Node> Parser::parseTier4(Tokenizer& tk)
1107{
1108 auto root = parseTier5(tk);
1109 while (tk.TokenStr == "&") {
1110 auto opnode = std::make_unique<Node>(OpNode{BasicOp::BitwiseAnd}, std::move(root), LiteralNode{-1});
1111 root.swap(opnode);
1112 tk.nextToken();
1113 root->right = parseTier5(tk);
1114 }
1115 return root;
1116}
1117
1118std::unique_ptr<Node> Parser::parseTier5(Tokenizer& tk)
1119{
1120 auto root = parseTier6(tk);
1121 while (tk.TokenStr == "==" || tk.TokenStr == "!=") {
1122 auto opnode = std::make_unique<Node>(opFromToken(tk.TokenStr), std::move(root), LiteralNode{-1});
1123 root.swap(opnode);
1124 tk.nextToken();
1125 root->right = parseTier6(tk);
1126 }
1127 return root;
1128}
1129
1130std::unique_ptr<Node> Parser::parseTier6(Tokenizer& tk)
1131{
1132 auto root = parseTier7(tk);
1133 while (tk.TokenStr == "<" || tk.TokenStr == "<=" || tk.TokenStr == "=>" || tk.TokenStr == ">") {
1134 auto opnode = std::make_unique<Node>(opFromToken(tk.TokenStr), std::move(root), LiteralNode{-1});
1135 root.swap(opnode);
1136 tk.nextToken();
1137 root->right = parseTier7(tk);
1138 }
1139 return root;
1140}
1141
1142std::unique_ptr<Node> Parser::parseTier7(Tokenizer& tk)
1143{
1144 auto root = parseTier8(tk);
1145 while (tk.TokenStr == "+" || tk.TokenStr == "-") {
1146 auto opnode = std::make_unique<Node>(opFromToken(tk.TokenStr), std::move(root), LiteralNode{-1});
1147 root.swap(opnode);
1148 tk.nextToken();
1149 root->right = parseTier8(tk);
1150 }
1151 return root;
1152}
1153
1154std::unique_ptr<Node> Parser::parseTier8(Tokenizer& tk)
1155{
1156 auto root = parseBase(tk);
1157 while (tk.TokenStr == "*" || tk.TokenStr == "/") {
1158 auto opnode = std::make_unique<Node>(opFromToken(tk.TokenStr), std::move(root), LiteralNode{-1});
1159 root.swap(opnode);
1160 tk.nextToken();
1161 root->right = parseBase(tk);
1162 }
1163 return root;
1164}
1165
1166std::unique_ptr<Node> Parser::parseBase(Tokenizer& tk)
1167{
1168 // parentheses
1169 if (tk.currentToken == '(') {
1170 tk.nextToken();
1171 auto node = parsePrimary(tk);
1172 if (tk.currentToken != ')') {
1173 throw runtime_error_f("Expected \")\" got %s", tk.TokenStr.c_str());
1174 }
1175 tk.nextToken();
1176 return node;
1177 }
1178
1179 // identifier or function call
1180 if (tk.currentToken == Token::Identifier) {
1181 std::string id = tk.IdentifierStr;
1182 tk.nextToken();
1183 if (tk.currentToken != '(') { // binding node or a constant
1184 std::string binding = id;
1185 auto posc = std::find(mathConstants.begin(), mathConstants.end(), id);
1186 if (posc != mathConstants.end()) { // constant
1187 return std::make_unique<Node>(LiteralNode{mathConstantsValues[std::distance(mathConstants.begin(), posc)]});
1188 }
1189 // binding node
1190 auto pos = binding.rfind(':');
1191 binding.erase(0, pos + 1);
1192 binding[0] = std::toupper(binding[0]);
1193 binding.insert(binding.begin(), 'f');
1194 return std::make_unique<Node>(BindingNode{runtime_hash(id.c_str()), atype::FLOAT}, binding);
1195 }
1196
1197 // function call
1198 if (id == "ifnode") { // conditional, 3 args
1199 auto node = std::make_unique<Node>(ConditionalNode{}, LiteralNode{-1}, LiteralNode{-1}, LiteralNode{-1});
1200 int args = 0;
1201 while (tk.currentToken != ')') {
1202 do {
1203 tk.nextToken();
1204 if (args == 0) {
1205 node->condition = parsePrimary(tk);
1206 } else if (args == 1) {
1207 node->left = parsePrimary(tk);
1208 } else if (args == 2) {
1209 node->right = parsePrimary(tk);
1210 } else {
1211 throw runtime_error_f("Extra argument in a conditional: %s", tk.TokenStr.c_str());
1212 }
1213 ++args;
1214 } while (tk.currentToken == ',');
1215 }
1216 tk.nextToken();
1217 return node;
1218 } else if (id == "ncfg") { // configurable placeholder, 3 args none of them can be expressions
1219 int args = 0;
1220 std::string type;
1221 std::string value;
1222 std::string path;
1223 while (tk.currentToken != ')') {
1224 do {
1225 tk.nextToken();
1226 if (args == 0) { // type
1227 type = tk.TokenStr;
1228 tk.nextToken();
1229 } else if (args == 1) { // value
1230 value = tk.TokenStr;
1231 tk.nextToken();
1232 } else if (args == 2) { // path
1233 path = tk.TokenStr;
1234 tk.nextToken();
1235 } else {
1236 throw runtime_error_f("Extra argument in configurable: %s", tk.TokenStr.c_str());
1237 }
1238 ++args;
1239 } while (tk.currentToken == ',');
1240 }
1241 tk.nextToken();
1242 auto locate = std::find(cfgtypes.begin(), cfgtypes.end(), type);
1243 if (locate == cfgtypes.end()) {
1244 throw runtime_error_f("Unsupported type in configurable: %s", type.c_str());
1245 }
1246 switch (std::distance(cfgtypes.begin(), locate)) {
1247 case 0:
1248 return std::make_unique<Node>(
1250 static_cast<uint16_t>(std::stoi(value)),
1251 std::move(path)));
1252 case 1:
1253 return std::make_unique<Node>(
1255 static_cast<int16_t>(std::stoi(value)),
1256 std::move(path)));
1257 case 2:
1258 return std::make_unique<Node>(
1260 static_cast<uint32_t>(std::stoi(value)),
1261 std::move(path)));
1262 case 3:
1263 return std::make_unique<Node>(
1265 static_cast<int32_t>(std::stoi(value)),
1266 std::move(path)));
1267 case 4:
1268 return std::make_unique<Node>(
1270 static_cast<uint64_t>(std::stoll(value)),
1271 std::move(path)));
1272 case 5:
1273 return std::make_unique<Node>(
1275 static_cast<int64_t>(std::stol(value)),
1276 std::move(path)));
1277 case 6:
1278 return std::make_unique<Node>(
1280 std::stof(value),
1281 std::move(path)));
1282 case 7:
1283 return std::make_unique<Node>(
1285 std::stod(value),
1286 std::move(path)));
1287 default:
1288 throw runtime_error_f("Unsupported type in configurable: %s", type.c_str());
1289 }
1290 } else { // normal function
1291 auto node = std::make_unique<Node>(opFromToken(id), LiteralNode{-1}, LiteralNode{-1});
1292 int args = 0;
1293 while (tk.currentToken != ')') {
1294 do {
1295 tk.nextToken();
1296 if (args == 0) {
1297 node->left = parsePrimary(tk);
1298 } else if (args == 1) {
1299 node->right = parsePrimary(tk);
1300 } else {
1301 throw runtime_error_f("Extra argument in a function call: %s", tk.TokenStr.c_str());
1302 }
1303 ++args;
1304 } while (tk.currentToken == ',');
1305 }
1306 if (args == 1) {
1307 node->right = nullptr;
1308 }
1309 tk.nextToken();
1310 return node;
1311 }
1312 }
1313
1314 // number
1315 if (tk.currentToken == Token::FloatNumber) {
1316 tk.nextToken();
1317 switch (tk.FloatValue.index()) {
1318 case 0:
1319 return std::make_unique<Node>(LiteralNode{get<0>(tk.FloatValue)});
1320 case 1:
1321 return std::make_unique<Node>(LiteralNode{get<1>(tk.FloatValue)});
1322 }
1323 }
1325 tk.nextToken();
1326 switch (tk.IntegerValue.index()) {
1327 case 0:
1328 return std::make_unique<Node>(LiteralNode{get<0>(tk.IntegerValue)});
1329 case 1:
1330 return std::make_unique<Node>(LiteralNode{get<1>(tk.IntegerValue)});
1331 case 2:
1332 return std::make_unique<Node>(LiteralNode{get<2>(tk.IntegerValue)});
1333 case 3:
1334 return std::make_unique<Node>(LiteralNode{get<3>(tk.IntegerValue)});
1335 }
1336 }
1337
1338 // error
1339 throw runtime_error_f("Unexpected token %s in operand", tk.TokenStr.c_str());
1340}
1341
1342OpNode Parser::opFromToken(std::string const& token)
1343{
1344 auto locate = std::find(mapping.begin(), mapping.end(), token);
1345 if (locate == mapping.end()) {
1346 throw runtime_error_f("No operation \"%s\" defined", token.c_str());
1347 }
1348 return OpNode{static_cast<BasicOp>(std::distance(mapping.begin(), locate))};
1349}
1350
1351} // namespace o2::framework::expressions
#define O2_BUILTIN_UNREACHABLE
uint8_t lookup(const char input) noexcept
uint32_t op
bool o
useful math constants
uint16_t pos
Definition RawData.h:3
uint32_t gfilter
Definition RawData.h:6
constexpr uint32_t runtime_hash(char const *str)
GLdouble n
Definition glcorearb.h:1982
GLuint64EXT * result
Definition glcorearb.h:5662
const GLdouble * v
Definition glcorearb.h:832
GLuint index
Definition glcorearb.h:781
GLdouble GLdouble GLdouble GLdouble top
Definition glcorearb.h:4077
GLuint const GLchar * name
Definition glcorearb.h:781
GLdouble GLdouble right
Definition glcorearb.h:4077
GLdouble f
Definition glcorearb.h:310
GLsizei GLsizei GLchar * source
Definition glcorearb.h:798
GLsizei const GLfloat * value
Definition glcorearb.h:819
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLint left
Definition glcorearb.h:1979
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode)
GLint GLint GLint GLint GLint GLint GLint GLbitfield GLenum filter
Definition glcorearb.h:1308
GLsizei const GLchar *const * path
Definition glcorearb.h:3591
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t0
Definition glcorearb.h:5034
GLuint id
Definition glcorearb.h:650
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Definition glcorearb.h:5034
std::shared_ptr< gandiva::SelectionVector > Selection
Definition Expressions.h:46
constexpr float Almost0
constexpr float Epsilon
constexpr float TwoPI
constexpr float PI
constexpr float PIQuarter
constexpr float PIHalf
constexpr float VeryBig
constexpr float Almost1
constexpr float PIThird
std::shared_ptr< arrow::DataType > concreteArrowType(atype::type type)
std::shared_ptr< gandiva::Filter > createFilter(gandiva::SchemaPtr const &Schema, gandiva::ConditionPtr condition)
Function to create gandiva filter from gandiva condition.
constexpr std::array< std::string_view, 8 > cfgtypes
gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr result)
Function to create gandiva projecting expression from generic gandiva expression tree.
std::shared_ptr< gandiva::Projector > createProjectorHelper(size_t nColumns, expressions::Projector *projectors, std::shared_ptr< arrow::Schema > schema, std::vector< std::shared_ptr< arrow::Field > > const &fields)
constexpr std::array< const char *, BasicOp::Conditional+1 > basicOperationsMap
gandiva::Selection createSelection(std::shared_ptr< arrow::Table > const &table, Filter const &expression)
Function for creating gandiva selection from our internal filter tree.
auto createProjection(std::shared_ptr< arrow::Table > const &table, std::shared_ptr< gandiva::Projector > const &gprojector)
const char * stringType(atype::type t)
std::vector< ColumnOperationSpec > Operations
void updateExpressionInfos(expressions::Filter const &filter, std::vector< ExpressionInfo > &eInfos)
Function for attaching gandiva filters to to compatible task inputs.
constexpr auto makeDatum(T const &)
constexpr std::array< std::string_view, BasicOp::Conditional+1 > mapping
a map between BasicOp and tokens in string expressions
Operations createOperations(Filter const &expression)
Function to create an internal operation sequence from a filter tree.
std::ostream & operator<<(std::ostream &os, DatumSpec const &spec)
gandiva::ConditionPtr makeCondition(gandiva::NodePtr node)
Function to create gandiva condition expression from generic gandiva expression tree.
bool isTableCompatible(std::set< uint32_t > const &hashes, Operations const &specs)
Function to check compatibility of a given arrow schema with operation sequence.
void unknownParameterUsed(const char *name)
void walk(Node *head, L &&pred)
Tree-walker helper.
gandiva::NodePtr createExpressionTree(Operations const &opSpecs, gandiva::SchemaPtr const &Schema)
Function to create gandiva expression tree from operation sequence.
constexpr auto makeOp(T const &, size_t const &)
void updatePlaceholders(Filter &filter, InitContext &context)
Update placeholder nodes from context.
constexpr std::array< float, 9 > mathConstantsValues
values of math constants to substiture
std::string upcastTo(atype::type f)
std::shared_ptr< gandiva::Projector > createProjector(gandiva::SchemaPtr const &Schema, Operations const &opSpecs, gandiva::FieldPtr result)
Function to create gandiva projector from operation sequence.
constexpr std::array< std::string_view, 9 > mathConstants
math constants to recognize in string expressions
void updateFilterInfo(ExpressionInfo &info, std::shared_ptr< arrow::Table > &table)
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
RuntimeErrorRef runtime_error(const char *)
RuntimeErrorRef runtime_error_f(const char *,...)
gandiva::Selection selection
Definition Expressions.h:65
gandiva::FilterPtr filter
Definition Expressions.h:64
gandiva::NodePtr tree
Definition Expressions.h:63
An expression tree node corresponding to a column binding.
A struct, containing the root of the expression tree.
size_t designateSubtrees(Node *node, size_t index=0)
std::unique_ptr< Node > node
An expression tree node corresponding to a literal value.
LiteralValue::stored_type var_t
An expression tree node corresponding to binary or unary operation.
static std::unique_ptr< Node > parseBase(Tokenizer &tk)
static std::unique_ptr< Node > parseTier8(Tokenizer &tk)
static std::unique_ptr< Node > parseTier3(Tokenizer &tk)
static std::unique_ptr< Node > parseTier1(Tokenizer &tk)
static std::unique_ptr< Node > parseTier6(Tokenizer &tk)
static std::unique_ptr< Node > parseTier7(Tokenizer &tk)
static std::unique_ptr< Node > parsePrimary(Tokenizer &tk)
static std::unique_ptr< Node > parseTier4(Tokenizer &tk)
static std::unique_ptr< Node > parseTier2(Tokenizer &tk)
static Node parse(std::string const &input)
static OpNode opFromToken(std::string const &token)
static std::unique_ptr< Node > parseTier5(Tokenizer &tk)
A placeholder node for simple type configurable.
void reset(std::string const &input)
std::variant< uint32_t, int32_t, uint64_t, int64_t > IntegerValue
std::variant< float, double > FloatValue
From https://en.cppreference.com/w/cpp/utility/variant/visit.
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))