Project
Loading...
Searching...
No Matches
TableTreeHelpers.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_TABLETREEHELPERS_H_
12#define O2_FRAMEWORK_TABLETREEHELPERS_H_
13
14#include <arrow/record_batch.h>
15#include "TFile.h"
16#include "TTreeReader.h"
17#include "TTreeReaderValue.h"
18#include "TTreeReaderArray.h"
19#include "TableBuilder.h"
20#include <arrow/dataset/file_base.h>
21#include <memory>
22
23// =============================================================================
24namespace o2::framework
25{
26// -----------------------------------------------------------------------------
27// TableToTree allows to save the contents of a given arrow::Table into
28// a TTree
29// ColumnToBranch is used by GenericTableToTree
30//
31// To write the contents of a table ta to a tree tr on file f do:
32// . GenericTableToTree t2t(ta, f,treename);
33// . t2t.addBranches();
34// OR t2t.addBranch(column.get(), field.get()), ...;
35// . t2t.process();
36//
37// .............................................................................
38// -----------------------------------------------------------------------------
39// TreeToTable allows to fill the contents of a given TTree to an arrow::Table
40// ColumnIterator is used by TreeToTable
41//
42// To copy the contents of a tree tr to a table ta do:
43// . TreeToTable t2t(tr);
44// . t2t.addColumn(columnname1); t2t.addColumn(columnname2); ...
45// OR
46// t2t.addAllColumns();
47// . auto ta = t2t.process();
48//
49// .............................................................................
51 EDataType type;
52 char suffix[3];
53 int size;
54};
55
56auto arrowTypeFromROOT(EDataType type, int size);
57auto basicROOTTypeFromArrow(arrow::Type::type id);
58
60{
61 public:
62 BranchToColumn(TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool);
63 // BranchToColumn(TBranch* branch, TBranch* sizeBranch, std::string name, EDataType type, arrow::MemoryPool* pool);
64 ~BranchToColumn() = default;
65 TBranch* branch();
66
67 std::pair<std::shared_ptr<arrow::ChunkedArray>, std::shared_ptr<arrow::Field>> read(TBuffer* buffer);
68
69 private:
70 TBranch* mBranch = nullptr;
71 bool mVLA = false;
72 std::string mColumnName;
73 EDataType mType;
74 std::shared_ptr<arrow::DataType> mArrowType;
75 arrow::ArrayBuilder* mValueBuilder = nullptr;
76 std::unique_ptr<arrow::ArrayBuilder> mListBuilder = nullptr;
77 int mListSize = 1;
78 std::unique_ptr<arrow::ArrayBuilder> mBuilder = nullptr;
79 arrow::MemoryPool* mPool = nullptr;
80};
81
83{
84 public:
85 ColumnToBranch(TTree* tree, std::shared_ptr<arrow::ChunkedArray> const& column, std::shared_ptr<arrow::Field> const& field);
88 void at(const int64_t* pos);
89 [[nodiscard]] int fieldSize() const { return mFieldSize; }
90 [[nodiscard]] int columnEntries() const { return mColumn->length(); }
91 [[nodiscard]] char const* branchName() const { return mBranchName.c_str(); }
92
93 private:
94 void accessChunk();
95 void nextChunk();
96
97 std::string mBranchName;
98 TBranch* mBranch = nullptr;
99 TBranch* mSizeBranch = nullptr;
100 arrow::ChunkedArray* mColumn = nullptr;
101 int64_t mFirstIndex = 0;
102 int mCurrentChunk = 0;
103 int mListSize = 1;
104 ROOTTypeInfo mElementType;
105 arrow::Type::type mFieldType;
106 std::vector<uint8_t> cache;
107 std::shared_ptr<arrow::Array> mCurrentArray = nullptr;
108 int64_t mChunkLength = 0;
109 int mFieldSize = 0;
110};
111
113{
114 public:
115 TableToTree(std::shared_ptr<arrow::Table> const& table, TFile* file, const char* treename);
116
117 std::shared_ptr<TTree> process();
118 void addBranch(std::shared_ptr<arrow::ChunkedArray> const& column, std::shared_ptr<arrow::Field> const& field);
119 void addAllBranches();
120
121 private:
122 arrow::Table* mTable;
123 int64_t mRows = 0;
124 std::shared_ptr<TTree> mTree;
125 std::vector<std::unique_ptr<ColumnToBranch>> mColumnReaders;
126};
127
129{
130 public:
131 TreeToTable(arrow::MemoryPool* pool = arrow::default_memory_pool());
132 void setLabel(const char* label);
133 void addAllColumns(TTree* tree, std::vector<std::string>&& names = {});
134 void fill(TTree*);
135 std::shared_ptr<arrow::Table> finalize();
136
137 private:
138 arrow::MemoryPool* mArrowMemoryPool;
139 std::vector<std::unique_ptr<BranchToColumn>> mBranchReaders;
140 std::string mTableLabel;
141 std::shared_ptr<arrow::Table> mTable;
142
143 void addReader(TBranch* branch, std::string const& name, bool VLA);
144};
145
147{
148 public:
149 FragmentToBatch(arrow::MemoryPool* pool = arrow::default_memory_pool());
150 void setLabel(const char* label);
151 void fill(std::shared_ptr<arrow::dataset::FileFragment>, std::shared_ptr<arrow::Schema> dataSetSchema, std::shared_ptr<arrow::dataset::FileFormat>);
152 std::shared_ptr<arrow::RecordBatch> finalize();
153
154 private:
155 arrow::MemoryPool* mArrowMemoryPool = nullptr;
156 std::string mTableLabel;
157 std::shared_ptr<arrow::RecordBatch> mRecordBatch;
158};
159
160// -----------------------------------------------------------------------------
161} // namespace o2::framework
162
163// =============================================================================
164#endif // O2_FRAMEWORK_TABLETREEHELPERS_H_
uint16_t pos
Definition RawData.h:3
std::pair< std::shared_ptr< arrow::ChunkedArray >, std::shared_ptr< arrow::Field > > read(TBuffer *buffer)
ColumnToBranch(ColumnToBranch &&other)=delete
ColumnToBranch(ColumnToBranch const &other)=delete
void at(const int64_t *pos)
char const * branchName() const
void fill(std::shared_ptr< arrow::dataset::FileFragment >, std::shared_ptr< arrow::Schema > dataSetSchema, std::shared_ptr< arrow::dataset::FileFormat >)
std::shared_ptr< arrow::RecordBatch > finalize()
void setLabel(const char *label)
void addBranch(std::shared_ptr< arrow::ChunkedArray > const &column, std::shared_ptr< arrow::Field > const &field)
std::shared_ptr< TTree > process()
void addAllColumns(TTree *tree, std::vector< std::string > &&names={})
void setLabel(const char *label)
std::shared_ptr< arrow::Table > finalize()
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLuint const GLchar * name
Definition glcorearb.h:781
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint GLsizei const GLchar * label
Definition glcorearb.h:2519
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
auto basicROOTTypeFromArrow(arrow::Type::type id)
auto arrowTypeFromROOT(EDataType type, int size)
VectorOfTObjectPtrs other
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))