Project
Loading...
Searching...
No Matches
TableTreeHelpers.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_TABLETREEHELPERS_H_
12#define O2_FRAMEWORK_TABLETREEHELPERS_H_
13
14#include <arrow/buffer.h>
15#include <arrow/io/interfaces.h>
16#include <arrow/record_batch.h>
17#include "TFile.h"
18#include "TTreeReader.h"
19#include "TTreeReaderValue.h"
20#include "TTreeReaderArray.h"
21#include "TableBuilder.h"
22#include <arrow/dataset/file_base.h>
23#include <memory>
24
25// =============================================================================
26namespace o2::framework
27{
28// -----------------------------------------------------------------------------
29// TableToTree allows to save the contents of a given arrow::Table into
30// a TTree
31// ColumnToBranch is used by GenericTableToTree
32//
33// To write the contents of a table ta to a tree tr on file f do:
34// . GenericTableToTree t2t(ta, f,treename);
35// . t2t.addBranches();
36// OR t2t.addBranch(column.get(), field.get()), ...;
37// . t2t.process();
38//
40 EDataType type;
41 char suffix[3];
42 int size;
43};
44
45auto arrowTypeFromROOT(EDataType type, int size);
46auto basicROOTTypeFromArrow(arrow::Type::type id);
47
49{
50 public:
51 ColumnToBranch(TTree* tree, std::shared_ptr<arrow::ChunkedArray> const& column, std::shared_ptr<arrow::Field> const& field);
54 void at(const int64_t* pos);
55 [[nodiscard]] int fieldSize() const { return mFieldSize; }
56 [[nodiscard]] int columnEntries() const { return mColumn->length(); }
57 [[nodiscard]] char const* branchName() const { return mBranchName.c_str(); }
58
59 private:
60 void accessChunk();
61 void nextChunk();
62
63 std::string mBranchName;
64 TBranch* mBranch = nullptr;
65 TBranch* mSizeBranch = nullptr;
66 arrow::ChunkedArray* mColumn = nullptr;
67 int64_t mFirstIndex = 0;
68 int mCurrentChunk = 0;
69 int mListSize = 1;
70 ROOTTypeInfo mElementType;
71 arrow::Type::type mFieldType;
72 std::vector<uint8_t> cache;
73 std::shared_ptr<arrow::Array> mCurrentArray = nullptr;
74 int64_t mChunkLength = 0;
75 int mFieldSize = 0;
76};
77
79{
80 public:
81 TableToTree(std::shared_ptr<arrow::Table> const& table, TFile* file, const char* treename);
82
83 std::shared_ptr<TTree> process();
84 void addBranch(std::shared_ptr<arrow::ChunkedArray> const& column, std::shared_ptr<arrow::Field> const& field);
85 void addAllBranches();
86
87 private:
88 arrow::Table* mTable;
89 int64_t mRows = 0;
90 std::shared_ptr<TTree> mTree;
91 std::vector<std::unique_ptr<ColumnToBranch>> mColumnReaders;
92};
93
95{
96 public:
97 // The function to be used to create the required stream.
98 using StreamerCreator = std::function<std::shared_ptr<arrow::io::OutputStream>(std::shared_ptr<arrow::dataset::FileFragment>, const std::shared_ptr<arrow::ResizableBuffer>& buffer)>;
99
100 FragmentToBatch(StreamerCreator, std::shared_ptr<arrow::dataset::FileFragment>, arrow::MemoryPool* pool = arrow::default_memory_pool());
101 void setLabel(const char* label);
102 void fill(std::shared_ptr<arrow::Schema> dataSetSchema, std::shared_ptr<arrow::dataset::FileFormat>);
103 std::shared_ptr<arrow::RecordBatch> finalize();
104
105 std::shared_ptr<arrow::io::OutputStream> streamer(std::shared_ptr<arrow::ResizableBuffer> buffer)
106 {
107 return mCreator(mFragment, buffer);
108 }
109
110 private:
111 std::shared_ptr<arrow::dataset::FileFragment> mFragment;
112 arrow::MemoryPool* mArrowMemoryPool = nullptr;
113 std::string mTableLabel;
114 std::shared_ptr<arrow::RecordBatch> mRecordBatch;
115 StreamerCreator mCreator;
116};
117
118// -----------------------------------------------------------------------------
119} // namespace o2::framework
120
121// =============================================================================
122#endif // O2_FRAMEWORK_TABLETREEHELPERS_H_
uint16_t pos
Definition RawData.h:3
ColumnToBranch(ColumnToBranch &&other)=delete
ColumnToBranch(ColumnToBranch const &other)=delete
void at(const int64_t *pos)
char const * branchName() const
std::shared_ptr< arrow::RecordBatch > finalize()
std::function< std::shared_ptr< arrow::io::OutputStream >(std::shared_ptr< arrow::dataset::FileFragment >, const std::shared_ptr< arrow::ResizableBuffer > &buffer)> StreamerCreator
void setLabel(const char *label)
std::shared_ptr< arrow::io::OutputStream > streamer(std::shared_ptr< arrow::ResizableBuffer > buffer)
void fill(std::shared_ptr< arrow::Schema > dataSetSchema, std::shared_ptr< arrow::dataset::FileFormat >)
void addBranch(std::shared_ptr< arrow::ChunkedArray > const &column, std::shared_ptr< arrow::Field > const &field)
std::shared_ptr< TTree > process()
GLuint buffer
Definition glcorearb.h:655
GLsizeiptr size
Definition glcorearb.h:659
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLuint GLsizei const GLchar * label
Definition glcorearb.h:2519
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
auto basicROOTTypeFromArrow(arrow::Type::type id)
auto arrowTypeFromROOT(EDataType type, int size)
VectorOfTObjectPtrs other
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))