Project
Loading...
Searching...
No Matches
RootArrowFilesystem.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_
12#define O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_
13
14#include <TBufferFile.h>
15#include <arrow/buffer.h>
16#include <arrow/dataset/dataset.h>
17#include <arrow/dataset/type_fwd.h>
18#include <arrow/dataset/file_base.h>
19#include <arrow/filesystem/type_fwd.h>
20#include <arrow/type_fwd.h>
21#include <memory>
22#include <utility>
23
24class TFile;
25class TBufferFile;
26class TDirectoryFile;
27
28namespace o2::framework
29{
30
32 RootObjectHandler(void* p, std::shared_ptr<arrow::dataset::FileFormat> f)
33 : payload(p), format(std::move(f))
34 {
35 }
36
37 ~RootObjectHandler() noexcept(false);
38
39 template <typename T>
40 std::unique_ptr<T> GetObjectAsOwner()
41 {
42 auto* p = payload;
43 payload = nullptr;
44 return std::unique_ptr<T>((T*)p);
45 }
46 std::shared_ptr<arrow::dataset::FileFormat> format;
47
48 private:
49 void* payload = nullptr;
50};
51
52// This is to avoid having to implement a bunch of unimplemented methods
53// for all the possible virtual filesystem we can invent on top of ROOT
54// data structures.
56{
57 public:
58 // Dummy implementation to avoid
59 arrow::Result<arrow::fs::FileInfo> GetFileInfo(const std::string& path) override;
60 arrow::Result<arrow::fs::FileInfoVector> GetFileInfo(const arrow::fs::FileSelector& select) override;
61
62 bool Equals(const FileSystem& other) const override
63 {
64 return this->type_name() == other.type_name();
65 }
66
67 virtual std::shared_ptr<RootObjectHandler> GetObjectHandler(arrow::dataset::FileSource source) = 0;
68 virtual bool CheckSupport(arrow::dataset::FileSource source) = 0;
69
70 arrow::Status CreateDir(const std::string& path, bool recursive) override;
71
72 arrow::Status DeleteDir(const std::string& path) override;
73
74 arrow::Status CopyFile(const std::string& src, const std::string& dest) override;
75
76 arrow::Status Move(const std::string& src, const std::string& dest) override;
77
78 arrow::Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
79
80 arrow::Status DeleteRootDirContents() override;
81
82 arrow::Status DeleteFile(const std::string& path) override;
83
84 arrow::Result<std::shared_ptr<arrow::io::InputStream>> OpenInputStream(const std::string& path) override;
85
86 arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> OpenInputFile(const std::string& path) override;
87
88 arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenOutputStream(
89 const std::string& path,
90 const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override;
91
92 arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenAppendStream(
93 const std::string& path,
94 const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override;
95};
96
97struct RootArrowFactory final {
98 std::function<std::shared_ptr<arrow::dataset::FileWriteOptions>()> options = nullptr;
99 std::function<std::shared_ptr<arrow::dataset::FileFormat>()> format = nullptr;
100 // Builds an output streamer which is able to read from the source fragment
101 // in a deferred way.
102 std::function<std::shared_ptr<arrow::io::OutputStream>(std::shared_ptr<arrow::dataset::FileFragment>, const std::shared_ptr<arrow::ResizableBuffer>& buffer)> deferredOutputStreamer = nullptr;
103};
104
108
109// A registry for all the possible ways of encoding a table in a TFile
111 // The unique name of this capability
112 std::string name = "unknown";
113 // Convert a logical filename to an actual object to be read
114 // This can be used, e.g. to read an RNTuple stored in
115 // a flat directory structure in a TFile vs a TTree stored inside
116 // a TDirectory (e.g. /DF_1000/o2tracks).
117 std::function<std::string(std::string)> lfn2objectPath;
118 // Given a TFile, return the object which this capability support
119 // Use a void * in order not to expose the kind of object to the
120 // generic reading code. This is also where we load the plugin
121 // which will be used for the actual creation.
122 std::function<void*(std::shared_ptr<arrow::fs::FileSystem> fs, std::string const& path)> getHandle;
123 // Wether or not this actually supports reading an object of the following class
124 std::function<bool(char const*)> checkSupport;
125
126 // This must be implemented to load the actual RootArrowFactory plugin which
127 // implements this capability. This way the detection of the file format
128 // (via get handle) does not need to know about the actual code which performs
129 // the serialization (and might depend on e.g. RNTuple).
130 std::function<RootArrowFactory&()> factory;
131};
132
136
137// This acts as registry of all the capabilities (i.e. the ability to
138// associate a given object in a root file to the serialization plugin) and
139// the factory (i.e. the serialization plugin)
141 std::vector<RootObjectReadingCapability> capabilities;
142};
143
145{
146 public:
147 arrow::Result<arrow::fs::FileInfo> GetFileInfo(const std::string& path) override;
148
149 TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObjectReadingFactory&);
150
151 ~TFileFileSystem() override;
152
153 std::string type_name() const override
154 {
155 return "TDirectoryFile";
156 }
157
158 std::shared_ptr<RootObjectHandler> GetObjectHandler(arrow::dataset::FileSource source) override;
159 bool CheckSupport(arrow::dataset::FileSource source) override;
160 virtual std::shared_ptr<VirtualRootFileSystemBase> GetSubFilesystem(arrow::dataset::FileSource source);
161
162 arrow::Result<std::shared_ptr<arrow::io::OutputStream>> OpenOutputStream(
163 const std::string& path,
164 const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override;
165
166 // We can go back to the TFile in case this is needed.
167 TDirectoryFile* GetFile()
168 {
169 return mFile;
170 }
171
172 private:
173 TDirectoryFile* mFile;
174 RootObjectReadingFactory& mObjectFactory;
175};
176
178{
179 public:
181
182 arrow::Result<arrow::fs::FileInfo> GetFileInfo(const std::string& path) override;
183 std::string type_name() const override
184 {
185 return "tbufferfile";
186 }
187
188 bool CheckSupport(arrow::dataset::FileSource source) override;
189 std::shared_ptr<RootObjectHandler> GetObjectHandler(arrow::dataset::FileSource source) override;
191 {
192 return mBuffer;
193 }
194
195 private:
196 TBufferFile* mBuffer;
197 std::shared_ptr<VirtualRootFileSystemBase> mFilesystem;
198 RootObjectReadingFactory& mObjectFactory;
199};
200
201// An arrow outputstream which allows to write to a TDirectoryFile.
202// This will point to the location of the file itself. You can
203// specify the location of the actual object inside it by passing the
204// associated path to the Write() API.
206{
207 public:
208 TDirectoryFileOutputStream(TDirectoryFile*);
209
210 arrow::Status Close() override;
211
212 arrow::Result<int64_t> Tell() const override;
213
214 arrow::Status Write(const void* data, int64_t nbytes) override;
215
216 bool closed() const override;
217
218 TDirectoryFile* GetDirectory()
219 {
220 return mDirectory;
221 }
222
223 private:
224 TDirectoryFile* mDirectory;
225};
226
227} // namespace o2::framework
228
229#endif // O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_
arrow::Result< arrow::fs::FileInfo > GetFileInfo(const std::string &path) override
std::shared_ptr< RootObjectHandler > GetObjectHandler(arrow::dataset::FileSource source) override
bool CheckSupport(arrow::dataset::FileSource source) override
std::string type_name() const override
arrow::Result< int64_t > Tell() const override
std::string type_name() const override
virtual std::shared_ptr< VirtualRootFileSystemBase > GetSubFilesystem(arrow::dataset::FileSource source)
bool CheckSupport(arrow::dataset::FileSource source) override
arrow::Result< arrow::fs::FileInfo > GetFileInfo(const std::string &path) override
arrow::Result< std::shared_ptr< arrow::io::OutputStream > > OpenOutputStream(const std::string &path, const std::shared_ptr< const arrow::KeyValueMetadata > &metadata) override
std::shared_ptr< RootObjectHandler > GetObjectHandler(arrow::dataset::FileSource source) override
arrow::Status DeleteDir(const std::string &path) override
virtual std::shared_ptr< RootObjectHandler > GetObjectHandler(arrow::dataset::FileSource source)=0
arrow::Status Move(const std::string &src, const std::string &dest) override
arrow::Status DeleteFile(const std::string &path) override
bool Equals(const FileSystem &other) const override
arrow::Status DeleteDirContents(const std::string &path, bool missing_dir_ok) override
arrow::Status CreateDir(const std::string &path, bool recursive) override
arrow::Status CopyFile(const std::string &src, const std::string &dest) override
arrow::Result< arrow::fs::FileInfo > GetFileInfo(const std::string &path) override
arrow::Result< std::shared_ptr< arrow::io::OutputStream > > OpenOutputStream(const std::string &path, const std::shared_ptr< const arrow::KeyValueMetadata > &metadata) override
arrow::Result< std::shared_ptr< arrow::io::InputStream > > OpenInputStream(const std::string &path) override
arrow::Result< std::shared_ptr< arrow::io::OutputStream > > OpenAppendStream(const std::string &path, const std::shared_ptr< const arrow::KeyValueMetadata > &metadata) override
virtual bool CheckSupport(arrow::dataset::FileSource source)=0
arrow::Result< std::shared_ptr< arrow::io::RandomAccessFile > > OpenInputFile(const std::string &path) override
GLenum src
Definition glcorearb.h:1767
GLuint buffer
Definition glcorearb.h:655
GLuint const GLchar * name
Definition glcorearb.h:781
GLdouble f
Definition glcorearb.h:310
GLsizei GLsizei GLchar * source
Definition glcorearb.h:798
GLboolean * data
Definition glcorearb.h:298
GLsizei const GLchar *const * path
Definition glcorearb.h:3591
GLint GLint GLsizei GLint GLenum format
Definition glcorearb.h:275
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
std::vector< InputSpec > select(char const *matcher="")
Defining DataPointCompositeObject explicitly as copiable.
virtual RootArrowFactory * create()=0
std::function< std::shared_ptr< arrow::dataset::FileWriteOptions >()> options
std::function< std::shared_ptr< arrow::io::OutputStream >(std::shared_ptr< arrow::dataset::FileFragment >, const std::shared_ptr< arrow::ResizableBuffer > &buffer)> deferredOutputStreamer
RootObjectHandler(void *p, std::shared_ptr< arrow::dataset::FileFormat > f)
std::unique_ptr< T > GetObjectAsOwner()
std::shared_ptr< arrow::dataset::FileFormat > format
virtual RootObjectReadingCapability * create()=0
std::function< RootArrowFactory &()> factory
std::function< bool(char const *)> checkSupport
std::function< std::string(std::string)> lfn2objectPath
std::function< void *(std::shared_ptr< arrow::fs::FileSystem > fs, std::string const &path)> getHandle
std::vector< RootObjectReadingCapability > capabilities
VectorOfTObjectPtrs other
ctfTree Write()