Project
Loading...
Searching...
No Matches
DataInputDirector.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
12#define O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
13
14#include "TFile.h"
15
19
20#include <arrow/filesystem/filesystem.h>
21#include <arrow/dataset/dataset.h>
22
23#include <regex>
24#include <vector>
25#include "rapidjson/fwd.h"
26
28{
29class Monitoring;
30}
31
32namespace o2::framework
33{
34
36 std::string fileName;
38 std::vector<uint64_t> listOfTimeFrameNumbers;
39 std::vector<bool> alreadyRead;
40};
41
42FileNameHolder makeFileNameHolder(std::string fileName);
43
45 o2::monitoring::Monitoring* monitoring = nullptr;
47 std::string parentFileReplacement = "";
48 std::vector<std::pair<std::string, int>> parentLevelToOrigin = {};
49 // Optional registry of pre-opened TFiles (keyed by name) used to bypass
50 // TFile::Open for testing with in-memory TMemFile instances.
51 std::vector<std::pair<std::string, TFile*>> openFiles = {};
52
53 int levelForOrigin(std::string_view origin) const
54 {
55 for (auto& [o, level] : parentLevelToOrigin) {
56 if (o == origin) {
57 return level;
58 }
59 }
60 return -1;
61 }
62};
63
65{
69
70 public:
71 std::string tablename = "";
72 std::string treename = "";
73 std::shared_ptr<data_matcher::DataDescriptorMatcher> matcher;
74
75 DataInputDescriptor(bool alienSupport, int level, DataInputDirectorContext& context);
77
78 void printOut() const;
79
80 // setters
81 void setInputfilesFile(std::string dffn) { minputfilesFile = dffn; }
82 void setInputfilesFile(std::string* dffnptr) { minputfilesFilePtr = dffnptr; }
83 void setFilenamesRegex(std::string fn) { mFilenameRegex = fn; }
84 void setFilenamesRegex(std::string* fnptr) { mFilenameRegexPtr = fnptr; }
85
86 void setDefaultInputfiles(std::vector<FileNameHolder> difnptr) { mdefaultFilenamesPtr = difnptr; }
87
89 int fillInputfiles();
90 bool setFile(int counter, int wantedParentLevel, std::string_view wantedOrigin);
91
92 // getters
93 std::string getInputfilesFilename() const;
94 std::string getFilenamesRegexString() const;
95 std::regex getFilenamesRegex();
96 int getNumberInputfiles() { return mfilenames.size(); }
97 int getNumberTimeFrames() const { return mtotalNumberTimeFrames; }
98 int findDFNumber(int file, std::string dfName);
99
100 uint64_t getTimeFrameNumber(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
101 arrow::dataset::FileSource getFileFolder(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
102 // Open the current file to populate the parent map, then return the parent descriptor and
103 // the TF index within it that corresponds to numTF at this level. Returns {nullptr, -1} on failure.
104 std::pair<std::shared_ptr<DataInputDescriptor>, int> navigateToLevel(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
105 std::shared_ptr<DataInputDescriptor> getParentFile(int counter, int numTF, std::string treename, int wantedParentLevel, std::string_view wantedOrigin);
108
109 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed);
110
111 void printFileOpening();
112 void printFileStatistics();
113 void closeInputFile();
114 bool isAlienSupportOn() { return mAlienSupport; }
115
116 private:
118 std::string minputfilesFile;
119 std::string* minputfilesFilePtr = nullptr;
120 std::string mFilenameRegex;
121 std::string* mFilenameRegexPtr = nullptr;
122 std::vector<FileNameHolder> mfilenames;
123 std::vector<FileNameHolder> mdefaultFilenamesPtr;
124 std::shared_ptr<arrow::fs::FileSystem> mCurrentFilesystem;
125 int mCurrentFileID = -1;
126 bool mAlienSupport = false;
127
128 DataInputDirectorContext& mContext;
129 TMap* mParentFileMap = nullptr;
130 std::shared_ptr<DataInputDescriptor> mParentFile = nullptr;
131 int mLevel = 0; // level of parent files
132
133 int mtotalNumberTimeFrames = 0;
134
135 uint64_t mIOTime = 0;
136 uint64_t mCurrentFileStartedAt = 0;
137};
138
140{
144
145 public:
146 DataInputDirector(std::vector<std::string> inputFiles, DataInputDirectorContext&& context);
148
149 void reset();
150 void printOut();
151 bool atEnd(int counter);
152
153 // setters
154 void setInputfilesFile(std::string iffn) { minputfilesFile = iffn; }
155 void setFilenamesRegex(std::string dfn) { mFilenameRegex = dfn; }
156 bool readJson(std::string const& fnjson);
157 void closeInputFiles();
158
159 // getters
161 int getNumberInputDescriptors() { return mdataInputDescriptors.size(); }
163
164 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed, bool wasAOD);
165 uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF);
166 arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF);
168
171
173
174 private:
176 std::string minputfilesFile;
177 std::string* const minputfilesFilePtr = &minputfilesFile;
178 std::string mFilenameRegex;
179 std::string* const mFilenameRegexPtr = &mFilenameRegex;
180 std::shared_ptr<DataInputDescriptor> mdefaultDataInputDescriptor = nullptr;
181 std::vector<FileNameHolder> mdefaultInputFiles;
182 std::vector<DataInputDescriptor> mdataInputDescriptors;
183
184 bool mDebugMode = false;
185 bool mAlienSupport = false;
186
187 bool readJsonDocument(rapidjson::Document* doc);
188 bool isValid();
189};
190
191} // namespace o2::framework
192
193#endif // O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
header::DataOrigin origin
o2::monitoring::Monitoring Monitoring
uint64_t getTimeFrameNumber(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
std::shared_ptr< DataInputDescriptor > getParentFile(int counter, int numTF, std::string treename, int wantedParentLevel, std::string_view wantedOrigin)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t &totalSizeCompressed, size_t &totalSizeUncompressed)
arrow::dataset::FileSource getFileFolder(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
void setDefaultInputfiles(std::vector< FileNameHolder > difnptr)
std::shared_ptr< data_matcher::DataDescriptorMatcher > matcher
void setInputfilesFile(std::string dffn)
void setInputfilesFile(std::string *dffnptr)
void setFilenamesRegex(std::string *fnptr)
void addFileNameHolder(FileNameHolder fn)
bool setFile(int counter, int wantedParentLevel, std::string_view wantedOrigin)
int findDFNumber(int file, std::string dfName)
DataInputDescriptor(DataInputDescriptor const &)=default
std::pair< std::shared_ptr< DataInputDescriptor >, int > navigateToLevel(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
DataInputDescriptor * getDataInputDescriptor(header::DataHeader dh)
arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF)
void setInputfilesFile(std::string iffn)
int getTimeFramesInFile(header::DataHeader dh, int counter)
uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF)
int getLevelForOrigin(header::DataOrigin origin) const
void setFilenamesRegex(std::string dfn)
bool readJson(std::string const &fnjson)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, size_t &totalSizeCompressed, size_t &totalSizeUncompressed, bool wasAOD)
GLint level
Definition glcorearb.h:275
GLuint counter
Definition glcorearb.h:3987
Defining ITS Vertex explicitly as messageable.
Definition Cartesian.h:288
FileNameHolder makeFileNameHolder(std::string fileName)
o2::monitoring::Monitoring * monitoring
std::vector< std::pair< std::string, int > > parentLevelToOrigin
std::vector< std::pair< std::string, TFile * > > openFiles
int levelForOrigin(std::string_view origin) const
std::vector< uint64_t > listOfTimeFrameNumbers
the main header struct
Definition DataHeader.h:620