Project
Loading...
Searching...
No Matches
DataInputDirector.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
12#define O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
13
14#include "TFile.h"
15
19
20#include <arrow/filesystem/filesystem.h>
21#include <arrow/dataset/dataset.h>
22
23#include <regex>
24#include <vector>
25#include "rapidjson/fwd.h"
26
28{
29class Monitoring;
30}
31
32namespace o2::framework
33{
34
36 std::string fileName;
38 std::vector<uint64_t> listOfTimeFrameNumbers;
39 std::vector<bool> alreadyRead;
40};
41
42FileNameHolder* makeFileNameHolder(std::string fileName);
43
45 o2::monitoring::Monitoring* monitoring = nullptr;
47 std::string parentFileReplacement = "";
48 std::vector<std::pair<std::string, int>> parentLevelToOrigin = {};
49 // Optional registry of pre-opened TFiles (keyed by name) used to bypass
50 // TFile::Open for testing with in-memory TMemFile instances.
51 std::vector<std::pair<std::string, TFile*>> openFiles = {};
52
53 int levelForOrigin(std::string_view origin) const
54 {
55 for (auto& [o, level] : parentLevelToOrigin) {
56 if (o == origin) {
57 return level;
58 }
59 }
60 return -1;
61 }
62};
63
65{
69
70 public:
71 std::string tablename = "";
72 std::string treename = "";
73 std::unique_ptr<data_matcher::DataDescriptorMatcher> matcher;
74
75 DataInputDescriptor(bool alienSupport, int level, DataInputDirectorContext& context);
76
77 void printOut();
78
79 // setters
80 void setInputfilesFile(std::string dffn) { minputfilesFile = dffn; }
81 void setInputfilesFile(std::string* dffnptr) { minputfilesFilePtr = dffnptr; }
82 void setFilenamesRegex(std::string fn) { mFilenameRegex = fn; }
83 void setFilenamesRegex(std::string* fnptr) { mFilenameRegexPtr = fnptr; }
84
85 void setDefaultInputfiles(std::vector<FileNameHolder*>* difnptr) { mdefaultFilenamesPtr = difnptr; }
86
88 int fillInputfiles();
89 bool setFile(int counter, int wantedParentLevel, std::string_view wantedOrigin);
90
91 // getters
92 std::string getInputfilesFilename();
93 std::string getFilenamesRegexString();
94 std::regex getFilenamesRegex();
95 int getNumberInputfiles() { return mfilenames.size(); }
96 int getNumberTimeFrames() { return mtotalNumberTimeFrames; }
97 int findDFNumber(int file, std::string dfName);
98
99 uint64_t getTimeFrameNumber(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
100 arrow::dataset::FileSource getFileFolder(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
101 // Open the current file to populate the parent map, then return the parent descriptor and
102 // the TF index within it that corresponds to numTF at this level. Returns {nullptr, -1} on failure.
103 std::pair<DataInputDescriptor*, int> navigateToLevel(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin);
104 DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename, int wantedParentLevel, std::string_view wantedOrigin);
107
108 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed);
109
110 void printFileOpening();
111 void printFileStatistics();
112 void closeInputFile();
113 bool isAlienSupportOn() { return mAlienSupport; }
114
115 private:
117 std::string minputfilesFile = "";
118 std::string* minputfilesFilePtr = nullptr;
119 std::string mFilenameRegex = "";
120 std::string* mFilenameRegexPtr = nullptr;
121 std::vector<FileNameHolder*> mfilenames;
122 std::vector<FileNameHolder*>* mdefaultFilenamesPtr = nullptr;
123 std::shared_ptr<arrow::fs::FileSystem> mCurrentFilesystem;
124 int mCurrentFileID = -1;
125 bool mAlienSupport = false;
126
127 DataInputDirectorContext& mContext;
128 TMap* mParentFileMap = nullptr;
129 DataInputDescriptor* mParentFile = nullptr;
130 int mLevel = 0; // level of parent files
131
132 int mtotalNumberTimeFrames = 0;
133
134 uint64_t mIOTime = 0;
135 uint64_t mCurrentFileStartedAt = 0;
136};
137
139{
143
144 public:
145 DataInputDirector(std::vector<std::string> inputFiles, DataInputDirectorContext&& context);
147
148 void reset();
149 void printOut();
150 bool atEnd(int counter);
151
152 // setters
153 void setInputfilesFile(std::string iffn) { minputfilesFile = iffn; }
154 void setFilenamesRegex(std::string dfn) { mFilenameRegex = dfn; }
155 bool readJson(std::string const& fnjson);
156 void closeInputFiles();
157
158 // getters
160 int getNumberInputDescriptors() { return mdataInputDescriptors.size(); }
162
163 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed);
164 uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF);
165 arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF);
167
170
171 private:
173 std::string minputfilesFile;
174 std::string* const minputfilesFilePtr = &minputfilesFile;
175 std::string mFilenameRegex;
176 std::string* const mFilenameRegexPtr = &mFilenameRegex;
177 DataInputDescriptor* mdefaultDataInputDescriptor = nullptr;
178 std::vector<FileNameHolder*> mdefaultInputFiles;
179 std::vector<DataInputDescriptor*> mdataInputDescriptors;
180
181 bool mDebugMode = false;
182 bool mAlienSupport = false;
183
184 bool readJsonDocument(rapidjson::Document* doc);
185 bool isValid();
186};
187
188} // namespace o2::framework
189
190#endif // O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
header::DataOrigin origin
o2::monitoring::Monitoring Monitoring
uint64_t getTimeFrameNumber(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
void addFileNameHolder(FileNameHolder *fn)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t &totalSizeCompressed, size_t &totalSizeUncompressed)
arrow::dataset::FileSource getFileFolder(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
std::unique_ptr< data_matcher::DataDescriptorMatcher > matcher
std::pair< DataInputDescriptor *, int > navigateToLevel(int counter, int numTF, int wantedParentLevel, std::string_view wantedOrigin)
void setDefaultInputfiles(std::vector< FileNameHolder * > *difnptr)
void setInputfilesFile(std::string dffn)
void setInputfilesFile(std::string *dffnptr)
void setFilenamesRegex(std::string *fnptr)
DataInputDescriptor * getParentFile(int counter, int numTF, std::string treename, int wantedParentLevel, std::string_view wantedOrigin)
bool setFile(int counter, int wantedParentLevel, std::string_view wantedOrigin)
int findDFNumber(int file, std::string dfName)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, size_t &totalSizeCompressed, size_t &totalSizeUncompressed)
DataInputDescriptor * getDataInputDescriptor(header::DataHeader dh)
arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF)
void setInputfilesFile(std::string iffn)
int getTimeFramesInFile(header::DataHeader dh, int counter)
uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF)
void setFilenamesRegex(std::string dfn)
bool readJson(std::string const &fnjson)
GLint level
Definition glcorearb.h:275
GLuint counter
Definition glcorearb.h:3987
Defining ITS Vertex explicitly as messageable.
Definition Cartesian.h:288
FileNameHolder * makeFileNameHolder(std::string fileName)
o2::monitoring::Monitoring * monitoring
std::vector< std::pair< std::string, int > > parentLevelToOrigin
std::vector< std::pair< std::string, TFile * > > openFiles
int levelForOrigin(std::string_view origin) const
std::vector< uint64_t > listOfTimeFrameNumbers
the main header struct
Definition DataHeader.h:620