Project
Loading...
Searching...
No Matches
DataInputDirector.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11#ifndef O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
12#define O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
13
14#include "TFile.h"
15
19
20#include <arrow/filesystem/filesystem.h>
21#include <arrow/dataset/dataset.h>
22
23#include <regex>
24#include "rapidjson/fwd.h"
25
27{
28class Monitoring;
29}
30
31namespace o2::framework
32{
33
35 std::string fileName;
37 std::vector<uint64_t> listOfTimeFrameNumbers;
38 std::vector<bool> alreadyRead;
39};
40FileNameHolder* makeFileNameHolder(std::string fileName);
41
43{
47
48 public:
49 std::string tablename = "";
50 std::string treename = "";
51 std::unique_ptr<data_matcher::DataDescriptorMatcher> matcher;
52
53 DataInputDescriptor(bool alienSupport, int level, o2::monitoring::Monitoring* monitoring = nullptr, int allowedParentLevel = 0, std::string parentFileReplacement = "");
54
55 void printOut();
56
57 // setters
58 void setInputfilesFile(std::string dffn) { minputfilesFile = dffn; }
59 void setInputfilesFile(std::string* dffnptr) { minputfilesFilePtr = dffnptr; }
60 void setFilenamesRegex(std::string fn) { mFilenameRegex = fn; }
61 void setFilenamesRegex(std::string* fnptr) { mFilenameRegexPtr = fnptr; }
62
63 void setDefaultInputfiles(std::vector<FileNameHolder*>* difnptr) { mdefaultFilenamesPtr = difnptr; }
64
66 int fillInputfiles();
67 bool setFile(int counter);
68
69 // getters
70 std::string getInputfilesFilename();
71 std::string getFilenamesRegexString();
72 std::regex getFilenamesRegex();
73 int getNumberInputfiles() { return mfilenames.size(); }
74 int getNumberTimeFrames() { return mtotalNumberTimeFrames; }
75 int findDFNumber(int file, std::string dfName);
76
77 uint64_t getTimeFrameNumber(int counter, int numTF);
78 arrow::dataset::FileSource getFileFolder(int counter, int numTF);
79 DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename);
82
83 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed);
84
86 void closeInputFile();
87 bool isAlienSupportOn() { return mAlienSupport; }
88
89 private:
91 std::string minputfilesFile = "";
92 std::string* minputfilesFilePtr = nullptr;
93 std::string mFilenameRegex = "";
94 std::string* mFilenameRegexPtr = nullptr;
95 int mAllowedParentLevel = 0;
96 std::string mParentFileReplacement;
97 std::vector<FileNameHolder*> mfilenames;
98 std::vector<FileNameHolder*>* mdefaultFilenamesPtr = nullptr;
99 std::shared_ptr<arrow::fs::FileSystem> mCurrentFilesystem;
100 int mCurrentFileID = -1;
101 bool mAlienSupport = false;
102
103 o2::monitoring::Monitoring* mMonitoring = nullptr;
104
105 TMap* mParentFileMap = nullptr;
106 DataInputDescriptor* mParentFile = nullptr;
107 int mLevel = 0; // level of parent files
108
109 int mtotalNumberTimeFrames = 0;
110
111 uint64_t mIOTime = 0;
112 uint64_t mCurrentFileStartedAt = 0;
113};
114
116{
120
121 public:
123 DataInputDirector(std::string inputFile, o2::monitoring::Monitoring* monitoring = nullptr, int allowedParentLevel = 0, std::string parentFileReplacement = "");
124 DataInputDirector(std::vector<std::string> inputFiles, o2::monitoring::Monitoring* monitoring = nullptr, int allowedParentLevel = 0, std::string parentFileReplacement = "");
126
127 void reset();
128 void printOut();
129 bool atEnd(int counter);
130
131 // setters
132 void setInputfilesFile(std::string iffn) { minputfilesFile = iffn; }
133 void setFilenamesRegex(std::string dfn) { mFilenameRegex = dfn; }
134 bool readJson(std::string const& fnjson);
135 void closeInputFiles();
136
137 // getters
139 int getNumberInputDescriptors() { return mdataInputDescriptors.size(); }
141
142 bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed);
143 uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF);
144 arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF);
146
149
150 private:
151 std::string minputfilesFile;
152 std::string* const minputfilesFilePtr = &minputfilesFile;
153 std::string mFilenameRegex;
154 int mAllowedParentLevel = 0;
155 std::string mParentFileReplacement;
156 std::string* const mFilenameRegexPtr = &mFilenameRegex;
157 DataInputDescriptor* mdefaultDataInputDescriptor = nullptr;
158 std::vector<FileNameHolder*> mdefaultInputFiles;
159 std::vector<DataInputDescriptor*> mdataInputDescriptors;
160
161 o2::monitoring::Monitoring* mMonitoring = nullptr;
162
163 bool mDebugMode = false;
164 bool mAlienSupport = false;
165
166 bool readJsonDocument(rapidjson::Document* doc);
167 bool isValid();
168};
169
170} // namespace o2::framework
171
172#endif // O2_FRAMEWORK_DATAINPUTDIRECTOR_H_
o2::monitoring::Monitoring Monitoring
void addFileNameHolder(FileNameHolder *fn)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t &totalSizeCompressed, size_t &totalSizeUncompressed)
std::unique_ptr< data_matcher::DataDescriptorMatcher > matcher
uint64_t getTimeFrameNumber(int counter, int numTF)
arrow::dataset::FileSource getFileFolder(int counter, int numTF)
void setDefaultInputfiles(std::vector< FileNameHolder * > *difnptr)
void setInputfilesFile(std::string dffn)
void setInputfilesFile(std::string *dffnptr)
void setFilenamesRegex(std::string *fnptr)
DataInputDescriptor * getParentFile(int counter, int numTF, std::string treename)
int findDFNumber(int file, std::string dfName)
bool readTree(DataAllocator &outputs, header::DataHeader dh, int counter, int numTF, size_t &totalSizeCompressed, size_t &totalSizeUncompressed)
DataInputDescriptor * getDataInputDescriptor(header::DataHeader dh)
arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF)
void setInputfilesFile(std::string iffn)
int getTimeFramesInFile(header::DataHeader dh, int counter)
uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF)
void setFilenamesRegex(std::string dfn)
bool readJson(std::string const &fnjson)
GLuint counter
Definition glcorearb.h:3987
Defining PrimaryVertex explicitly as messageable.
Definition TFIDInfo.h:20
FileNameHolder * makeFileNameHolder(std::string fileName)
std::vector< uint64_t > listOfTimeFrameNumbers
the main header struct
Definition DataHeader.h:618