Project
Loading...
Searching...
No Matches
TreeMergerTool.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12// A utility for the purpose to produce a global merged TTree
13// from multiple TTree (containing a subset of branches).
14// A typical example is TPC clusterization/digitization: Clusters per TPC
15// sector may sit in different files and we want to produce an aggregate TTree
16// for further processing. The utility offers options to use TFriends or to make
17// a deep copy.
18
19#include <TTree.h>
20#include <TFile.h>
21#include <boost/program_options.hpp>
22#include <set>
23#include <vector>
24#include <iostream>
25
26struct Options {
27 std::vector<std::string> infilenames;
28 std::string treename;
29 std::string outfilename;
30 bool asfriend = false;
31};
32
33// just to make a protected interface accessible
34class MyTTreeHelper : public TTree
35{
36 public:
37 TBranch* PublicBranchImp(const char* branchname, TClass* ptrClass, void* addobj, Int_t bufsize, Int_t splitlevel)
38 {
39 return BranchImp(branchname, ptrClass, addobj, bufsize, splitlevel);
40 }
41};
42
43bool parseOptions(int argc, char* argv[], Options& optvalues)
44{
45 namespace bpo = boost::program_options;
46 bpo::options_description options(
47 "A tool to create a single TTree from a list of TTrees (each in its own file).\nMerging is "
48 "done vertically - over branches - instead over entries (like in a TChain).\nIt corresponds to the TFriend mechanism but makes a deep copy\n"
49 "(unless the friend is asked).\n\n"
50 "Allowed options");
51
52 options.add_options()(
53 "infiles,i", bpo::value<std::vector<std::string>>(&optvalues.infilenames)->multitoken(), "All input files to be merged")(
54 "treename,t", bpo::value<std::string>(&optvalues.treename), "Name of tree (assumed same in all files).")(
55 "outfile,o", bpo::value<std::string>(&optvalues.outfilename)->default_value(""), "Outfile to be created with merged tree.")(
56 "asfriend", "If merging is done using the friend mechanism.");
57 options.add_options()("help,h", "Produce help message.");
58
59 bpo::variables_map vm;
60 try {
61 bpo::store(bpo::command_line_parser(argc, argv).options(options).run(), vm);
62 bpo::notify(vm);
63
64 // help
65 if (vm.count("help")) {
66 std::cout << options << std::endl;
67 return false;
68 }
69 if (vm.count("asfriend")) {
70 optvalues.asfriend = true;
71 }
72
73 } catch (const bpo::error& e) {
74 std::cerr << e.what() << "\n\n";
75 std::cerr << "Error parsing options; Available options:\n";
76 std::cerr << options << std::endl;
77 return false;
78 }
79 return true;
80}
81
82// Checks if all given files have a TTree of this name
83// and if all entries are the same
84// TODO: add more checks such as for non-overlapping branch names etc.
85bool checkFiles(std::vector<std::string> const& filenames, std::string const& treename)
86{
87 bool ok = true;
88 int entries = -1;
89 for (auto& f : filenames) {
90 TFile _tmpfile(f.c_str(), "OPEN");
91 auto tree = (TTree*)_tmpfile.Get(treename.c_str());
92 if (tree == nullptr) {
93 ok = false;
94 std::cerr << "File " << f << " doesn't have a tree of name " << treename;
95 } else {
96 if (entries == -1) {
97 entries = tree->GetEntries();
98 } else {
99 if (entries != tree->GetEntries()) {
100 std::cerr << "Trees have inconsistent number of entries ";
101 ok = false;
102 }
103 }
104 }
105 }
106 return ok;
107}
108
109// a helper function taken from TTree.cxx
110static char DataTypeToChar(EDataType datatype)
111{
112 // Return the leaflist 'char' for a given datatype.
113
114 switch (datatype) {
115 case kChar_t:
116 return 'B';
117 case kUChar_t:
118 return 'b';
119 case kBool_t:
120 return 'O';
121 case kShort_t:
122 return 'S';
123 case kUShort_t:
124 return 's';
125 case kCounter:
126 case kInt_t:
127 return 'I';
128 case kUInt_t:
129 return 'i';
130 case kDouble_t:
131 return 'D';
132 case kDouble32_t:
133 return 'd';
134 case kFloat_t:
135 return 'F';
136 case kFloat16_t:
137 return 'f';
138 case kLong_t:
139 return 'G';
140 case kULong_t:
141 return 'g';
142 case kchar:
143 return 0; // unsupported
144 case kLong64_t:
145 return 'L';
146 case kULong64_t:
147 return 'l';
148
149 case kCharStar:
150 return 'C';
151 case kBits:
152 return 0; //unsupported
153
154 case kOther_t:
155 case kNoType_t:
156 default:
157 return 0;
158 }
159 return 0;
160}
161
162void merge(Options const& options)
163{
164 if (options.asfriend) {
165 // open the output file
166 auto newfile = TFile::Open(options.outfilename.c_str(), "RECREATE");
167 auto newtree = new TTree(options.treename.c_str(), "");
168 // add remaining stuff as friend
169 for (int i = 0; i < options.infilenames.size(); ++i) {
170 newtree->AddFriend(options.treename.c_str(), options.infilenames[i].c_str());
171 }
172 newfile->Write();
173 newfile->Close();
174
175 // P. Canal suggests that this can be done in the following way to fix the branch names
176 // in the merged file and to keep only the final file:
177 //auto mainfile = TFile::Open(firsttreefilename, "UPDATE");
178 //auto friendfile = TFile::Open(secondtreefilename, "READ");
179 //auto friendtree = ffriendfile>Get<Tree>(secondtreename);
180 //mainfile->cd();
181 //auto friendcopy = friendtree->CloneTree(-1, "fast");
182 //auto maintree = mainfile->Get<TTree>(firsttreename);
183 //maintree->AddFriend(friendcopy);
184 //mainfile->Write();
185 } else {
186 // a deep copy solution
187
188 auto copyBranch = [](TTree* t, TBranch* br) -> bool {
189 // Get data from original branch and copy to new
190 // by using generic type/class information of old.
191 // We are using some internals of the TTree implementation. (Luckily these
192 // functions are not marked private ... so that we can still access them).
193 TClass* clptr = nullptr;
194 EDataType type;
195 if (br->GetExpectedType(clptr, type) == 0) {
196 char* data = nullptr;
197 TBranch* newbr = nullptr;
198 if (clptr != nullptr) {
199 newbr = ((MyTTreeHelper*)t)->PublicBranchImp(br->GetName(), clptr, &data, 32000, br->GetSplitLevel());
200 } else if (type != EDataType::kOther_t) {
201 TString varname;
202 varname.Form("%s/%c", br->GetName(), DataTypeToChar(type));
203 newbr = t->Branch(br->GetName(), &data, varname.Data());
204 } else {
205 std::cerr << "Could not retrieve class/type information. Branch " << br->GetName() << "cannot be copied.\n";
206 return false;
207 }
208 if (newbr) {
209 br->SetAddress(&data);
210 for (int e = 0; e < br->GetEntries(); ++e) {
211 auto size = br->GetEntry(e);
212 newbr->Fill();
213 }
214 br->ResetAddress();
215 br->DropBaskets("all");
216 return true;
217 // TODO: data is leaking? (but deleting it here causes a crash)
218 }
219 } // end good
220 return false;
221 };
222
223 TFile outfile(options.outfilename.c_str(), "RECREATE");
224 auto outtree = new TTree(options.treename.c_str(), options.treename.c_str());
225 // iterate over files and branches
226 for (auto filename : options.infilenames) {
227 TFile _tmp(filename.c_str(), "OPEN");
228 auto t = (TTree*)_tmp.Get(options.treename.c_str());
229 auto brlist = t->GetListOfBranches();
230 for (int i = 0; i < brlist->GetEntries(); ++i) {
231 auto br = (TBranch*)brlist->At(i);
232 if (!copyBranch(outtree, br)) {
233 std::cerr << "Error copying branch " << br->GetName() << "\n";
234 }
235 }
236 outtree->SetEntries(t->GetEntries());
237 }
238 outfile.Write();
239 outfile.Close();
240 }
241
242 // Note: There is/was also an elegant solution based on RDataFrames (snapshot) as discussed here:
243 // https://root-forum.cern.ch/t/make-a-new-ttree-from-a-deep-vertical-union-of-existing-ttrees/44250
244 // ... but this solution has problems since ROOT 6-24 since RDataFrame may change the internal type
245 // of std::vector<> using a non-default allocator which may cause problem when reading data back.
246}
247
248int main(int argc, char* argv[])
249{
250 Options optvalues;
251 if (!parseOptions(argc, argv, optvalues)) {
252 return 0;
253 }
254
255 auto ok = checkFiles(optvalues.infilenames, optvalues.treename);
256 if (!ok) {
257 return 1;
258 }
259
260 // merge files
261 merge(optvalues);
262
263 return 0;
264}
int32_t i
bool parseOptions(int argc, char *argv[], Options &optvalues)
void merge(Options const &options)
bool checkFiles(std::vector< std::string > const &filenames, std::string const &treename)
TBranch * PublicBranchImp(const char *branchname, TClass *ptrClass, void *addobj, Int_t bufsize, Int_t splitlevel)
GLsizeiptr size
Definition glcorearb.h:659
GLdouble f
Definition glcorearb.h:310
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLboolean * data
Definition glcorearb.h:298
std::string filename()
std::vector< std::string > infilenames
std::string treename
std::string outfilename
#define main
std::unique_ptr< TTree > tree((TTree *) flIn.Get(std::string(o2::base::NameConf::CTFTREENAME).c_str()))