Project
Loading...
Searching...
No Matches
aodStrainer.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#include <map>
13#include <list>
14#include <fstream>
15#include <getopt.h>
16#include <sstream>
17#include <random>
18
19#include "TSystem.h"
20#include "TFile.h"
21#include "TTree.h"
22#include "TList.h"
23#include "TKey.h"
24#include "TDirectory.h"
25#include "TObjString.h"
26#include <TGrid.h>
27#include <TMap.h>
28#include <TLeaf.h>
29
30#include "aodMerger.h"
31#include <cinttypes>
32
33// AOD strainer with correct index rewriting: a strainer only for the table of interest
34int main(int argc, char* argv[])
35{
36 std::string inputAO2D("AO2D.root");
37 std::string outputFileName{"AO2D_strained.root"};
38 std::string tables{"O2bc,O2calotrigger,O2collision,O2fdd,O2ft0,O2fv0a"};
39 double downsampling = 1.0;
40 int verbosity = 2;
41 int exitCode = 0; // 0: success, >0: failure
42 int compression = 505;
43
44 std::random_device rd; // Seed generator
45 std::mt19937 gen(rd()); // Mersenne Twister generator
46 std::uniform_real_distribution<> dis(0.0, 1.0);
47
48 int option_index = 0;
49 static struct option long_options[] = {
50 {"input", required_argument, nullptr, 0},
51 {"output", required_argument, nullptr, 1},
52 {"verbosity", required_argument, nullptr, 2},
53 {"tables", required_argument, nullptr, 3},
54 {"downsampling", required_argument, nullptr, 4},
55 {"compression", required_argument, nullptr, 5},
56 {"help", no_argument, nullptr, 'h'},
57 {nullptr, 0, nullptr, 0}};
58
59 while (true) {
60 int c = getopt_long(argc, argv, "", long_options, &option_index);
61 if (c == -1) {
62 break;
63 } else if (c == 0) {
64 inputAO2D = optarg;
65 } else if (c == 1) {
66 outputFileName = optarg;
67 } else if (c == 2) {
68 verbosity = atoi(optarg);
69 } else if (c == 3) {
70 tables = optarg;
71 } else if (c == 4) {
72 downsampling = atof(optarg);
73 } else if (c == 5) {
74 compression = atoi(optarg);
75 } else if (c == 'h') {
76 printf("AO2D strainer tool. Options: \n");
77 printf(" --input <%s> Contains path to files to be merged. Default: %s\n", inputAO2D.c_str(), inputAO2D.c_str());
78 printf(" --output <%s> Target output ROOT file. Default: %s\n", outputFileName.c_str(), outputFileName.c_str());
79 printf(" --verbosity <flag> Verbosity of output (default: %d).\n", verbosity);
80 printf(" --tables <list of tables> Comma separated list of tables (default: %s).\n", tables.c_str());
81 printf(" --downsampling <downsample> Fraction of DF to be kept (default: %f)\n", downsampling);
82 printf(" --compression <root compression id> Compression algorithm / level to use (default: %d)\n", compression);
83 return -1;
84 } else {
85 return -2;
86 }
87 }
88
89 printf("AOD strainer started with:\n");
90 printf(" Input file: %s\n", inputAO2D.c_str());
91 printf(" Output file name: %s\n", outputFileName.c_str());
92 printf(" Tables to be kept: %s\n", tables.c_str());
93 printf(" Downsampling: %f\n", downsampling);
94
95 std::vector<std::string> listOfTables;
96 std::stringstream ss(tables);
97 std::string token;
98
99 while (std::getline(ss, token, ',')) {
100 listOfTables.push_back(token);
101 }
102
103 auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", compression);
104 TDirectory* outputDir = nullptr;
105 TString line(inputAO2D.c_str());
106 if (line.BeginsWith("alien://") && !gGrid && !TGrid::Connect("alien:")) {
107 printf("Error: Could not connect to AliEn.\n");
108 return -1;
109 }
110 printf("Processing input file: %s\n", line.Data());
111 auto inputFile = TFile::Open(line);
112 if (!inputFile) {
113 printf("Error: Could not open input file %s.\n", line.Data());
114 return -1;
115 }
116
117 TList* keyList = inputFile->GetListOfKeys();
118 keyList->Sort();
119
120 for (auto key1 : *keyList) {
121 if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
122 auto metaDataCurrentFile = (TMap*)inputFile->Get("metaData");
123 outputFile->cd();
124 metaDataCurrentFile->Write("metaData", TObject::kSingleKey);
125 }
126
127 if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
128 auto parentFilesCurrentFile = (TMap*)inputFile->Get("parentFiles");
129 outputFile->cd();
130 parentFilesCurrentFile->Write("parentFiles", TObject::kSingleKey);
131 }
132
133 if (!((TObjString*)key1)->GetString().BeginsWith("DF_") || dis(gen) > downsampling) {
134 continue;
135 }
136
137 auto dfName = ((TObjString*)key1)->GetString().Data();
138 if (verbosity > 0) {
139 printf(" Processing folder %s\n", dfName);
140 }
141 outputDir = outputFile->mkdir(dfName);
142 auto folder = (TDirectoryFile*)inputFile->Get(dfName);
143 auto treeList = folder->GetListOfKeys();
144
145 treeList->Sort();
146
147 // purging keys from duplicates
148 for (auto i = 0; i < treeList->GetEntries(); ++i) {
149 TKey* ki = (TKey*)treeList->At(i);
150 for (int j = i + 1; j < treeList->GetEntries(); ++j) {
151 TKey* kj = (TKey*)treeList->At(j);
152 if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) {
153 if (ki->GetCycle() < kj->GetCycle()) {
154 printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check");
155 exitCode = 5;
156 } else {
157 // key is a duplicate, let's remove it
158 treeList->Remove(kj);
159 j--;
160 }
161 } else {
162 // we changed key, since they are sorted, we won't have the same anymore
163 break;
164 }
165 }
166 }
167
168 std::list<std::string> foundTrees;
169
170 for (auto key2 : *treeList) {
171 auto treeName = ((TObjString*)key2)->GetString().Data();
172 bool found = (std::find(foundTrees.begin(), foundTrees.end(), treeName) != foundTrees.end());
173 if (found == true) {
174 printf(" ***WARNING*** Tree %s was already merged (even if we purged duplicated trees before, so this should not happen), skipping\n", treeName);
175 continue;
176 }
177 bool foundTable = false;
178 for (auto const& table : listOfTables) {
179 if (table == removeVersionSuffix(treeName)) {
180 foundTrees.push_back(treeName);
181 foundTable = true;
182 break;
183 }
184 }
185 if (!foundTable) {
186 if (verbosity > 2) {
187 printf(" Skipping tree %s\n", treeName);
188 }
189 continue;
190 }
191
192 auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName));
193 if (verbosity > 1) {
194 printf(" Processing tree %s with %lld entries with total size %lld\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes());
195 }
196
197 outputDir->cd();
198 auto outputTree = inputTree->CloneTree(-1, "fast");
199 outputTree->Write();
200 }
201 }
202 // in case of failure, remove the incomplete file
203 if (exitCode != 0) {
204 printf("Removing incomplete output file %s.\n", outputFile->GetName());
205 gSystem->Unlink(outputFile->GetName());
206 } else {
207 outputFile->Close();
208 }
209 return exitCode;
210}
#define verbosity
default_random_engine gen(dev())
int32_t i
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
const char * removeVersionSuffix(const char *treeName)
Definition aodMerger.h:14
#define main
std::random_device rd