Project
Loading...
Searching...
No Matches
aodStrainer.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#include <map>
13#include <list>
14#include <fstream>
15#include <getopt.h>
16#include <sstream>
17#include <random>
18#include <algorithm>
19
20#include "TSystem.h"
21#include "TFile.h"
22#include "TTree.h"
23#include "TList.h"
24#include "TKey.h"
25#include "TDirectory.h"
26#include "TObjString.h"
27#include <TGrid.h>
28#include <TMap.h>
29#include <TLeaf.h>
30
31#include "aodMerger.h"
32#include <cinttypes>
33
34// AOD strainer with correct index rewriting: a strainer only for the table of interest
35int main(int argc, char* argv[])
36{
37 std::string inputAO2D("AO2D.root");
38 std::string outputFileName{"AO2D_strained.root"};
39 std::string tables{"O2bc,O2calotrigger,O2collision,O2fdd,O2ft0,O2fv0a"};
40 double downsampling = 1.0;
41 int verbosity = 2;
42 int exitCode = 0; // 0: success, >0: failure
43 int compression = 505;
44
45 std::random_device rd; // Seed generator
46 std::mt19937 gen(rd()); // Mersenne Twister generator
47 std::uniform_real_distribution<> dis(0.0, 1.0);
48
49 int option_index = 0;
50 static struct option long_options[] = {
51 {"input", required_argument, nullptr, 0},
52 {"output", required_argument, nullptr, 1},
53 {"verbosity", required_argument, nullptr, 2},
54 {"tables", required_argument, nullptr, 3},
55 {"downsampling", required_argument, nullptr, 4},
56 {"compression", required_argument, nullptr, 5},
57 {"help", no_argument, nullptr, 'h'},
58 {nullptr, 0, nullptr, 0}};
59
60 while (true) {
61 int c = getopt_long(argc, argv, "", long_options, &option_index);
62 if (c == -1) {
63 break;
64 } else if (c == 0) {
65 inputAO2D = optarg;
66 } else if (c == 1) {
67 outputFileName = optarg;
68 } else if (c == 2) {
69 verbosity = atoi(optarg);
70 } else if (c == 3) {
71 tables = optarg;
72 } else if (c == 4) {
73 downsampling = atof(optarg);
74 } else if (c == 5) {
75 compression = atoi(optarg);
76 } else if (c == 'h') {
77 printf("AO2D strainer tool. Options: \n");
78 printf(" --input <%s> Contains path to files to be merged. Default: %s\n", inputAO2D.c_str(), inputAO2D.c_str());
79 printf(" --output <%s> Target output ROOT file. Default: %s\n", outputFileName.c_str(), outputFileName.c_str());
80 printf(" --verbosity <flag> Verbosity of output (default: %d).\n", verbosity);
81 printf(" --tables <list of tables> Comma separated list of tables (default: %s).\n", tables.c_str());
82 printf(" --downsampling <downsample> Fraction of DF to be kept (default: %f)\n", downsampling);
83 printf(" --compression <root compression id> Compression algorithm / level to use (default: %d)\n", compression);
84 return -1;
85 } else {
86 return -2;
87 }
88 }
89
90 printf("AOD strainer started with:\n");
91 printf(" Input file: %s\n", inputAO2D.c_str());
92 printf(" Output file name: %s\n", outputFileName.c_str());
93 printf(" Tables to be kept: %s\n", tables.c_str());
94 printf(" Downsampling: %f\n", downsampling);
95
96 std::vector<std::string> listOfTables;
97 std::stringstream ss(tables);
98 std::string token;
99
100 while (std::getline(ss, token, ',')) {
101 listOfTables.push_back(token);
102 }
103
104 auto outputFile = TFile::Open(outputFileName.c_str(), "RECREATE", "", compression);
105 TDirectory* outputDir = nullptr;
106 TString line(inputAO2D.c_str());
107 if (line.BeginsWith("alien://") && !gGrid && !TGrid::Connect("alien:")) {
108 printf("Error: Could not connect to AliEn.\n");
109 return -1;
110 }
111 printf("Processing input file: %s\n", line.Data());
112 auto inputFile = TFile::Open(line);
113 if (!inputFile) {
114 printf("Error: Could not open input file %s.\n", line.Data());
115 return -1;
116 }
117
118 TList* keyList = inputFile->GetListOfKeys();
119 keyList->Sort();
120
121 for (auto key1 : *keyList) {
122 if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
123 auto metaDataCurrentFile = (TMap*)inputFile->Get("metaData");
124 outputFile->cd();
125 metaDataCurrentFile->Write("metaData", TObject::kSingleKey);
126 }
127
128 if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
129 auto parentFilesCurrentFile = (TMap*)inputFile->Get("parentFiles");
130 outputFile->cd();
131 parentFilesCurrentFile->Write("parentFiles", TObject::kSingleKey);
132 }
133
134 if (!((TObjString*)key1)->GetString().BeginsWith("DF_") || dis(gen) > downsampling) {
135 continue;
136 }
137
138 auto dfName = ((TObjString*)key1)->GetString().Data();
139 if (verbosity > 0) {
140 printf(" Processing folder %s\n", dfName);
141 }
142 outputDir = outputFile->mkdir(dfName);
143 auto folder = (TDirectoryFile*)inputFile->Get(dfName);
144 auto treeList = folder->GetListOfKeys();
145
146 treeList->Sort();
147
148 // purging keys from duplicates
149 for (auto i = 0; i < treeList->GetEntries(); ++i) {
150 TKey* ki = (TKey*)treeList->At(i);
151 for (int j = i + 1; j < treeList->GetEntries(); ++j) {
152 TKey* kj = (TKey*)treeList->At(j);
153 if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) {
154 if (ki->GetCycle() < kj->GetCycle()) {
155 printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check");
156 exitCode = 5;
157 } else {
158 // key is a duplicate, let's remove it
159 treeList->Remove(kj);
160 j--;
161 }
162 } else {
163 // we changed key, since they are sorted, we won't have the same anymore
164 break;
165 }
166 }
167 }
168
169 std::list<std::string> foundTrees;
170
171 for (auto key2 : *treeList) {
172 auto treeName = ((TObjString*)key2)->GetString().Data();
173 bool found = (std::find(foundTrees.begin(), foundTrees.end(), treeName) != foundTrees.end());
174 if (found == true) {
175 printf(" ***WARNING*** Tree %s was already merged (even if we purged duplicated trees before, so this should not happen), skipping\n", treeName);
176 continue;
177 }
178 bool foundTable = false;
179 for (auto const& table : listOfTables) {
180 if (table == removeVersionSuffix(treeName)) {
181 foundTrees.push_back(treeName);
182 foundTable = true;
183 break;
184 }
185 }
186 if (!foundTable) {
187 if (verbosity > 2) {
188 printf(" Skipping tree %s\n", treeName);
189 }
190 continue;
191 }
192
193 auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName));
194 if (verbosity > 1) {
195 printf(" Processing tree %s with %lld entries with total size %lld\n", treeName, inputTree->GetEntries(), inputTree->GetTotBytes());
196 }
197
198 outputDir->cd();
199 auto outputTree = inputTree->CloneTree(-1, "fast");
200 outputTree->Write();
201 }
202 }
203 // in case of failure, remove the incomplete file
204 if (exitCode != 0) {
205 printf("Removing incomplete output file %s.\n", outputFile->GetName());
206 gSystem->Unlink(outputFile->GetName());
207 } else {
208 outputFile->Close();
209 }
210 return exitCode;
211}
#define verbosity
default_random_engine gen(dev())
int32_t i
uint32_t j
Definition RawData.h:0
uint32_t c
Definition RawData.h:2
const char * removeVersionSuffix(const char *treeName)
Definition aodMerger.h:14
#define main
std::random_device rd