Project
Loading...
Searching...
No Matches
aodThinner.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#include <unordered_map>
13#include <getopt.h>
14
15#include "TSystem.h"
16#include "TStopwatch.h"
17#include "TString.h"
18#include "TRegexp.h"
19#include "TFile.h"
20#include "TTree.h"
21#include "TBranch.h"
22#include "TList.h"
23#include "TKey.h"
24#include "TDirectory.h"
25#include "TObjString.h"
26#include "TGrid.h"
27#include "TMap.h"
28#include "TLeaf.h"
29
30#include "aodMerger.h"
31
32// AOD reduction tool
33// Designed for the 2022 pp data with specific selections:
34// - Remove all TPC only tracks, optionally keep TPC-only V0 tracks
35// - Remove all V0s which refer to any removed track
36// - Remove all cascade which refer to any removed V0
37// - Remove all ambiguous track entries which point to a track with collision
38// - Adjust all indices
39int main(int argc, char* argv[])
40{
41 std::string inputFileName("AO2D.root");
42 std::string outputFileName("AO2D_thinned.root");
43 int exitCode = 0; // 0: success, !=0: failure
44 bool bOverwrite = false;
45 int compression = 505;
46
47 int option_index = 1;
48
49 const char* const short_opts = "i:o:KOh";
50 static struct option long_options[] = {
51 {"input", required_argument, nullptr, 'i'},
52 {"output", required_argument, nullptr, 'o'},
53 {"overwrite", no_argument, nullptr, 'O'},
54 {"compression", required_argument, nullptr, 'c'},
55 {"help", no_argument, nullptr, 'h'},
56 {nullptr, 0, nullptr, 0}};
57
58 while (true) {
59 const auto opt = getopt_long(argc, argv, short_opts, long_options, &option_index);
60 if (opt == -1) {
61 break; // use defaults
62 }
63 switch (opt) {
64 case 'i':
65 inputFileName = optarg;
66 break;
67 case 'o':
68 outputFileName = optarg;
69 break;
70 case 'O':
71 bOverwrite = true;
72 printf("Overwriting existing output file if existing\n");
73 break;
74 case 'c':
75 compression = atoi(optarg);
76 break;
77 case 'h':
78 case '?':
79 default:
80 printf("AO2D thinning tool. Options: \n");
81 printf(" --input/-i <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
82 printf(" --output/-o <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
83 printf(" --compression/-c <compression id> ROOT compression algorithm / level. Default: %d\n", compression);
84 printf("\n");
85 printf(" Optional Arguments:\n");
86 printf(" --overwrite/-O Overwrite existing output file\n");
87 return -1;
88 }
89 }
90
91 printf("AOD reduction started with:\n");
92 printf(" Input file: %s\n", inputFileName.c_str());
93 printf(" Ouput file name: %s\n", outputFileName.c_str());
94
95 TStopwatch clock;
96 clock.Start(kTRUE);
97
98 auto outputFile = TFile::Open(outputFileName.c_str(), (bOverwrite) ? "RECREATE" : "CREATE", "", compression);
99 if (outputFile == nullptr) {
100 printf("Error: File %s exists or cannot be created!\n", outputFileName.c_str());
101 return 1;
102 }
103 TDirectory* outputDir = nullptr;
104
105 if (inputFileName.find("alien:") == 0) {
106 printf("Connecting to AliEn...");
107 TGrid::Connect("alien:");
108 }
109
110 auto inputFile = TFile::Open(inputFileName.c_str());
111 if (!inputFile) {
112 printf("Error: Could not open input file %s.\n", inputFileName.c_str());
113 return 1;
114 }
115
116 TList* keyList = inputFile->GetListOfKeys();
117 keyList->Sort();
118
119 for (auto key1 : *keyList) {
120 // Keep metaData
121 if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
122 auto metaData = (TMap*)inputFile->Get("metaData");
123 outputFile->cd();
124 metaData->Write("metaData", TObject::kSingleKey);
125 }
126
127 // Keep parentFiles
128 if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
129 auto parentFiles = (TMap*)inputFile->Get("parentFiles");
130 outputFile->cd();
131 parentFiles->Write("parentFiles", TObject::kSingleKey);
132 }
133
134 // Skip everything else, except 'DF_*'
135 if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) {
136 continue;
137 }
138
139 auto dfName = ((TObjString*)key1)->GetString().Data();
140
141 printf(" Processing folder %s\n", dfName);
142 auto folder = (TDirectoryFile*)inputFile->Get(dfName);
143 auto treeList = folder->GetListOfKeys();
144 treeList->Sort();
145
146 // purging keys from duplicates
147 for (auto i = 0; i < treeList->GetEntries(); ++i) {
148 TKey* ki = (TKey*)treeList->At(i);
149 for (int j = i + 1; j < treeList->GetEntries(); ++j) {
150 TKey* kj = (TKey*)treeList->At(j);
151 if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) {
152 if (ki->GetCycle() < kj->GetCycle()) {
153 printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check");
154 exitCode = 5;
155 } else {
156 // key is a duplicate, let's remove it
157 treeList->Remove(kj);
158 j--;
159 }
160 } else {
161 // we changed key, since they are sorted, we won't have the same anymore
162 break;
163 }
164 }
165 }
166
167 // Scan versions e.g. 001 or 002 ...
168 TString v0Name{"O2v0_???"}, trkExtraName{"O2trackextra*"};
169 TRegexp v0Re(v0Name, kTRUE), trkExtraRe(trkExtraName, kTRUE);
170 bool hasTrackQA{false};
171 TString trackQAName{"O2trackqa*"};
172 TRegexp trackQARe(trackQAName, kTRUE);
173 for (TObject* obj : *treeList) {
174 TString st = obj->GetName();
175 if (st.Index(v0Re) != kNPOS) {
176 v0Name = st;
177 } else if (st.Index(trkExtraRe) != kNPOS) {
178 trkExtraName = st;
179 } else if (st.Index(trackQARe) != kNPOS) {
180 hasTrackQA = true;
181 trackQAName = st;
182 }
183 }
184
185 // Certain order needed in order to populate vectors of skipped entries
186 auto v0Entry = (TObject*)treeList->FindObject(v0Name);
187 treeList->Remove(v0Entry);
188 treeList->AddFirst(v0Entry);
189
190 // Prepare maps for track skimming
191 auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, trkExtraName.Data()));
192 if (trackExtraTree == nullptr) {
193 printf("%s table not found\n", trkExtraName.Data());
194 exitCode = 6;
195 break;
196 }
197 auto track_iu = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2track_iu"));
198 if (track_iu == nullptr) {
199 printf("O2track_iu table not found\n");
200 exitCode = 7;
201 break;
202 }
203 auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, v0Name.Data()));
204 if (v0s == nullptr) {
205 printf("%s table not found\n", v0Name.Data());
206 exitCode = 8;
207 break;
208 }
209 // TrackQA
210 TTree* trackQA;
211 if (hasTrackQA && (trackQA = (TTree*)inputFile->Get(Form("%s/%s", dfName, trackQAName.Data()))) == nullptr) {
212 exitCode = 20;
213 break;
214 }
215
216 // We need to loop over the V0s once and flag the prong indices
217 int trackIdxPos = 0, trackIdxNeg = 0;
218 std::vector<bool> keepV0s(trackExtraTree->GetEntries(), false);
219 v0s->SetBranchAddress("fIndexTracks_Pos", &trackIdxPos);
220 v0s->SetBranchAddress("fIndexTracks_Neg", &trackIdxNeg);
221 auto nV0s = v0s->GetEntriesFast();
222 for (int i{0}; i < nV0s; ++i) {
223 v0s->GetEntry(i);
224 keepV0s[trackIdxPos] = true;
225 keepV0s[trackIdxNeg] = true;
226 }
227
228 // TrackQA
229 std::vector<bool> keepTrackQA;
230 if (hasTrackQA) {
231 keepTrackQA.assign(trackExtraTree->GetEntries(), false);
232 trackQA->SetBranchAddress("fIndexTracks", &trackIdxPos);
233 for (int i{0}; i < trackQA->GetEntries(); ++i) {
234 trackQA->GetEntry(i);
235 keepTrackQA[trackIdxPos] = true;
236 }
237 }
238
239 std::vector<int> acceptedTracks(trackExtraTree->GetEntries(), -1);
240 std::vector<bool> hasCollision(trackExtraTree->GetEntries(), false);
241
242 uint8_t tpcNClsFindable = 0;
243 bool bTPClsFindable = false;
244 uint8_t ITSClusterMap = 0;
245 UInt_t ITSClusterSizes = 0;
246 bool bITSClusterMap = false;
247 bool bITSClusterSizes = false;
248 uint8_t TRDPattern = 0;
249 bool bTRDPattern = false;
250 float_t TOFChi2 = 0;
251 bool bTOFChi2 = false;
252
253 // Test if track properties exist
254 TBranch* br = nullptr;
255 TIter next(trackExtraTree->GetListOfBranches());
256 while ((br = (TBranch*)next())) {
257 TString brName = br->GetName();
258 if (brName == "fTPCNClsFindable") {
259 trackExtraTree->SetBranchAddress("fTPCNClsFindable", &tpcNClsFindable);
260 bTPClsFindable = true;
261 } else if (brName == "fITSClusterMap") {
262 trackExtraTree->SetBranchAddress("fITSClusterMap", &ITSClusterMap);
263 bITSClusterMap = true;
264 } else if (brName == "fITSClusterSizes") {
265 trackExtraTree->SetBranchAddress("fITSClusterSizes", &ITSClusterSizes);
266 bITSClusterSizes = true;
267 } else if (brName == "fTRDPattern") {
268 trackExtraTree->SetBranchAddress("fTRDPattern", &TRDPattern);
269 bTRDPattern = true;
270 } else if (brName == "fTOFChi2") {
271 trackExtraTree->SetBranchAddress("fTOFChi2", &TOFChi2);
272 bTOFChi2 = true;
273 }
274 }
275
276 // Sanity-Check
277 // If any (%ITSClusterMap or %ITSClusterSizes) of these are not found, continuation is not possible, hence fataling
278 if (!bTPClsFindable || !bTRDPattern || !bTOFChi2 ||
279 (!bITSClusterMap && !bITSClusterSizes)) {
280 printf(" *** FATAL *** Branch detection failed in %s for trackextra.[(fITSClusterMap=%d,fITSClusterSizes=%d),fTPCNClsFindable=%d,fTRDPattern=%d,fTOFChi2=%d]\n", dfName, bITSClusterMap, bITSClusterSizes, bTPClsFindable, bTRDPattern, bTOFChi2);
281 exitCode = 10;
282 break;
283 }
284
285 int fIndexCollisions = 0;
286 track_iu->SetBranchAddress("fIndexCollisions", &fIndexCollisions);
287
288 // loop over all tracks
289 auto entries = trackExtraTree->GetEntries();
290 int counter = 0;
291 for (int i = 0; i < entries; i++) {
292 trackExtraTree->GetEntry(i);
293 track_iu->GetEntry(i);
294
295 // Flag collisions
296 hasCollision[i] = (fIndexCollisions >= 0);
297
298 // Remove TPC only tracks, if they are not assoc. to a V0
299 if (tpcNClsFindable > 0 && TRDPattern == 0 && TOFChi2 < -1. &&
300 (!bITSClusterMap || ITSClusterMap == 0) &&
301 (!bITSClusterSizes || ITSClusterSizes == 0) &&
302 (!hasTrackQA || !keepTrackQA[i]) &&
303 !keepV0s[i]) {
304 counter++;
305 } else {
306 acceptedTracks[i] = i - counter;
307 }
308 }
309
310 for (auto key2 : *treeList) {
311 TString treeName = ((TObjString*)key2)->GetString().Data();
312
313 // Connect trees but do not copy entries (using the clone function)
314 // NOTE Basket size etc. are copied in CloneTree()
315 if (outputDir == nullptr) {
316 outputDir = outputFile->mkdir(dfName);
317 printf("Writing to output folder %s\n", dfName);
318 }
319 outputDir->cd();
320
321 auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName.Data()));
322 printf(" Processing tree %s with %lld entries with total size %lld\n", treeName.Data(), inputTree->GetEntries(), inputTree->GetTotBytes());
323 auto outputTree = inputTree->CloneTree(0);
324 outputTree->SetAutoFlush(0);
325
326 std::vector<int*> indexList;
327 std::vector<char*> vlaPointers;
328 std::vector<int*> indexPointers;
329 TObjArray* branches = inputTree->GetListOfBranches();
330 for (int i = 0; i < branches->GetEntriesFast(); ++i) {
331 TBranch* br = (TBranch*)branches->UncheckedAt(i);
332 TString branchName(br->GetName());
333 TString tableName(getTableName(branchName, treeName.Data()));
334 // register index of track index ONLY
335 if (!tableName.EqualTo("O2track")) {
336 continue;
337 }
338 // detect VLA
339 if (((TLeaf*)br->GetListOfLeaves()->First())->GetLeafCount() != nullptr) {
340 printf(" *** FATAL ***: VLA detection is not supported\n");
341 exitCode = 9;
342 } else if (branchName.BeginsWith("fIndexSlice")) {
343 int* buffer = new int[2];
344 memset(buffer, 0, 2 * sizeof(buffer[0]));
345 vlaPointers.push_back(reinterpret_cast<char*>(buffer));
346 inputTree->SetBranchAddress(br->GetName(), buffer);
347 outputTree->SetBranchAddress(br->GetName(), buffer);
348
349 indexList.push_back(buffer);
350 indexList.push_back(buffer + 1);
351 } else if (branchName.BeginsWith("fIndex") && !branchName.EndsWith("_size")) {
352 int* buffer = new int;
353 *buffer = 0;
354 indexPointers.push_back(buffer);
355
356 inputTree->SetBranchAddress(br->GetName(), buffer);
357 outputTree->SetBranchAddress(br->GetName(), buffer);
358
359 indexList.push_back(buffer);
360 }
361 }
362
363 const bool processingTracked = treeName.BeginsWith("O2tracked");
364 const bool processingTrackQA = treeName.BeginsWith("O2trackqa");
365 const bool processingTracks = treeName.BeginsWith("O2track") && !processingTracked && !processingTrackQA; // matches any of the track tables and not tracked{v0s,cascase,3body} or trackqa;
366 const bool processingAmbiguousTracks = treeName.BeginsWith("O2ambiguoustrack");
367
368 auto entries = inputTree->GetEntries();
369 for (int i = 0; i < entries; i++) {
370 inputTree->GetEntry(i);
371 bool fillThisEntry = true;
372 // Special case for Tracks, TracksExtra, TracksCov
373 if (processingTracks) {
374 if (acceptedTracks[i] < 0) {
375 fillThisEntry = false;
376 }
377 } else {
378 // Other table than Tracks* --> reassign indices to Tracks
379 for (const auto& idx : indexList) {
380 int oldTrackIndex = *idx;
381
382 // if negative, the index is unassigned.
383 if (oldTrackIndex >= 0) {
384 if (acceptedTracks[oldTrackIndex] < 0) {
385 fillThisEntry = false;
386 } else {
387 *idx = acceptedTracks[oldTrackIndex];
388 }
389 }
390 }
391 }
392
393 // Keep only tracks which have no collision, see O2-3601
394 if (processingAmbiguousTracks) {
395 if (hasCollision[i]) {
396 fillThisEntry = false;
397 }
398 }
399
400 if (fillThisEntry) {
401 outputTree->Fill();
402 }
403 }
404
405 if (entries != outputTree->GetEntries()) {
406 printf(" Reduced from %lld to %lld entries\n", entries, outputTree->GetEntries());
407 // sanity check by hardcoding the trees for which we expect a reduction
408 const TString tableName{removeVersionSuffix(outputTree->GetName())};
409 static const std::array<TString, 4> checkNames{"O2track", "O2trackextra", "O2trackcov", "O2ambiguoustrack"}; // O2track -> O2track_iu; O2trackcov -> O2trackcov_iu
410 std::vector<bool> checks(checkNames.size(), false);
411 for (size_t i{0}; i < checkNames.size(); ++i) {
412 if (tableName.EqualTo(checkNames[i])) {
413 checks[i] = true;
414 }
415 }
416 if (std::none_of(checks.begin(), checks.end(), [](bool b) { return b; })) {
417 exitCode = 30;
418 printf(" -> Reduction is not expected for this tree!\n");
419 break;
420 }
421 }
422
423 delete inputTree;
424
425 for (auto& buffer : indexPointers) {
426 delete buffer;
427 }
428 for (auto& buffer : vlaPointers) {
429 delete[] buffer;
430 }
431
432 outputDir->cd();
433 outputTree->Write();
434 delete outputTree;
435 }
436 if (exitCode > 0) {
437 break;
438 }
439
440 outputDir = nullptr;
441 }
442 inputFile->Close();
443
444 outputFile->Write();
445 outputFile->Close();
446
447 // in case of failure, remove the incomplete file
448 if (exitCode != 0) {
449 printf("Removing incomplete output file %s.\n", outputFile->GetName());
450 gSystem->Unlink(outputFile->GetName());
451 return exitCode; // skip output below
452 }
453
454 clock.Stop();
455
456 // Report savings
457 auto sBefore = inputFile->GetSize();
458 auto sAfter = outputFile->GetSize();
459 if (sBefore <= 0 || sAfter <= 0) {
460 printf("Warning: Empty input or output file after thinning!\n");
461 exitCode = 9;
462 }
463 auto spaceSaving = (1 - ((double)sAfter) / ((double)sBefore)) * 100;
464 printf("Stats: After=%lld / Before=%lld Bytes ---> Saving %.1f%% diskspace!\n", sAfter, sBefore, spaceSaving);
465 printf("Timing: CPU=%.2f (s); Real=%.2f (s)\n", clock.CpuTime(), clock.RealTime());
466 printf("End of AOD thinning.\n");
467
468 return exitCode;
469}
int32_t i
uint32_t j
Definition RawData.h:0
const char * getTableName(const char *branchName, const char *treeName)
Definition aodMerger.h:26
const char * removeVersionSuffix(const char *treeName)
Definition aodMerger.h:14
benchmark::State & st
GLuint buffer
Definition glcorearb.h:655
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLuint counter
Definition glcorearb.h:3987
#define main