Project
Loading...
Searching...
No Matches
aodThinner.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#include <unordered_map>
13#include <getopt.h>
14#include <algorithm>
15#include <cmath>
16
17#include "TSystem.h"
18#include "TStopwatch.h"
19#include "TString.h"
20#include "TRegexp.h"
21#include "TFile.h"
22#include "TTree.h"
23#include "TBranch.h"
24#include "TList.h"
25#include "TKey.h"
26#include "TDirectory.h"
27#include "TObjString.h"
28#include "TGrid.h"
29#include "TMap.h"
30#include "TLeaf.h"
31
32#include "aodMerger.h"
33
34// AOD reduction tool
35// Designed for the 2022 pp data with specific selections:
36// - Remove all TPC only tracks, optionally keep TPC-only V0 tracks
37// - Remove all V0s which refer to any removed track
38// - Remove all cascade which refer to any removed V0
39// - Remove all ambiguous track entries which point to a track with collision
40// - Adjust all indices
41int main(int argc, char* argv[])
42{
43 std::string inputFileName("AO2D.root");
44 std::string outputFileName("AO2D_thinned.root");
45 int exitCode = 0; // 0: success, !=0: failure
46 bool bOverwrite = false;
47 int compression = 505;
48
49 int option_index = 1;
50
51 const char* const short_opts = "i:o:KOh";
52 static struct option long_options[] = {
53 {"input", required_argument, nullptr, 'i'},
54 {"output", required_argument, nullptr, 'o'},
55 {"overwrite", no_argument, nullptr, 'O'},
56 {"compression", required_argument, nullptr, 'c'},
57 {"help", no_argument, nullptr, 'h'},
58 {nullptr, 0, nullptr, 0}};
59
60 while (true) {
61 const auto opt = getopt_long(argc, argv, short_opts, long_options, &option_index);
62 if (opt == -1) {
63 break; // use defaults
64 }
65 switch (opt) {
66 case 'i':
67 inputFileName = optarg;
68 break;
69 case 'o':
70 outputFileName = optarg;
71 break;
72 case 'O':
73 bOverwrite = true;
74 printf("Overwriting existing output file if existing\n");
75 break;
76 case 'c':
77 compression = atoi(optarg);
78 break;
79 case 'h':
80 case '?':
81 default:
82 printf("AO2D thinning tool. Options: \n");
83 printf(" --input/-i <inputfile.root> Contains input file path to the file to be thinned. Default: %s\n", inputFileName.c_str());
84 printf(" --output/-o <outputfile.root> Target output ROOT file. Default: %s\n", outputFileName.c_str());
85 printf(" --compression/-c <compression id> ROOT compression algorithm / level. Default: %d\n", compression);
86 printf("\n");
87 printf(" Optional Arguments:\n");
88 printf(" --overwrite/-O Overwrite existing output file\n");
89 return -1;
90 }
91 }
92
93 printf("AOD reduction started with:\n");
94 printf(" Input file: %s\n", inputFileName.c_str());
95 printf(" Ouput file name: %s\n", outputFileName.c_str());
96
97 TStopwatch clock;
98 clock.Start(kTRUE);
99
100 auto outputFile = TFile::Open(outputFileName.c_str(), (bOverwrite) ? "RECREATE" : "CREATE", "", compression);
101 if (outputFile == nullptr) {
102 printf("Error: File %s exists or cannot be created!\n", outputFileName.c_str());
103 return 1;
104 }
105 TDirectory* outputDir = nullptr;
106
107 if (inputFileName.find("alien:") == 0 && !gGrid) {
108 printf("Connecting to AliEn...");
109 TGrid::Connect("alien:");
110 }
111
112 auto inputFile = TFile::Open(inputFileName.c_str());
113 if (!inputFile) {
114 printf("Error: Could not open input file %s.\n", inputFileName.c_str());
115 return 1;
116 }
117
118 TList* keyList = inputFile->GetListOfKeys();
119 keyList->Sort();
120
121 for (auto key1 : *keyList) {
122 // Keep metaData
123 if (((TObjString*)key1)->GetString().EqualTo("metaData")) {
124 auto metaData = (TMap*)inputFile->Get("metaData");
125 outputFile->cd();
126 metaData->Write("metaData", TObject::kSingleKey);
127 }
128
129 // Keep parentFiles
130 if (((TObjString*)key1)->GetString().EqualTo("parentFiles")) {
131 auto parentFiles = (TMap*)inputFile->Get("parentFiles");
132 outputFile->cd();
133 parentFiles->Write("parentFiles", TObject::kSingleKey);
134 }
135
136 // Skip everything else, except 'DF_*'
137 if (!((TObjString*)key1)->GetString().BeginsWith("DF_")) {
138 continue;
139 }
140
141 auto dfName = ((TObjString*)key1)->GetString().Data();
142
143 printf(" Processing folder %s\n", dfName);
144 auto folder = (TDirectoryFile*)inputFile->Get(dfName);
145 auto treeList = folder->GetListOfKeys();
146 treeList->Sort();
147
148 // purging keys from duplicates
149 for (auto i = 0; i < treeList->GetEntries(); ++i) {
150 TKey* ki = (TKey*)treeList->At(i);
151 for (int j = i + 1; j < treeList->GetEntries(); ++j) {
152 TKey* kj = (TKey*)treeList->At(j);
153 if (std::strcmp(ki->GetName(), kj->GetName()) == 0 && std::strcmp(ki->GetTitle(), kj->GetTitle()) == 0) {
154 if (ki->GetCycle() < kj->GetCycle()) {
155 printf(" *** FATAL *** we had ordered the keys, first cycle should be higher, please check");
156 exitCode = 5;
157 } else {
158 // key is a duplicate, let's remove it
159 treeList->Remove(kj);
160 j--;
161 }
162 } else {
163 // we changed key, since they are sorted, we won't have the same anymore
164 break;
165 }
166 }
167 }
168
169 // Scan versions e.g. 001 or 002 ...
170 TString v0Name{"O2v0_???"}, trkExtraName{"O2trackextra*"};
171 TRegexp v0Re(v0Name, kTRUE), trkExtraRe(trkExtraName, kTRUE);
172 bool hasTrackQA{false};
173 TString trackQAName{"O2trackqa*"};
174 TRegexp trackQARe(trackQAName, kTRUE);
175 for (TObject* obj : *treeList) {
176 TString st = obj->GetName();
177 if (st.Index(v0Re) != kNPOS) {
178 v0Name = st;
179 } else if (st.Index(trkExtraRe) != kNPOS) {
180 trkExtraName = st;
181 } else if (st.Index(trackQARe) != kNPOS) {
182 hasTrackQA = true;
183 trackQAName = st;
184 }
185 }
186
187 // Certain order needed in order to populate vectors of skipped entries
188 auto v0Entry = (TObject*)treeList->FindObject(v0Name);
189 treeList->Remove(v0Entry);
190 treeList->AddFirst(v0Entry);
191
192 // Prepare maps for track skimming
193 auto trackExtraTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, trkExtraName.Data()));
194 if (trackExtraTree == nullptr) {
195 printf("%s table not found\n", trkExtraName.Data());
196 exitCode = 6;
197 break;
198 }
199 auto track_iu = (TTree*)inputFile->Get(Form("%s/%s", dfName, "O2track_iu"));
200 if (track_iu == nullptr) {
201 printf("O2track_iu table not found\n");
202 exitCode = 7;
203 break;
204 }
205 auto v0s = (TTree*)inputFile->Get(Form("%s/%s", dfName, v0Name.Data()));
206 if (v0s == nullptr) {
207 printf("%s table not found\n", v0Name.Data());
208 exitCode = 8;
209 break;
210 }
211 // TrackQA
212 TTree* trackQA;
213 if (hasTrackQA && (trackQA = (TTree*)inputFile->Get(Form("%s/%s", dfName, trackQAName.Data()))) == nullptr) {
214 exitCode = 20;
215 break;
216 }
217
218 // We need to loop over the V0s once and flag the prong indices
219 int trackIdxPos = 0, trackIdxNeg = 0;
220 std::vector<bool> keepV0s(trackExtraTree->GetEntries(), false);
221 v0s->SetBranchAddress("fIndexTracks_Pos", &trackIdxPos);
222 v0s->SetBranchAddress("fIndexTracks_Neg", &trackIdxNeg);
223 auto nV0s = v0s->GetEntriesFast();
224 for (int i{0}; i < nV0s; ++i) {
225 v0s->GetEntry(i);
226 keepV0s[trackIdxPos] = true;
227 keepV0s[trackIdxNeg] = true;
228 }
229
230 // TrackQA
231 std::vector<bool> keepTrackQA;
232 if (hasTrackQA) {
233 keepTrackQA.assign(trackExtraTree->GetEntries(), false);
234 trackQA->SetBranchAddress("fIndexTracks", &trackIdxPos);
235 for (int i{0}; i < trackQA->GetEntries(); ++i) {
236 trackQA->GetEntry(i);
237 keepTrackQA[trackIdxPos] = true;
238 }
239 }
240
241 std::vector<int> acceptedTracks(trackExtraTree->GetEntries(), -1);
242 std::vector<bool> hasCollision(trackExtraTree->GetEntries(), false);
243
244 uint8_t tpcNClsFindable = 0;
245 bool bTPClsFindable = false;
246 uint8_t ITSClusterMap = 0;
247 UInt_t ITSClusterSizes = 0;
248 bool bITSClusterMap = false;
249 bool bITSClusterSizes = false;
250 uint8_t TRDPattern = 0;
251 bool bTRDPattern = false;
252 float_t TOFChi2 = 0;
253 bool bTOFChi2 = false;
254
255 // Test if track properties exist
256 TBranch* br = nullptr;
257 TIter next(trackExtraTree->GetListOfBranches());
258 while ((br = (TBranch*)next())) {
259 TString brName = br->GetName();
260 if (brName == "fTPCNClsFindable") {
261 trackExtraTree->SetBranchAddress("fTPCNClsFindable", &tpcNClsFindable);
262 bTPClsFindable = true;
263 } else if (brName == "fITSClusterMap") {
264 trackExtraTree->SetBranchAddress("fITSClusterMap", &ITSClusterMap);
265 bITSClusterMap = true;
266 } else if (brName == "fITSClusterSizes") {
267 trackExtraTree->SetBranchAddress("fITSClusterSizes", &ITSClusterSizes);
268 bITSClusterSizes = true;
269 } else if (brName == "fTRDPattern") {
270 trackExtraTree->SetBranchAddress("fTRDPattern", &TRDPattern);
271 bTRDPattern = true;
272 } else if (brName == "fTOFChi2") {
273 trackExtraTree->SetBranchAddress("fTOFChi2", &TOFChi2);
274 bTOFChi2 = true;
275 }
276 }
277
278 // Sanity-Check
279 // If any (%ITSClusterMap or %ITSClusterSizes) of these are not found, continuation is not possible, hence fataling
280 if (!bTPClsFindable || !bTRDPattern || !bTOFChi2 ||
281 (!bITSClusterMap && !bITSClusterSizes)) {
282 printf(" *** FATAL *** Branch detection failed in %s for trackextra.[(fITSClusterMap=%d,fITSClusterSizes=%d),fTPCNClsFindable=%d,fTRDPattern=%d,fTOFChi2=%d]\n", dfName, bITSClusterMap, bITSClusterSizes, bTPClsFindable, bTRDPattern, bTOFChi2);
283 exitCode = 10;
284 break;
285 }
286
287 int fIndexCollisions = 0;
288 track_iu->SetBranchAddress("fIndexCollisions", &fIndexCollisions);
289
290 // loop over all tracks
291 auto entries = trackExtraTree->GetEntries();
292 int counter = 0;
293 for (int i = 0; i < entries; i++) {
294 trackExtraTree->GetEntry(i);
295 track_iu->GetEntry(i);
296
297 // Flag collisions
298 hasCollision[i] = (fIndexCollisions >= 0);
299
300 // Remove TPC only tracks, if they are not assoc. to a V0
301 if (tpcNClsFindable > 0 && TRDPattern == 0 && TOFChi2 < -1. &&
302 (!bITSClusterMap || ITSClusterMap == 0) &&
303 (!bITSClusterSizes || ITSClusterSizes == 0) &&
304 (!hasTrackQA || !keepTrackQA[i]) &&
305 !keepV0s[i]) {
306 counter++;
307 } else {
308 acceptedTracks[i] = i - counter;
309 }
310 }
311
312 for (auto key2 : *treeList) {
313 TString treeName = ((TObjString*)key2)->GetString().Data();
314
315 // Connect trees but do not copy entries (using the clone function)
316 // NOTE Basket size etc. are copied in CloneTree()
317 if (outputDir == nullptr) {
318 outputDir = outputFile->mkdir(dfName);
319 printf("Writing to output folder %s\n", dfName);
320 }
321 outputDir->cd();
322
323 auto inputTree = (TTree*)inputFile->Get(Form("%s/%s", dfName, treeName.Data()));
324 printf(" Processing tree %s with %lld entries with total size %lld\n", treeName.Data(), inputTree->GetEntries(), inputTree->GetTotBytes());
325 auto outputTree = inputTree->CloneTree(0);
326 outputTree->SetAutoFlush(0);
327
328 std::vector<int*> indexList;
329 std::vector<char*> vlaPointers;
330 std::vector<int*> indexPointers;
331 TObjArray* branches = inputTree->GetListOfBranches();
332 for (int i = 0; i < branches->GetEntriesFast(); ++i) {
333 TBranch* br = (TBranch*)branches->UncheckedAt(i);
334 TString branchName(br->GetName());
335 TString tableName(getTableName(branchName, treeName.Data()));
336 // register index of track index ONLY
337 if (!tableName.EqualTo("O2track")) {
338 continue;
339 }
340 // detect VLA
341 if (((TLeaf*)br->GetListOfLeaves()->First())->GetLeafCount() != nullptr) {
342 printf(" *** FATAL ***: VLA detection is not supported\n");
343 exitCode = 9;
344 } else if (branchName.BeginsWith("fIndexSlice")) {
345 int* buffer = new int[2];
346 memset(buffer, 0, 2 * sizeof(buffer[0]));
347 vlaPointers.push_back(reinterpret_cast<char*>(buffer));
348 inputTree->SetBranchAddress(br->GetName(), buffer);
349 outputTree->SetBranchAddress(br->GetName(), buffer);
350
351 indexList.push_back(buffer);
352 indexList.push_back(buffer + 1);
353 } else if (branchName.BeginsWith("fIndex") && !branchName.EndsWith("_size")) {
354 int* buffer = new int;
355 *buffer = 0;
356 indexPointers.push_back(buffer);
357
358 inputTree->SetBranchAddress(br->GetName(), buffer);
359 outputTree->SetBranchAddress(br->GetName(), buffer);
360
361 indexList.push_back(buffer);
362 }
363 }
364
365 const bool processingTracked = treeName.BeginsWith("O2tracked");
366 const bool processingTrackQA = treeName.BeginsWith("O2trackqa");
367 const bool processingTracks = treeName.BeginsWith("O2track") && !processingTracked && !processingTrackQA; // matches any of the track tables and not tracked{v0s,cascase,3body} or trackqa;
368 const bool processingAmbiguousTracks = treeName.BeginsWith("O2ambiguoustrack");
369
370 auto entries = inputTree->GetEntries();
371 for (int i = 0; i < entries; i++) {
372 inputTree->GetEntry(i);
373 bool fillThisEntry = true;
374 // Special case for Tracks, TracksExtra, TracksCov
375 if (processingTracks) {
376 if (acceptedTracks[i] < 0) {
377 fillThisEntry = false;
378 }
379 } else {
380 // Other table than Tracks* --> reassign indices to Tracks
381 for (const auto& idx : indexList) {
382 int oldTrackIndex = *idx;
383
384 // if negative, the index is unassigned.
385 if (oldTrackIndex >= 0) {
386 if (acceptedTracks[oldTrackIndex] < 0) {
387 fillThisEntry = false;
388 } else {
389 *idx = acceptedTracks[oldTrackIndex];
390 }
391 }
392 }
393 }
394
395 // Keep only tracks which have no collision, see O2-3601
396 if (processingAmbiguousTracks) {
397 if (hasCollision[i]) {
398 fillThisEntry = false;
399 }
400 }
401
402 if (fillThisEntry) {
403 outputTree->Fill();
404 }
405 }
406
407 if (entries != outputTree->GetEntries()) {
408 printf(" Reduced from %lld to %lld entries\n", entries, outputTree->GetEntries());
409 // sanity check by hardcoding the trees for which we expect a reduction
410 const TString tableName{removeVersionSuffix(outputTree->GetName())};
411 static const std::array<TString, 4> checkNames{"O2track", "O2trackextra", "O2trackcov", "O2ambiguoustrack"}; // O2track -> O2track_iu; O2trackcov -> O2trackcov_iu
412 std::vector<bool> checks(checkNames.size(), false);
413 for (size_t i{0}; i < checkNames.size(); ++i) {
414 if (tableName.EqualTo(checkNames[i])) {
415 checks[i] = true;
416 }
417 }
418 if (std::none_of(checks.begin(), checks.end(), [](bool b) { return b; })) {
419 exitCode = 30;
420 printf(" -> Reduction is not expected for this tree!\n");
421 break;
422 }
423 }
424
425 delete inputTree;
426
427 for (auto& buffer : indexPointers) {
428 delete buffer;
429 }
430 for (auto& buffer : vlaPointers) {
431 delete[] buffer;
432 }
433
434 outputDir->cd();
435 outputTree->Write();
436 delete outputTree;
437 }
438 if (exitCode > 0) {
439 break;
440 }
441
442 outputDir = nullptr;
443 }
444 inputFile->Close();
445
446 outputFile->Write();
447 outputFile->Close();
448
449 // in case of failure, remove the incomplete file
450 if (exitCode != 0) {
451 printf("Removing incomplete output file %s.\n", outputFile->GetName());
452 gSystem->Unlink(outputFile->GetName());
453 return exitCode; // skip output below
454 }
455
456 clock.Stop();
457
458 // Report savings
459 auto sBefore = inputFile->GetSize();
460 auto sAfter = outputFile->GetSize();
461 if (sBefore <= 0 || sAfter <= 0) {
462 printf("Warning: Empty input or output file after thinning!\n");
463 exitCode = 9;
464 }
465 auto spaceSaving = (1 - ((double)sAfter) / ((double)sBefore)) * 100;
466 printf("Stats: After=%lld / Before=%lld Bytes ---> Saving %.1f%% diskspace!\n", sAfter, sBefore, spaceSaving);
467 printf("Timing: CPU=%.2f (s); Real=%.2f (s)\n", clock.CpuTime(), clock.RealTime());
468 printf("End of AOD thinning.\n");
469
470 return exitCode;
471}
int32_t i
uint32_t j
Definition RawData.h:0
const char * getTableName(const char *branchName, const char *treeName)
Definition aodMerger.h:26
const char * removeVersionSuffix(const char *treeName)
Definition aodMerger.h:14
benchmark::State & st
GLuint buffer
Definition glcorearb.h:655
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLuint counter
Definition glcorearb.h:3987
#define main