Project
Loading...
Searching...
No Matches
fit.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
12#ifndef ALICEO2_MATHUTILS_MATHBASE_H_
13#define ALICEO2_MATHUTILS_MATHBASE_H_
14
17
18#include <cmath>
19#include <numeric>
20#include <algorithm>
21#include <vector>
22#include <array>
23#include <thread>
24
25#include "Rtypes.h"
26#include "TLinearFitter.h"
27#include "TVectorD.h"
28#include "TMath.h"
29#include "TF1.h"
30#include "Foption.h"
31#include "HFitInterface.h"
32#include "TFitResultPtr.h"
33#include "TFitResult.h"
34#include "Fit/Fitter.h"
35#include "Fit/BinData.h"
36#include "Math/WrappedMultiTF1.h"
37#include <Math/SMatrix.h>
38#include <Math/SVector.h>
39#include "Framework/Logger.h"
40
41namespace o2
42{
43namespace math_utils
44{
59template <typename T>
60TFitResultPtr fit(const size_t nBins, const T* arr, const T xMin, const T xMax, TF1& func, std::string_view option = "")
61{
62 Foption_t fitOption;
63 ROOT::Fit::FitOptionsMake(ROOT::Fit::EFitObjectType::kHistogram, option.data(), fitOption);
64
65 ROOT::Fit::DataRange range(xMin, xMax);
66 ROOT::Fit::DataOptions opt;
67 ROOT::Fit::BinData fitdata(opt, range);
68 fitdata.Initialize(nBins, 1);
69
70 // create an empty TFitResult
71 std::shared_ptr<TFitResult> tfr(new TFitResult());
72 // create the fitter from an empty fit result
73 // std::shared_ptr<ROOT::Fit::Fitter> fitter(new ROOT::Fit::Fitter(std::static_pointer_cast<ROOT::Fit::FitResult>(tfr) ) );
74 ROOT::Fit::Fitter fitter(tfr);
75 // ROOT::Fit::FitConfig & fitConfig = fitter->Config();
76
77 const double binWidth = double(xMax - xMin) / double(nBins);
78
79 for (Int_t ibin = 0; ibin < nBins; ibin++) {
80 const double x = double(xMin) + double(ibin + 0.5) * binWidth;
81 const double y = double(arr[ibin]);
82 const double ey = std::sqrt(y);
83 fitdata.Add(x, y, ey);
84 }
85
86 const int special = func.GetNumber();
87 const int npar = func.GetNpar();
88 bool linear = func.IsLinear();
89 if (special == 299 + npar) {
90 linear = kTRUE; // for polynomial functions
91 }
92 // do not use linear fitter in these case
93 if (fitOption.Bound || fitOption.Like || fitOption.Errors || fitOption.Gradient || fitOption.More || fitOption.User || fitOption.Integral || fitOption.Minuit) {
94 linear = kFALSE;
95 }
96
97 if (special != 0 && !fitOption.Bound && !linear) {
98 if (special == 100) {
99 ROOT::Fit::InitGaus(fitdata, &func); // gaussian
100 } else if (special == 400) {
101 ROOT::Fit::InitGaus(fitdata, &func); // landau (use the same)
102 } else if (special == 200) {
103 ROOT::Fit::InitExpo(fitdata, &func); // exponential
104 }
105 }
106
107 if ((linear || fitOption.Gradient)) {
108 fitter.SetFunction(ROOT::Math::WrappedMultiTF1(func));
109 } else {
110 fitter.SetFunction(static_cast<const ROOT::Math::IParamMultiFunction&>(ROOT::Math::WrappedMultiTF1(func)));
111 }
112
113 // standard least square fit
114 const bool fitok = fitter.Fit(fitdata, fitOption.ExecPolicy);
115 if (!fitok) {
116 LOGP(warning, "bad fit");
117 }
118
119 return TFitResultPtr(tfr);
120}
121
131
132template <typename T>
133bool medmadGaus(size_t nBins, const T* arr, const T xMin, const T xMax, std::array<double, 3>& param)
134{
135 int bStart = 0, bEnd = -1;
136 double sum = 0, binW = double(xMax - xMin) / nBins, medVal = xMin;
137 for (int i = 0; i < (int)nBins; i++) {
138 auto v = arr[i];
139 if (v) {
140 if (!sum) {
141 bStart = i;
142 }
143 sum += v;
144 bEnd = i;
145 }
146 }
147 if (bEnd < bStart) {
148 return false;
149 }
150 bEnd++;
151 double cum = 0, thresh = 0.5 * sum, frac0 = 0;
152 int bid = bStart, prevbid = bid;
153 while (bid < bEnd) {
154 if (arr[bid] > 0) {
155 cum += arr[bid];
156 if (cum > thresh) {
157 frac0 = 1. + (thresh - cum) / float(arr[bid]);
158 medVal = xMin + binW * (bid + frac0);
159 int bdiff = bid - prevbid - 1;
160 if (bdiff > 0) {
161 medVal -= bdiff * binW * 0.5; // account for the gap
162 bid -= bdiff / 2;
163 }
164 break;
165 }
166 prevbid = bid;
167 }
168 bid++;
169 }
170 cum = 0.;
171 double edgeL = frac0 + bid, edgeR = edgeL, dist = 0., wL = 0, wR = 0;
172 while (1) {
173 float amp = 0.;
174 int bL = edgeL, bR = edgeR; // left and right bins
175 if (edgeL > bStart) {
176 wL = edgeL - bL;
177 amp += arr[bL];
178 } else {
179 wL = 1.;
180 }
181 if (edgeR < bEnd) {
182 wR = 1. + bR - edgeR;
183 amp += arr[bR];
184 } else {
185 wR = 1.;
186 }
187 auto wdt = std::min(wL, wR);
188 if (wdt < 1e-5) {
189 wdt = std::max(wL, wR);
190 }
191 if (amp > 0) {
192 amp *= wdt;
193 cum += amp;
194 if (cum >= thresh) {
195 dist += wdt * (cum - thresh) / amp * 0.5;
196 break;
197 }
198 }
199 dist += wdt;
200 edgeL -= wdt;
201 edgeR += wdt;
202 }
203 constexpr double SQRT2PI = 2.5066283;
204 param[1] = medVal;
205 param[2] = dist * binW * 1.4826; // MAD -> sigma
206 param[0] = sum * binW / (param[2] * SQRT2PI);
207 return true;
208}
209
229// template <typename T>
230// Double_t fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector<T>& param);
231template <typename T>
232Double_t fitGaus(const size_t nBins, const T* arr, const T xMin, const T xMax, std::vector<T>& param)
233{
234 static TLinearFitter fitter(3, "pol2");
235 static TMatrixD mat(3, 3);
236 static Double_t kTol = mat.GetTol();
237 fitter.StoreData(kFALSE);
238 fitter.ClearPoints();
239 TVectorD par(3);
240 TVectorD sigma(3);
241 TMatrixD A(3, 3);
242 TMatrixD b(3, 1);
243 T rms = TMath::RMS(nBins, arr);
244 T max = TMath::MaxElement(nBins, arr);
245 T binWidth = (xMax - xMin) / T(nBins);
246
247 Float_t meanCOG = 0;
248 Float_t rms2COG = 0;
249 Float_t sumCOG = 0;
250
251 Float_t entries = 0;
252 Int_t nfilled = 0;
253
254 param.resize(4);
255 param[0] = 0.;
256 param[1] = 0.;
257 param[2] = 0.;
258 param[3] = 0.;
259
260 for (size_t i = 0; i < nBins; i++) {
261 entries += arr[i];
262 if (arr[i] > 0) {
263 nfilled++;
264 }
265 }
266
267 // TODO: Check why this is needed
268 if (max < 4) {
269 return -4;
270 }
271 if (entries < 12) {
272 return -4;
273 }
274
275 if (rms < kTol) {
276 return -4;
277 }
278
279 param[3] = entries;
280
281 Int_t npoints = 0;
282 for (size_t ibin = 0; ibin < nBins; ibin++) {
283 Float_t entriesI = arr[ibin];
284 if (entriesI > 1) {
285 Double_t xcenter = xMin + (ibin + 0.5) * binWidth;
286 Double_t error = 1. / TMath::Sqrt(entriesI);
287 Double_t val = TMath::Log(Float_t(entriesI));
288 fitter.AddPoint(&xcenter, val, error);
289 if (npoints < 3) {
290 A(npoints, 0) = 1;
291 A(npoints, 1) = xcenter;
292 A(npoints, 2) = xcenter * xcenter;
293 b(npoints, 0) = val;
294 meanCOG += xcenter * entriesI;
295 rms2COG += xcenter * entriesI * xcenter;
296 sumCOG += entriesI;
297 }
298 npoints++;
299 }
300 }
301
302 Double_t chi2 = 0;
303 if (npoints >= 3) {
304 if (npoints == 3) {
305 // analytic calculation of the parameters for three points
306 A.Invert();
307 TMatrixD res(1, 3);
308 res.Mult(A, b);
309 par[0] = res(0, 0);
310 par[1] = res(0, 1);
311 par[2] = res(0, 2);
312 chi2 = -3.;
313 } else {
314 // use fitter for more than three points
315 fitter.Eval();
316 fitter.GetParameters(par);
317 fitter.GetCovarianceMatrix(mat);
318 chi2 = fitter.GetChisquare() / Double_t(npoints);
319 }
320 if (TMath::Abs(par[1]) < kTol) {
321 return -4;
322 }
323 if (TMath::Abs(par[2]) < kTol) {
324 return -4;
325 }
326 param[1] = T(par[1] / (-2. * par[2]));
327 param[2] = T(1. / TMath::Sqrt(TMath::Abs(-2. * par[2])));
328 Double_t lnparam0 = par[0] + par[1] * param[1] + par[2] * param[1] * param[1];
329 if (lnparam0 > 307) {
330 return -4;
331 }
332 param[0] = TMath::Exp(lnparam0);
333
334 return chi2;
335 }
336
337 if (npoints == 2) {
338 // use center of gravity for 2 points
339 meanCOG /= sumCOG;
340 rms2COG /= sumCOG;
341 param[0] = max;
342 param[1] = meanCOG;
343 param[2] = TMath::Sqrt(TMath::Abs(meanCOG * meanCOG - rms2COG));
344 chi2 = -2.;
345 }
346 if (npoints == 1) {
347 meanCOG /= sumCOG;
348 param[0] = max;
349 param[1] = meanCOG;
350 param[2] = binWidth / TMath::Sqrt(12);
351 chi2 = -1.;
352 }
353 return chi2;
354}
355
356// more optimal implementation of guassian fit via log-normal fit, appropriate for MT calls
357// Only bins with values above minVal will be accounted.
358// If applyMAD is true, the fit is done whithin the nSigmaMAD range of the preliminary estimate by MAD
359template <typename T>
360double fitGaus(size_t nBins, const T* arr, const T xMin, const T xMax, std::array<double, 3>& param,
361 ROOT::Math::SMatrix<double, 3, 3, ROOT::Math::MatRepSym<double, 3>>* covMat = nullptr,
362 int minVal = 2, bool applyMAD = true)
363{
364 double binW = double(xMax - xMin) / nBins, s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, sy0 = 0, sy1 = 0, sy2 = 0, syy = 0;
365 int np = 0;
366 int bStart = 0, bEnd = (int)nBins;
367 const float nSigmaMAD = 2.;
368 if (applyMAD) {
369 std::array<double, 3> madPar;
370 if (!medmadGaus(nBins, arr, xMin, xMax, madPar)) {
371 return -10;
372 }
373 bStart = std::max(bStart, int((madPar[1] - nSigmaMAD * madPar[2] - xMin) / binW));
374 bEnd = std::min(bEnd, 1 + int((madPar[1] + nSigmaMAD * madPar[2] - xMin) / binW));
375 }
376 float x = xMin + (bStart - 0.5) * binW;
377 for (int i = bStart; i < bEnd; i++) {
378 x += binW;
379 auto v = arr[i];
380 if (v < 0) {
381 throw std::runtime_error("Log-normal fit is possible only with non-negative data");
382 }
383 if (v < minVal) {
384 continue;
385 }
386 double y = std::log(v), err2i = v, err2iX = err2i, err2iY = err2i * y;
387 s0 += err2iX;
388 s1 += (err2iX *= x);
389 s2 += (err2iX *= x);
390 s3 += (err2iX *= x);
391 s4 += (err2iX *= x);
392 sy0 += err2iY;
393 syy += err2iY * y;
394 sy1 += (err2iY *= x);
395 sy2 += (err2iY *= x);
396 np++;
397 }
398 if (np < 1) {
399 return -10;
400 }
401 auto recover = [&param, binW, np, s0, s1, s2, sy0]() {
402 param[0] = std::exp(sy0 / s0); // recover center of gravity
403 param[1] = s1 / s0; // mean x;
404 param[2] = np == 1 ? binW / std::sqrt(12) : std::sqrt(std::abs(param[1] * param[1] - s2 / s0));
405 };
406 if (np < 3) {
407 recover();
408 return -np;
409 }
411 ROOT::Math::SVector<double, 3> v3{sy0, sy1, sy2};
412 m33(0, 0) = s0;
413 m33(1, 0) = s1;
414 m33(1, 1) = m33(2, 0) = s2;
415 m33(2, 1) = s3;
416 m33(2, 2) = s4;
417 int res = 0;
418 auto m33i = m33.Inverse(res);
419 if (res) {
420 recover();
421 LOG(error) << np << " points collected, matrix inversion failed " << m33;
422 return -10;
423 }
424 auto v = m33i * v3;
425 if (v(2) >= 0.) { // fit failed, use mean amd RMS
426 recover();
427 return -3;
428 }
429
430 double chi2 = v(0) * v(0) * s0 + v(1) * v(1) * s2 + v(2) * v(2) * s4 + syy +
431 2. * (v(0) * v(1) * s1 + v(0) * v(2) * s2 + v(1) * v(2) * s3 - v(0) * sy0 - v(1) * sy1 - v(2) * sy2);
432 param[1] = -0.5 * v(1) / v(2);
433 param[2] = 1. / std::sqrt(-2. * v(2));
434 param[0] = std::exp(v(0) - param[1] * param[1] * v(2));
435 if (std::isnan(param[0]) || std::isnan(param[1]) || std::isnan(param[2])) {
436 recover();
437 return -3;
438 }
439 if (covMat) {
440 // build jacobian of transformation from log-normal to normal params
442 j33(0, 0) = param[0];
443 j33(0, 1) = param[0] * param[1];
444 j33(0, 2) = j33(0, 1) * param[1];
445 j33(1, 1) = -0.5 / v(2);
446 j33(1, 2) = -param[1] / v(2);
447 j33(2, 2) = param[2] * j33(1, 1);
448 *covMat = ROOT::Math::Similarity(j33, m33i);
449 }
450 return np > 3 ? chi2 / (np - 3.) : 0.;
451}
452
458 double mCOG{0};
459 double mStdDev{0};
460 double mSum{0};
461};
462
470template <typename T>
471StatisticsData getStatisticsData(const T* arr, const size_t nBins, const double xMin, const double xMax)
472{
473 double mean = 0;
474 double rms2 = 0;
475 double sum = 0;
476 size_t npoints = 0;
477
478 double binWidth = (xMax - xMin) / (double)nBins;
479
481 // in case something went wrong the COG is the histogram lower limit
482 data.mCOG = xMin;
483
484 for (size_t ibin = 0; ibin < nBins; ++ibin) {
485 double entriesI = (double)arr[ibin];
486 double xcenter = xMin + (ibin + 0.5) * binWidth; // +0.5 to shift to bin centre
487 if (entriesI > 0) {
488 mean += xcenter * entriesI;
489 rms2 += xcenter * entriesI * xcenter;
490 sum += entriesI;
491 ++npoints;
492 }
493 }
494 if (sum == 0) {
495 return data;
496 }
497 mean /= sum;
498
499 data.mCOG = mean;
500 // exception in case of only one bin is filled
501 // set the standard deviation to bin width over sqrt(12)
502 rms2 /= sum;
503 if (npoints == 1) {
504 data.mStdDev = binWidth / std::sqrt(12.);
505 } else {
506 data.mStdDev = std::sqrt(std::abs(rms2 - mean * mean));
507 }
508
509 data.mSum = sum;
510
511 return data;
512}
513
519template <typename T, typename R = double>
520R median(std::vector<T> v)
521{
522 if (v.empty()) {
523 return R{};
524 }
525 auto n = v.size() / 2;
526 nth_element(v.begin(), v.begin() + n, v.end());
527 auto med = R{v[n]};
528 if (!(v.size() & 1)) { // If the set size is even
529 auto max_it = max_element(v.begin(), v.begin() + n);
530 med = R{(*max_it + med) / 2.0};
531 }
532 return med;
533}
534
539template <typename T>
540void SortData(std::vector<T> const& values, std::vector<size_t>& index)
541{
542 if (values.size() != index.size()) {
543 LOG(error) << "Vector with values must have same size as vector for indices";
544 return;
545 }
546 std::iota(index.begin(), index.end(), static_cast<size_t>(0));
547 std::sort(index.begin(), index.end(), [&](size_t a, size_t b) { return values[a] < values[b]; });
548}
549
569template <typename T>
570bool LTMUnbinned(const std::vector<T>& data, std::vector<size_t>& index, std::array<float, 7>& params, float fracKeep)
571{
572 int nPoints = data.size();
573 std::vector<float> w(2 * nPoints);
574 int nKeep = nPoints * fracKeep;
575 if (nKeep > nPoints) {
576 nKeep = nPoints;
577 }
578 if (nKeep < 2) {
579 return false;
580 }
581 // sort in increasing order
583 // build cumulants
584 double sum1 = 0.0;
585 double sum2 = 0.0;
586 for (int i = 0; i < nPoints; i++) {
587 double x = data[index[i]];
588 sum1 += x;
589 sum2 += x * x;
590 w[i] = sum1;
591 w[i + nPoints] = sum2;
592 }
593 double maxRMS = sum2 + 1e6;
594 params[0] = nKeep;
595 int limI = nPoints - nKeep + 1; // lowest possible bin to accept
596 for (int i = 0; i < limI; i++) {
597 const int limJ = i + nKeep - 1; // highest accepted bin
598 sum1 = static_cast<double>(w[limJ]) - static_cast<double>(i ? w[i - 1] : 0.);
599 sum2 = static_cast<double>(w[nPoints + limJ]) - static_cast<double>(i ? w[nPoints + i - 1] : 0.);
600 const double mean = sum1 / nKeep;
601 const double rms2 = sum2 / nKeep - mean * mean;
602 if (rms2 > maxRMS) {
603 continue;
604 }
605 maxRMS = rms2;
606 params[1] = mean;
607 params[2] = rms2;
608 params[5] = i;
609 params[6] = limJ;
610 }
611 //
612 if (params[2] < 0) {
613 LOG(error) << "Rounding error: RMS = " << params[2] << " < 0";
614 return false;
615 }
616 params[2] = std::sqrt(params[2]);
617 params[3] = params[2] / std::sqrt(params[0]); // error on mean
618 params[4] = params[3] / std::sqrt(2.0); // error on RMS
619 return true;
620}
621
625template <typename T>
626void Reorder(std::vector<T>& data, const std::vector<size_t>& index)
627{
628 // rearange data in order given by index
629 if (data.size() != index.size()) {
630 LOG(error) << "Reordering not possible if number of elements in index container different from the data container";
631 return;
632 }
633 std::vector<T> tmp(data);
634 for (size_t i = 0; i < data.size(); ++i) {
635 data[i] = tmp[index[i]];
636 }
637}
638
648template <typename T>
649bool LTMUnbinnedSig(const std::vector<T>& data, std::vector<size_t>& index, std::array<float, 7>& params, float fracKeepMin, float sigTgt, bool sorted = false)
650{
651 int nPoints = data.size();
652 std::vector<double> wx(nPoints);
653 std::vector<double> wx2(nPoints);
654
655 if (!sorted) {
656 // sort in increasing order
658 } else {
659 // array is already sorted
660 std::iota(index.begin(), index.end(), 0);
661 }
662 // build cumulants
663 double sum1 = 0.0;
664 double sum2 = 0.0;
665 for (int i = 0; i < nPoints; i++) {
666 double x = data[index[i]];
667 sum1 += x;
668 sum2 += x * x;
669 wx[i] = sum1;
670 wx2[i] = sum2;
671 }
672 int keepMax = nPoints;
673 int keepMin = fracKeepMin * nPoints;
674 if (keepMin > keepMax) {
675 keepMin = keepMax;
676 }
677 float sigTgt2 = sigTgt * sigTgt;
678 //
679 while (true) {
680 double maxRMS = wx2.back() + 1e6;
681 int keepN = (keepMax + keepMin) / 2;
682 if (keepN < 2) {
683 return false;
684 }
685 params[0] = keepN;
686 int limI = nPoints - keepN + 1;
687 for (int i = 0; i < limI; ++i) {
688 const int limJ = i + keepN - 1;
689 sum1 = wx[limJ] - (i ? wx[i - 1] : 0.);
690 sum2 = wx2[limJ] - (i ? wx2[i - 1] : 0.);
691 const double mean = sum1 / keepN;
692 const double rms2 = sum2 / keepN - mean * mean;
693 if (rms2 > maxRMS) {
694 continue;
695 }
696 maxRMS = rms2;
697 params[1] = mean;
698 params[2] = rms2;
699 params[5] = i;
700 params[6] = limJ;
701 }
702 if (maxRMS < sigTgt2) {
703 keepMin = keepN;
704 } else {
705 keepMax = keepN;
706 }
707 if (keepMin >= keepMax - 1) {
708 break;
709 }
710 }
711 params[2] = std::sqrt(params[2]);
712 params[3] = params[2] / std::sqrt(params[0]); // error on mean
713 params[4] = params[3] / std::sqrt(2.0); // error on RMS
714 return true;
715}
716
717//___________________________________________________________________
718template <typename T>
719T selKthMin(int k, int np, T* arr)
720{
721 // Returns the k th smallest value in the array. The input array will be rearranged
722 // to have this value in location arr[k] , with all smaller elements moved before it
723 // (in arbitrary order) and all larger elements after (also in arbitrary order).
724 // From Numerical Recipes in C++
725
726 int i, j, mid, ir = np - 1, l = 0;
727 T a;
728 for (;;) {
729 if (ir <= l + 1) {
730 if (ir == l + 1 && arr[ir] < arr[l]) {
731 std::swap(arr[l], arr[ir]);
732 }
733 return arr[k];
734 } else {
735 int mid = (l + ir) >> 1, i = l + 1;
736 std::swap(arr[mid], arr[i]);
737 if (arr[i] > arr[ir]) {
738 std::swap(arr[i], arr[ir]);
739 }
740 if (arr[l] > arr[ir]) {
741 std::swap(arr[l], arr[ir]);
742 }
743 if (arr[i] > arr[l]) {
744 std::swap(arr[i], arr[l]);
745 }
746 j = ir;
747 a = arr[l];
748 for (;;) {
749 do {
750 i++;
751 } while (arr[i] < a);
752 do {
753 j--;
754 } while (arr[j] > a);
755 if (j < i) {
756 break;
757 }
758 std::swap(arr[i], arr[j]);
759 }
760 arr[l] = arr[j];
761 arr[j] = a;
762 if (j >= k) {
763 ir = j - 1;
764 }
765 if (j <= k) {
766 l = i;
767 }
768 }
769 }
770}
771
772//___________________________________________________________________
773template <typename T>
774T MAD2Sigma(int np, T* y)
775{
776 // Sigma calculated from median absolute deviations, https://en.wikipedia.org/wiki/Median_absolute_deviation
777 // the input array is modified
778 if (np < 2) {
779 return 0;
780 }
781 int nph = np >> 1;
782 float median = (np & 0x1) ? selKthMin(nph, np, y) : 0.5 * (selKthMin(nph - 1, np, y) + selKthMin(nph, np, y));
783 // build abs differences to median
784 for (int i = np; i--;) {
785 y[i] = std::abs(y[i] - median);
786 }
787 // now get median of abs deviations
788 median = (np & 0x1) ? selKthMin(nph, np, y) : 0.5 * (selKthMin(nph - 1, np, y) + selKthMin(nph, np, y));
789 return median * 1.4826; // convert to Gaussian sigma
790}
791
795template <typename DataTimeType, typename DataTime>
796std::optional<std::pair<size_t, size_t>> findClosestIndices(const std::vector<DataTimeType>& timestamps, DataTime timestamp)
797{
798 if (timestamps.empty()) {
799 LOGP(warning, "Timestamp vector is empty!");
800 return std::nullopt;
801 }
802
803 if (timestamp <= timestamps.front()) {
804 return std::pair{0, 0};
805 } else if (timestamp >= timestamps.back()) {
806 return std::pair{timestamps.size() - 1, timestamps.size() - 1};
807 }
808
809 const auto it = std::lower_bound(timestamps.begin(), timestamps.end(), timestamp);
810 const size_t idx = std::distance(timestamps.begin(), it);
811 const auto prevTimestamp = timestamps[idx - 1];
812 const auto nextTimestamp = timestamps[idx];
813 return std::pair{(idx - 1), idx};
814}
815
817 RollingStats() = default;
818 RollingStats(const int nValues)
819 {
820 median.resize(nValues);
821 std.resize(nValues);
822 nPoints.resize(nValues);
823 closestDistanceL.resize(nValues);
824 closestDistanceR.resize(nValues);
825 }
826
827 std::vector<float> median;
828 std::vector<float> std;
829 std::vector<int> nPoints;
830 std::vector<float> closestDistanceL;
831 std::vector<float> closestDistanceR;
832
834};
835
845template <typename DataTimeType, typename DataType, typename DataTime>
846RollingStats getRollingStatistics(const DataTimeType& timeData, const DataType& data, const DataTime& times, const double deltaMax, const int mNthreads, const size_t minPoints = 4, const size_t nClosestPoints = 4)
847{
848 // output statistics
849 const size_t vecSize = times.size();
850 RollingStats stats(vecSize);
851
852 if (!std::is_sorted(timeData.begin(), timeData.end())) {
853 LOGP(error, "Input data is NOT sorted!");
854 return stats;
855 }
856
857 if (timeData.empty()) {
858 LOGP(error, "Input data is empty!");
859 return stats;
860 }
861
862 const size_t dataSize = data.size();
863 const size_t timeDataSize = timeData.size();
864 if (timeDataSize != dataSize) {
865 LOGP(error, "Input data has different sizes {}!={}", timeDataSize, dataSize);
866 return stats;
867 }
868
869 auto myThread = [&](int iThread) {
870 // data in given time window for median calculation
871 DataType window;
872 for (size_t i = iThread; i < vecSize; i += mNthreads) {
873 const double timeI = times[i];
874
875 // lower index
876 const double timeStampLower = timeI - deltaMax;
877 const auto lower = std::lower_bound(timeData.begin(), timeData.end(), timeStampLower);
878 size_t idxStart = std::distance(timeData.begin(), lower);
879
880 // upper index
881 const double timeStampUpper = timeI + deltaMax;
882 const auto upper = std::lower_bound(timeData.begin(), timeData.end(), timeStampUpper);
883 size_t idxEnd = std::distance(timeData.begin(), upper);
884
885 // closest data point
886 if (auto idxClosest = findClosestIndices(timeData, timeI)) {
887 auto [idxLeft, idxRight] = *idxClosest;
888 const auto closestL = std::abs(timeData[idxLeft] - timeI);
889 const auto closestR = std::abs(timeData[idxRight] - timeI);
890 stats.closestDistanceL[i] = closestL;
891 stats.closestDistanceR[i] = closestR;
892
893 // if no points are in the range use the n closest points - n from the left and n from the right
894 const size_t reqSize = idxEnd - idxStart;
895 if (reqSize < minPoints) {
896 // calculate weighted average
897 idxStart = (idxRight > nClosestPoints) ? (idxRight - nClosestPoints) : 0;
898 idxEnd = std::min(data.size(), idxRight + nClosestPoints);
899 constexpr float epsilon = 1e-6f;
900 double weightedSum = 0.0;
901 double weightTotal = 0.0;
902 for (size_t j = idxStart; j < idxEnd; ++j) {
903 const double dist = std::abs(timeI - timeData[j]);
904 const double weight = 1.0 / (dist + epsilon);
905 weightedSum += weight * data[j];
906 weightTotal += weight;
907 }
908 stats.median[i] = (weightTotal > 0.) ? (weightedSum / weightTotal) : 0.0f;
909 } else {
910 // calculate statistics
911 stats.nPoints[i] = reqSize;
912
913 if (idxStart >= data.size()) {
914 stats.median[i] = data.back();
915 continue;
916 }
917
918 if (reqSize <= 1) {
919 stats.median[i] = data[idxStart];
920 continue;
921 }
922
923 // calculate median
924 window.clear();
925 if (reqSize > window.capacity()) {
926 window.reserve(static_cast<size_t>(reqSize * 1.5));
927 }
928 window.insert(window.end(), data.begin() + idxStart, data.begin() + idxEnd);
929 const size_t middle = window.size() / 2;
930 std::nth_element(window.begin(), window.begin() + middle, window.end());
931 stats.median[i] = (window.size() % 2 == 1) ? window[middle] : ((window[middle - 1] + window[middle]) / 2.0);
932
933 // calculate the stdev
934 const float mean = std::accumulate(window.begin(), window.end(), 0.0f) / window.size();
935 std::transform(window.begin(), window.end(), window.begin(), [mean](const float val) { return val - mean; });
936 const float sqsum = std::inner_product(window.begin(), window.end(), window.begin(), 0.0f);
937 const float stdev = std::sqrt(sqsum / window.size());
938 stats.std[i] = stdev;
939 }
940 }
941 }
942 };
943
944 std::vector<std::thread> threads(mNthreads);
945 for (int i = 0; i < mNthreads; i++) {
946 threads[i] = std::thread(myThread, i);
947 }
948
949 for (auto& th : threads) {
950 th.join();
951 }
952 return stats;
953}
954
955} // namespace math_utils
956} // namespace o2
957#endif
std::vector< unsigned long > times
int32_t i
uint32_t j
Definition RawData.h:0
uint32_t res
Definition RawData.h:0
Definition A.h:16
float sum(float s, o2::dcs::DataPointValue v)
Definition dcs-ccdb.cxx:39
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
GLenum func
Definition glcorearb.h:778
const GLdouble * v
Definition glcorearb.h:832
GLenum GLsizei dataSize
Definition glcorearb.h:3994
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition glcorearb.h:5034
GLuint index
Definition glcorearb.h:781
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233
GLenum GLint * range
Definition glcorearb.h:1899
GLenum const GLfloat * params
Definition glcorearb.h:272
GLenum GLsizei GLsizei GLint * values
Definition glcorearb.h:1576
GLboolean * data
Definition glcorearb.h:298
GLuint GLfloat * val
Definition glcorearb.h:1582
GLfloat GLfloat GLfloat GLfloat v3
Definition glcorearb.h:814
GLenum GLfloat param
Definition glcorearb.h:271
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s0
Definition glcorearb.h:5034
bool medmadGaus(size_t nBins, const T *arr, const T xMin, const T xMax, std::array< double, 3 > &param)
Definition fit.h:133
void Reorder(std::vector< T > &data, const std::vector< size_t > &index)
Definition fit.h:626
TFitResultPtr fit(const size_t nBins, const T *arr, const T xMin, const T xMax, TF1 &func, std::string_view option="")
Definition fit.h:60
bool LTMUnbinnedSig(const std::vector< T > &data, std::vector< size_t > &index, std::array< float, 7 > &params, float fracKeepMin, float sigTgt, bool sorted=false)
Definition fit.h:649
void SortData(std::vector< T > const &values, std::vector< size_t > &index)
Definition fit.h:540
RollingStats getRollingStatistics(const DataTimeType &timeData, const DataType &data, const DataTime &times, const double deltaMax, const int mNthreads, const size_t minPoints=4, const size_t nClosestPoints=4)
calculates the rolling statistics of the input data
Definition fit.h:846
StatisticsData getStatisticsData(const T *arr, const size_t nBins, const double xMin, const double xMax)
Definition fit.h:471
T MAD2Sigma(int np, T *y)
Definition fit.h:774
const T y
Definition Utils.h:225
bool LTMUnbinned(const std::vector< T > &data, std::vector< size_t > &index, std::array< float, 7 > &params, float fracKeep)
Definition fit.h:570
T selKthMin(int k, int np, T *arr)
Definition fit.h:719
std::optional< std::pair< size_t, size_t > > findClosestIndices(const std::vector< DataTimeType > &timestamps, DataTime timestamp)
Definition fit.h:796
R median(std::vector< T > v)
Definition fit.h:520
Double_t fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector< T > &param)
Definition fit.h:232
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Defining DataPointCompositeObject explicitly as copiable.
std::vector< float > closestDistanceL
distance of closest point to the left
Definition fit.h:830
std::vector< float > std
std of rolling data
Definition fit.h:828
std::vector< int > nPoints
number of points used for the calculation
Definition fit.h:829
std::vector< float > closestDistanceR
distance of closest point to the right
Definition fit.h:831
RollingStats(const int nValues)
Definition fit.h:818
std::vector< float > median
median of rolling data
Definition fit.h:827
ClassDefNV(RollingStats, 1)
double mCOG
calculated centre of gravity
Definition fit.h:458
double mSum
sum of values
Definition fit.h:460
double mStdDev
standard deviation
Definition fit.h:459
constexpr size_t max
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"
o2::InteractionRecord ir(0, 0)