d5/d71/fit_8h_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#ifndef ALICEO2_MATHUTILS_MATHBASE_H_

#define ALICEO2_MATHUTILS_MATHBASE_H_


#include <cmath>

#include <numeric>

#include <algorithm>

#include <vector>

#include <array>

#include <thread>


#include "Rtypes.h"

#include "TLinearFitter.h"

#include "TVectorD.h"

#include "TMath.h"

#include "TF1.h"

#include "Foption.h"

#include "HFitInterface.h"

#include "TFitResultPtr.h"

#include "TFitResult.h"

#include "Fit/Fitter.h"

#include "Fit/BinData.h"

#include "Math/WrappedMultiTF1.h"

#include <Math/SMatrix.h>

#include <Math/SVector.h>

#include "Framework/Logger.h"


namespace o2

{

namespace math_utils

{

template <typename T>


TFitResultPtr fit(const size_t nBins, const T* arr, const T xMin, const T xMax, TF1& func, std::string_view option = "")

{

  Foption_t fitOption;

  ROOT::Fit::FitOptionsMake(ROOT::Fit::EFitObjectType::kHistogram, option.data(), fitOption);


  ROOT::Fit::DataRange range(xMin, xMax);

  ROOT::Fit::DataOptions opt;

  ROOT::Fit::BinData fitdata(opt, range);

  fitdata.Initialize(nBins, 1);


  // create an empty TFitResult

  std::shared_ptr<TFitResult> tfr(new TFitResult());

  // create the fitter from an empty fit result

  // std::shared_ptr<ROOT::Fit::Fitter> fitter(new ROOT::Fit::Fitter(std::static_pointer_cast<ROOT::Fit::FitResult>(tfr) ) );

  ROOT::Fit::Fitter fitter(tfr);

  // ROOT::Fit::FitConfig & fitConfig = fitter->Config();


  const double binWidth = double(xMax - xMin) / double(nBins);


  for (Int_t ibin = 0; ibin < nBins; ibin++) {

    const double x = double(xMin) + double(ibin + 0.5) * binWidth;

    const double y = double(arr[ibin]);

    const double ey = std::sqrt(y);

    fitdata.Add(x, y, ey);

  }


  const int special = func.GetNumber();

  const int npar = func.GetNpar();

  bool linear = func.IsLinear();

  if (special == 299 + npar) {

    linear = kTRUE; // for polynomial functions

  }

  // do not use linear fitter in these case

  if (fitOption.Bound || fitOption.Like || fitOption.Errors || fitOption.Gradient || fitOption.More || fitOption.User || fitOption.Integral || fitOption.Minuit) {

    linear = kFALSE;

  }


  if (special != 0 && !fitOption.Bound && !linear) {

    if (special == 100) {

      ROOT::Fit::InitGaus(fitdata, &func); // gaussian

    } else if (special == 400) {

      ROOT::Fit::InitGaus(fitdata, &func); // landau (use the same)

    } else if (special == 200) {

      ROOT::Fit::InitExpo(fitdata, &func); // exponential

    }

  }


  if ((linear || fitOption.Gradient)) {

    fitter.SetFunction(ROOT::Math::WrappedMultiTF1(func));

  } else {

    fitter.SetFunction(static_cast<const ROOT::Math::IParamMultiFunction&>(ROOT::Math::WrappedMultiTF1(func)));

  }


  // standard least square fit

  const bool fitok = fitter.Fit(fitdata, fitOption.ExecPolicy);

  if (!fitok) {

    LOGP(warning, "bad fit");

  }


  return TFitResultPtr(tfr);

}


template <typename T>


bool medmadGaus(size_t nBins, const T* arr, const T xMin, const T xMax, std::array<double, 3>& param)

{

  int bStart = 0, bEnd = -1;

  double sum = 0, binW = double(xMax - xMin) / nBins, medVal = xMin;

  for (int i = 0; i < (int)nBins; i++) {

    auto v = arr[i];

    if (v) {

      if (!sum) {

        bStart = i;

      }

      sum += v;

      bEnd = i;

    }

  }

  if (bEnd < bStart) {

    return false;

  }

  bEnd++;

  double cum = 0, thresh = 0.5 * sum, frac0 = 0;

  int bid = bStart, prevbid = bid;

  while (bid < bEnd) {

    if (arr[bid] > 0) {

      cum += arr[bid];

      if (cum > thresh) {

        frac0 = 1. + (thresh - cum) / float(arr[bid]);

        medVal = xMin + binW * (bid + frac0);

        int bdiff = bid - prevbid - 1;

        if (bdiff > 0) {

          medVal -= bdiff * binW * 0.5; // account for the gap

          bid -= bdiff / 2;

        }

        break;

      }

      prevbid = bid;

    }

    bid++;

  }

  cum = 0.;

  double edgeL = frac0 + bid, edgeR = edgeL, dist = 0., wL = 0, wR = 0;

  while (1) {

    float amp = 0.;

    int bL = edgeL, bR = edgeR; // left and right bins

    if (edgeL > bStart) {

      wL = edgeL - bL;

      amp += arr[bL];

    } else {

      wL = 1.;

    }

    if (edgeR < bEnd) {

      wR = 1. + bR - edgeR;

      amp += arr[bR];

    } else {

      wR = 1.;

    }

    auto wdt = std::min(wL, wR);

    if (wdt < 1e-5) {

      wdt = std::max(wL, wR);

    }

    if (amp > 0) {

      amp *= wdt;

      cum += amp;

      if (cum >= thresh) {

        dist += wdt * (cum - thresh) / amp * 0.5;

        break;

      }

    }

    dist += wdt;

    edgeL -= wdt;

    edgeR += wdt;

  }

  constexpr double SQRT2PI = 2.5066283;

  param[1] = medVal;

  param[2] = dist * binW * 1.4826; // MAD -> sigma

  param[0] = sum * binW / (param[2] * SQRT2PI);

  return true;

}


// template <typename T>

// Double_t  fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector<T>& param);

template <typename T>


Double_t fitGaus(const size_t nBins, const T* arr, const T xMin, const T xMax, std::vector<T>& param)

{

  static TLinearFitter fitter(3, "pol2");

  static TMatrixD mat(3, 3);

  static Double_t kTol = mat.GetTol();

  fitter.StoreData(kFALSE);

  fitter.ClearPoints();

  TVectorD par(3);

  TVectorD sigma(3);

  TMatrixD A(3, 3);

  TMatrixD b(3, 1);

  T rms = TMath::RMS(nBins, arr);

  T max = TMath::MaxElement(nBins, arr);

  T binWidth = (xMax - xMin) / T(nBins);


  Float_t meanCOG = 0;

  Float_t rms2COG = 0;

  Float_t sumCOG = 0;


  Float_t entries = 0;

  Int_t nfilled = 0;


  param.resize(4);

  param[0] = 0.;

  param[1] = 0.;

  param[2] = 0.;

  param[3] = 0.;


  for (size_t i = 0; i < nBins; i++) {

    entries += arr[i];

    if (arr[i] > 0) {

      nfilled++;

    }

  }


  // TODO: Check why this is needed

  if (max < 4) {

    return -4;

  }

  if (entries < 12) {

    return -4;

  }


  if (rms < kTol) {

    return -4;

  }


  param[3] = entries;


  Int_t npoints = 0;

  for (size_t ibin = 0; ibin < nBins; ibin++) {

    Float_t entriesI = arr[ibin];

    if (entriesI > 1) {

      Double_t xcenter = xMin + (ibin + 0.5) * binWidth;

      Double_t error = 1. / TMath::Sqrt(entriesI);

      Double_t val = TMath::Log(Float_t(entriesI));

      fitter.AddPoint(&xcenter, val, error);

      if (npoints < 3) {

        A(npoints, 0) = 1;

        A(npoints, 1) = xcenter;

        A(npoints, 2) = xcenter * xcenter;

        b(npoints, 0) = val;

        meanCOG += xcenter * entriesI;

        rms2COG += xcenter * entriesI * xcenter;

        sumCOG += entriesI;

      }

      npoints++;

    }

  }


  Double_t chi2 = 0;

  if (npoints >= 3) {

    if (npoints == 3) {

      // analytic calculation of the parameters for three points

      A.Invert();

      TMatrixD res(1, 3);

      res.Mult(A, b);

      par[0] = res(0, 0);

      par[1] = res(0, 1);

      par[2] = res(0, 2);

      chi2 = -3.;

    } else {

      // use fitter for more than three points

      fitter.Eval();

      fitter.GetParameters(par);

      fitter.GetCovarianceMatrix(mat);

      chi2 = fitter.GetChisquare() / Double_t(npoints);

    }

    if (TMath::Abs(par[1]) < kTol) {

      return -4;

    }

    if (TMath::Abs(par[2]) < kTol) {

      return -4;

    }

    param[1] = T(par[1] / (-2. * par[2]));

    param[2] = T(1. / TMath::Sqrt(TMath::Abs(-2. * par[2])));

    Double_t lnparam0 = par[0] + par[1] * param[1] + par[2] * param[1] * param[1];

    if (lnparam0 > 307) {

      return -4;

    }

    param[0] = TMath::Exp(lnparam0);


    return chi2;

  }


  if (npoints == 2) {

    // use center of gravity for 2 points

    meanCOG /= sumCOG;

    rms2COG /= sumCOG;

    param[0] = max;

    param[1] = meanCOG;

    param[2] = TMath::Sqrt(TMath::Abs(meanCOG * meanCOG - rms2COG));

    chi2 = -2.;

  }

  if (npoints == 1) {

    meanCOG /= sumCOG;

    param[0] = max;

    param[1] = meanCOG;

    param[2] = binWidth / TMath::Sqrt(12);

    chi2 = -1.;

  }

  return chi2;

}


// more optimal implementation of guassian fit via log-normal fit, appropriate for MT calls

// Only bins with values above minVal will be accounted.

// If applyMAD is true, the fit is done whithin the nSigmaMAD range of the preliminary estimate by MAD

template <typename T>


double fitGaus(size_t nBins, const T* arr, const T xMin, const T xMax, std::array<double, 3>& param,

               ROOT::Math::SMatrix<double, 3, 3, ROOT::Math::MatRepSym<double, 3>>* covMat = nullptr,

               int minVal = 2, bool applyMAD = true)

{

  double binW = double(xMax - xMin) / nBins, s0 = 0, s1 = 0, s2 = 0, s3 = 0, s4 = 0, sy0 = 0, sy1 = 0, sy2 = 0, syy = 0;

  int np = 0;

  int bStart = 0, bEnd = (int)nBins;

  const float nSigmaMAD = 2.;

  if (applyMAD) {

    std::array<double, 3> madPar;

    if (!medmadGaus(nBins, arr, xMin, xMax, madPar)) {

      return -10;

    }

    bStart = std::max(bStart, int((madPar[1] - nSigmaMAD * madPar[2] - xMin) / binW));

    bEnd = std::min(bEnd, 1 + int((madPar[1] + nSigmaMAD * madPar[2] - xMin) / binW));

  }

  float x = xMin + (bStart - 0.5) * binW;

  for (int i = bStart; i < bEnd; i++) {

    x += binW;

    auto v = arr[i];

    if (v < 0) {

      throw std::runtime_error("Log-normal fit is possible only with non-negative data");

    }

    if (v < minVal) {

      continue;

    }

    double y = std::log(v), err2i = v, err2iX = err2i, err2iY = err2i * y;

    s0 += err2iX;

    s1 += (err2iX *= x);

    s2 += (err2iX *= x);

    s3 += (err2iX *= x);

    s4 += (err2iX *= x);

    sy0 += err2iY;

    syy += err2iY * y;

    sy1 += (err2iY *= x);

    sy2 += (err2iY *= x);

    np++;

  }

  if (np < 1) {

    return -10;

  }

  auto recover = [&param, binW, np, s0, s1, s2, sy0]() {

    param[0] = std::exp(sy0 / s0); // recover center of gravity

    param[1] = s1 / s0;            // mean x;

    param[2] = np == 1 ? binW / std::sqrt(12) : std::sqrt(std::abs(param[1] * param[1] - s2 / s0));

  };

  if (np < 3) {

    recover();

    return -np;

  }

  ROOT::Math::SMatrix<double, 3, 3, ROOT::Math::MatRepSym<double, 3>> m33{};

  ROOT::Math::SVector<double, 3> v3{sy0, sy1, sy2};

  m33(0, 0) = s0;

  m33(1, 0) = s1;

  m33(1, 1) = m33(2, 0) = s2;

  m33(2, 1) = s3;

  m33(2, 2) = s4;

  int res = 0;

  auto m33i = m33.Inverse(res);

  if (res) {

    recover();

    LOG(error) << np << " points collected, matrix inversion failed " << m33;

    return -10;

  }

  auto v = m33i * v3;

  if (v(2) >= 0.) { // fit failed, use mean amd RMS

    recover();

    return -3;

  }


  double chi2 = v(0) * v(0) * s0 + v(1) * v(1) * s2 + v(2) * v(2) * s4 + syy +

                2. * (v(0) * v(1) * s1 + v(0) * v(2) * s2 + v(1) * v(2) * s3 - v(0) * sy0 - v(1) * sy1 - v(2) * sy2);

  param[1] = -0.5 * v(1) / v(2);

  param[2] = 1. / std::sqrt(-2. * v(2));

  param[0] = std::exp(v(0) - param[1] * param[1] * v(2));

  if (std::isnan(param[0]) || std::isnan(param[1]) || std::isnan(param[2])) {

    recover();

    return -3;

  }

  if (covMat) {

    // build jacobian of transformation from log-normal to normal params

    ROOT::Math::SMatrix<double, 3, 3, ROOT::Math::MatRepStd<double, 3, 3>> j33{};

    j33(0, 0) = param[0];

    j33(0, 1) = param[0] * param[1];

    j33(0, 2) = j33(0, 1) * param[1];

    j33(1, 1) = -0.5 / v(2);

    j33(1, 2) = -param[1] / v(2);

    j33(2, 2) = param[2] * j33(1, 1);

    *covMat = ROOT::Math::Similarity(j33, m33i);

  }

  return np > 3 ? chi2 / (np - 3.) : 0.;

}


struct StatisticsData {

  double mCOG{0};

  double mStdDev{0};

  double mSum{0};

};


template <typename T>


StatisticsData getStatisticsData(const T* arr, const size_t nBins, const double xMin, const double xMax)

{

  double mean = 0;

  double rms2 = 0;

  double sum = 0;

  size_t npoints = 0;


  double binWidth = (xMax - xMin) / (double)nBins;


  StatisticsData data;

  // in case something went wrong the COG is the histogram lower limit

  data.mCOG = xMin;


  for (size_t ibin = 0; ibin < nBins; ++ibin) {

    double entriesI = (double)arr[ibin];

    double xcenter = xMin + (ibin + 0.5) * binWidth; // +0.5 to shift to bin centre

    if (entriesI > 0) {

      mean += xcenter * entriesI;

      rms2 += xcenter * entriesI * xcenter;

      sum += entriesI;

      ++npoints;

    }

  }

  if (sum == 0) {

    return data;

  }

  mean /= sum;


  data.mCOG = mean;

  // exception in case of only one bin is filled

  // set the standard deviation to bin width over sqrt(12)

  rms2 /= sum;

  if (npoints == 1) {

    data.mStdDev = binWidth / std::sqrt(12.);

  } else {

    data.mStdDev = std::sqrt(std::abs(rms2 - mean * mean));

  }


  data.mSum = sum;


  return data;

}


template <typename T, typename R = double>


R median(std::vector<T> v)

{

  if (v.empty()) {

    return R{};

  }

  auto n = v.size() / 2;

  nth_element(v.begin(), v.begin() + n, v.end());

  auto med = R{v[n]};

  if (!(v.size() & 1)) { // If the set size is even

    auto max_it = max_element(v.begin(), v.begin() + n);

    med = R{(*max_it + med) / 2.0};

  }

  return med;

}


template <typename T>


void SortData(std::vector<T> const& values, std::vector<size_t>& index)

{

  if (values.size() != index.size()) {

    LOG(error) << "Vector with values must have same size as vector for indices";

    return;

  }

  std::iota(index.begin(), index.end(), static_cast<size_t>(0));

  std::sort(index.begin(), index.end(), [&](size_t a, size_t b) { return values[a] < values[b]; });

}


template <typename T>


bool LTMUnbinned(const std::vector<T>& data, std::vector<size_t>& index, std::array<float, 7>& params, float fracKeep)

{

  int nPoints = data.size();

  std::vector<float> w(2 * nPoints);

  int nKeep = nPoints * fracKeep;

  if (nKeep > nPoints) {

    nKeep = nPoints;

  }

  if (nKeep < 2) {

    return false;

  }

  // sort in increasing order

  SortData(data, index);

  // build cumulants

  double sum1 = 0.0;

  double sum2 = 0.0;

  for (int i = 0; i < nPoints; i++) {

    double x = data[index[i]];

    sum1 += x;

    sum2 += x * x;

    w[i] = sum1;

    w[i + nPoints] = sum2;

  }

  double maxRMS = sum2 + 1e6;

  params[0] = nKeep;

  int limI = nPoints - nKeep + 1; // lowest possible bin to accept

  for (int i = 0; i < limI; i++) {

    const int limJ = i + nKeep - 1; // highest accepted bin

    sum1 = static_cast<double>(w[limJ]) - static_cast<double>(i ? w[i - 1] : 0.);

    sum2 = static_cast<double>(w[nPoints + limJ]) - static_cast<double>(i ? w[nPoints + i - 1] : 0.);

    const double mean = sum1 / nKeep;

    const double rms2 = sum2 / nKeep - mean * mean;

    if (rms2 > maxRMS) {

      continue;

    }

    maxRMS = rms2;

    params[1] = mean;

    params[2] = rms2;

    params[5] = i;

    params[6] = limJ;

  }

  //

  if (params[2] < 0) {

    LOG(error) << "Rounding error: RMS = " << params[2] << " < 0";

    return false;

  }

  params[2] = std::sqrt(params[2]);

  params[3] = params[2] / std::sqrt(params[0]); // error on mean

  params[4] = params[3] / std::sqrt(2.0);       // error on RMS

  return true;

}


template <typename T>


void Reorder(std::vector<T>& data, const std::vector<size_t>& index)

{

  // rearange data in order given by index

  if (data.size() != index.size()) {

    LOG(error) << "Reordering not possible if number of elements in index container different from the data container";

    return;

  }

  std::vector<T> tmp(data);

  for (size_t i = 0; i < data.size(); ++i) {

    data[i] = tmp[index[i]];

  }

}


template <typename T>


bool LTMUnbinnedSig(const std::vector<T>& data, std::vector<size_t>& index, std::array<float, 7>& params, float fracKeepMin, float sigTgt, bool sorted = false)

{

  int nPoints = data.size();

  std::vector<double> wx(nPoints);

  std::vector<double> wx2(nPoints);


  if (!sorted) {

    // sort in increasing order

    SortData(data, index);

  } else {

    // array is already sorted

    std::iota(index.begin(), index.end(), 0);

  }

  // build cumulants

  double sum1 = 0.0;

  double sum2 = 0.0;

  for (int i = 0; i < nPoints; i++) {

    double x = data[index[i]];

    sum1 += x;

    sum2 += x * x;

    wx[i] = sum1;

    wx2[i] = sum2;

  }

  int keepMax = nPoints;

  int keepMin = fracKeepMin * nPoints;

  if (keepMin > keepMax) {

    keepMin = keepMax;

  }

  float sigTgt2 = sigTgt * sigTgt;

  //

  while (true) {

    double maxRMS = wx2.back() + 1e6;

    int keepN = (keepMax + keepMin) / 2;

    if (keepN < 2) {

      return false;

    }

    params[0] = keepN;

    int limI = nPoints - keepN + 1;

    for (int i = 0; i < limI; ++i) {

      const int limJ = i + keepN - 1;

      sum1 = wx[limJ] - (i ? wx[i - 1] : 0.);

      sum2 = wx2[limJ] - (i ? wx2[i - 1] : 0.);

      const double mean = sum1 / keepN;

      const double rms2 = sum2 / keepN - mean * mean;

      if (rms2 > maxRMS) {

        continue;

      }

      maxRMS = rms2;

      params[1] = mean;

      params[2] = rms2;

      params[5] = i;

      params[6] = limJ;

    }

    if (maxRMS < sigTgt2) {

      keepMin = keepN;

    } else {

      keepMax = keepN;

    }

    if (keepMin >= keepMax - 1) {

      break;

    }

  }

  params[2] = std::sqrt(params[2]);

  params[3] = params[2] / std::sqrt(params[0]); // error on mean

  params[4] = params[3] / std::sqrt(2.0);       // error on RMS

  return true;

}


//___________________________________________________________________

template <typename T>


T selKthMin(int k, int np, T* arr)

{

  // Returns the k th smallest value in the array. The input array will be rearranged

  // to have this value in location arr[k] , with all smaller elements moved before it

  // (in arbitrary order) and all larger elements after (also in arbitrary order).

  // From Numerical Recipes in C++


  int i, j, mid, ir = np - 1, l = 0;

  T a;

  for (;;) {

    if (ir <= l + 1) {

      if (ir == l + 1 && arr[ir] < arr[l]) {

        std::swap(arr[l], arr[ir]);

      }

      return arr[k];

    } else {

      int mid = (l + ir) >> 1, i = l + 1;

      std::swap(arr[mid], arr[i]);

      if (arr[i] > arr[ir]) {

        std::swap(arr[i], arr[ir]);

      }

      if (arr[l] > arr[ir]) {

        std::swap(arr[l], arr[ir]);

      }

      if (arr[i] > arr[l]) {

        std::swap(arr[i], arr[l]);

      }

      j = ir;

      a = arr[l];

      for (;;) {

        do {

          i++;

        } while (arr[i] < a);

        do {

          j--;

        } while (arr[j] > a);

        if (j < i) {

          break;

        }

        std::swap(arr[i], arr[j]);

      }

      arr[l] = arr[j];

      arr[j] = a;

      if (j >= k) {

        ir = j - 1;

      }

      if (j <= k) {

        l = i;

      }

    }

  }

}


//___________________________________________________________________

template <typename T>


T MAD2Sigma(int np, T* y)

{

  // Sigma calculated from median absolute deviations, https://en.wikipedia.org/wiki/Median_absolute_deviation

  // the input array is modified

  if (np < 2) {

    return 0;

  }

  int nph = np >> 1;

  float median = (np & 0x1) ? selKthMin(nph, np, y) : 0.5 * (selKthMin(nph - 1, np, y) + selKthMin(nph, np, y));

  // build abs differences to median

  for (int i = np; i--;) {

    y[i] = std::abs(y[i] - median);

  }

  // now get median of abs deviations

  median = (np & 0x1) ? selKthMin(nph, np, y) : 0.5 * (selKthMin(nph - 1, np, y) + selKthMin(nph, np, y));

  return median * 1.4826; // convert to Gaussian sigma

}


template <typename DataTimeType, typename DataTime>


std::optional<std::pair<size_t, size_t>> findClosestIndices(const std::vector<DataTimeType>& timestamps, DataTime timestamp)

{

  if (timestamps.empty()) {

    LOGP(warning, "Timestamp vector is empty!");

    return std::nullopt;

  }


  if (timestamp <= timestamps.front()) {

    return std::pair{0, 0};

  } else if (timestamp >= timestamps.back()) {

    return std::pair{timestamps.size() - 1, timestamps.size() - 1};

  }


  const auto it = std::lower_bound(timestamps.begin(), timestamps.end(), timestamp);

  const size_t idx = std::distance(timestamps.begin(), it);

  const auto prevTimestamp = timestamps[idx - 1];

  const auto nextTimestamp = timestamps[idx];

  return std::pair{(idx - 1), idx};

}


struct RollingStats {

  RollingStats() = default;


  RollingStats(const int nValues)

  {

    median.resize(nValues);

    std.resize(nValues);

    nPoints.resize(nValues);

    closestDistanceL.resize(nValues);

    closestDistanceR.resize(nValues);

  }


  std::vector<float> median;

  std::vector<float> std;

  std::vector<int> nPoints;

  std::vector<float> closestDistanceL;

  std::vector<float> closestDistanceR;


  ClassDefNV(RollingStats, 1);

};


template <typename DataTimeType, typename DataType, typename DataTime>


RollingStats getRollingStatistics(const DataTimeType& timeData, const DataType& data, const DataTime& times, const double deltaMax, const int mNthreads, const size_t minPoints = 4, const size_t nClosestPoints = 4)

{

  // output statistics

  const size_t vecSize = times.size();

  RollingStats stats(vecSize);


  if (!std::is_sorted(timeData.begin(), timeData.end())) {

    LOGP(error, "Input data is NOT sorted!");

    return stats;

  }


  if (timeData.empty()) {

    LOGP(error, "Input data is empty!");

    return stats;

  }


  const size_t dataSize = data.size();

  const size_t timeDataSize = timeData.size();

  if (timeDataSize != dataSize) {

    LOGP(error, "Input data has different sizes {}!={}", timeDataSize, dataSize);

    return stats;

  }


  auto myThread = [&](int iThread) {

    // data in given time window for median calculation

    DataType window;

    for (size_t i = iThread; i < vecSize; i += mNthreads) {

      const double timeI = times[i];


      // lower index

      const double timeStampLower = timeI - deltaMax;

      const auto lower = std::lower_bound(timeData.begin(), timeData.end(), timeStampLower);

      size_t idxStart = std::distance(timeData.begin(), lower);


      // upper index

      const double timeStampUpper = timeI + deltaMax;

      const auto upper = std::lower_bound(timeData.begin(), timeData.end(), timeStampUpper);

      size_t idxEnd = std::distance(timeData.begin(), upper);


      // closest data point

      if (auto idxClosest = findClosestIndices(timeData, timeI)) {

        auto [idxLeft, idxRight] = *idxClosest;

        const auto closestL = std::abs(timeData[idxLeft] - timeI);

        const auto closestR = std::abs(timeData[idxRight] - timeI);

        stats.closestDistanceL[i] = closestL;

        stats.closestDistanceR[i] = closestR;


        // if no points are in the range use the n closest points - n from the left and n from the right

        const size_t reqSize = idxEnd - idxStart;

        if (reqSize < minPoints) {

          // calculate weighted average

          idxStart = (idxRight > nClosestPoints) ? (idxRight - nClosestPoints) : 0;

          idxEnd = std::min(data.size(), idxRight + nClosestPoints);

          constexpr float epsilon = 1e-6f;

          double weightedSum = 0.0;

          double weightTotal = 0.0;

          for (size_t j = idxStart; j < idxEnd; ++j) {

            const double dist = std::abs(timeI - timeData[j]);

            const double weight = 1.0 / (dist + epsilon);

            weightedSum += weight * data[j];

            weightTotal += weight;

          }

          stats.median[i] = (weightTotal > 0.) ? (weightedSum / weightTotal) : 0.0f;

        } else {

          // calculate statistics

          stats.nPoints[i] = reqSize;


          if (idxStart >= data.size()) {

            stats.median[i] = data.back();

            continue;

          }


          if (reqSize <= 1) {

            stats.median[i] = data[idxStart];

            continue;

          }


          // calculate median

          window.clear();

          if (reqSize > window.capacity()) {

            window.reserve(static_cast<size_t>(reqSize * 1.5));

          }

          window.insert(window.end(), data.begin() + idxStart, data.begin() + idxEnd);

          const size_t middle = window.size() / 2;

          std::nth_element(window.begin(), window.begin() + middle, window.end());

          stats.median[i] = (window.size() % 2 == 1) ? window[middle] : ((window[middle - 1] + window[middle]) / 2.0);


          // calculate the stdev

          const float mean = std::accumulate(window.begin(), window.end(), 0.0f) / window.size();

          std::transform(window.begin(), window.end(), window.begin(), [mean](const float val) { return val - mean; });

          const float sqsum = std::inner_product(window.begin(), window.end(), window.begin(), 0.0f);

          const float stdev = std::sqrt(sqsum / window.size());

          stats.std[i] = stdev;

        }

      }

    }

  };


  std::vector<std::thread> threads(mNthreads);

  for (int i = 0; i < mNthreads; i++) {

    threads[i] = std::thread(myThread, i);

  }


  for (auto& th : threads) {

    th.join();

  }

  return stats;

}


} // namespace math_utils

} // namespace o2

#endif

times
std::vector< unsigned long > times
Definition CCDBFetcherTestWorkflow.cxx:27

i
int32_t i
Definition GPUCommonAlgorithm.h:436

Logger.h

j
uint32_t j
Definition RawData.h:0

res
uint32_t res
Definition RawData.h:0

A
Definition A.h:16

Float_t

ROOT::Math::MatRepSym
Definition GPUROOTCartesianFwd.h:30

ROOT::Math::SMatrix
Definition GPUROOTCartesianFwd.h:28

ROOT::Math::SVector
Definition GPUROOTSMatrixFwd.h:28

R

TF1

int

sum
float sum(float s, o2::dcs::DataPointValue v)
Definition dcs-ccdb.cxx:39

n
GLdouble n
Definition glcorearb.h:1982

x
GLint GLenum GLint x
Definition glcorearb.h:403

func
GLenum func
Definition glcorearb.h:778

v
const GLdouble * v
Definition glcorearb.h:832

dataSize
GLenum GLsizei dataSize
Definition glcorearb.h:3994

s1
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition glcorearb.h:5034

index
GLuint index
Definition glcorearb.h:781

weight
GLuint GLuint GLfloat weight
Definition glcorearb.h:5477

b
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233

range
GLenum GLint * range
Definition glcorearb.h:1899

params
GLenum const GLfloat * params
Definition glcorearb.h:272

values
GLenum GLsizei GLsizei GLint * values
Definition glcorearb.h:1576

data
GLboolean * data
Definition glcorearb.h:298

val
GLuint GLfloat * val
Definition glcorearb.h:1582

v3
GLfloat GLfloat GLfloat GLfloat v3
Definition glcorearb.h:814

param
GLenum GLfloat param
Definition glcorearb.h:271

a
GLboolean GLboolean GLboolean GLboolean a
Definition glcorearb.h:1233

w
GLubyte GLubyte GLubyte GLubyte w
Definition glcorearb.h:852

s0
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s0
Definition glcorearb.h:5034

o2::math_utils::medmadGaus
bool medmadGaus(size_t nBins, const T *arr, const T xMin, const T xMax, std::array< double, 3 > &param)
Definition fit.h:133

o2::math_utils::Reorder
void Reorder(std::vector< T > &data, const std::vector< size_t > &index)
Definition fit.h:626

o2::math_utils::fit
TFitResultPtr fit(const size_t nBins, const T *arr, const T xMin, const T xMax, TF1 &func, std::string_view option="")
Definition fit.h:60

o2::math_utils::LTMUnbinnedSig
bool LTMUnbinnedSig(const std::vector< T > &data, std::vector< size_t > &index, std::array< float, 7 > &params, float fracKeepMin, float sigTgt, bool sorted=false)
Definition fit.h:649

o2::math_utils::SortData
void SortData(std::vector< T > const &values, std::vector< size_t > &index)
Definition fit.h:540

o2::math_utils::x
float x
Definition Utils.h:214

o2::math_utils::getRollingStatistics
RollingStats getRollingStatistics(const DataTimeType &timeData, const DataType &data, const DataTime &times, const double deltaMax, const int mNthreads, const size_t minPoints=4, const size_t nClosestPoints=4)
calculates the rolling statistics of the input data
Definition fit.h:846

o2::math_utils::getStatisticsData
StatisticsData getStatisticsData(const T *arr, const size_t nBins, const double xMin, const double xMax)
Definition fit.h:471

o2::math_utils::MAD2Sigma
T MAD2Sigma(int np, T *y)
Definition fit.h:774

o2::math_utils::y
const T y
Definition Utils.h:225

o2::math_utils::LTMUnbinned
bool LTMUnbinned(const std::vector< T > &data, std::vector< size_t > &index, std::array< float, 7 > &params, float fracKeep)
Definition fit.h:570

o2::math_utils::selKthMin
T selKthMin(int k, int np, T *arr)
Definition fit.h:719

o2::math_utils::findClosestIndices
std::optional< std::pair< size_t, size_t > > findClosestIndices(const std::vector< DataTimeType > &timestamps, DataTime timestamp)
Definition fit.h:796

o2::math_utils::median
R median(std::vector< T > v)
Definition fit.h:520

o2::math_utils::fitGaus
Double_t fitGaus(const size_t nBins, const T *arr, const T xMin, const T xMax, std::vector< T > &param)
Definition fit.h:232

o2
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Definition BitstreamReader.h:24

std
Defining DataPointCompositeObject explicitly as copiable.
Definition CcdbObjectInfo.h:121

o2::math_utils::RollingStats
Definition fit.h:816

o2::math_utils::RollingStats::closestDistanceL
std::vector< float > closestDistanceL
distance of closest point to the left
Definition fit.h:830

o2::math_utils::RollingStats::std
std::vector< float > std
std of rolling data
Definition fit.h:828

o2::math_utils::RollingStats::nPoints
std::vector< int > nPoints
number of points used for the calculation
Definition fit.h:829

o2::math_utils::RollingStats::closestDistanceR
std::vector< float > closestDistanceR
distance of closest point to the right
Definition fit.h:831

o2::math_utils::RollingStats::RollingStats
RollingStats(const int nValues)
Definition fit.h:818

o2::math_utils::RollingStats::median
std::vector< float > median
median of rolling data
Definition fit.h:827

o2::math_utils::RollingStats::RollingStats
RollingStats()=default

o2::math_utils::RollingStats::ClassDefNV
ClassDefNV(RollingStats, 1)

o2::math_utils::StatisticsData
Definition fit.h:457

o2::math_utils::StatisticsData::mCOG
double mCOG
calculated centre of gravity
Definition fit.h:458

o2::math_utils::StatisticsData::mSum
double mSum
sum of values
Definition fit.h:460

o2::math_utils::StatisticsData::mStdDev
double mStdDev
standard deviation
Definition fit.h:459

max
constexpr size_t max
Definition test_Algorithm.cxx:49

LOG
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"

ir
o2::InteractionRecord ir(0, 0)