d3/d4e/BandMatrixSolver_8h_source.html

// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.

// All rights not expressly granted are reserved.

//

// This software is distributed under the terms of the GNU General Public

// License v3 (GPL Version 3), copied verbatim in the file "COPYING".

//

// In applying this license CERN does not waive the privileges and immunities

// granted to it by virtue of its status as an Intergovernmental Organization

// or submit itself to any jurisdiction.


#ifndef ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_BANDMATRIXSOLVER_H

#define ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_BANDMATRIXSOLVER_H


#include "GPUCommonDef.h"

#include "GPUCommonRtypes.h"

#include <vector>

#include <cassert>

#include <cstdlib>

#include <algorithm>

#include <limits>


namespace o2

{

namespace gpu

{


template <int32_t BandWidthT>


class BandMatrixSolver

{

 public:


  BandMatrixSolver(int32_t N, int32_t Bdim) : mN(N), mBdim(Bdim)

  {

    assert(N > 0 && Bdim > 0);

    mA.resize(mN * BandWidthT, 0.);

    mB.resize(mN * mBdim, 0.);

  }


  void initWithNaN()

  {

    // Assign NaN's to ensure that uninitialized elements (for the matrix type 1) are not used in calculations.

    mA.assign(mA.size(), std::numeric_limits<double>::signaling_NaN());

    mB.assign(mB.size(), std::numeric_limits<double>::signaling_NaN());

  }


  double& A(int32_t i, int32_t j)

  {

    auto ij = std::minmax(i, j);

    assert(ij.first >= 0 && ij.second < mN);

    int32_t k = ij.second - ij.first;

    assert(k < BandWidthT);

    return mA[ij.first * BandWidthT + k];

  }


  double& B(int32_t i, int32_t j)

  {

    assert(i >= 0 && i < mN && j >= 0 && j < mBdim);

    return mB[i * mBdim + j];

  }


  void solve();


  void solveType1();


  static int32_t test(bool prn = 0)

  {

    return BandMatrixSolver<0>::test(prn);

  }


 private:

  template <int32_t nRows>

  void triangulateBlock(double AA[], double bb[]);


  template <int32_t nCols>

  void dioganalizeBlock(double A[], double b[]);


 private:

  int32_t mN = 0;

  int32_t mBdim = 0;

  std::vector<double> mA;

  std::vector<double> mB;


  ClassDefNV(BandMatrixSolver, 0);

};


template <>

int32_t BandMatrixSolver<0>::test(bool prn);


template <int32_t BandWidthT>

template <int32_t nRows>

inline void BandMatrixSolver<BandWidthT>::triangulateBlock(double AA[], double bb[])

{

  {

    int32_t m = BandWidthT;

    double* A = AA;

    for (int32_t rows = 0; rows < nRows; rows++) {

      double c = 1. / A[0];

      A[0] = c; // store 1/a[0][0]

      double* rowi = A + BandWidthT - 1;

      for (int32_t i = 1; i < m; i++) { // row 0+i

        double ai = c * A[i];           // A[0][i]

        for (int32_t j = i; j < m; j++) {

          rowi[j] -= ai * A[j]; // A[i][j] -= A[0][j]/A[0][0]*A[i][0]

        }

        A[i] = ai; // A[0][i] /= A[0][0]

        rowi += BandWidthT - 1;

      }

      m--;

      A += BandWidthT;

    }

  }


  for (int32_t k = 0; k < mBdim; k++) {

    int32_t m = BandWidthT;

    double* A = AA;

    double* b = bb;

    for (int32_t rows = 0; rows < nRows; rows++) {

      double bk = b[k];

      for (int32_t i = 1; i < m; i++) {

        b[mBdim * i + k] -= A[i] * bk;

      }

      b[k] *= A[0];

      m--;

      A += BandWidthT;

      b += mBdim;

    }

  }

}


template <int32_t BandWidthT>

template <int32_t nCols>

inline void BandMatrixSolver<BandWidthT>::dioganalizeBlock(double AA[], double bb[])

{

  for (int32_t k = 0; k < mBdim; k++) {

    int32_t rows = BandWidthT;

    double* A = AA;

    double* b = bb;

    for (int32_t col = 0; col < nCols; col++) {

      double bk = b[k];

      for (int32_t i = 1; i < rows; i++) {

        b[-i * mBdim + k] -= A[BandWidthT * (-i) + i] * bk;

      }

      A -= BandWidthT;

      b -= mBdim;

      rows--;

    }

  }

}


template <int32_t BandWidthT>


inline void BandMatrixSolver<BandWidthT>::solve()

{


  const int32_t stepA = BandWidthT;

  const int32_t stepB = mBdim;

  // Upper Triangulization

  {

    int32_t k = 0;

    double* Ak = &mA[0];

    double* bk = &mB[0];

    for (; k < mN - BandWidthT; k += 1, Ak += stepA, bk += stepB) { // for each row k

      triangulateBlock<1>(Ak, bk);

    }

    // last m rows

    triangulateBlock<BandWidthT>(Ak, bk);

  }


  // Diagonalization

  {

    int32_t k = mN - 1;

    double* Ak = &mA[BandWidthT * k];

    double* bk = &mB[mBdim * k];

    for (; k > BandWidthT - 1; k -= 1, Ak -= stepA, bk -= stepB) { // for each row k

      dioganalizeBlock<1>(Ak, bk);

    }

    // first m rows

    dioganalizeBlock<BandWidthT>(Ak, bk);

  }

}


template <int32_t BandWidthT>


inline void BandMatrixSolver<BandWidthT>::solveType1()

{


  const int32_t stepA = 2 * BandWidthT;

  const int32_t stepB = 2 * mBdim;

  // Upper Triangulization

  {

    int32_t k = 0;

    double* Ak = &mA[0];

    double* bk = &mB[0];

    for (; k < mN - BandWidthT; k += 2, Ak += stepA, bk += stepB) { // for each row k

      triangulateBlock<2>(Ak, bk);

    }

    // last m rows

    triangulateBlock<BandWidthT>(Ak, bk);

  }


  // Diagonalization

  {

    int32_t k = mN - 1;

    double* Ak = &mA[BandWidthT * k];

    double* bk = &mB[mBdim * k];

    for (; k > BandWidthT - 1; k -= 2, Ak -= stepA, bk -= stepB) { // for each row k

      dioganalizeBlock<2>(Ak, bk);

    }

    // first m rows

    dioganalizeBlock<BandWidthT>(Ak, bk);

  }

}


} // namespace gpu

} // namespace o2


#endif

i
int32_t i
Definition GPUCommonAlgorithm.h:436

GPUCommonDef.h

GPUCommonRtypes.h

bb
const int16_t bb
Definition GPUTPCGMMerger.cxx:1479

j
uint32_t j
Definition RawData.h:0

col
uint32_t col
Definition RawData.h:4

c
uint32_t c
Definition RawData.h:2

A
Definition A.h:16

o2::gpu::BandMatrixSolver
Definition BandMatrixSolver.h:56

o2::gpu::BandMatrixSolver::A
double & A(int32_t i, int32_t j)
access to A elements
Definition BandMatrixSolver.h:75

o2::gpu::BandMatrixSolver::B
double & B(int32_t i, int32_t j)
access to B elements
Definition BandMatrixSolver.h:85

o2::gpu::BandMatrixSolver::test
static int32_t test(bool prn=0)
Test the class functionality. Returns 1 when ok, 0 when not ok.
Definition BandMatrixSolver.h:98

o2::gpu::BandMatrixSolver::initWithNaN
void initWithNaN()
debug tool: init arrays with NaN's
Definition BandMatrixSolver.h:67

o2::gpu::BandMatrixSolver::BandMatrixSolver
BandMatrixSolver(int32_t N, int32_t Bdim)
Consructor.
Definition BandMatrixSolver.h:59

o2::gpu::BandMatrixSolver::solve
void solve()
solve the equation
Definition BandMatrixSolver.h:184

o2::gpu::BandMatrixSolver::solveType1
void solveType1()
solve an equation of a special type
Definition BandMatrixSolver.h:216

m
const GLfloat * m
Definition glcorearb.h:4066

b
GLboolean GLboolean GLboolean b
Definition glcorearb.h:1233

o2::its3::constants::pixelarray::nCols
constexpr int nCols
Definition SpecsV2.h:38

o2::its3::constants::pixelarray::nRows
constexpr int nRows
Definition SpecsV2.h:39

o2
a couple of static helper functions to create timestamp values for CCDB queries or override obsolete ...
Definition BitstreamReader.h:24

rows
std::vector< ReadoutWindowData > rows
Definition test_ctf_io_tof.cxx:42