24#ifndef ALICEO2_SMATRIX_GPU_H
25#define ALICEO2_SMATRIX_GPU_H
44#define GPU_STATIC_CHECK(expr, msg) \
50 (void)(Check<(expr) != 0>(&e)); \
53template <
typename T,
unsigned int N>
67 GPUhd() const
T& operator[](
unsigned int i) const;
69 GPUd() const
T& operator()(
unsigned int i) const;
70 GPUd()
T& operator()(
unsigned int i);
73 GPUd()
T apply(
unsigned int i) const;
83 GPUdi() static
unsigned int Dim() {
return N; }
89template <
class T,
unsigned int D>
95template <
class T,
unsigned int D>
96GPUdi() const
T* SVectorGPU<
T,
D>::begin()
const
101template <
class T,
unsigned int D>
104 return mArray + Dim();
107template <
class T,
unsigned int D>
110 return mArray + Dim();
112template <
class T,
unsigned int N>
119template <
class T,
unsigned int N>
120GPUhdi()
T& SVectorGPU<
T, N>::operator[](
unsigned int i)
125template <
class T,
unsigned int N>
131template <
class T,
unsigned int N>
132GPUdi()
T& SVectorGPU<
T, N>::operator()(
unsigned int i)
137template <
class T,
unsigned int N>
140 for (
unsigned int i = 0;
i < N; ++
i) {
145template <
class T,
unsigned int N>
146GPUd() SVectorGPU<
T, N>::SVectorGPU(const SVectorGPU<
T, N>&
rhs)
148 for (
unsigned int i = 0;
i < N; ++
i) {
149 mArray[
i] =
rhs.mArray[
i];
153template <
class T,
unsigned int D>
154GPUdi()
T SVectorGPU<
T,
D>::apply(
unsigned int i)
const
159template <
class T,
unsigned int D>
160GPUdi() const
T* SVectorGPU<
T,
D>::Array()
const
165template <
class T,
unsigned int D>
171template <
unsigned int I>
173 template <
class A,
class B,
class T>
182 template <
class A,
class B,
class T>
185 return lhs.apply(0) *
rhs.apply(0);
189template <
class T,
unsigned int D>
195template <
class T,
unsigned int D>
198 for (
unsigned int i = 0;
i <
D; ++
i) {
199 mArray[
i] -=
rhs.apply(
i);
204template <
class T,
unsigned int D>
207 for (
unsigned int i = 0;
i <
D; ++
i) {
208 mArray[
i] +=
rhs.apply(
i);
213template <
unsigned int I>
216 template <
class MatrixA,
class MatrixB>
217 static GPUdi() typename MatrixA::value_type
f(const MatrixA&
lhs,
219 const
unsigned int offset)
221 return lhs.apply(
offset / MatrixB::kCols * MatrixA::kCols + I) *
222 rhs.apply(MatrixB::kCols * I +
offset % MatrixB::kCols) +
226 template <
class MatrixA,
class MatrixB>
227 static GPUdi() typename MatrixA::value_type
g(const MatrixA&
lhs,
240 template <
class MatrixA,
class MatrixB>
241 static GPUdi() typename MatrixA::value_type
f(const MatrixA&
lhs,
243 const
unsigned int offset)
245 return lhs.apply(
offset / MatrixB::kCols * MatrixA::kCols) *
250 template <
class MatrixA,
class MatrixB>
251 static GPUdi() typename MatrixA::value_type
g(const MatrixA&
lhs,
253 unsigned int i,
unsigned int j)
259namespace row_offsets_utils
265template <
int I,
class IndexTuple,
int N>
268template <
int I,
int... Indices,
int N>
274template <
int N,
int... Indices>
283template <
int I0,
class F,
int... I>
290template <
int N,
int I0 = 0,
class F>
298template <
class T,
unsigned int D>
306 GPUhdi()
T& operator[](
unsigned int i) {
return mArray[off(
i)]; }
307 GPUdi()
T const& operator[](
unsigned int i)
const {
return mArray[off(
i)]; }
308 GPUdi()
T apply(
unsigned int i)
const {
return mArray[off(
i)]; }
323 for (
unsigned int i = 0;
i <
kSize; ++
i) {
324 mArray[
i] =
rhs.Array()[
i];
335 static constexpr int off0(
int i) {
return i == 0 ? 0 :
off0(
i - 1) +
i; }
341 static constexpr auto v = row_offsets_utils::make<D * D>(
off1);
345 static GPUdi() constexpr
unsigned int offset(
unsigned int i,
unsigned int j)
347 return off(
i *
D +
j);
355template <
class T,
unsigned int D1,
unsigned int D2 = D1>
363 return mArray[
i *
D2 +
j];
365 GPUdi()
T& operator()(
unsigned int i,
unsigned int j)
367 return mArray[
i *
D2 +
j];
369 GPUdi()
T& operator[](
unsigned int i) {
return mArray[
i]; }
371 GPUdi()
T apply(
unsigned int i)
const {
return mArray[
i]; }
378 for (
unsigned int i = 0;
i < kSize; ++
i) {
387 for (
unsigned int i = 0;
i < kSize; ++
i) {
388 rc = rc && (mArray[
i] ==
rhs[
i]);
409template <
class ExprType,
class T,
unsigned int D,
unsigned int D2 = 1,
class R1 = MatRepStdGPU<T, D, D2>>
418 return mRhs.apply(
i);
420 GPUdi()
T operator()(
unsigned int i,
unsigned j)
const
426 return mRhs.IsInUse(p);
438template <
class T,
unsigned int D1,
unsigned int D2 = D1,
class R = MatRepStdGPU<T, D1, D2>>
452 template <class
A, class R2>
456 template <class
A, class R2>
470 GPUd()
T& operator()(
unsigned int i,
unsigned int j);
472 template <typename Y, typename X>
481 GPUd()
T& operator[](
int j) {
return (*mMat)(mRow,
j); }
509 template <class
A, class R2>
520template <
class T,
unsigned int D1,
unsigned int D2,
class R,
typename Y,
typename X = Y>
521 requires(
sizeof(
typename X::traits_type::pos_type) != 0)
528template <
class T,
unsigned int D1,
unsigned int D2,
class R>
531 for (
unsigned int i = 0;
i < R::kSize; ++
i) {
535 for (
unsigned int i = 0;
i <
D1; ++
i) {
539 for (
unsigned int i = 0;
i <
D2; ++
i) {
545template <
class T,
unsigned int D1,
unsigned int D2,
class R>
551template <
class T,
unsigned int D1,
unsigned int D2,
class R>
558template <
class T,
unsigned int D1,
unsigned int D2,
class R>
564template <
class T,
unsigned int D1,
unsigned int D2,
class R>
567 return mRep.Array() + R::kSize;
570template <
class T,
unsigned int D1,
unsigned int D2,
class R>
573 return p ==
mRep.Array();
576template <
class T,
unsigned int D1,
unsigned int D2,
class A,
class R1,
class R2>
581 if (!
rhs.IsInUse(
lhs.begin())) {
583 for (
unsigned int i = 0;
i <
D1; ++
i) {
584 for (
unsigned int j = 0;
j <
D2; ++
j) {
592 for (
unsigned int i = 0;
i <
D1; ++
i) {
593 for (
unsigned int j = 0;
j <
D2; ++
j) {
599 for (
unsigned int i = 0;
i <
D1 *
D2; ++
i) {
600 lhs.mRep[
i] = tmp[
i];
606template <
class T,
unsigned int D1,
unsigned int D2,
class A>
612 if (!
rhs.IsInUse(
lhs.begin())) {
614 for (
unsigned int i = 0;
i <
D1; ++
i) {
616 for (
unsigned int j = 0;
j <=
i; ++
j) {
624 for (
unsigned int i = 0;
i <
D1; ++
i) {
625 for (
unsigned int j = 0;
j <=
i; ++
j) {
630 for (
unsigned int i = 0; i < MatRepSymGPU<T, D1>::kSize; ++
i) {
631 lhs.mRep.Array()[
i] = tmp[
i];
638template <
class T,
unsigned int D1,
unsigned int D2,
class A>
650 template <
class T,
unsigned int D,
class A,
class R>
654 for (
unsigned int i = 0;
i <
D; ++
i) {
655 for (
unsigned int j = 0;
j <=
i; ++
j) {
663 template <
class T,
unsigned int D,
class R>
667 for (
unsigned int i = 0;
i <
D; ++
i) {
668 for (
unsigned int j = 0;
j <=
i; ++
j) {
676template <
class T,
unsigned int D1,
unsigned int D2,
class R>
677template <
class A,
class R2>
678GPUdi()
SMatrixGPU<
T,
D1,
D2,
R>&
SMatrixGPU<
T,
D1,
D2,
R>::operator=(
const Expr<
A,
T,
D1,
D2, R2>&
rhs)
684template <
class T,
unsigned int D1,
unsigned int D2,
class R>
692template <
class T,
unsigned int D1,
unsigned int D2,
class R>
693template <
class A,
class R2>
699template <
class T,
unsigned int D1,
unsigned int D2,
class R>
705template <
class T,
unsigned int D1,
unsigned int D2,
class R>
706GPUdi()
T& SMatrixGPU<
T,
D1,
D2,
R>::operator()(
unsigned int i,
unsigned int j)
711template <
class T,
class R1,
class R2>
720template <
class MatrixA,
class MatrixB,
class T,
unsigned int D>
731 GPUdi() T operator()(
unsigned int i,
unsigned int j)
const
736 GPUdi() bool IsInUse(const T* p)
const
738 return lhs_.IsInUse(p) || rhs_.IsInUse(p);
746template <
class T,
unsigned int D1,
unsigned int D,
unsigned int D2,
class R1,
class R2>
747GPUdi()
Expr<
MatrixMulOpGPU<
SMatrixGPU<
T,
D1,
D,
R1>,
SMatrixGPU<
T,
D,
D2, R2>,
T,
D>,
T,
D1,
D2, typename
MultPolicyGPU<
T,
R1, R2>::RepType>
756template <
unsigned int D,
unsigned int N = D>
761 template <
class MatrixRep>
762 GPUd() static
bool Dinv(MatrixRep& rhs)
764 unsigned int work[N + 1] = {0};
765 typename MatrixRep::value_type det(0.0);
767 if (DfactMatrix(
rhs, det, work) != 0) {
771 int ifail = DfinvMatrix(
rhs, work);
803template <
unsigned int D,
unsigned int N>
807 typedef T value_type;
815 typedef int* pivIter;
818 mIter
x = xvec.begin();
820 pivIter piv = pivv.begin();
823 value_type temp1, temp2;
825 value_type lambda, sigma;
826 const value_type
alpha = .6404;
836 for (
i = 0;
i < nrow;
i++) {
846 for (
j = 1;
j < nrow;
j +=
s)
848 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
851 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
852 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
853 if (o2::gpu::GPUCommonMath::Abs(*ip) > lambda) {
854 lambda = o2::gpu::GPUCommonMath::Abs(*ip);
867 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= lambda *
alpha) {
872 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1;
873 for (k =
j; k < pivrow; k++) {
874 if (o2::gpu::GPUCommonMath::Abs(*ip) > sigma) {
875 sigma = o2::gpu::GPUCommonMath::Abs(*ip);
880 if (o2::gpu::GPUCommonMath::Abs(*mjj) >=
alpha * lambda * (lambda / sigma)) {
883 }
else if (o2::gpu::GPUCommonMath::Abs(*(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1)) >=
alpha * sigma) {
896 temp2 = *mjj = 1.0f / *mjj;
899 for (
i =
j + 1;
i <= nrow;
i++) {
900 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) * temp2;
901 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
902 for (k =
j + 1; k <=
i; k++) {
903 *ip -=
static_cast<T>(temp1 * *(
rhs.Array() + k * (k - 1) / 2 +
j - 1));
910 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
911 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
912 *ip *=
static_cast<T>(temp2);
920 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j;
921 for (
i =
j + 1;
i < pivrow;
i++, ip++) {
922 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1);
923 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) = *ip;
924 *ip =
static_cast<T>(temp1);
927 *mjj = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
928 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
929 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j - 1;
930 iq = ip + pivrow -
j;
931 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
934 *ip =
static_cast<T>(temp1);
941 temp2 = *mjj = 1.0f / *mjj;
944 for (
i =
j + 1;
i <= nrow;
i++) {
945 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) * temp2;
946 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
947 for (k =
j + 1; k <=
i; k++) {
948 *ip -=
static_cast<T>(temp1 * *(
rhs.Array() + k * (k - 1) / 2 +
j - 1));
955 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
956 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
957 *ip *=
static_cast<T>(temp2);
961 piv[
j - 1] = -pivrow;
964 if (
j + 1 != pivrow) {
967 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j + 1;
968 for (
i =
j + 2;
i < pivrow;
i++, ip++) {
969 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j);
970 *(
rhs.Array() +
i * (
i - 1) / 2 +
j) = *ip;
971 *ip =
static_cast<T>(temp1);
973 temp1 = *(mjj +
j + 1);
975 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
976 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
978 *(mjj +
j) = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1);
979 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1) =
static_cast<T>(temp1);
980 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j;
981 iq = ip + pivrow - (
j + 1);
982 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
985 *ip =
static_cast<T>(temp1);
989 temp2 = *mjj * *(mjj +
j + 1) - *(mjj +
j) * *(mjj +
j);
991 LOGF(error,
"SymMatrix::bunch_invert: error in pivot choice");
997 *mjj =
static_cast<T>(*(mjj +
j + 1) * temp2);
998 *(mjj +
j + 1) =
static_cast<T>(temp1 * temp2);
999 *(mjj +
j) =
static_cast<T>(-*(mjj +
j) * temp2);
1004 for (
i =
j + 2;
i <= nrow;
i++) {
1005 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j - 1;
1006 temp1 = *ip * *mjj + *(ip + 1) * *(mjj +
j);
1009 temp2 = *ip * *(mjj +
j) + *(ip + 1) * *(mjj +
j + 1);
1012 for (k =
j + 2; k <=
i; k++) {
1013 ip =
rhs.Array() +
i * (
i - 1) / 2 + k - 1;
1014 iq =
rhs.Array() + k * (k - 1) / 2 +
j - 1;
1015 *ip -=
static_cast<T>(temp1 * *iq + temp2 * *(iq + 1));
1021 for (
i =
j + 2;
i <= nrow;
i++) {
1022 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j - 1;
1023 temp1 = *ip * *mjj + *(ip + 1) * *(mjj +
j);
1026 *(ip + 1) = *ip * *(mjj +
j) + *(ip + 1) * *(mjj +
j + 1);
1029 *ip =
static_cast<T>(temp1);
1038 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
1049 for (
j = nrow;
j >= 1;
j -=
s)
1051 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
1056 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1057 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1060 for (
i =
j + 1;
i <= nrow;
i++) {
1062 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1063 for (k = 0; k <=
i -
j - 1; k++) {
1064 temp2 += *ip++ *
x[k];
1066 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1067 temp2 += *ip *
x[k];
1069 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) =
static_cast<T>(-temp2);
1072 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1073 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1074 temp2 +=
x[k] * *ip;
1076 *mjj -=
static_cast<T>(temp2);
1080 if (piv[
j - 1] != 0) {
1081 LOGF(error,
"error in piv %lf \n",
static_cast<T>(piv[
j - 1]));
1085 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1086 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1089 for (
i =
j + 1;
i <= nrow;
i++) {
1091 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1092 for (k = 0; k <=
i -
j - 1; k++) {
1093 temp2 += *ip++ *
x[k];
1095 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1096 temp2 += *ip *
x[k];
1098 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) =
static_cast<T>(-temp2);
1101 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1102 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1103 temp2 +=
x[k] * *ip;
1105 *mjj -=
static_cast<T>(temp2);
1107 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1108 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
1109 temp2 += *ip * *(ip + 1);
1111 *(mjj - 1) -=
static_cast<T>(temp2);
1112 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1113 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1116 for (
i =
j + 1;
i <= nrow;
i++) {
1118 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1119 for (k = 0; k <=
i -
j - 1; k++) {
1120 temp2 += *ip++ *
x[k];
1122 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1123 temp2 += *ip *
x[k];
1125 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 2) =
static_cast<T>(-temp2);
1128 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1129 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1130 temp2 +=
x[k] * *ip;
1132 *(mjj -
j) -=
static_cast<T>(temp2);
1139 pivrow = (piv[
j - 1] == 0) ? -piv[
j - 2] : piv[
j - 1];
1140 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j;
1141 for (
i =
j + 1;
i < pivrow;
i++, ip++) {
1142 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1);
1143 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) = *ip;
1144 *ip =
static_cast<T>(temp1);
1147 *mjj = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
1148 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
1151 *(mjj - 1) = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 2);
1152 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 2) =
static_cast<T>(temp1);
1155 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j - 1;
1156 iq = ip + pivrow -
j;
1157 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
1160 *ip =
static_cast<T>(temp1);
1168template <
unsigned int D,
unsigned int n>
1179 typedef T value_type;
1182 value_type g1 = 1.0e-19, g2 = 1.0e19;
1185 value_type s11, s12;
1188 const value_type epsilon = 0.0;
1194 int normal = 0, imposs = -1;
1195 int jrange = 0, jover = 1, junder = -1;
1200 mIter mj = rhs.Array();
1202 for (
unsigned int j = 1;
j <=
n;
j++) {
1204 p = (o2::gpu::GPUCommonMath::Abs(*mjj));
1206 mIter mij = mj +
n +
j - 1;
1207 for (
unsigned int i =
j + 1;
i <=
n;
i++) {
1208 q = (o2::gpu::GPUCommonMath::Abs(*(mij)));
1226 mIter mkl =
rhs.Array() + (k - 1) *
n;
1227 for (
unsigned int l = 1; l <=
n; l++) {
1230 *(mkl++) =
static_cast<T>(
tf);
1233 ir[nxch] = (((
j) << 12) + (k));
1244 t = (o2::gpu::GPUCommonMath::Abs(det));
1247 if (jfail == jrange) {
1250 }
else if (t > g2) {
1252 if (jfail == jrange) {
1258 mIter mkjp = mk +
j;
1260 for (k =
j + 1; k <=
n; k++) {
1264 mIter mik =
rhs.Array() + k - 1;
1265 mIter mijp =
rhs.Array() +
j;
1268 for (
unsigned int i = 1;
i <
j;
i++) {
1269 s11 += (*mik) * (*(mji++));
1270 s12 += (*mijp) * (*(mki++));
1276 *(mjk++) =
static_cast<T>(-s11 * (*mjj));
1277 *(mkjp) =
static_cast<T>(-(((*(mjj + 1))) * ((*(mkjp - 1))) + (s12)));
1285 if (nxch % 2 == 1) {
1288 if (jfail != jrange) {
1295template <
unsigned int D,
unsigned int n>
1297GPUdi()
int Inverter<D,
n>::DfinvMatrix(MatRepStdGPU<T, D,
n>& rhs,
unsigned int*
ir)
1300 typedef T value_type;
1306 value_type s31, s32;
1307 value_type s33, s34;
1309 mIter m11 =
rhs.Array();
1310 mIter m12 = m11 + 1;
1311 mIter m21 = m11 +
n;
1312 mIter m22 = m12 +
n;
1313 *m21 = -(*m22) * (*m11) * (*m21);
1316 mIter mi =
rhs.Array() + 2 *
n;
1317 mIter mii =
rhs.Array() + 2 *
n + 2;
1318 mIter mimim =
rhs.Array() +
n + 1;
1319 for (
unsigned int i = 3;
i <=
n;
i++) {
1320 unsigned int im2 =
i - 2;
1321 mIter mj =
rhs.Array();
1322 mIter mji = mj +
i - 1;
1324 for (
unsigned int j = 1;
j <= im2;
j++) {
1327 mIter mkj = mj +
j - 1;
1328 mIter mik = mi +
j - 1;
1329 mIter mjkp = mj +
j;
1330 mIter mkpi = mj +
n +
i - 1;
1331 for (
unsigned int k =
j; k <= im2; k++) {
1332 s31 += (*mkj) * (*(mik++));
1333 s32 += (*(mjkp++)) * (*mkpi);
1337 *mij =
static_cast<T>(-(*mii) * (((*(mij -
n))) * ((*(mii - 1))) + (s31)));
1338 *mji =
static_cast<T>(-s32);
1343 *(mii - 1) = -(*mii) * (*mimim) * (*(mii - 1));
1344 *(mimim + 1) = -(*(mimim + 1));
1350 mIter mi =
rhs.Array();
1351 mIter mii =
rhs.Array();
1352 for (
unsigned int i = 1;
i <
n;
i++) {
1353 unsigned int ni =
n -
i;
1356 for (
unsigned j = 1;
j <=
i;
j++) {
1358 mIter mikj = mi +
n +
j - 1;
1359 mIter miik = mii + 1;
1360 mIter min_end = mi +
n;
1361 for (; miik < min_end;) {
1362 s33 += (*mikj) * (*(miik++));
1365 *(mij++) =
static_cast<T>(s33);
1367 for (
unsigned j = 1;
j <= ni;
j++) {
1369 mIter miik = mii +
j;
1370 mIter mikij = mii +
j *
n +
j;
1371 for (
unsigned int k =
j; k <= ni; k++) {
1372 s34 += *mikij * (*(miik++));
1380 unsigned int nxch =
ir[
n];
1384 for (
unsigned int mm = 1;
mm <= nxch;
mm++) {
1385 unsigned int k = nxch -
mm + 1;
1389 mIter mki =
rhs.Array() +
i - 1;
1390 mIter mkj =
rhs.Array() +
j - 1;
1391 for (k = 1; k <=
n; k++) {
1402template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1403GPUdi() bool SMatrixGPU<T, D1, D2,
R>::Invert()
1406 return Inverter<D1, D2>::Dinv((*this).mRep);
1409template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1417template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1420 for (
unsigned int i = 0;
i < R::kSize; ++
i) {
1421 mRep.Array()[
i] *=
rhs;
1426template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1430 return operator=(*
this*
rhs);
1433template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1434template <
class A,
class R2>
1435GPUdi()
SMatrixGPU<T, D1, D2,
R>&
SMatrixGPU<T, D1, D2,
R>::operator*=(const
Expr<
A, T, D1, D2, R2>& rhs)
1437 return operator=(*
this*
rhs);
1440template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1449template <
class T,
unsigned int D1,
unsigned int D2>
1454template <
class Matrix,
class T,
unsigned int D1,
unsigned int D2 = D1>
1464 return mRhs.apply((
i %
D1) *
D2 +
i /
D1);
1467 GPUdi() T operator()(
unsigned int i,
unsigned j)
const
1474 return mRhs.IsInUse(p);
1481template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1485 for (
unsigned int i = 0;
i <
D1; ++
i) {
1486 const unsigned int rpos =
i *
D2;
1487 for (
unsigned int j = 0;
j <
D2; ++
j) {
1488 tmp[
i] +=
rhs.apply(rpos +
j) *
lhs.apply(
j);
1494template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1495GPUdi()
Expr<
TransposeOpGPU<
SMatrixGPU<T, D1, D2,
R>, T, D1, D2>, T, D2, D1, typename
TranspPolicyGPU<T, D1, D2,
R>::RepType> Transpose(const
SMatrixGPU<T, D1, D2,
R>& rhs)
1501template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1502GPUdi()
SMatrixGPU<T, D1, D1,
MatRepSymGPU<T, D1>> Similarity(const
SMatrixGPU<T, D1, D2,
R>& lhs, const
SMatrixGPU<T, D2, D2,
MatRepSymGPU<T, D2>>& rhs)
#define GPU_STATIC_CHECK(expr, msg)
GPUdi() T apply(unsigned int i) const
GPUdDefault() ~Expr()=default
GPUd() Expr(const ExprType &rhs)
GPUdi() bool IsInUse(const T *p) const
GPUd() static bool Dinv(MatrixRep &rhs)
InvertBunchKaufman(rhs, ifail)
GPUd() static bool Dinv(MatRepSymGPU< T
SMatReprStd starting port here.
GPUdi() const T *Array() const
GPUdi() T apply(unsigned int i) const
GPUdDefault() MatRepStdGPU()=default
GPUdi() const T &operator[](unsigned int i) const
GPUdi() T &operator()(unsigned int i
GPUdi() T &operator[](unsigned int i)
static GPUdi() const expr unsigned int offset(unsigned int i
static GPUdi() int off(int i)
static constexpr int off0(int i)
GPUdDefault() MatRepSymGPU()=default
static constexpr int off2(int i, int j)
GPUdi() T apply(unsigned int i) const
GPUdi() T const &operator[](unsigned int i) const
GPUdi() const T *Array() const
GPUhdi() T &operator[](unsigned int i)
static constexpr int off1(int i)
GPUdi() T apply(unsigned int i) const
GPUd() MatrixMulOpGPU(const MatrixA &lhs
GPUdDefault() ~MatrixMulOpGPU()=default
GPUdi() bool IsInUse(const T *p) const
GPUd() T &operator[](int j)
GPUd() SMatrixRowGPUconst(const SMatrixGPU< T
GPUd() const T &operator[](int j) const
R & operator=(const M &rhs)
GPUd() SMatrixGPU(SMatrixIdentity)
GPUd() T apply(unsigned int i) const
GPUd() SMatrixRowGPUconst operator[](unsigned int i) const
GPUdi() SMatrixGPU(SMatrixNoInit)
GPUd() const T *Array() const
GPUd() SMatrixRowGPU operator[](unsigned int i)
GPUdDefault() SMatrixGPU()=default
GPUdi() static unsigned int Dim()
GPUdDefault() ~TransposeOpGPU()=default
GPUdi() T apply(unsigned int i) const
GPUd() TransposeOpGPU(const Matrix &rhs)
GPUdi() bool IsInUse(const T *p) const
GLfloat GLfloat GLfloat alpha
GLint GLint GLsizei GLint GLenum GLenum type
constexpr auto do_make(F f, indices< I... >) -> gpu::gpustd::array< int, sizeof...(I)>
constexpr auto make(F f) -> gpu::gpustd::array< int, N >
return((dphi > 0 &&dphi< constants::math::PI)||dphi< -constants::math::PI) ? true GPUhdi() const expr bool okForPhiMax(T phiMax
D const SVectorGPU< T, D > & rhs
TranspPolicyGPU< T, D1, D2, R >::RepType Transpose(const SMatrixGPU< T, D1, D2, R > &rhs)
const SMatrixGPU< T, D1, D2, R > D2
const SMatrixGPU< T, D1, D2, R > D1
T Dot(const SVector< T, D > &lhs, const SVector< T, D > &rhs)
std::unique_ptr< GPUReconstructionTimeframe > tf
GPUd() static void Evaluate(SMatrixGPU< T
MatRepStdGPU< T, N1, N2 > RepType
SMatrixGPU starting port here.
MatRepSymGPU< T, D1 > RepType
MatRepStdGPU< T, N2, N1 > RepType
make_indices_impl< I+1, indices< Indices..., I >, N >::type type
indices< Indices... > type
o2::InteractionRecord ir(0, 0)