24#ifndef ALICEO2_SMATRIX_GPU_H
25#define ALICEO2_SMATRIX_GPU_H
31#ifndef GPUCA_GPUCODE_DEVICE
46#define GPU_STATIC_CHECK(expr, msg) \
52 (void)(Check<(expr) != 0>(&e)); \
55template <
typename T,
unsigned int N>
69 GPUhd() const
T& operator[](
unsigned int i) const;
71 GPUd() const
T& operator()(
unsigned int i) const;
72 GPUd()
T& operator()(
unsigned int i);
75 GPUd()
T apply(
unsigned int i) const;
85 GPUdi() static
unsigned int Dim() {
return N; }
91template <
class T,
unsigned int D>
97template <
class T,
unsigned int D>
98GPUdi() const
T* SVectorGPU<
T,
D>::begin()
const
103template <
class T,
unsigned int D>
106 return mArray + Dim();
109template <
class T,
unsigned int D>
112 return mArray + Dim();
114template <
class T,
unsigned int N>
121template <
class T,
unsigned int N>
122GPUhdi()
T& SVectorGPU<
T, N>::operator[](
unsigned int i)
127template <
class T,
unsigned int N>
133template <
class T,
unsigned int N>
134GPUdi()
T& SVectorGPU<
T, N>::operator()(
unsigned int i)
139template <
class T,
unsigned int N>
142 for (
unsigned int i = 0;
i < N; ++
i) {
147template <
class T,
unsigned int N>
148GPUd() SVectorGPU<
T, N>::SVectorGPU(const SVectorGPU<
T, N>&
rhs)
150 for (
unsigned int i = 0;
i < N; ++
i) {
151 mArray[
i] =
rhs.mArray[
i];
155template <
class T,
unsigned int D>
156GPUdi()
T SVectorGPU<
T,
D>::apply(
unsigned int i)
const
161template <
class T,
unsigned int D>
162GPUdi() const
T* SVectorGPU<
T,
D>::Array()
const
167template <
class T,
unsigned int D>
173template <
unsigned int I>
175 template <
class A,
class B,
class T>
184 template <
class A,
class B,
class T>
187 return lhs.apply(0) *
rhs.apply(0);
191template <
class T,
unsigned int D>
197template <
class T,
unsigned int D>
200 for (
unsigned int i = 0;
i <
D; ++
i) {
201 mArray[
i] -=
rhs.apply(
i);
206template <
class T,
unsigned int D>
209 for (
unsigned int i = 0;
i <
D; ++
i) {
210 mArray[
i] +=
rhs.apply(
i);
215template <
unsigned int I>
218 template <
class MatrixA,
class MatrixB>
219 static GPUdi() typename MatrixA::value_type
f(const MatrixA&
lhs,
221 const
unsigned int offset)
223 return lhs.apply(
offset / MatrixB::kCols * MatrixA::kCols + I) *
224 rhs.apply(MatrixB::kCols * I +
offset % MatrixB::kCols) +
228 template <
class MatrixA,
class MatrixB>
229 static GPUdi() typename MatrixA::value_type
g(const MatrixA&
lhs,
242 template <
class MatrixA,
class MatrixB>
243 static GPUdi() typename MatrixA::value_type
f(const MatrixA&
lhs,
245 const
unsigned int offset)
247 return lhs.apply(
offset / MatrixB::kCols * MatrixA::kCols) *
252 template <
class MatrixA,
class MatrixB>
253 static GPUdi() typename MatrixA::value_type
g(const MatrixA&
lhs,
255 unsigned int i,
unsigned int j)
261namespace row_offsets_utils
267template <
int I,
class IndexTuple,
int N>
270template <
int I,
int... Indices,
int N>
276template <
int N,
int... Indices>
285template <
int I0,
class F,
int... I>
288 std::array<
int,
sizeof...(I)> retarr = {
f(I0 + I)...};
292template <
int N,
int I0 = 0,
class F>
293constexpr auto make(F
f) -> std::array<int, N>
300template <
class T,
unsigned int D>
308 GPUhdi()
T& operator[](
unsigned int i) {
return mArray[off(
i)]; }
309 GPUdi()
T const& operator[](
unsigned int i)
const {
return mArray[off(
i)]; }
310 GPUdi()
T apply(
unsigned int i)
const {
return mArray[off(
i)]; }
325 for (
unsigned int i = 0;
i <
kSize; ++
i) {
326 mArray[
i] =
rhs.Array()[
i];
337 static constexpr int off0(
int i) {
return i == 0 ? 0 :
off0(
i - 1) +
i; }
343 static constexpr auto v = row_offsets_utils::make<D * D>(
off1);
347 static GPUdi() constexpr
unsigned int offset(
unsigned int i,
unsigned int j)
349 return off(
i *
D +
j);
357template <
class T,
unsigned int D1,
unsigned int D2 = D1>
365 return mArray[
i *
D2 +
j];
367 GPUdi()
T& operator()(
unsigned int i,
unsigned int j)
369 return mArray[
i *
D2 +
j];
371 GPUdi()
T& operator[](
unsigned int i) {
return mArray[
i]; }
373 GPUdi()
T apply(
unsigned int i)
const {
return mArray[
i]; }
380 for (
unsigned int i = 0;
i < kSize; ++
i) {
389 for (
unsigned int i = 0;
i < kSize; ++
i) {
390 rc = rc && (mArray[
i] ==
rhs[
i]);
411template <
class ExprType,
class T,
unsigned int D,
unsigned int D2 = 1,
class R1 = MatRepStdGPU<T, D, D2>>
420 return mRhs.apply(
i);
422 GPUdi()
T operator()(
unsigned int i,
unsigned j)
const
428 return mRhs.IsInUse(p);
440template <
class T,
unsigned int D1,
unsigned int D2 = D1,
class R = MatRepStdGPU<T, D1, D2>>
454 template <class
A, class R2>
458 template <class
A, class R2>
472 GPUd()
T& operator()(
unsigned int i,
unsigned int j);
474 template <typename Y, typename X>
483 GPUd()
T& operator[](
int j) {
return (*mMat)(mRow,
j); }
511 template <class
A, class R2>
522template <
class T,
unsigned int D1,
unsigned int D2,
class R,
typename Y,
typename X = Y>
523 requires(
sizeof(
typename X::traits_type::pos_type) != 0)
530template <
class T,
unsigned int D1,
unsigned int D2,
class R>
533 for (
unsigned int i = 0;
i < R::kSize; ++
i) {
537 for (
unsigned int i = 0;
i <
D1; ++
i) {
541 for (
unsigned int i = 0;
i <
D2; ++
i) {
547template <
class T,
unsigned int D1,
unsigned int D2,
class R>
553template <
class T,
unsigned int D1,
unsigned int D2,
class R>
560template <
class T,
unsigned int D1,
unsigned int D2,
class R>
566template <
class T,
unsigned int D1,
unsigned int D2,
class R>
569 return mRep.Array() + R::kSize;
572template <
class T,
unsigned int D1,
unsigned int D2,
class R>
575 return p ==
mRep.Array();
578template <
class T,
unsigned int D1,
unsigned int D2,
class A,
class R1,
class R2>
583 if (!
rhs.IsInUse(
lhs.begin())) {
585 for (
unsigned int i = 0;
i <
D1; ++
i) {
586 for (
unsigned int j = 0;
j <
D2; ++
j) {
594 for (
unsigned int i = 0;
i <
D1; ++
i) {
595 for (
unsigned int j = 0;
j <
D2; ++
j) {
601 for (
unsigned int i = 0;
i <
D1 *
D2; ++
i) {
602 lhs.mRep[
i] = tmp[
i];
608template <
class T,
unsigned int D1,
unsigned int D2,
class A>
614 if (!
rhs.IsInUse(
lhs.begin())) {
616 for (
unsigned int i = 0;
i <
D1; ++
i) {
618 for (
unsigned int j = 0;
j <=
i; ++
j) {
626 for (
unsigned int i = 0;
i <
D1; ++
i) {
627 for (
unsigned int j = 0;
j <=
i; ++
j) {
632 for (
unsigned int i = 0; i < MatRepSymGPU<T, D1>::kSize; ++
i) {
633 lhs.mRep.Array()[
i] = tmp[
i];
640template <
class T,
unsigned int D1,
unsigned int D2,
class A>
652 template <
class T,
unsigned int D,
class A,
class R>
656 for (
unsigned int i = 0;
i <
D; ++
i) {
657 for (
unsigned int j = 0;
j <=
i; ++
j) {
665 template <
class T,
unsigned int D,
class R>
669 for (
unsigned int i = 0;
i <
D; ++
i) {
670 for (
unsigned int j = 0;
j <=
i; ++
j) {
678template <
class T,
unsigned int D1,
unsigned int D2,
class R>
679template <
class A,
class R2>
680GPUdi()
SMatrixGPU<
T,
D1,
D2,
R>&
SMatrixGPU<
T,
D1,
D2,
R>::operator=(
const Expr<
A,
T,
D1,
D2, R2>&
rhs)
686template <
class T,
unsigned int D1,
unsigned int D2,
class R>
694template <
class T,
unsigned int D1,
unsigned int D2,
class R>
695template <
class A,
class R2>
701template <
class T,
unsigned int D1,
unsigned int D2,
class R>
707template <
class T,
unsigned int D1,
unsigned int D2,
class R>
708GPUdi()
T& SMatrixGPU<
T,
D1,
D2,
R>::operator()(
unsigned int i,
unsigned int j)
713template <
class T,
class R1,
class R2>
722template <
class MatrixA,
class MatrixB,
class T,
unsigned int D>
733 GPUdi() T operator()(
unsigned int i,
unsigned int j)
const
738 GPUdi() bool IsInUse(const T* p)
const
740 return lhs_.IsInUse(p) || rhs_.IsInUse(p);
748template <
class T,
unsigned int D1,
unsigned int D,
unsigned int D2,
class R1,
class R2>
749GPUdi()
Expr<
MatrixMulOpGPU<
SMatrixGPU<
T,
D1,
D,
R1>,
SMatrixGPU<
T,
D,
D2, R2>,
T,
D>,
T,
D1,
D2, typename
MultPolicyGPU<
T,
R1, R2>::RepType>
758template <
unsigned int D,
unsigned int N = D>
763 template <
class MatrixRep>
764 GPUd() static
bool Dinv(MatrixRep& rhs)
766 unsigned int work[N + 1] = {0};
767 typename MatrixRep::value_type det(0.0);
769 if (DfactMatrix(
rhs, det, work) != 0) {
773 int ifail = DfinvMatrix(
rhs, work);
805template <
unsigned int D,
unsigned int N>
809 typedef T value_type;
817 typedef int* pivIter;
820 mIter
x = xvec.begin();
822 pivIter piv = pivv.begin();
825 value_type temp1, temp2;
827 value_type lambda, sigma;
828 const value_type
alpha = .6404;
838 for (
i = 0;
i < nrow;
i++) {
848 for (
j = 1;
j < nrow;
j +=
s)
850 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
853 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
854 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
855 if (o2::gpu::GPUCommonMath::Abs(*ip) > lambda) {
856 lambda = o2::gpu::GPUCommonMath::Abs(*ip);
869 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= lambda *
alpha) {
874 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1;
875 for (k =
j; k < pivrow; k++) {
876 if (o2::gpu::GPUCommonMath::Abs(*ip) > sigma) {
877 sigma = o2::gpu::GPUCommonMath::Abs(*ip);
882 if (o2::gpu::GPUCommonMath::Abs(*mjj) >=
alpha * lambda * (lambda / sigma)) {
885 }
else if (o2::gpu::GPUCommonMath::Abs(*(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1)) >=
alpha * sigma) {
898 temp2 = *mjj = 1.0f / *mjj;
901 for (
i =
j + 1;
i <= nrow;
i++) {
902 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) * temp2;
903 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
904 for (k =
j + 1; k <=
i; k++) {
905 *ip -=
static_cast<T>(temp1 * *(
rhs.Array() + k * (k - 1) / 2 +
j - 1));
912 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
913 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
914 *ip *=
static_cast<T>(temp2);
922 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j;
923 for (
i =
j + 1;
i < pivrow;
i++, ip++) {
924 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1);
925 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) = *ip;
926 *ip =
static_cast<T>(temp1);
929 *mjj = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
930 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
931 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j - 1;
932 iq = ip + pivrow -
j;
933 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
936 *ip =
static_cast<T>(temp1);
943 temp2 = *mjj = 1.0f / *mjj;
946 for (
i =
j + 1;
i <= nrow;
i++) {
947 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) * temp2;
948 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
949 for (k =
j + 1; k <=
i; k++) {
950 *ip -=
static_cast<T>(temp1 * *(
rhs.Array() + k * (k - 1) / 2 +
j - 1));
957 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
958 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
959 *ip *=
static_cast<T>(temp2);
963 piv[
j - 1] = -pivrow;
966 if (
j + 1 != pivrow) {
969 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j + 1;
970 for (
i =
j + 2;
i < pivrow;
i++, ip++) {
971 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j);
972 *(
rhs.Array() +
i * (
i - 1) / 2 +
j) = *ip;
973 *ip =
static_cast<T>(temp1);
975 temp1 = *(mjj +
j + 1);
977 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
978 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
980 *(mjj +
j) = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1);
981 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 1) =
static_cast<T>(temp1);
982 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j;
983 iq = ip + pivrow - (
j + 1);
984 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
987 *ip =
static_cast<T>(temp1);
991 temp2 = *mjj * *(mjj +
j + 1) - *(mjj +
j) * *(mjj +
j);
993 LOGF(error,
"SymMatrix::bunch_invert: error in pivot choice");
999 *mjj =
static_cast<T>(*(mjj +
j + 1) * temp2);
1000 *(mjj +
j + 1) =
static_cast<T>(temp1 * temp2);
1001 *(mjj +
j) =
static_cast<T>(-*(mjj +
j) * temp2);
1006 for (
i =
j + 2;
i <= nrow;
i++) {
1007 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j - 1;
1008 temp1 = *ip * *mjj + *(ip + 1) * *(mjj +
j);
1011 temp2 = *ip * *(mjj +
j) + *(ip + 1) * *(mjj +
j + 1);
1014 for (k =
j + 2; k <=
i; k++) {
1015 ip =
rhs.Array() +
i * (
i - 1) / 2 + k - 1;
1016 iq =
rhs.Array() + k * (k - 1) / 2 +
j - 1;
1017 *ip -=
static_cast<T>(temp1 * *iq + temp2 * *(iq + 1));
1023 for (
i =
j + 2;
i <= nrow;
i++) {
1024 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j - 1;
1025 temp1 = *ip * *mjj + *(ip + 1) * *(mjj +
j);
1028 *(ip + 1) = *ip * *(mjj +
j) + *(ip + 1) * *(mjj +
j + 1);
1031 *ip =
static_cast<T>(temp1);
1040 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
1051 for (
j = nrow;
j >= 1;
j -=
s)
1053 mjj =
rhs.Array() +
j * (
j - 1) / 2 +
j - 1;
1058 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1059 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1062 for (
i =
j + 1;
i <= nrow;
i++) {
1064 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1065 for (k = 0; k <=
i -
j - 1; k++) {
1066 temp2 += *ip++ *
x[k];
1068 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1069 temp2 += *ip *
x[k];
1071 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) =
static_cast<T>(-temp2);
1074 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1075 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1076 temp2 +=
x[k] * *ip;
1078 *mjj -=
static_cast<T>(temp2);
1082 if (piv[
j - 1] != 0) {
1083 LOGF(error,
"error in piv %lf \n",
static_cast<T>(piv[
j - 1]));
1087 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1088 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1091 for (
i =
j + 1;
i <= nrow;
i++) {
1093 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1094 for (k = 0; k <=
i -
j - 1; k++) {
1095 temp2 += *ip++ *
x[k];
1097 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1098 temp2 += *ip *
x[k];
1100 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) =
static_cast<T>(-temp2);
1103 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 1;
1104 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1105 temp2 +=
x[k] * *ip;
1107 *mjj -=
static_cast<T>(temp2);
1109 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1110 for (
i =
j + 1;
i <= nrow; ip +=
i++) {
1111 temp2 += *ip * *(ip + 1);
1113 *(mjj - 1) -=
static_cast<T>(temp2);
1114 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1115 for (
i = 0;
i < nrow -
j; ip += 1 +
j +
i++) {
1118 for (
i =
j + 1;
i <= nrow;
i++) {
1120 ip =
rhs.Array() +
i * (
i - 1) / 2 +
j;
1121 for (k = 0; k <=
i -
j - 1; k++) {
1122 temp2 += *ip++ *
x[k];
1124 for (ip +=
i - 1; k < nrow -
j; ip += 1 +
j + k++) {
1125 temp2 += *ip *
x[k];
1127 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 2) =
static_cast<T>(-temp2);
1130 ip =
rhs.Array() + (
j + 1) *
j / 2 +
j - 2;
1131 for (k = 0; k < nrow -
j; ip += 1 +
j + k++) {
1132 temp2 +=
x[k] * *ip;
1134 *(mjj -
j) -=
static_cast<T>(temp2);
1141 pivrow = (piv[
j - 1] == 0) ? -piv[
j - 2] : piv[
j - 1];
1142 ip =
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j;
1143 for (
i =
j + 1;
i < pivrow;
i++, ip++) {
1144 temp1 = *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1);
1145 *(
rhs.Array() +
i * (
i - 1) / 2 +
j - 1) = *ip;
1146 *ip =
static_cast<T>(temp1);
1149 *mjj = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
1150 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) =
static_cast<T>(temp1);
1153 *(mjj - 1) = *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 2);
1154 *(
rhs.Array() + pivrow * (pivrow - 1) / 2 +
j - 2) =
static_cast<T>(temp1);
1157 ip =
rhs.Array() + (pivrow + 1) * pivrow / 2 +
j - 1;
1158 iq = ip + pivrow -
j;
1159 for (
i = pivrow + 1;
i <= nrow; ip +=
i, iq +=
i++) {
1162 *ip =
static_cast<T>(temp1);
1170template <
unsigned int D,
unsigned int n>
1181 typedef T value_type;
1184 value_type g1 = 1.0e-19, g2 = 1.0e19;
1187 value_type s11, s12;
1190 const value_type epsilon = 0.0;
1196 int normal = 0, imposs = -1;
1197 int jrange = 0, jover = 1, junder = -1;
1202 mIter mj = rhs.Array();
1204 for (
unsigned int j = 1;
j <=
n;
j++) {
1206 p = (o2::gpu::GPUCommonMath::Abs(*mjj));
1208 mIter mij = mj +
n +
j - 1;
1209 for (
unsigned int i =
j + 1;
i <=
n;
i++) {
1210 q = (o2::gpu::GPUCommonMath::Abs(*(mij)));
1228 mIter mkl =
rhs.Array() + (k - 1) *
n;
1229 for (
unsigned int l = 1; l <=
n; l++) {
1232 *(mkl++) =
static_cast<T>(
tf);
1235 ir[nxch] = (((
j) << 12) + (k));
1246 t = (o2::gpu::GPUCommonMath::Abs(det));
1249 if (jfail == jrange) {
1252 }
else if (t > g2) {
1254 if (jfail == jrange) {
1260 mIter mkjp = mk +
j;
1262 for (k =
j + 1; k <=
n; k++) {
1266 mIter mik =
rhs.Array() + k - 1;
1267 mIter mijp =
rhs.Array() +
j;
1270 for (
unsigned int i = 1;
i <
j;
i++) {
1271 s11 += (*mik) * (*(mji++));
1272 s12 += (*mijp) * (*(mki++));
1278 *(mjk++) =
static_cast<T>(-s11 * (*mjj));
1279 *(mkjp) =
static_cast<T>(-(((*(mjj + 1))) * ((*(mkjp - 1))) + (s12)));
1287 if (nxch % 2 == 1) {
1290 if (jfail != jrange) {
1297template <
unsigned int D,
unsigned int n>
1299GPUdi()
int Inverter<D,
n>::DfinvMatrix(MatRepStdGPU<T, D,
n>& rhs,
unsigned int*
ir)
1302 typedef T value_type;
1308 value_type s31, s32;
1309 value_type s33, s34;
1311 mIter m11 =
rhs.Array();
1312 mIter m12 = m11 + 1;
1313 mIter m21 = m11 +
n;
1314 mIter m22 = m12 +
n;
1315 *m21 = -(*m22) * (*m11) * (*m21);
1318 mIter mi =
rhs.Array() + 2 *
n;
1319 mIter mii =
rhs.Array() + 2 *
n + 2;
1320 mIter mimim =
rhs.Array() +
n + 1;
1321 for (
unsigned int i = 3;
i <=
n;
i++) {
1322 unsigned int im2 =
i - 2;
1323 mIter mj =
rhs.Array();
1324 mIter mji = mj +
i - 1;
1326 for (
unsigned int j = 1;
j <= im2;
j++) {
1329 mIter mkj = mj +
j - 1;
1330 mIter mik = mi +
j - 1;
1331 mIter mjkp = mj +
j;
1332 mIter mkpi = mj +
n +
i - 1;
1333 for (
unsigned int k =
j; k <= im2; k++) {
1334 s31 += (*mkj) * (*(mik++));
1335 s32 += (*(mjkp++)) * (*mkpi);
1339 *mij =
static_cast<T>(-(*mii) * (((*(mij -
n))) * ((*(mii - 1))) + (s31)));
1340 *mji =
static_cast<T>(-s32);
1345 *(mii - 1) = -(*mii) * (*mimim) * (*(mii - 1));
1346 *(mimim + 1) = -(*(mimim + 1));
1352 mIter mi =
rhs.Array();
1353 mIter mii =
rhs.Array();
1354 for (
unsigned int i = 1;
i <
n;
i++) {
1355 unsigned int ni =
n -
i;
1358 for (
unsigned j = 1;
j <=
i;
j++) {
1360 mIter mikj = mi +
n +
j - 1;
1361 mIter miik = mii + 1;
1362 mIter min_end = mi +
n;
1363 for (; miik < min_end;) {
1364 s33 += (*mikj) * (*(miik++));
1367 *(mij++) =
static_cast<T>(s33);
1369 for (
unsigned j = 1;
j <= ni;
j++) {
1371 mIter miik = mii +
j;
1372 mIter mikij = mii +
j *
n +
j;
1373 for (
unsigned int k =
j; k <= ni; k++) {
1374 s34 += *mikij * (*(miik++));
1382 unsigned int nxch =
ir[
n];
1386 for (
unsigned int mm = 1;
mm <= nxch;
mm++) {
1387 unsigned int k = nxch -
mm + 1;
1391 mIter mki =
rhs.Array() +
i - 1;
1392 mIter mkj =
rhs.Array() +
j - 1;
1393 for (k = 1; k <=
n; k++) {
1404template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1405GPUdi() bool SMatrixGPU<T, D1, D2,
R>::Invert()
1408 return Inverter<D1, D2>::Dinv((*this).mRep);
1411template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1419template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1422 for (
unsigned int i = 0;
i < R::kSize; ++
i) {
1423 mRep.Array()[
i] *=
rhs;
1428template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1432 return operator=(*
this*
rhs);
1435template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1436template <
class A,
class R2>
1437GPUdi()
SMatrixGPU<T, D1, D2,
R>&
SMatrixGPU<T, D1, D2,
R>::operator*=(const
Expr<
A, T, D1, D2, R2>& rhs)
1439 return operator=(*
this*
rhs);
1442template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1451template <
class T,
unsigned int D1,
unsigned int D2>
1456template <
class Matrix,
class T,
unsigned int D1,
unsigned int D2 = D1>
1466 return mRhs.apply((
i %
D1) *
D2 +
i /
D1);
1469 GPUdi() T operator()(
unsigned int i,
unsigned j)
const
1476 return mRhs.IsInUse(p);
1483template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1487 for (
unsigned int i = 0;
i <
D1; ++
i) {
1488 const unsigned int rpos =
i *
D2;
1489 for (
unsigned int j = 0;
j <
D2; ++
j) {
1490 tmp[
i] +=
rhs.apply(rpos +
j) *
lhs.apply(
j);
1496template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1497GPUdi()
Expr<
TransposeOpGPU<
SMatrixGPU<T, D1, D2,
R>, T, D1, D2>, T, D2, D1, typename
TranspPolicyGPU<T, D1, D2,
R>::RepType> Transpose(const
SMatrixGPU<T, D1, D2,
R>& rhs)
1503template <
class T,
unsigned int D1,
unsigned int D2,
class R>
1504GPUdi()
SMatrixGPU<T, D1, D1,
MatRepSymGPU<T, D1>> Similarity(const
SMatrixGPU<T, D1, D2,
R>& lhs, const
SMatrixGPU<T, D2, D2,
MatRepSymGPU<T, D2>>& rhs)
#define GPU_STATIC_CHECK(expr, msg)
GPUdi() T apply(unsigned int i) const
GPUdDefault() ~Expr()=default
GPUd() Expr(const ExprType &rhs)
GPUdi() bool IsInUse(const T *p) const
GPUd() static bool Dinv(MatrixRep &rhs)
InvertBunchKaufman(rhs, ifail)
GPUd() static bool Dinv(MatRepSymGPU< T
SMatReprStd starting port here.
GPUdi() const T *Array() const
GPUdi() T apply(unsigned int i) const
GPUdDefault() MatRepStdGPU()=default
GPUdi() const T &operator[](unsigned int i) const
GPUdi() T &operator()(unsigned int i
GPUdi() T &operator[](unsigned int i)
static GPUdi() const expr unsigned int offset(unsigned int i
static GPUdi() int off(int i)
static constexpr int off0(int i)
GPUdDefault() MatRepSymGPU()=default
static constexpr int off2(int i, int j)
GPUdi() T apply(unsigned int i) const
GPUdi() T const &operator[](unsigned int i) const
GPUdi() const T *Array() const
GPUhdi() T &operator[](unsigned int i)
static constexpr int off1(int i)
GPUdi() T apply(unsigned int i) const
GPUd() MatrixMulOpGPU(const MatrixA &lhs
GPUdDefault() ~MatrixMulOpGPU()=default
GPUdi() bool IsInUse(const T *p) const
GPUd() T &operator[](int j)
GPUd() SMatrixRowGPUconst(const SMatrixGPU< T
GPUd() const T &operator[](int j) const
R & operator=(const M &rhs)
GPUd() SMatrixGPU(SMatrixIdentity)
GPUd() T apply(unsigned int i) const
GPUd() SMatrixRowGPUconst operator[](unsigned int i) const
GPUdi() SMatrixGPU(SMatrixNoInit)
GPUd() const T *Array() const
GPUd() SMatrixRowGPU operator[](unsigned int i)
GPUdDefault() SMatrixGPU()=default
GPUdi() static unsigned int Dim()
GPUdDefault() ~TransposeOpGPU()=default
GPUdi() T apply(unsigned int i) const
GPUd() TransposeOpGPU(const Matrix &rhs)
GPUdi() bool IsInUse(const T *p) const
GLfloat GLfloat GLfloat alpha
GLint GLint GLsizei GLint GLenum GLenum type
constexpr auto do_make(F f, indices< I... >) -> std::array< int, sizeof...(I)>
constexpr auto make(F f) -> std::array< int, N >
return((dphi > 0 &&dphi< constants::math::PI)||dphi< -constants::math::PI) ? true GPUhdi() const expr bool okForPhiMax(T phiMax
D const SVectorGPU< T, D > & rhs
TranspPolicyGPU< T, D1, D2, R >::RepType Transpose(const SMatrixGPU< T, D1, D2, R > &rhs)
const SMatrixGPU< T, D1, D2, R > D2
const SMatrixGPU< T, D1, D2, R > D1
T Dot(const SVector< T, D > &lhs, const SVector< T, D > &rhs)
std::unique_ptr< GPUReconstructionTimeframe > tf
GPUd() static void Evaluate(SMatrixGPU< T
MatRepStdGPU< T, N1, N2 > RepType
SMatrixGPU starting port here.
MatRepSymGPU< T, D1 > RepType
MatRepStdGPU< T, N2, N1 > RepType
make_indices_impl< I+1, indices< Indices..., I >, N >::type type
indices< Indices... > type
o2::InteractionRecord ir(0, 0)