Project
Loading...
Searching...
No Matches
SMatrixGPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
23
24#ifndef ALICEO2_SMATRIX_GPU_H
25#define ALICEO2_SMATRIX_GPU_H
26
27#include "GPUCommonDef.h"
28#include "GPUCommonMath.h"
29#include "GPUCommonAlgorithm.h"
30#include "GPUCommonLogger.h"
31#ifndef GPUCA_GPUCODE_DEVICE
32#include <type_traits>
33#include <array>
34#endif
35
37{
38template <bool>
39struct Check {
40 GPUd() Check(void*) {}
41};
42template <>
43struct Check<false> {
44};
45
46#define GPU_STATIC_CHECK(expr, msg) \
47 { \
48 class ERROR_##msg \
49 { \
50 }; \
51 ERROR_##msg e; \
52 (void)(Check<(expr) != 0>(&e)); \
53 }
54
55template <typename T, unsigned int N>
57{
58 public:
59 typedef T value_type;
60 typedef T* iterator;
61 typedef const T* const_iterator;
62 GPUd() iterator begin();
64 GPUd() const_iterator begin() const;
68
69 GPUhd() const T& operator[](unsigned int i) const;
70 GPUhd() T& operator[](unsigned int i);
71 GPUd() const T& operator()(unsigned int i) const;
72 GPUd() T& operator()(unsigned int i);
73 GPUd() const T* Array() const;
74 GPUd() T* Array();
75 GPUd() T apply(unsigned int i) const;
76
77 // Operators
78 GPUd() SVectorGPU<T, N>& operator-=(const SVectorGPU<T, N>& rhs);
79 GPUd() SVectorGPU<T, N>& operator+=(const SVectorGPU<T, N>& rhs);
80
81 enum {
82 kSize = N
83 };
84
85 GPUdi() static unsigned int Dim() { return N; }
86
87 private:
88 T mArray[N];
89};
90
91template <class T, unsigned int D>
92GPUdi() T* SVectorGPU<T, D>::begin()
93{
94 return mArray;
95}
96
97template <class T, unsigned int D>
98GPUdi() const T* SVectorGPU<T, D>::begin() const
99{
100 return mArray;
101}
102
103template <class T, unsigned int D>
104GPUdi() T* SVectorGPU<T, D>::end()
105{
106 return mArray + Dim();
107}
108
109template <class T, unsigned int D>
110GPUdi() const T* SVectorGPU<T, D>::end() const
111{
112 return mArray + Dim();
113}
114template <class T, unsigned int N>
115
116GPUhdi() const T& SVectorGPU<T, N>::operator[](unsigned int i) const
117{
118 return mArray[i];
119}
120
121template <class T, unsigned int N>
122GPUhdi() T& SVectorGPU<T, N>::operator[](unsigned int i)
123{
124 return mArray[i];
125}
126
127template <class T, unsigned int N>
128GPUdi() const T& SVectorGPU<T, N>::operator()(unsigned int i) const
129{
130 return mArray[i];
131}
132
133template <class T, unsigned int N>
134GPUdi() T& SVectorGPU<T, N>::operator()(unsigned int i)
135{
136 return mArray[i];
137}
138
139template <class T, unsigned int N>
141{
142 for (unsigned int i = 0; i < N; ++i) {
143 mArray[i] = 0;
144 }
145}
146
147template <class T, unsigned int N>
148GPUd() SVectorGPU<T, N>::SVectorGPU(const SVectorGPU<T, N>& rhs)
149{
150 for (unsigned int i = 0; i < N; ++i) {
151 mArray[i] = rhs.mArray[i];
152 }
153}
154
155template <class T, unsigned int D>
156GPUdi() T SVectorGPU<T, D>::apply(unsigned int i) const
157{
158 return mArray[i];
159}
160
161template <class T, unsigned int D>
162GPUdi() const T* SVectorGPU<T, D>::Array() const
163{
164 return mArray;
165}
166
167template <class T, unsigned int D>
168GPUdi() T* SVectorGPU<T, D>::Array()
169{
170 return mArray;
171}
172
173template <unsigned int I>
174struct meta_dot {
175 template <class A, class B, class T>
176 static GPUdi() T f(const A& lhs, const B& rhs, const T& x)
177 {
178 return lhs.apply(I) * rhs.apply(I) + meta_dot<I - 1>::f(lhs, rhs, x);
179 }
180};
181
182template <>
183struct meta_dot<0> {
184 template <class A, class B, class T>
185 static GPUdi() T f(const A& lhs, const B& rhs, const T& /*x */)
186 {
187 return lhs.apply(0) * rhs.apply(0);
188 }
189};
190
191template <class T, unsigned int D>
192GPUdi() T Dot(const SVectorGPU<T, D>& lhs, const SVectorGPU<T, D>& rhs)
193{
194 return meta_dot<D - 1>::f(lhs, rhs, T());
195}
196
197template <class T, unsigned int D>
198GPUdi() SVectorGPU<T, D>& SVectorGPU<T, D>::operator-=(const SVectorGPU<T, D>& rhs)
199{
200 for (unsigned int i = 0; i < D; ++i) {
201 mArray[i] -= rhs.apply(i);
202 }
203 return *this;
204}
205
206template <class T, unsigned int D>
207GPUdi() SVectorGPU<T, D>& SVectorGPU<T, D>::operator+=(const SVectorGPU<T, D>& rhs)
208{
209 for (unsigned int i = 0; i < D; ++i) {
210 mArray[i] += rhs.apply(i);
211 }
212 return *this;
213}
214
215template <unsigned int I>
217
218 template <class MatrixA, class MatrixB>
219 static GPUdi() typename MatrixA::value_type f(const MatrixA& lhs,
220 const MatrixB& rhs,
221 const unsigned int offset)
222 {
223 return lhs.apply(offset / MatrixB::kCols * MatrixA::kCols + I) *
224 rhs.apply(MatrixB::kCols * I + offset % MatrixB::kCols) +
226 }
227
228 template <class MatrixA, class MatrixB>
229 static GPUdi() typename MatrixA::value_type g(const MatrixA& lhs,
230 const MatrixB& rhs,
231 unsigned int i,
232 unsigned int j)
233 {
234 return lhs(i, I) * rhs(I, j) +
236 }
237};
238
239template <>
241
242 template <class MatrixA, class MatrixB>
243 static GPUdi() typename MatrixA::value_type f(const MatrixA& lhs,
244 const MatrixB& rhs,
245 const unsigned int offset)
246 {
247 return lhs.apply(offset / MatrixB::kCols * MatrixA::kCols) *
248 rhs.apply(offset % MatrixB::kCols);
249 }
250
251 // multiplication using i and j
252 template <class MatrixA, class MatrixB>
253 static GPUdi() typename MatrixA::value_type g(const MatrixA& lhs,
254 const MatrixB& rhs,
255 unsigned int i, unsigned int j)
256 {
257 return lhs(i, 0) * rhs(0, j);
258 }
259};
260
261namespace row_offsets_utils
262{
263template <int...>
264struct indices {
265};
266
267template <int I, class IndexTuple, int N>
269
270template <int I, int... Indices, int N>
271struct make_indices_impl<I, indices<Indices...>, N> {
272 typedef typename make_indices_impl<I + 1, indices<Indices..., I>,
274};
275
276template <int N, int... Indices>
277struct make_indices_impl<N, indices<Indices...>, N> {
278 typedef indices<Indices...> type;
279};
280
281template <int N>
282struct make_indices : make_indices_impl<0, indices<>, N> {
283};
284
285template <int I0, class F, int... I>
286constexpr auto do_make(F f, indices<I...>) -> std::array<int, sizeof...(I)>
287{
288 std::array<int, sizeof...(I)> retarr = {f(I0 + I)...};
289 return retarr;
290}
291
292template <int N, int I0 = 0, class F>
293constexpr auto make(F f) -> std::array<int, N>
294{
295 return do_make<I0>(f, typename make_indices<N>::type());
296}
297} // namespace row_offsets_utils
298
299// Symm representation
300template <class T, unsigned int D>
302{
303 public:
304 typedef T value_type;
306 GPUdi() T& operator()(unsigned int i, unsigned int j) { return mArray[offset(i, j)]; }
307 GPUdi() T const& operator()(unsigned int i, unsigned int j) const { return mArray[offset(i, j)]; }
308 GPUhdi() T& operator[](unsigned int i) { return mArray[off(i)]; }
309 GPUdi() T const& operator[](unsigned int i) const { return mArray[off(i)]; }
310 GPUdi() T apply(unsigned int i) const { return mArray[off(i)]; }
311 GPUdi() T* Array() { return mArray; }
312
313 GPUdi() const T* Array() const { return mArray; }
314
315 // assignment: only symmetric to symmetric allowed
316 template <class R>
317 GPUdi() MatRepSymGPU<T, D>& operator=(const R&)
318 {
319 GPU_STATIC_CHECK(0 == 1, Check_symmetric_matrices_equivalence);
320 return *this;
321 }
322
324 {
325 for (unsigned int i = 0; i < kSize; ++i) {
326 mArray[i] = rhs.Array()[i];
327 }
328 return *this;
329 }
330
331 enum {
332 kRows = D, // rows
333 kCols = D, // columns
334 kSize = D * (D + 1) / 2 // rows*columns
335 };
336
337 static constexpr int off0(int i) { return i == 0 ? 0 : off0(i - 1) + i; }
338 static constexpr int off2(int i, int j) { return j < i ? off0(i) + j : off0(j) + i; }
339 static constexpr int off1(int i) { return off2(i / D, i % D); }
340
341 static GPUdi() int off(int i)
342 {
343 static constexpr auto v = row_offsets_utils::make<D * D>(off1);
344 return v[i];
345 }
346
347 static GPUdi() constexpr unsigned int offset(unsigned int i, unsigned int j)
348 {
349 return off(i * D + j);
350 }
351
352 private:
353 T mArray[kSize];
354};
355
357template <class T, unsigned int D1, unsigned int D2 = D1>
359{
360 public:
361 typedef T value_type;
363 GPUdi() const T& operator()(unsigned int i, unsigned int j) const
364 {
365 return mArray[i * D2 + j];
366 }
367 GPUdi() T& operator()(unsigned int i, unsigned int j)
368 {
369 return mArray[i * D2 + j];
370 }
371 GPUdi() T& operator[](unsigned int i) { return mArray[i]; }
372 GPUdi() const T& operator[](unsigned int i) const { return mArray[i]; }
373 GPUdi() T apply(unsigned int i) const { return mArray[i]; }
374 GPUdi() T* Array() { return mArray; }
375 GPUdi() const T* Array() const { return mArray; }
376
377 template <class R>
378 GPUdi() MatRepStdGPU<T, D1, D2>& operator=(const R& rhs)
379 {
380 for (unsigned int i = 0; i < kSize; ++i) {
381 mArray[i] = rhs[i];
382 }
383 return *this;
384 }
385 template <class R>
386 GPUdi() bool operator==(const R& rhs) const
387 {
388 bool rc = true;
389 for (unsigned int i = 0; i < kSize; ++i) {
390 rc = rc && (mArray[i] == rhs[i]);
391 }
392 return rc;
393 }
394 enum {
395 kRows = D1, // rows
396 kCols = D2, // columns
397 kSize = D1 * D2 // rows*columns
398 };
399
400 private:
401 T mArray[kSize];
402};
403
406};
408};
409
410// Expression utility to describe operations
411template <class ExprType, class T, unsigned int D, unsigned int D2 = 1, class R1 = MatRepStdGPU<T, D, D2>>
412class Expr
413{
414 public:
415 typedef T value_type;
416 GPUd() Expr(const ExprType& rhs) : mRhs(rhs) {} // NOLINT: False positive
417 GPUdDefault() ~Expr() = default;
418 GPUdi() T apply(unsigned int i) const
419 {
420 return mRhs.apply(i);
421 }
422 GPUdi() T operator()(unsigned int i, unsigned j) const
423 {
424 return mRhs(i, j);
425 }
426 GPUdi() bool IsInUse(const T* p) const
427 {
428 return mRhs.IsInUse(p);
429 }
430
431 enum {
433 kCols = D2
434 };
435
436 private:
437 ExprType mRhs;
438};
439
440template <class T, unsigned int D1, unsigned int D2 = D1, class R = MatRepStdGPU<T, D1, D2>>
442{
443 public:
444 typedef T value_type;
445 typedef R rep_type;
446 typedef T* iterator;
447 typedef const T* const_iterator;
448 GPUdDefault() SMatrixGPU() = default;
452 template <class R2>
454 template <class A, class R2>
456 template <class M>
457 GPUd() SMatrixGPU<T, D1, D2, R>& operator=(const M& rhs);
458 template <class A, class R2>
459 GPUd() SMatrixGPU<T, D1, D2, R>& operator=(const Expr<A, T, D1, D2, R2>& rhs);
460 enum {
461 kRows = D1, // rows
462 kCols = D2, // columns
463 kSize = D1 * D2 // rows*columns
464 };
465 // https://root.cern/doc/master/SMatrix_8icc_source.html#l00627
466 GPUd() T apply(unsigned int i) const { return mRep[i]; }
467 GPUd() const T* Array() const { return mRep.Array(); }
468 GPUd() T* Array() { return mRep.Array(); }
469 GPUd() iterator begin();
471 GPUd() const T& operator()(unsigned int i, unsigned int j) const;
472 GPUd() T& operator()(unsigned int i, unsigned int j);
473
474 template <typename Y, typename X>
475 GPUd() friend X& operator<<(Y& y, const SMatrixGPU&);
476
478 {
479 public:
480 GPUd() SMatrixRowGPU(SMatrixGPU<T, D1, D2, R>& rhs, unsigned int i) : mMat(&rhs), mRow(i)
481 {
482 }
483 GPUd() T& operator[](int j) { return (*mMat)(mRow, j); }
484 //
485 private:
487 unsigned int mRow;
488 };
489
491 {
492 public:
493 GPUd() SMatrixRowGPUconst(const SMatrixGPU<T, D1, D2, R>& rhs, unsigned int i) : mMat(&rhs), mRow(i)
494 {
495 }
496 //
497 GPUd() const T& operator[](int j) const { return (*mMat)(mRow, j); }
498 //
499 private:
500 const SMatrixGPU<T, D1, D2, R>* mMat;
501 unsigned int mRow;
502 };
503
504 GPUd() SMatrixRowGPUconst operator[](unsigned int i) const { return SMatrixRowGPUconst(*this, i); }
505 GPUd() SMatrixRowGPU operator[](unsigned int i) { return SMatrixRowGPU(*this, i); }
506 template <class R2>
507 GPUd() SMatrixGPU<T, D1, D2, R>& operator+=(const SMatrixGPU<T, D1, D2, R2>& rhs);
508 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const T& rhs);
509 template <class R2>
510 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs);
511 template <class A, class R2>
512 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const Expr<A, T, D1, D2, R2>& rhs);
513
514 GPUd() bool Invert();
515 GPUd() bool IsInUse(const T* p) const;
516
517 public:
519};
520
521#ifndef __OPENCL__ // TODO: current C++ for OpenCL 2021 is at C++17, so no concepts. But we don't need this trick for OpenCL anyway, so we can just hide it.
522template <class T, unsigned int D1, unsigned int D2, class R, typename Y, typename X = Y>
523 requires(sizeof(typename X::traits_type::pos_type) != 0) // do not provide a template to fair::Logger, etc... (pos_type is a member type of all std::ostream classes)
524GPUd() X& operator<<(Y& y, const SMatrixGPU<T, D1, D2, R>&)
525{
526 return y;
527}
528#endif
529
530template <class T, unsigned int D1, unsigned int D2, class R>
532{
533 for (unsigned int i = 0; i < R::kSize; ++i) {
534 mRep.Array()[i] = 0;
535 }
536 if (D1 <= D2) {
537 for (unsigned int i = 0; i < D1; ++i) {
538 mRep[i * D2 + i] = 1;
539 }
540 } else {
541 for (unsigned int i = 0; i < D2; ++i) {
542 mRep[i * D2 + i] = 1;
543 }
544 }
545}
546
547template <class T, unsigned int D1, unsigned int D2, class R>
549{
550 mRep = rhs.mRep;
551}
552
553template <class T, unsigned int D1, unsigned int D2, class R>
554template <class R2>
556{
557 operator=(rhs);
558}
559
560template <class T, unsigned int D1, unsigned int D2, class R>
561GPUdi() T* SMatrixGPU<T, D1, D2, R>::begin()
562{
563 return mRep.Array();
564}
565
566template <class T, unsigned int D1, unsigned int D2, class R>
567GPUdi() T* SMatrixGPU<T, D1, D2, R>::end()
568{
569 return mRep.Array() + R::kSize;
570}
571
572template <class T, unsigned int D1, unsigned int D2, class R>
573GPUdi() bool SMatrixGPU<T, D1, D2, R>::IsInUse(const T* p) const
574{
575 return p == mRep.Array();
576}
577
578template <class T, unsigned int D1, unsigned int D2, class A, class R1, class R2>
579struct Assign {
580 // Evaluate the expression from general to general matrices.
581 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, R1>& lhs, const Expr<A, T, D1, D2, R2>& rhs)
582 {
583 if (!rhs.IsInUse(lhs.begin())) {
584 unsigned int l = 0;
585 for (unsigned int i = 0; i < D1; ++i) {
586 for (unsigned int j = 0; j < D2; ++j) {
587 lhs.mRep[l] = rhs(i, j);
588 l++;
589 }
590 }
591 } else {
592 T tmp[D1 * D2];
593 unsigned int l = 0;
594 for (unsigned int i = 0; i < D1; ++i) {
595 for (unsigned int j = 0; j < D2; ++j) {
596 tmp[l] = rhs(i, j);
597 l++;
598 }
599 }
600
601 for (unsigned int i = 0; i < D1 * D2; ++i) {
602 lhs.mRep[i] = tmp[i];
603 }
604 }
605 }
606};
607
608template <class T, unsigned int D1, unsigned int D2, class A>
610 // Evaluate the expression from symmetric to symmetric matrices.
611 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, MatRepSymGPU<T, D1>>& lhs,
612 const Expr<A, T, D1, D2, MatRepSymGPU<T, D1>>& rhs)
613 {
614 if (!rhs.IsInUse(lhs.begin())) {
615 unsigned int l = 0;
616 for (unsigned int i = 0; i < D1; ++i) {
617 // storage of symmetric matrix is in lower block
618 for (unsigned int j = 0; j <= i; ++j) {
619 lhs.mRep.Array()[l] = rhs(i, j);
620 l++;
621 }
622 }
623 } else {
625 unsigned int l = 0;
626 for (unsigned int i = 0; i < D1; ++i) {
627 for (unsigned int j = 0; j <= i; ++j) {
628 tmp[l] = rhs(i, j);
629 l++;
630 }
631 }
632 for (unsigned int i = 0; i < MatRepSymGPU<T, D1>::kSize; ++i) {
633 lhs.mRep.Array()[i] = tmp[i];
634 }
635 }
636 }
637};
638
639// avoid assigment from expression based on a general matrix to a symmetric matrix
640template <class T, unsigned int D1, unsigned int D2, class A>
642 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, MatRepSymGPU<T, D1>>&,
643 const Expr<A, T, D1, D2, MatRepStdGPU<T, D1, D2>>&)
644 {
645 GPU_STATIC_CHECK(0 == 1, Check_general_to_symmetric_matrix_assignment);
646 }
647};
648
649// Force Expression evaluation from general to symmetric
650struct AssignSym {
651 // assign a symmetric matrix from an expression
652 template <class T, unsigned int D, class A, class R>
653 GPUd() static void Evaluate(SMatrixGPU<T, D, D, MatRepSymGPU<T, D>>& lhs, const Expr<A, T, D, D, R>& rhs)
654 {
655 unsigned int l = 0;
656 for (unsigned int i = 0; i < D; ++i) {
657 for (unsigned int j = 0; j <= i; ++j) {
658 lhs.mRep.Array()[l] = rhs(i, j);
659 l++;
660 }
661 }
662 }
663
664 // assign the symmetric matrix from a general matrix
665 template <class T, unsigned int D, class R>
666 GPUd() static void Evaluate(SMatrixGPU<T, D, D, MatRepSymGPU<T, D>>& lhs, const SMatrixGPU<T, D, D, R>& rhs)
667 {
668 unsigned int l = 0;
669 for (unsigned int i = 0; i < D; ++i) {
670 for (unsigned int j = 0; j <= i; ++j) {
671 lhs.mRep.Array()[l] = rhs(i, j);
672 l++;
673 }
674 }
675 }
676};
677
678template <class T, unsigned int D1, unsigned int D2, class R>
679template <class A, class R2>
680GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator=(const Expr<A, T, D1, D2, R2>& rhs)
681{
683 return *this;
684}
685
686template <class T, unsigned int D1, unsigned int D2, class R>
687template <class M>
688GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator=(const M & rhs)
689{
690 mRep = rhs.mRep;
691 return *this;
692}
693
694template <class T, unsigned int D1, unsigned int D2, class R>
695template <class A, class R2>
696GPUdi() SMatrixGPU<T, D1, D2, R>::SMatrixGPU(const Expr<A, T, D1, D2, R2>& rhs)
697{
698 operator=(rhs);
699}
700
701template <class T, unsigned int D1, unsigned int D2, class R>
702GPUdi() const T& SMatrixGPU<T, D1, D2, R>::operator()(unsigned int i, unsigned int j) const
703{
704 return mRep(i, j);
705}
706
707template <class T, unsigned int D1, unsigned int D2, class R>
708GPUdi() T& SMatrixGPU<T, D1, D2, R>::operator()(unsigned int i, unsigned int j)
709{
710 return mRep(i, j);
711}
712
713template <class T, class R1, class R2>
715 enum {
716 N1 = R1::kRows,
717 N2 = R2::kCols
718 };
720};
721
722template <class MatrixA, class MatrixB, class T, unsigned int D>
724{
725 public:
726 GPUd() MatrixMulOpGPU(const MatrixA& lhs, const MatrixB& rhs) : lhs_(lhs), rhs_(rhs) {}
728 GPUdi() T apply(unsigned int i) const
729 {
730 return meta_matrix_dot<D - 1>::f(lhs_, rhs_, i);
731 }
732
733 GPUdi() T operator()(unsigned int i, unsigned int j) const
734 {
735 return meta_matrix_dot<D - 1>::g(lhs_, rhs_, i, j);
736 }
737
738 GPUdi() bool IsInUse(const T* p) const
739 {
740 return lhs_.IsInUse(p) || rhs_.IsInUse(p);
741 }
742
743 protected:
744 const MatrixA& lhs_;
745 const MatrixB& rhs_;
746};
747
748template <class T, unsigned int D1, unsigned int D, unsigned int D2, class R1, class R2>
749GPUdi() Expr<MatrixMulOpGPU<SMatrixGPU<T, D1, D, R1>, SMatrixGPU<T, D, D2, R2>, T, D>, T, D1, D2, typename MultPolicyGPU<T, R1, R2>::RepType>
750 operator*(const SMatrixGPU<T, D1, D, R1>& lhs, const SMatrixGPU<T, D, D2, R2>& rhs)
751{
753 return Expr<MatMulOp, T, D1, D2,
754 typename MultPolicyGPU<T, R1, R2>::RepType>(MatMulOp(lhs, rhs));
755}
756
758template <unsigned int D, unsigned int N = D>
760{
761 public:
762 // generic square matrix using LU factorization
763 template <class MatrixRep>
764 GPUd() static bool Dinv(MatrixRep& rhs)
765 {
766 unsigned int work[N + 1] = {0};
767 typename MatrixRep::value_type det(0.0);
768
769 if (DfactMatrix(rhs, det, work) != 0) {
770 return false;
771 }
772
773 int ifail = DfinvMatrix(rhs, work);
774 if (ifail == 0) {
775 return true;
776 }
777 return false;
778 }
779
780 // symmetric matrix inversion (Bunch-kaufman pivoting)
781 template <class T>
782 GPUd() static bool Dinv(MatRepSymGPU<T, D>& rhs)
783 {
784 int ifail{0};
786 if (!ifail) {
787 return true;
788 }
789 return false;
790 }
791
792 // LU Factorization method for inversion of general square matrices
793 template <class T>
794 GPUd() static int DfactMatrix(MatRepStdGPU<T, D, N>& rhs, T& det, unsigned int* work);
795
796 // LU inversion of general square matrices. To be called after DFactMatrix
797 template <class T>
798 GPUd() static int DfinvMatrix(MatRepStdGPU<T, D, N>& rhs, unsigned int* work);
799
800 // Bunch-Kaufman method for inversion of symmetric matrices
801 template <class T>
802 GPUd() static void InvertBunchKaufman(MatRepSymGPU<T, D>& rhs, int& ifail);
803};
804
805template <unsigned int D, unsigned int N>
806template <class T>
807GPUdi() void Inverter<D, N>::InvertBunchKaufman(MatRepSymGPU<T, D>& rhs, int& ifail)
808{
809 typedef T value_type;
810 int i, j, k, s;
811 int pivrow;
812 const int nrow = MatRepSymGPU<T, D>::kRows;
813
816
817 typedef int* pivIter;
818 typedef T* mIter;
819
820 mIter x = xvec.begin();
821 // x[i] is used as helper storage, needs to have at least size nrow.
822 pivIter piv = pivv.begin();
823 // piv[i] is used to store details of exchanges
824
825 value_type temp1, temp2;
826 mIter ip, mjj, iq;
827 value_type lambda, sigma;
828 const value_type alpha = .6404; // = (1+sqrt(17))/8
829 // LM (04/2009) remove this useless check (it is not in LAPACK) which fails inversion of
830 // a matrix with values < epsilon in the diagonal
831 //
832 // const double epsilon = 32*std::numeric_limits<T>::epsilon();
833 // whenever a sum of two doubles is below or equal to epsilon
834 // it is set to zero.
835 // this constant could be set to zero but then the algorithm
836 // doesn't neccessarily detect that a matrix is singular
837
838 for (i = 0; i < nrow; i++) {
839 piv[i] = i + 1;
840 }
841
842 ifail = 0;
843
844 // compute the factorization P*A*P^T = L * D * L^T
845 // L is unit lower triangular, D is direct sum of 1x1 and 2x2 matrices
846 // L and D^-1 are stored in A = *this, P is stored in piv[]
847
848 for (j = 1; j < nrow; j += s) // main loop over columns
849 {
850 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
851 lambda = 0; // compute lambda = max of A(j+1:n,j)
852 pivrow = j + 1;
853 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
854 for (i = j + 1; i <= nrow; ip += i++) {
855 if (o2::gpu::GPUCommonMath::Abs(*ip) > lambda) {
856 lambda = o2::gpu::GPUCommonMath::Abs(*ip);
857 pivrow = i;
858 }
859 }
860
861 if (lambda == 0) {
862 if (*mjj == 0) {
863 ifail = 1;
864 return;
865 }
866 s = 1;
867 *mjj = 1.0f / *mjj;
868 } else {
869 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= lambda * alpha) {
870 s = 1;
871 pivrow = j;
872 } else {
873 sigma = 0; // compute sigma = max A(pivrow, j:pivrow-1)
874 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1;
875 for (k = j; k < pivrow; k++) {
876 if (o2::gpu::GPUCommonMath::Abs(*ip) > sigma) {
877 sigma = o2::gpu::GPUCommonMath::Abs(*ip);
878 }
879 ip++;
880 }
881 // sigma cannot be zero because it is at least lambda which is not zero
882 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= alpha * lambda * (lambda / sigma)) {
883 s = 1;
884 pivrow = j;
885 } else if (o2::gpu::GPUCommonMath::Abs(*(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1)) >= alpha * sigma) {
886 s = 1;
887 } else {
888 s = 2;
889 }
890 }
891 if (pivrow == j) // no permutation neccessary
892 {
893 piv[j - 1] = pivrow;
894 if (*mjj == 0) {
895 ifail = 1;
896 return;
897 }
898 temp2 = *mjj = 1.0f / *mjj; // invert D(j,j)
899
900 // update A(j+1:n, j+1,n)
901 for (i = j + 1; i <= nrow; i++) {
902 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1) * temp2;
903 ip = rhs.Array() + i * (i - 1) / 2 + j;
904 for (k = j + 1; k <= i; k++) {
905 *ip -= static_cast<T>(temp1 * *(rhs.Array() + k * (k - 1) / 2 + j - 1));
906 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
907 // *ip=0;
908 ip++;
909 }
910 }
911 // update L
912 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
913 for (i = j + 1; i <= nrow; ip += i++) {
914 *ip *= static_cast<T>(temp2);
915 }
916 } else if (s == 1) // 1x1 pivot
917 {
918 piv[j - 1] = pivrow;
919
920 // interchange rows and columns j and pivrow in
921 // submatrix (j:n,j:n)
922 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j;
923 for (i = j + 1; i < pivrow; i++, ip++) {
924 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1);
925 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = *ip;
926 *ip = static_cast<T>(temp1);
927 }
928 temp1 = *mjj;
929 *mjj = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
930 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
931 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j - 1;
932 iq = ip + pivrow - j;
933 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
934 temp1 = *iq;
935 *iq = *ip;
936 *ip = static_cast<T>(temp1);
937 }
938
939 if (*mjj == 0) {
940 ifail = 1;
941 return;
942 }
943 temp2 = *mjj = 1.0f / *mjj; // invert D(j,j)
944
945 // update A(j+1:n, j+1:n)
946 for (i = j + 1; i <= nrow; i++) {
947 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1) * temp2;
948 ip = rhs.Array() + i * (i - 1) / 2 + j;
949 for (k = j + 1; k <= i; k++) {
950 *ip -= static_cast<T>(temp1 * *(rhs.Array() + k * (k - 1) / 2 + j - 1));
951 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
952 // *ip=0;
953 ip++;
954 }
955 }
956 // update L
957 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
958 for (i = j + 1; i <= nrow; ip += i++) {
959 *ip *= static_cast<T>(temp2);
960 }
961 } else // s=2, ie use a 2x2 pivot
962 {
963 piv[j - 1] = -pivrow;
964 piv[j] = 0; // that means this is the second row of a 2x2 pivot
965
966 if (j + 1 != pivrow) {
967 // interchange rows and columns j+1 and pivrow in
968 // submatrix (j:n,j:n)
969 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j + 1;
970 for (i = j + 2; i < pivrow; i++, ip++) {
971 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j);
972 *(rhs.Array() + i * (i - 1) / 2 + j) = *ip;
973 *ip = static_cast<T>(temp1);
974 }
975 temp1 = *(mjj + j + 1);
976 *(mjj + j + 1) =
977 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
978 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
979 temp1 = *(mjj + j);
980 *(mjj + j) = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1);
981 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1) = static_cast<T>(temp1);
982 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j;
983 iq = ip + pivrow - (j + 1);
984 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
985 temp1 = *iq;
986 *iq = *ip;
987 *ip = static_cast<T>(temp1);
988 }
989 }
990 // invert D(j:j+1,j:j+1)
991 temp2 = *mjj * *(mjj + j + 1) - *(mjj + j) * *(mjj + j);
992 if (temp2 == 0) {
993 LOGF(error, "SymMatrix::bunch_invert: error in pivot choice");
994 }
995 temp2 = 1. / temp2;
996 // this quotient is guaranteed to exist by the choice
997 // of the pivot
998 temp1 = *mjj;
999 *mjj = static_cast<T>(*(mjj + j + 1) * temp2);
1000 *(mjj + j + 1) = static_cast<T>(temp1 * temp2);
1001 *(mjj + j) = static_cast<T>(-*(mjj + j) * temp2);
1002
1003 if (j < nrow - 1) // otherwise do nothing
1004 {
1005 // update A(j+2:n, j+2:n)
1006 for (i = j + 2; i <= nrow; i++) {
1007 ip = rhs.Array() + i * (i - 1) / 2 + j - 1;
1008 temp1 = *ip * *mjj + *(ip + 1) * *(mjj + j);
1009 // if (o2::gpu::GPUCommonMath::Abs(temp1 ) <= epsilon)
1010 // temp1 = 0;
1011 temp2 = *ip * *(mjj + j) + *(ip + 1) * *(mjj + j + 1);
1012 // if (o2::gpu::GPUCommonMath::Abs(temp2 ) <= epsilon)
1013 // temp2 = 0;
1014 for (k = j + 2; k <= i; k++) {
1015 ip = rhs.Array() + i * (i - 1) / 2 + k - 1;
1016 iq = rhs.Array() + k * (k - 1) / 2 + j - 1;
1017 *ip -= static_cast<T>(temp1 * *iq + temp2 * *(iq + 1));
1018 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
1019 // *ip = 0;
1020 }
1021 }
1022 // update L
1023 for (i = j + 2; i <= nrow; i++) {
1024 ip = rhs.Array() + i * (i - 1) / 2 + j - 1;
1025 temp1 = *ip * *mjj + *(ip + 1) * *(mjj + j);
1026 // if (o2::gpu::GPUCommonMath::Abs(temp1) <= epsilon)
1027 // temp1 = 0;
1028 *(ip + 1) = *ip * *(mjj + j) + *(ip + 1) * *(mjj + j + 1);
1029 // if (o2::gpu::GPUCommonMath::Abs(*(ip+1)) <= epsilon)
1030 // *(ip+1) = 0;
1031 *ip = static_cast<T>(temp1);
1032 }
1033 }
1034 }
1035 }
1036 } // end of main loop over columns
1037
1038 if (j == nrow) // the the last pivot is 1x1
1039 {
1040 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
1041 if (*mjj == 0) {
1042 ifail = 1;
1043 return;
1044 } else {
1045 *mjj = 1.0f / *mjj;
1046 }
1047 } // end of last pivot code
1048
1049 // computing the inverse from the factorization
1050
1051 for (j = nrow; j >= 1; j -= s) // loop over columns
1052 {
1053 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
1054 if (piv[j - 1] > 0) // 1x1 pivot, compute column j of inverse
1055 {
1056 s = 1;
1057 if (j < nrow) {
1058 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1059 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1060 x[i] = *ip;
1061 }
1062 for (i = j + 1; i <= nrow; i++) {
1063 temp2 = 0;
1064 ip = rhs.Array() + i * (i - 1) / 2 + j;
1065 for (k = 0; k <= i - j - 1; k++) {
1066 temp2 += *ip++ * x[k];
1067 }
1068 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1069 temp2 += *ip * x[k];
1070 }
1071 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = static_cast<T>(-temp2);
1072 }
1073 temp2 = 0;
1074 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1075 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1076 temp2 += x[k] * *ip;
1077 }
1078 *mjj -= static_cast<T>(temp2);
1079 }
1080 } else // 2x2 pivot, compute columns j and j-1 of the inverse
1081 {
1082 if (piv[j - 1] != 0) {
1083 LOGF(error, "error in piv %lf \n", static_cast<T>(piv[j - 1]));
1084 }
1085 s = 2;
1086 if (j < nrow) {
1087 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1088 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1089 x[i] = *ip;
1090 }
1091 for (i = j + 1; i <= nrow; i++) {
1092 temp2 = 0;
1093 ip = rhs.Array() + i * (i - 1) / 2 + j;
1094 for (k = 0; k <= i - j - 1; k++) {
1095 temp2 += *ip++ * x[k];
1096 }
1097 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1098 temp2 += *ip * x[k];
1099 }
1100 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = static_cast<T>(-temp2);
1101 }
1102 temp2 = 0;
1103 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1104 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1105 temp2 += x[k] * *ip;
1106 }
1107 *mjj -= static_cast<T>(temp2);
1108 temp2 = 0;
1109 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1110 for (i = j + 1; i <= nrow; ip += i++) {
1111 temp2 += *ip * *(ip + 1);
1112 }
1113 *(mjj - 1) -= static_cast<T>(temp2);
1114 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1115 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1116 x[i] = *ip;
1117 }
1118 for (i = j + 1; i <= nrow; i++) {
1119 temp2 = 0;
1120 ip = rhs.Array() + i * (i - 1) / 2 + j;
1121 for (k = 0; k <= i - j - 1; k++) {
1122 temp2 += *ip++ * x[k];
1123 }
1124 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1125 temp2 += *ip * x[k];
1126 }
1127 *(rhs.Array() + i * (i - 1) / 2 + j - 2) = static_cast<T>(-temp2);
1128 }
1129 temp2 = 0;
1130 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1131 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1132 temp2 += x[k] * *ip;
1133 }
1134 *(mjj - j) -= static_cast<T>(temp2);
1135 }
1136 }
1137
1138 // interchange rows and columns j and piv[j-1]
1139 // or rows and columns j and -piv[j-2]
1140
1141 pivrow = (piv[j - 1] == 0) ? -piv[j - 2] : piv[j - 1];
1142 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j;
1143 for (i = j + 1; i < pivrow; i++, ip++) {
1144 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1);
1145 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = *ip;
1146 *ip = static_cast<T>(temp1);
1147 }
1148 temp1 = *mjj;
1149 *mjj = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
1150 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
1151 if (s == 2) {
1152 temp1 = *(mjj - 1);
1153 *(mjj - 1) = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 2);
1154 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 2) = static_cast<T>(temp1);
1155 }
1156
1157 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j - 1; // &A(i,j)
1158 iq = ip + pivrow - j;
1159 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
1160 temp1 = *iq;
1161 *iq = *ip;
1162 *ip = static_cast<T>(temp1);
1163 }
1164 } // end of loop over columns (in computing inverse from factorization)
1165
1166 return; // inversion successful
1167}
1168
1169// LU factorization
1170template <unsigned int D, unsigned int n>
1171template <class T>
1172GPUdi() int Inverter<D, n>::DfactMatrix(MatRepStdGPU<T, D, n>& rhs, T& det, unsigned int* ir)
1173{
1174 if (D != n) {
1175 return -1;
1176 }
1177
1178 int ifail, jfail;
1179 typedef T* mIter;
1180
1181 typedef T value_type;
1182
1183 value_type tf;
1184 value_type g1 = 1.0e-19, g2 = 1.0e19;
1185
1186 value_type p, q, t;
1187 value_type s11, s12;
1188
1189 // LM (04.09) : remove useless check on epsilon and set it to zero
1190 const value_type epsilon = 0.0;
1191 // double epsilon = 8*std::numeric_limits<T>::epsilon();
1192 // could be set to zero (like it was before)
1193 // but then the algorithm often doesn't detect
1194 // that a matrix is singular
1195
1196 int normal = 0, imposs = -1;
1197 int jrange = 0, jover = 1, junder = -1;
1198 ifail = normal;
1199 jfail = jrange;
1200 int nxch = 0;
1201 det = 1.0;
1202 mIter mj = rhs.Array();
1203 mIter mjj = mj;
1204 for (unsigned int j = 1; j <= n; j++) {
1205 unsigned int k = j;
1206 p = (o2::gpu::GPUCommonMath::Abs(*mjj));
1207 if (j != n) {
1208 mIter mij = mj + n + j - 1;
1209 for (unsigned int i = j + 1; i <= n; i++) {
1210 q = (o2::gpu::GPUCommonMath::Abs(*(mij)));
1211 if (q > p) {
1212 k = i;
1213 p = q;
1214 }
1215 mij += n;
1216 }
1217 if (k == j) {
1218 if (p <= epsilon) {
1219 det = 0;
1220 ifail = imposs;
1221 jfail = jrange;
1222 return ifail;
1223 }
1224 det = -det; // in this case the sign of the determinant
1225 // must not change. So I change it twice.
1226 }
1227 mIter mjl = mj;
1228 mIter mkl = rhs.Array() + (k - 1) * n;
1229 for (unsigned int l = 1; l <= n; l++) {
1230 tf = *mjl;
1231 *(mjl++) = *mkl;
1232 *(mkl++) = static_cast<T>(tf);
1233 }
1234 nxch = nxch + 1; // this makes the determinant change its sign
1235 ir[nxch] = (((j) << 12) + (k));
1236 } else {
1237 if (p <= epsilon) {
1238 det = 0.0;
1239 ifail = imposs;
1240 jfail = jrange;
1241 return ifail;
1242 }
1243 }
1244 det *= *mjj;
1245 *mjj = 1.0f / *mjj;
1246 t = (o2::gpu::GPUCommonMath::Abs(det));
1247 if (t < g1) {
1248 det = 0.0;
1249 if (jfail == jrange) {
1250 jfail = junder;
1251 }
1252 } else if (t > g2) {
1253 det = 1.0;
1254 if (jfail == jrange) {
1255 jfail = jover;
1256 }
1257 }
1258 if (j != n) {
1259 mIter mk = mj + n;
1260 mIter mkjp = mk + j;
1261 mIter mjk = mj + j;
1262 for (k = j + 1; k <= n; k++) {
1263 s11 = -(*mjk);
1264 s12 = -(*mkjp);
1265 if (j != 1) {
1266 mIter mik = rhs.Array() + k - 1;
1267 mIter mijp = rhs.Array() + j;
1268 mIter mki = mk;
1269 mIter mji = mj;
1270 for (unsigned int i = 1; i < j; i++) {
1271 s11 += (*mik) * (*(mji++));
1272 s12 += (*mijp) * (*(mki++));
1273 mik += n;
1274 mijp += n;
1275 }
1276 }
1277 // cast to avoid warnings from double to float conversions
1278 *(mjk++) = static_cast<T>(-s11 * (*mjj));
1279 *(mkjp) = static_cast<T>(-(((*(mjj + 1))) * ((*(mkjp - 1))) + (s12)));
1280 mk += n;
1281 mkjp += n;
1282 }
1283 }
1284 mj += n;
1285 mjj += (n + 1);
1286 }
1287 if (nxch % 2 == 1) {
1288 det = -det;
1289 }
1290 if (jfail != jrange) {
1291 det = 0.0;
1292 }
1293 ir[n] = nxch;
1294 return 0;
1295}
1296
1297template <unsigned int D, unsigned int n>
1298template <class T>
1299GPUdi() int Inverter<D, n>::DfinvMatrix(MatRepStdGPU<T, D, n>& rhs, unsigned int* ir)
1300{
1301 typedef T* mIter;
1302 typedef T value_type;
1303
1304 if (D != n) {
1305 return -1;
1306 }
1307
1308 value_type s31, s32;
1309 value_type s33, s34;
1310
1311 mIter m11 = rhs.Array();
1312 mIter m12 = m11 + 1;
1313 mIter m21 = m11 + n;
1314 mIter m22 = m12 + n;
1315 *m21 = -(*m22) * (*m11) * (*m21);
1316 *m12 = -(*m12);
1317 if (n > 2) {
1318 mIter mi = rhs.Array() + 2 * n;
1319 mIter mii = rhs.Array() + 2 * n + 2;
1320 mIter mimim = rhs.Array() + n + 1;
1321 for (unsigned int i = 3; i <= n; i++) {
1322 unsigned int im2 = i - 2;
1323 mIter mj = rhs.Array();
1324 mIter mji = mj + i - 1;
1325 mIter mij = mi;
1326 for (unsigned int j = 1; j <= im2; j++) {
1327 s31 = 0.0;
1328 s32 = *mji;
1329 mIter mkj = mj + j - 1;
1330 mIter mik = mi + j - 1;
1331 mIter mjkp = mj + j;
1332 mIter mkpi = mj + n + i - 1;
1333 for (unsigned int k = j; k <= im2; k++) {
1334 s31 += (*mkj) * (*(mik++));
1335 s32 += (*(mjkp++)) * (*mkpi);
1336 mkj += n;
1337 mkpi += n;
1338 }
1339 *mij = static_cast<T>(-(*mii) * (((*(mij - n))) * ((*(mii - 1))) + (s31)));
1340 *mji = static_cast<T>(-s32);
1341 mj += n;
1342 mji += n;
1343 mij++;
1344 }
1345 *(mii - 1) = -(*mii) * (*mimim) * (*(mii - 1));
1346 *(mimim + 1) = -(*(mimim + 1));
1347 mi += n;
1348 mimim += (n + 1);
1349 mii += (n + 1);
1350 }
1351 }
1352 mIter mi = rhs.Array();
1353 mIter mii = rhs.Array();
1354 for (unsigned int i = 1; i < n; i++) {
1355 unsigned int ni = n - i;
1356 mIter mij = mi;
1357 // int j;
1358 for (unsigned j = 1; j <= i; j++) {
1359 s33 = *mij;
1360 mIter mikj = mi + n + j - 1;
1361 mIter miik = mii + 1;
1362 mIter min_end = mi + n;
1363 for (; miik < min_end;) {
1364 s33 += (*mikj) * (*(miik++));
1365 mikj += n;
1366 }
1367 *(mij++) = static_cast<T>(s33);
1368 }
1369 for (unsigned j = 1; j <= ni; j++) {
1370 s34 = 0.0;
1371 mIter miik = mii + j;
1372 mIter mikij = mii + j * n + j;
1373 for (unsigned int k = j; k <= ni; k++) {
1374 s34 += *mikij * (*(miik++));
1375 mikij += n;
1376 }
1377 *(mii + j) = s34;
1378 }
1379 mi += n;
1380 mii += (n + 1);
1381 }
1382 unsigned int nxch = ir[n];
1383 if (nxch == 0) {
1384 return 0;
1385 }
1386 for (unsigned int mm = 1; mm <= nxch; mm++) {
1387 unsigned int k = nxch - mm + 1;
1388 int ij = ir[k];
1389 int i = ij >> 12;
1390 int j = ij % 4096;
1391 mIter mki = rhs.Array() + i - 1;
1392 mIter mkj = rhs.Array() + j - 1;
1393 for (k = 1; k <= n; k++) {
1394 T ti = *mki;
1395 *mki = *mkj;
1396 *mkj = ti;
1397 mki += n;
1398 mkj += n;
1399 }
1400 }
1401 return 0;
1402}
1403
1404template <class T, unsigned int D1, unsigned int D2, class R>
1405GPUdi() bool SMatrixGPU<T, D1, D2, R>::Invert()
1406{
1407 GPU_STATIC_CHECK(D1 == D2, SMatrixGPU_not_square);
1408 return Inverter<D1, D2>::Dinv((*this).mRep);
1409}
1410
1411template <class T, unsigned int D1, unsigned int D2, class R>
1412template <class R2>
1413GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator+=(const SMatrixGPU<T, D1, D2, R2>& rhs)
1414{
1415 mRep += rhs.mRep;
1416 return *this;
1417}
1418
1419template <class T, unsigned int D1, unsigned int D2, class R>
1420GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const T & rhs)
1421{
1422 for (unsigned int i = 0; i < R::kSize; ++i) {
1423 mRep.Array()[i] *= rhs;
1424 }
1425 return *this;
1426}
1427
1428template <class T, unsigned int D1, unsigned int D2, class R>
1429template <class R2>
1430GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs)
1431{
1432 return operator=(*this* rhs);
1433}
1434
1435template <class T, unsigned int D1, unsigned int D2, class R>
1436template <class A, class R2>
1437GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const Expr<A, T, D1, D2, R2>& rhs)
1438{
1439 return operator=(*this* rhs);
1440}
1441
1442template <class T, unsigned int D1, unsigned int D2, class R>
1444 enum {
1445 N1 = R::kRows,
1446 N2 = R::kCols
1449};
1450
1451template <class T, unsigned int D1, unsigned int D2>
1455
1456template <class Matrix, class T, unsigned int D1, unsigned int D2 = D1>
1458{
1459 public:
1460 GPUd() TransposeOpGPU(const Matrix& rhs) : mRhs(rhs) {}
1461
1463
1464 GPUdi() T apply(unsigned int i) const
1465 {
1466 return mRhs.apply((i % D1) * D2 + i / D1);
1467 }
1468
1469 GPUdi() T operator()(unsigned int i, unsigned j) const
1470 {
1471 return mRhs(j, i);
1472 }
1473
1474 GPUdi() bool IsInUse(const T* p) const
1475 {
1476 return mRhs.IsInUse(p);
1477 }
1478
1479 protected:
1480 const Matrix& mRhs;
1481};
1482
1483template <class T, unsigned int D1, unsigned int D2, class R>
1484GPUdi() SVectorGPU<T, D1> operator*(const SMatrixGPU<T, D1, D2, R>& rhs, const SVectorGPU<T, D2>& lhs)
1485{
1487 for (unsigned int i = 0; i < D1; ++i) {
1488 const unsigned int rpos = i * D2;
1489 for (unsigned int j = 0; j < D2; ++j) {
1490 tmp[i] += rhs.apply(rpos + j) * lhs.apply(j);
1491 }
1492 }
1493 return tmp;
1494}
1495
1496template <class T, unsigned int D1, unsigned int D2, class R>
1497GPUdi() Expr<TransposeOpGPU<SMatrixGPU<T, D1, D2, R>, T, D1, D2>, T, D2, D1, typename TranspPolicyGPU<T, D1, D2, R>::RepType> Transpose(const SMatrixGPU<T, D1, D2, R>& rhs)
1498{
1499 typedef TransposeOpGPU<SMatrixGPU<T, D1, D2, R>, T, D1, D2> MatTrOp;
1501}
1502
1503template <class T, unsigned int D1, unsigned int D2, class R>
1504GPUdi() SMatrixGPU<T, D1, D1, MatRepSymGPU<T, D1>> Similarity(const SMatrixGPU<T, D1, D2, R>& lhs, const SMatrixGPU<T, D2, D2, MatRepSymGPU<T, D2>>& rhs)
1505{
1508 SMatrixSym mret;
1509 AssignSym::Evaluate(mret, tmp * Transpose(lhs));
1510 return mret;
1511}
1512} // namespace o2::math_utils::detail
1513#endif
int32_t i
#define GPUdi()
#define GPUhd()
#define GPUd()
uint32_t j
Definition RawData.h:0
#define GPU_STATIC_CHECK(expr, msg)
Definition SMatrixGPU.h:46
Definition A.h:16
Definition B.h:16
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:418
GPUdDefault() ~Expr()=default
GPUd() Expr(const ExprType &rhs)
Definition SMatrixGPU.h:416
GPUdi() bool IsInUse(const T *p) const
Definition SMatrixGPU.h:426
GPUd() static bool Dinv(MatrixRep &rhs)
Definition SMatrixGPU.h:764
GPUd() static bool Dinv(MatRepSymGPU< T
SMatReprStd starting port here.
Definition SMatrixGPU.h:359
GPUdi() const T *Array() const
Definition SMatrixGPU.h:375
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:373
GPUdDefault() MatRepStdGPU()=default
GPUdi() const T &operator[](unsigned int i) const
Definition SMatrixGPU.h:372
GPUdi() T &operator()(unsigned int i
GPUdi() T &operator[](unsigned int i)
Definition SMatrixGPU.h:371
static GPUdi() const expr unsigned int offset(unsigned int i
static GPUdi() int off(int i)
Definition SMatrixGPU.h:341
static constexpr int off0(int i)
Definition SMatrixGPU.h:337
GPUdDefault() MatRepSymGPU()=default
static constexpr int off2(int i, int j)
Definition SMatrixGPU.h:338
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:310
GPUdi() T const &operator[](unsigned int i) const
Definition SMatrixGPU.h:309
GPUdi() const T *Array() const
Definition SMatrixGPU.h:313
GPUhdi() T &operator[](unsigned int i)
Definition SMatrixGPU.h:308
static constexpr int off1(int i)
Definition SMatrixGPU.h:339
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:728
GPUd() MatrixMulOpGPU(const MatrixA &lhs
GPUdDefault() ~MatrixMulOpGPU()=default
GPUdi() bool IsInUse(const T *p) const
Definition SMatrixGPU.h:738
GPUd() SMatrixRowGPUconst(const SMatrixGPU< T
GPUd() SMatrixGPU(SMatrixIdentity)
GPUd() T apply(unsigned int i) const
Definition SMatrixGPU.h:466
GPUd() SMatrixRowGPUconst operator[](unsigned int i) const
Definition SMatrixGPU.h:504
GPUdi() SMatrixGPU(SMatrixNoInit)
Definition SMatrixGPU.h:449
GPUd() const T *Array() const
Definition SMatrixGPU.h:467
GPUd() SMatrixRowGPU operator[](unsigned int i)
Definition SMatrixGPU.h:505
GPUdDefault() SMatrixGPU()=default
GPUdi() static unsigned int Dim()
Definition SMatrixGPU.h:85
GPUdDefault() ~TransposeOpGPU()=default
GPUdi() T apply(unsigned int i) const
GPUd() TransposeOpGPU(const Matrix &rhs)
GPUdi() bool IsInUse(const T *p) const
GLdouble n
Definition glcorearb.h:1982
GLfloat GLfloat GLfloat alpha
Definition glcorearb.h:279
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLdouble f
Definition glcorearb.h:310
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
GLboolean GLboolean g
Definition glcorearb.h:1233
constexpr double mm
Definition SpecsV2.h:33
constexpr auto do_make(F f, indices< I... >) -> std::array< int, sizeof...(I)>
Definition SMatrixGPU.h:286
constexpr auto make(F f) -> std::array< int, N >
Definition SMatrixGPU.h:293
return((dphi > 0 &&dphi< constants::math::PI)||dphi< -constants::math::PI) ? true GPUhdi() const expr bool okForPhiMax(T phiMax
GPUd() SVectorGPU< T
D const SVectorGPU< T, D > & rhs
Definition SMatrixGPU.h:193
TranspPolicyGPU< T, D1, D2, R >::RepType Transpose(const SMatrixGPU< T, D1, D2, R > &rhs)
const SMatrixGPU< T, D1, D2, R > D2
Definition SMatrixGPU.h:531
const SMatrixGPU< T, D1, D2, R > D1
Definition SMatrixGPU.h:531
GPUdi() int nint(T x)
Definition basicMath.h:67
T Dot(const SVector< T, D > &lhs, const SVector< T, D > &rhs)
Definition Cartesian.h:257
std::unique_ptr< GPUReconstructionTimeframe > tf
GPUd() static void Evaluate(SMatrixGPU< T
MatRepStdGPU< T, N1, N2 > RepType
Definition SMatrixGPU.h:719
SMatrixGPU starting port here.
Definition SMatrixGPU.h:405
MatRepStdGPU< T, N2, N1 > RepType
static GPUdi() T f(const A &lhs
static GPUdi() T f(const A &lhs
static GPUdi() typename MatrixA
Definition SMatrixGPU.h:219
o2::InteractionRecord ir(0, 0)