Project
Loading...
Searching...
No Matches
SMatrixGPU.h
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
23
24#ifndef ALICEO2_SMATRIX_GPU_H
25#define ALICEO2_SMATRIX_GPU_H
26
27#include "GPUCommonDef.h"
28#include "GPUCommonArray.h"
29#include "GPUCommonMath.h"
30#include "GPUCommonAlgorithm.h"
31#include "GPUCommonLogger.h"
32#include "GPUCommonTypeTraits.h"
33
35{
36template <bool>
37struct Check {
38 GPUd() Check(void*) {}
39};
40template <>
41struct Check<false> {
42};
43
44#define GPU_STATIC_CHECK(expr, msg) \
45 { \
46 class ERROR_##msg \
47 { \
48 }; \
49 ERROR_##msg e; \
50 (void)(Check<(expr) != 0>(&e)); \
51 }
52
53template <typename T, unsigned int N>
55{
56 public:
57 typedef T value_type;
58 typedef T* iterator;
59 typedef const T* const_iterator;
60 GPUd() iterator begin();
62 GPUd() const_iterator begin() const;
66
67 GPUhd() const T& operator[](unsigned int i) const;
68 GPUhd() T& operator[](unsigned int i);
69 GPUd() const T& operator()(unsigned int i) const;
70 GPUd() T& operator()(unsigned int i);
71 GPUd() const T* Array() const;
72 GPUd() T* Array();
73 GPUd() T apply(unsigned int i) const;
74
75 // Operators
76 GPUd() SVectorGPU<T, N>& operator-=(const SVectorGPU<T, N>& rhs);
77 GPUd() SVectorGPU<T, N>& operator+=(const SVectorGPU<T, N>& rhs);
78
79 enum {
80 kSize = N
81 };
82
83 GPUdi() static unsigned int Dim() { return N; }
84
85 private:
86 T mArray[N];
87};
88
89template <class T, unsigned int D>
90GPUdi() T* SVectorGPU<T, D>::begin()
91{
92 return mArray;
93}
94
95template <class T, unsigned int D>
96GPUdi() const T* SVectorGPU<T, D>::begin() const
97{
98 return mArray;
99}
100
101template <class T, unsigned int D>
102GPUdi() T* SVectorGPU<T, D>::end()
103{
104 return mArray + Dim();
105}
106
107template <class T, unsigned int D>
108GPUdi() const T* SVectorGPU<T, D>::end() const
109{
110 return mArray + Dim();
111}
112template <class T, unsigned int N>
113
114GPUhdi() const T& SVectorGPU<T, N>::operator[](unsigned int i) const
115{
116 return mArray[i];
117}
118
119template <class T, unsigned int N>
120GPUhdi() T& SVectorGPU<T, N>::operator[](unsigned int i)
121{
122 return mArray[i];
123}
124
125template <class T, unsigned int N>
126GPUdi() const T& SVectorGPU<T, N>::operator()(unsigned int i) const
127{
128 return mArray[i];
129}
130
131template <class T, unsigned int N>
132GPUdi() T& SVectorGPU<T, N>::operator()(unsigned int i)
133{
134 return mArray[i];
135}
136
137template <class T, unsigned int N>
139{
140 for (unsigned int i = 0; i < N; ++i) {
141 mArray[i] = 0;
142 }
143}
144
145template <class T, unsigned int N>
146GPUd() SVectorGPU<T, N>::SVectorGPU(const SVectorGPU<T, N>& rhs)
147{
148 for (unsigned int i = 0; i < N; ++i) {
149 mArray[i] = rhs.mArray[i];
150 }
151}
152
153template <class T, unsigned int D>
154GPUdi() T SVectorGPU<T, D>::apply(unsigned int i) const
155{
156 return mArray[i];
157}
158
159template <class T, unsigned int D>
160GPUdi() const T* SVectorGPU<T, D>::Array() const
161{
162 return mArray;
163}
164
165template <class T, unsigned int D>
166GPUdi() T* SVectorGPU<T, D>::Array()
167{
168 return mArray;
169}
170
171template <unsigned int I>
172struct meta_dot {
173 template <class A, class B, class T>
174 static GPUdi() T f(const A& lhs, const B& rhs, const T& x)
175 {
176 return lhs.apply(I) * rhs.apply(I) + meta_dot<I - 1>::f(lhs, rhs, x);
177 }
178};
179
180template <>
181struct meta_dot<0> {
182 template <class A, class B, class T>
183 static GPUdi() T f(const A& lhs, const B& rhs, const T& /*x */)
184 {
185 return lhs.apply(0) * rhs.apply(0);
186 }
187};
188
189template <class T, unsigned int D>
190GPUdi() T Dot(const SVectorGPU<T, D>& lhs, const SVectorGPU<T, D>& rhs)
191{
192 return meta_dot<D - 1>::f(lhs, rhs, T());
193}
194
195template <class T, unsigned int D>
196GPUdi() SVectorGPU<T, D>& SVectorGPU<T, D>::operator-=(const SVectorGPU<T, D>& rhs)
197{
198 for (unsigned int i = 0; i < D; ++i) {
199 mArray[i] -= rhs.apply(i);
200 }
201 return *this;
202}
203
204template <class T, unsigned int D>
205GPUdi() SVectorGPU<T, D>& SVectorGPU<T, D>::operator+=(const SVectorGPU<T, D>& rhs)
206{
207 for (unsigned int i = 0; i < D; ++i) {
208 mArray[i] += rhs.apply(i);
209 }
210 return *this;
211}
212
213template <unsigned int I>
215
216 template <class MatrixA, class MatrixB>
217 static GPUdi() typename MatrixA::value_type f(const MatrixA& lhs,
218 const MatrixB& rhs,
219 const unsigned int offset)
220 {
221 return lhs.apply(offset / MatrixB::kCols * MatrixA::kCols + I) *
222 rhs.apply(MatrixB::kCols * I + offset % MatrixB::kCols) +
224 }
225
226 template <class MatrixA, class MatrixB>
227 static GPUdi() typename MatrixA::value_type g(const MatrixA& lhs,
228 const MatrixB& rhs,
229 unsigned int i,
230 unsigned int j)
231 {
232 return lhs(i, I) * rhs(I, j) +
234 }
235};
236
237template <>
239
240 template <class MatrixA, class MatrixB>
241 static GPUdi() typename MatrixA::value_type f(const MatrixA& lhs,
242 const MatrixB& rhs,
243 const unsigned int offset)
244 {
245 return lhs.apply(offset / MatrixB::kCols * MatrixA::kCols) *
246 rhs.apply(offset % MatrixB::kCols);
247 }
248
249 // multiplication using i and j
250 template <class MatrixA, class MatrixB>
251 static GPUdi() typename MatrixA::value_type g(const MatrixA& lhs,
252 const MatrixB& rhs,
253 unsigned int i, unsigned int j)
254 {
255 return lhs(i, 0) * rhs(0, j);
256 }
257};
258
259namespace row_offsets_utils
260{
261template <int...>
262struct indices {
263};
264
265template <int I, class IndexTuple, int N>
267
268template <int I, int... Indices, int N>
269struct make_indices_impl<I, indices<Indices...>, N> {
270 typedef typename make_indices_impl<I + 1, indices<Indices..., I>,
272};
273
274template <int N, int... Indices>
275struct make_indices_impl<N, indices<Indices...>, N> {
276 typedef indices<Indices...> type;
277};
278
279template <int N>
280struct make_indices : make_indices_impl<0, indices<>, N> {
281};
282
283template <int I0, class F, int... I>
284constexpr auto do_make(F f, indices<I...>) -> gpu::gpustd::array<int, sizeof...(I)>
285{
286 gpu::gpustd::array<int, sizeof...(I)> retarr = {f(I0 + I)...};
287 return retarr;
288}
289
290template <int N, int I0 = 0, class F>
292{
293 return do_make<I0>(f, typename make_indices<N>::type());
294}
295} // namespace row_offsets_utils
296
297// Symm representation
298template <class T, unsigned int D>
300{
301 public:
302 typedef T value_type;
304 GPUdi() T& operator()(unsigned int i, unsigned int j) { return mArray[offset(i, j)]; }
305 GPUdi() T const& operator()(unsigned int i, unsigned int j) const { return mArray[offset(i, j)]; }
306 GPUhdi() T& operator[](unsigned int i) { return mArray[off(i)]; }
307 GPUdi() T const& operator[](unsigned int i) const { return mArray[off(i)]; }
308 GPUdi() T apply(unsigned int i) const { return mArray[off(i)]; }
309 GPUdi() T* Array() { return mArray; }
310
311 GPUdi() const T* Array() const { return mArray; }
312
313 // assignment: only symmetric to symmetric allowed
314 template <class R>
315 GPUdi() MatRepSymGPU<T, D>& operator=(const R&)
316 {
317 GPU_STATIC_CHECK(0 == 1, Check_symmetric_matrices_equivalence);
318 return *this;
319 }
320
322 {
323 for (unsigned int i = 0; i < kSize; ++i) {
324 mArray[i] = rhs.Array()[i];
325 }
326 return *this;
327 }
328
329 enum {
330 kRows = D, // rows
331 kCols = D, // columns
332 kSize = D * (D + 1) / 2 // rows*columns
333 };
334
335 static constexpr int off0(int i) { return i == 0 ? 0 : off0(i - 1) + i; }
336 static constexpr int off2(int i, int j) { return j < i ? off0(i) + j : off0(j) + i; }
337 static constexpr int off1(int i) { return off2(i / D, i % D); }
338
339 static GPUdi() int off(int i)
340 {
341 static constexpr auto v = row_offsets_utils::make<D * D>(off1);
342 return v[i];
343 }
344
345 static GPUdi() constexpr unsigned int offset(unsigned int i, unsigned int j)
346 {
347 return off(i * D + j);
348 }
349
350 private:
351 T mArray[kSize];
352};
353
355template <class T, unsigned int D1, unsigned int D2 = D1>
357{
358 public:
359 typedef T value_type;
361 GPUdi() const T& operator()(unsigned int i, unsigned int j) const
362 {
363 return mArray[i * D2 + j];
364 }
365 GPUdi() T& operator()(unsigned int i, unsigned int j)
366 {
367 return mArray[i * D2 + j];
368 }
369 GPUdi() T& operator[](unsigned int i) { return mArray[i]; }
370 GPUdi() const T& operator[](unsigned int i) const { return mArray[i]; }
371 GPUdi() T apply(unsigned int i) const { return mArray[i]; }
372 GPUdi() T* Array() { return mArray; }
373 GPUdi() const T* Array() const { return mArray; }
374
375 template <class R>
376 GPUdi() MatRepStdGPU<T, D1, D2>& operator=(const R& rhs)
377 {
378 for (unsigned int i = 0; i < kSize; ++i) {
379 mArray[i] = rhs[i];
380 }
381 return *this;
382 }
383 template <class R>
384 GPUdi() bool operator==(const R& rhs) const
385 {
386 bool rc = true;
387 for (unsigned int i = 0; i < kSize; ++i) {
388 rc = rc && (mArray[i] == rhs[i]);
389 }
390 return rc;
391 }
392 enum {
393 kRows = D1, // rows
394 kCols = D2, // columns
395 kSize = D1 * D2 // rows*columns
396 };
397
398 private:
399 T mArray[kSize];
400};
401
404};
406};
407
408// Expression utility to describe operations
409template <class ExprType, class T, unsigned int D, unsigned int D2 = 1, class R1 = MatRepStdGPU<T, D, D2>>
410class Expr
411{
412 public:
413 typedef T value_type;
414 GPUd() Expr(const ExprType& rhs) : mRhs(rhs) {} // NOLINT: False positive
415 GPUdDefault() ~Expr() = default;
416 GPUdi() T apply(unsigned int i) const
417 {
418 return mRhs.apply(i);
419 }
420 GPUdi() T operator()(unsigned int i, unsigned j) const
421 {
422 return mRhs(i, j);
423 }
424 GPUdi() bool IsInUse(const T* p) const
425 {
426 return mRhs.IsInUse(p);
427 }
428
429 enum {
431 kCols = D2
432 };
433
434 private:
435 ExprType mRhs;
436};
437
438template <class T, unsigned int D1, unsigned int D2 = D1, class R = MatRepStdGPU<T, D1, D2>>
440{
441 public:
442 typedef T value_type;
443 typedef R rep_type;
444 typedef T* iterator;
445 typedef const T* const_iterator;
446 GPUdDefault() SMatrixGPU() = default;
450 template <class R2>
452 template <class A, class R2>
454 template <class M>
455 GPUd() SMatrixGPU<T, D1, D2, R>& operator=(const M& rhs);
456 template <class A, class R2>
457 GPUd() SMatrixGPU<T, D1, D2, R>& operator=(const Expr<A, T, D1, D2, R2>& rhs);
458 enum {
459 kRows = D1, // rows
460 kCols = D2, // columns
461 kSize = D1 * D2 // rows*columns
462 };
463 // https://root.cern/doc/master/SMatrix_8icc_source.html#l00627
464 GPUd() T apply(unsigned int i) const { return mRep[i]; }
465 GPUd() const T* Array() const { return mRep.Array(); }
466 GPUd() T* Array() { return mRep.Array(); }
467 GPUd() iterator begin();
469 GPUd() const T& operator()(unsigned int i, unsigned int j) const;
470 GPUd() T& operator()(unsigned int i, unsigned int j);
471
472 template <typename Y, typename X>
473 GPUd() friend X& operator<<(Y& y, const SMatrixGPU&);
474
476 {
477 public:
478 GPUd() SMatrixRowGPU(SMatrixGPU<T, D1, D2, R>& rhs, unsigned int i) : mMat(&rhs), mRow(i)
479 {
480 }
481 GPUd() T& operator[](int j) { return (*mMat)(mRow, j); }
482 //
483 private:
485 unsigned int mRow;
486 };
487
489 {
490 public:
491 GPUd() SMatrixRowGPUconst(const SMatrixGPU<T, D1, D2, R>& rhs, unsigned int i) : mMat(&rhs), mRow(i)
492 {
493 }
494 //
495 GPUd() const T& operator[](int j) const { return (*mMat)(mRow, j); }
496 //
497 private:
498 const SMatrixGPU<T, D1, D2, R>* mMat;
499 unsigned int mRow;
500 };
501
502 GPUd() SMatrixRowGPUconst operator[](unsigned int i) const { return SMatrixRowGPUconst(*this, i); }
503 GPUd() SMatrixRowGPU operator[](unsigned int i) { return SMatrixRowGPU(*this, i); }
504 template <class R2>
505 GPUd() SMatrixGPU<T, D1, D2, R>& operator+=(const SMatrixGPU<T, D1, D2, R2>& rhs);
506 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const T& rhs);
507 template <class R2>
508 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs);
509 template <class A, class R2>
510 GPUd() SMatrixGPU<T, D1, D2, R>& operator*=(const Expr<A, T, D1, D2, R2>& rhs);
511
512 GPUd() bool Invert();
513 GPUd() bool IsInUse(const T* p) const;
514
515 public:
517};
518
519#ifndef __OPENCL__ // TODO: current C++ for OpenCL 2021 is at C++17, so no concepts. But we don't need this trick for OpenCL anyway, so we can just hide it.
520template <class T, unsigned int D1, unsigned int D2, class R, typename Y, typename X = Y>
521 requires(sizeof(typename X::traits_type::pos_type) != 0) // do not provide a template to fair::Logger, etc... (pos_type is a member type of all std::ostream classes)
522GPUd() X& operator<<(Y& y, const SMatrixGPU<T, D1, D2, R>&)
523{
524 return y;
525}
526#endif
527
528template <class T, unsigned int D1, unsigned int D2, class R>
530{
531 for (unsigned int i = 0; i < R::kSize; ++i) {
532 mRep.Array()[i] = 0;
533 }
534 if (D1 <= D2) {
535 for (unsigned int i = 0; i < D1; ++i) {
536 mRep[i * D2 + i] = 1;
537 }
538 } else {
539 for (unsigned int i = 0; i < D2; ++i) {
540 mRep[i * D2 + i] = 1;
541 }
542 }
543}
544
545template <class T, unsigned int D1, unsigned int D2, class R>
547{
548 mRep = rhs.mRep;
549}
550
551template <class T, unsigned int D1, unsigned int D2, class R>
552template <class R2>
554{
555 operator=(rhs);
556}
557
558template <class T, unsigned int D1, unsigned int D2, class R>
559GPUdi() T* SMatrixGPU<T, D1, D2, R>::begin()
560{
561 return mRep.Array();
562}
563
564template <class T, unsigned int D1, unsigned int D2, class R>
565GPUdi() T* SMatrixGPU<T, D1, D2, R>::end()
566{
567 return mRep.Array() + R::kSize;
568}
569
570template <class T, unsigned int D1, unsigned int D2, class R>
571GPUdi() bool SMatrixGPU<T, D1, D2, R>::IsInUse(const T* p) const
572{
573 return p == mRep.Array();
574}
575
576template <class T, unsigned int D1, unsigned int D2, class A, class R1, class R2>
577struct Assign {
578 // Evaluate the expression from general to general matrices.
579 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, R1>& lhs, const Expr<A, T, D1, D2, R2>& rhs)
580 {
581 if (!rhs.IsInUse(lhs.begin())) {
582 unsigned int l = 0;
583 for (unsigned int i = 0; i < D1; ++i) {
584 for (unsigned int j = 0; j < D2; ++j) {
585 lhs.mRep[l] = rhs(i, j);
586 l++;
587 }
588 }
589 } else {
590 T tmp[D1 * D2];
591 unsigned int l = 0;
592 for (unsigned int i = 0; i < D1; ++i) {
593 for (unsigned int j = 0; j < D2; ++j) {
594 tmp[l] = rhs(i, j);
595 l++;
596 }
597 }
598
599 for (unsigned int i = 0; i < D1 * D2; ++i) {
600 lhs.mRep[i] = tmp[i];
601 }
602 }
603 }
604};
605
606template <class T, unsigned int D1, unsigned int D2, class A>
608 // Evaluate the expression from symmetric to symmetric matrices.
609 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, MatRepSymGPU<T, D1>>& lhs,
610 const Expr<A, T, D1, D2, MatRepSymGPU<T, D1>>& rhs)
611 {
612 if (!rhs.IsInUse(lhs.begin())) {
613 unsigned int l = 0;
614 for (unsigned int i = 0; i < D1; ++i) {
615 // storage of symmetric matrix is in lower block
616 for (unsigned int j = 0; j <= i; ++j) {
617 lhs.mRep.Array()[l] = rhs(i, j);
618 l++;
619 }
620 }
621 } else {
623 unsigned int l = 0;
624 for (unsigned int i = 0; i < D1; ++i) {
625 for (unsigned int j = 0; j <= i; ++j) {
626 tmp[l] = rhs(i, j);
627 l++;
628 }
629 }
630 for (unsigned int i = 0; i < MatRepSymGPU<T, D1>::kSize; ++i) {
631 lhs.mRep.Array()[i] = tmp[i];
632 }
633 }
634 }
635};
636
637// avoid assigment from expression based on a general matrix to a symmetric matrix
638template <class T, unsigned int D1, unsigned int D2, class A>
640 GPUd() static void Evaluate(SMatrixGPU<T, D1, D2, MatRepSymGPU<T, D1>>&,
641 const Expr<A, T, D1, D2, MatRepStdGPU<T, D1, D2>>&)
642 {
643 GPU_STATIC_CHECK(0 == 1, Check_general_to_symmetric_matrix_assignment);
644 }
645};
646
647// Force Expression evaluation from general to symmetric
648struct AssignSym {
649 // assign a symmetric matrix from an expression
650 template <class T, unsigned int D, class A, class R>
651 GPUd() static void Evaluate(SMatrixGPU<T, D, D, MatRepSymGPU<T, D>>& lhs, const Expr<A, T, D, D, R>& rhs)
652 {
653 unsigned int l = 0;
654 for (unsigned int i = 0; i < D; ++i) {
655 for (unsigned int j = 0; j <= i; ++j) {
656 lhs.mRep.Array()[l] = rhs(i, j);
657 l++;
658 }
659 }
660 }
661
662 // assign the symmetric matrix from a general matrix
663 template <class T, unsigned int D, class R>
664 GPUd() static void Evaluate(SMatrixGPU<T, D, D, MatRepSymGPU<T, D>>& lhs, const SMatrixGPU<T, D, D, R>& rhs)
665 {
666 unsigned int l = 0;
667 for (unsigned int i = 0; i < D; ++i) {
668 for (unsigned int j = 0; j <= i; ++j) {
669 lhs.mRep.Array()[l] = rhs(i, j);
670 l++;
671 }
672 }
673 }
674};
675
676template <class T, unsigned int D1, unsigned int D2, class R>
677template <class A, class R2>
678GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator=(const Expr<A, T, D1, D2, R2>& rhs)
679{
681 return *this;
682}
683
684template <class T, unsigned int D1, unsigned int D2, class R>
685template <class M>
686GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator=(const M & rhs)
687{
688 mRep = rhs.mRep;
689 return *this;
690}
691
692template <class T, unsigned int D1, unsigned int D2, class R>
693template <class A, class R2>
694GPUdi() SMatrixGPU<T, D1, D2, R>::SMatrixGPU(const Expr<A, T, D1, D2, R2>& rhs)
695{
696 operator=(rhs);
697}
698
699template <class T, unsigned int D1, unsigned int D2, class R>
700GPUdi() const T& SMatrixGPU<T, D1, D2, R>::operator()(unsigned int i, unsigned int j) const
701{
702 return mRep(i, j);
703}
704
705template <class T, unsigned int D1, unsigned int D2, class R>
706GPUdi() T& SMatrixGPU<T, D1, D2, R>::operator()(unsigned int i, unsigned int j)
707{
708 return mRep(i, j);
709}
710
711template <class T, class R1, class R2>
713 enum {
714 N1 = R1::kRows,
715 N2 = R2::kCols
716 };
718};
719
720template <class MatrixA, class MatrixB, class T, unsigned int D>
722{
723 public:
724 GPUd() MatrixMulOpGPU(const MatrixA& lhs, const MatrixB& rhs) : lhs_(lhs), rhs_(rhs) {}
726 GPUdi() T apply(unsigned int i) const
727 {
728 return meta_matrix_dot<D - 1>::f(lhs_, rhs_, i);
729 }
730
731 GPUdi() T operator()(unsigned int i, unsigned int j) const
732 {
733 return meta_matrix_dot<D - 1>::g(lhs_, rhs_, i, j);
734 }
735
736 GPUdi() bool IsInUse(const T* p) const
737 {
738 return lhs_.IsInUse(p) || rhs_.IsInUse(p);
739 }
740
741 protected:
742 const MatrixA& lhs_;
743 const MatrixB& rhs_;
744};
745
746template <class T, unsigned int D1, unsigned int D, unsigned int D2, class R1, class R2>
747GPUdi() Expr<MatrixMulOpGPU<SMatrixGPU<T, D1, D, R1>, SMatrixGPU<T, D, D2, R2>, T, D>, T, D1, D2, typename MultPolicyGPU<T, R1, R2>::RepType>
748 operator*(const SMatrixGPU<T, D1, D, R1>& lhs, const SMatrixGPU<T, D, D2, R2>& rhs)
749{
751 return Expr<MatMulOp, T, D1, D2,
752 typename MultPolicyGPU<T, R1, R2>::RepType>(MatMulOp(lhs, rhs));
753}
754
756template <unsigned int D, unsigned int N = D>
758{
759 public:
760 // generic square matrix using LU factorization
761 template <class MatrixRep>
762 GPUd() static bool Dinv(MatrixRep& rhs)
763 {
764 unsigned int work[N + 1] = {0};
765 typename MatrixRep::value_type det(0.0);
766
767 if (DfactMatrix(rhs, det, work) != 0) {
768 return false;
769 }
770
771 int ifail = DfinvMatrix(rhs, work);
772 if (ifail == 0) {
773 return true;
774 }
775 return false;
776 }
777
778 // symmetric matrix inversion (Bunch-kaufman pivoting)
779 template <class T>
780 GPUd() static bool Dinv(MatRepSymGPU<T, D>& rhs)
781 {
782 int ifail{0};
784 if (!ifail) {
785 return true;
786 }
787 return false;
788 }
789
790 // LU Factorization method for inversion of general square matrices
791 template <class T>
792 GPUd() static int DfactMatrix(MatRepStdGPU<T, D, N>& rhs, T& det, unsigned int* work);
793
794 // LU inversion of general square matrices. To be called after DFactMatrix
795 template <class T>
796 GPUd() static int DfinvMatrix(MatRepStdGPU<T, D, N>& rhs, unsigned int* work);
797
798 // Bunch-Kaufman method for inversion of symmetric matrices
799 template <class T>
800 GPUd() static void InvertBunchKaufman(MatRepSymGPU<T, D>& rhs, int& ifail);
801};
802
803template <unsigned int D, unsigned int N>
804template <class T>
805GPUdi() void Inverter<D, N>::InvertBunchKaufman(MatRepSymGPU<T, D>& rhs, int& ifail)
806{
807 typedef T value_type;
808 int i, j, k, s;
809 int pivrow;
810 const int nrow = MatRepSymGPU<T, D>::kRows;
811
814
815 typedef int* pivIter;
816 typedef T* mIter;
817
818 mIter x = xvec.begin();
819 // x[i] is used as helper storage, needs to have at least size nrow.
820 pivIter piv = pivv.begin();
821 // piv[i] is used to store details of exchanges
822
823 value_type temp1, temp2;
824 mIter ip, mjj, iq;
825 value_type lambda, sigma;
826 const value_type alpha = .6404; // = (1+sqrt(17))/8
827 // LM (04/2009) remove this useless check (it is not in LAPACK) which fails inversion of
828 // a matrix with values < epsilon in the diagonal
829 //
830 // const double epsilon = 32*std::numeric_limits<T>::epsilon();
831 // whenever a sum of two doubles is below or equal to epsilon
832 // it is set to zero.
833 // this constant could be set to zero but then the algorithm
834 // doesn't neccessarily detect that a matrix is singular
835
836 for (i = 0; i < nrow; i++) {
837 piv[i] = i + 1;
838 }
839
840 ifail = 0;
841
842 // compute the factorization P*A*P^T = L * D * L^T
843 // L is unit lower triangular, D is direct sum of 1x1 and 2x2 matrices
844 // L and D^-1 are stored in A = *this, P is stored in piv[]
845
846 for (j = 1; j < nrow; j += s) // main loop over columns
847 {
848 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
849 lambda = 0; // compute lambda = max of A(j+1:n,j)
850 pivrow = j + 1;
851 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
852 for (i = j + 1; i <= nrow; ip += i++) {
853 if (o2::gpu::GPUCommonMath::Abs(*ip) > lambda) {
854 lambda = o2::gpu::GPUCommonMath::Abs(*ip);
855 pivrow = i;
856 }
857 }
858
859 if (lambda == 0) {
860 if (*mjj == 0) {
861 ifail = 1;
862 return;
863 }
864 s = 1;
865 *mjj = 1.0f / *mjj;
866 } else {
867 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= lambda * alpha) {
868 s = 1;
869 pivrow = j;
870 } else {
871 sigma = 0; // compute sigma = max A(pivrow, j:pivrow-1)
872 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1;
873 for (k = j; k < pivrow; k++) {
874 if (o2::gpu::GPUCommonMath::Abs(*ip) > sigma) {
875 sigma = o2::gpu::GPUCommonMath::Abs(*ip);
876 }
877 ip++;
878 }
879 // sigma cannot be zero because it is at least lambda which is not zero
880 if (o2::gpu::GPUCommonMath::Abs(*mjj) >= alpha * lambda * (lambda / sigma)) {
881 s = 1;
882 pivrow = j;
883 } else if (o2::gpu::GPUCommonMath::Abs(*(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1)) >= alpha * sigma) {
884 s = 1;
885 } else {
886 s = 2;
887 }
888 }
889 if (pivrow == j) // no permutation neccessary
890 {
891 piv[j - 1] = pivrow;
892 if (*mjj == 0) {
893 ifail = 1;
894 return;
895 }
896 temp2 = *mjj = 1.0f / *mjj; // invert D(j,j)
897
898 // update A(j+1:n, j+1,n)
899 for (i = j + 1; i <= nrow; i++) {
900 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1) * temp2;
901 ip = rhs.Array() + i * (i - 1) / 2 + j;
902 for (k = j + 1; k <= i; k++) {
903 *ip -= static_cast<T>(temp1 * *(rhs.Array() + k * (k - 1) / 2 + j - 1));
904 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
905 // *ip=0;
906 ip++;
907 }
908 }
909 // update L
910 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
911 for (i = j + 1; i <= nrow; ip += i++) {
912 *ip *= static_cast<T>(temp2);
913 }
914 } else if (s == 1) // 1x1 pivot
915 {
916 piv[j - 1] = pivrow;
917
918 // interchange rows and columns j and pivrow in
919 // submatrix (j:n,j:n)
920 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j;
921 for (i = j + 1; i < pivrow; i++, ip++) {
922 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1);
923 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = *ip;
924 *ip = static_cast<T>(temp1);
925 }
926 temp1 = *mjj;
927 *mjj = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
928 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
929 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j - 1;
930 iq = ip + pivrow - j;
931 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
932 temp1 = *iq;
933 *iq = *ip;
934 *ip = static_cast<T>(temp1);
935 }
936
937 if (*mjj == 0) {
938 ifail = 1;
939 return;
940 }
941 temp2 = *mjj = 1.0f / *mjj; // invert D(j,j)
942
943 // update A(j+1:n, j+1:n)
944 for (i = j + 1; i <= nrow; i++) {
945 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1) * temp2;
946 ip = rhs.Array() + i * (i - 1) / 2 + j;
947 for (k = j + 1; k <= i; k++) {
948 *ip -= static_cast<T>(temp1 * *(rhs.Array() + k * (k - 1) / 2 + j - 1));
949 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
950 // *ip=0;
951 ip++;
952 }
953 }
954 // update L
955 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
956 for (i = j + 1; i <= nrow; ip += i++) {
957 *ip *= static_cast<T>(temp2);
958 }
959 } else // s=2, ie use a 2x2 pivot
960 {
961 piv[j - 1] = -pivrow;
962 piv[j] = 0; // that means this is the second row of a 2x2 pivot
963
964 if (j + 1 != pivrow) {
965 // interchange rows and columns j+1 and pivrow in
966 // submatrix (j:n,j:n)
967 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j + 1;
968 for (i = j + 2; i < pivrow; i++, ip++) {
969 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j);
970 *(rhs.Array() + i * (i - 1) / 2 + j) = *ip;
971 *ip = static_cast<T>(temp1);
972 }
973 temp1 = *(mjj + j + 1);
974 *(mjj + j + 1) =
975 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
976 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
977 temp1 = *(mjj + j);
978 *(mjj + j) = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1);
979 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 1) = static_cast<T>(temp1);
980 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j;
981 iq = ip + pivrow - (j + 1);
982 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
983 temp1 = *iq;
984 *iq = *ip;
985 *ip = static_cast<T>(temp1);
986 }
987 }
988 // invert D(j:j+1,j:j+1)
989 temp2 = *mjj * *(mjj + j + 1) - *(mjj + j) * *(mjj + j);
990 if (temp2 == 0) {
991 LOGF(error, "SymMatrix::bunch_invert: error in pivot choice");
992 }
993 temp2 = 1. / temp2;
994 // this quotient is guaranteed to exist by the choice
995 // of the pivot
996 temp1 = *mjj;
997 *mjj = static_cast<T>(*(mjj + j + 1) * temp2);
998 *(mjj + j + 1) = static_cast<T>(temp1 * temp2);
999 *(mjj + j) = static_cast<T>(-*(mjj + j) * temp2);
1000
1001 if (j < nrow - 1) // otherwise do nothing
1002 {
1003 // update A(j+2:n, j+2:n)
1004 for (i = j + 2; i <= nrow; i++) {
1005 ip = rhs.Array() + i * (i - 1) / 2 + j - 1;
1006 temp1 = *ip * *mjj + *(ip + 1) * *(mjj + j);
1007 // if (o2::gpu::GPUCommonMath::Abs(temp1 ) <= epsilon)
1008 // temp1 = 0;
1009 temp2 = *ip * *(mjj + j) + *(ip + 1) * *(mjj + j + 1);
1010 // if (o2::gpu::GPUCommonMath::Abs(temp2 ) <= epsilon)
1011 // temp2 = 0;
1012 for (k = j + 2; k <= i; k++) {
1013 ip = rhs.Array() + i * (i - 1) / 2 + k - 1;
1014 iq = rhs.Array() + k * (k - 1) / 2 + j - 1;
1015 *ip -= static_cast<T>(temp1 * *iq + temp2 * *(iq + 1));
1016 // if (o2::gpu::GPUCommonMath::Abs(*ip) <= epsilon)
1017 // *ip = 0;
1018 }
1019 }
1020 // update L
1021 for (i = j + 2; i <= nrow; i++) {
1022 ip = rhs.Array() + i * (i - 1) / 2 + j - 1;
1023 temp1 = *ip * *mjj + *(ip + 1) * *(mjj + j);
1024 // if (o2::gpu::GPUCommonMath::Abs(temp1) <= epsilon)
1025 // temp1 = 0;
1026 *(ip + 1) = *ip * *(mjj + j) + *(ip + 1) * *(mjj + j + 1);
1027 // if (o2::gpu::GPUCommonMath::Abs(*(ip+1)) <= epsilon)
1028 // *(ip+1) = 0;
1029 *ip = static_cast<T>(temp1);
1030 }
1031 }
1032 }
1033 }
1034 } // end of main loop over columns
1035
1036 if (j == nrow) // the the last pivot is 1x1
1037 {
1038 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
1039 if (*mjj == 0) {
1040 ifail = 1;
1041 return;
1042 } else {
1043 *mjj = 1.0f / *mjj;
1044 }
1045 } // end of last pivot code
1046
1047 // computing the inverse from the factorization
1048
1049 for (j = nrow; j >= 1; j -= s) // loop over columns
1050 {
1051 mjj = rhs.Array() + j * (j - 1) / 2 + j - 1;
1052 if (piv[j - 1] > 0) // 1x1 pivot, compute column j of inverse
1053 {
1054 s = 1;
1055 if (j < nrow) {
1056 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1057 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1058 x[i] = *ip;
1059 }
1060 for (i = j + 1; i <= nrow; i++) {
1061 temp2 = 0;
1062 ip = rhs.Array() + i * (i - 1) / 2 + j;
1063 for (k = 0; k <= i - j - 1; k++) {
1064 temp2 += *ip++ * x[k];
1065 }
1066 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1067 temp2 += *ip * x[k];
1068 }
1069 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = static_cast<T>(-temp2);
1070 }
1071 temp2 = 0;
1072 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1073 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1074 temp2 += x[k] * *ip;
1075 }
1076 *mjj -= static_cast<T>(temp2);
1077 }
1078 } else // 2x2 pivot, compute columns j and j-1 of the inverse
1079 {
1080 if (piv[j - 1] != 0) {
1081 LOGF(error, "error in piv %lf \n", static_cast<T>(piv[j - 1]));
1082 }
1083 s = 2;
1084 if (j < nrow) {
1085 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1086 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1087 x[i] = *ip;
1088 }
1089 for (i = j + 1; i <= nrow; i++) {
1090 temp2 = 0;
1091 ip = rhs.Array() + i * (i - 1) / 2 + j;
1092 for (k = 0; k <= i - j - 1; k++) {
1093 temp2 += *ip++ * x[k];
1094 }
1095 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1096 temp2 += *ip * x[k];
1097 }
1098 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = static_cast<T>(-temp2);
1099 }
1100 temp2 = 0;
1101 ip = rhs.Array() + (j + 1) * j / 2 + j - 1;
1102 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1103 temp2 += x[k] * *ip;
1104 }
1105 *mjj -= static_cast<T>(temp2);
1106 temp2 = 0;
1107 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1108 for (i = j + 1; i <= nrow; ip += i++) {
1109 temp2 += *ip * *(ip + 1);
1110 }
1111 *(mjj - 1) -= static_cast<T>(temp2);
1112 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1113 for (i = 0; i < nrow - j; ip += 1 + j + i++) {
1114 x[i] = *ip;
1115 }
1116 for (i = j + 1; i <= nrow; i++) {
1117 temp2 = 0;
1118 ip = rhs.Array() + i * (i - 1) / 2 + j;
1119 for (k = 0; k <= i - j - 1; k++) {
1120 temp2 += *ip++ * x[k];
1121 }
1122 for (ip += i - 1; k < nrow - j; ip += 1 + j + k++) {
1123 temp2 += *ip * x[k];
1124 }
1125 *(rhs.Array() + i * (i - 1) / 2 + j - 2) = static_cast<T>(-temp2);
1126 }
1127 temp2 = 0;
1128 ip = rhs.Array() + (j + 1) * j / 2 + j - 2;
1129 for (k = 0; k < nrow - j; ip += 1 + j + k++) {
1130 temp2 += x[k] * *ip;
1131 }
1132 *(mjj - j) -= static_cast<T>(temp2);
1133 }
1134 }
1135
1136 // interchange rows and columns j and piv[j-1]
1137 // or rows and columns j and -piv[j-2]
1138
1139 pivrow = (piv[j - 1] == 0) ? -piv[j - 2] : piv[j - 1];
1140 ip = rhs.Array() + pivrow * (pivrow - 1) / 2 + j;
1141 for (i = j + 1; i < pivrow; i++, ip++) {
1142 temp1 = *(rhs.Array() + i * (i - 1) / 2 + j - 1);
1143 *(rhs.Array() + i * (i - 1) / 2 + j - 1) = *ip;
1144 *ip = static_cast<T>(temp1);
1145 }
1146 temp1 = *mjj;
1147 *mjj = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1);
1148 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + pivrow - 1) = static_cast<T>(temp1);
1149 if (s == 2) {
1150 temp1 = *(mjj - 1);
1151 *(mjj - 1) = *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 2);
1152 *(rhs.Array() + pivrow * (pivrow - 1) / 2 + j - 2) = static_cast<T>(temp1);
1153 }
1154
1155 ip = rhs.Array() + (pivrow + 1) * pivrow / 2 + j - 1; // &A(i,j)
1156 iq = ip + pivrow - j;
1157 for (i = pivrow + 1; i <= nrow; ip += i, iq += i++) {
1158 temp1 = *iq;
1159 *iq = *ip;
1160 *ip = static_cast<T>(temp1);
1161 }
1162 } // end of loop over columns (in computing inverse from factorization)
1163
1164 return; // inversion successful
1165}
1166
1167// LU factorization
1168template <unsigned int D, unsigned int n>
1169template <class T>
1170GPUdi() int Inverter<D, n>::DfactMatrix(MatRepStdGPU<T, D, n>& rhs, T& det, unsigned int* ir)
1171{
1172 if (D != n) {
1173 return -1;
1174 }
1175
1176 int ifail, jfail;
1177 typedef T* mIter;
1178
1179 typedef T value_type;
1180
1181 value_type tf;
1182 value_type g1 = 1.0e-19, g2 = 1.0e19;
1183
1184 value_type p, q, t;
1185 value_type s11, s12;
1186
1187 // LM (04.09) : remove useless check on epsilon and set it to zero
1188 const value_type epsilon = 0.0;
1189 // double epsilon = 8*std::numeric_limits<T>::epsilon();
1190 // could be set to zero (like it was before)
1191 // but then the algorithm often doesn't detect
1192 // that a matrix is singular
1193
1194 int normal = 0, imposs = -1;
1195 int jrange = 0, jover = 1, junder = -1;
1196 ifail = normal;
1197 jfail = jrange;
1198 int nxch = 0;
1199 det = 1.0;
1200 mIter mj = rhs.Array();
1201 mIter mjj = mj;
1202 for (unsigned int j = 1; j <= n; j++) {
1203 unsigned int k = j;
1204 p = (o2::gpu::GPUCommonMath::Abs(*mjj));
1205 if (j != n) {
1206 mIter mij = mj + n + j - 1;
1207 for (unsigned int i = j + 1; i <= n; i++) {
1208 q = (o2::gpu::GPUCommonMath::Abs(*(mij)));
1209 if (q > p) {
1210 k = i;
1211 p = q;
1212 }
1213 mij += n;
1214 }
1215 if (k == j) {
1216 if (p <= epsilon) {
1217 det = 0;
1218 ifail = imposs;
1219 jfail = jrange;
1220 return ifail;
1221 }
1222 det = -det; // in this case the sign of the determinant
1223 // must not change. So I change it twice.
1224 }
1225 mIter mjl = mj;
1226 mIter mkl = rhs.Array() + (k - 1) * n;
1227 for (unsigned int l = 1; l <= n; l++) {
1228 tf = *mjl;
1229 *(mjl++) = *mkl;
1230 *(mkl++) = static_cast<T>(tf);
1231 }
1232 nxch = nxch + 1; // this makes the determinant change its sign
1233 ir[nxch] = (((j) << 12) + (k));
1234 } else {
1235 if (p <= epsilon) {
1236 det = 0.0;
1237 ifail = imposs;
1238 jfail = jrange;
1239 return ifail;
1240 }
1241 }
1242 det *= *mjj;
1243 *mjj = 1.0f / *mjj;
1244 t = (o2::gpu::GPUCommonMath::Abs(det));
1245 if (t < g1) {
1246 det = 0.0;
1247 if (jfail == jrange) {
1248 jfail = junder;
1249 }
1250 } else if (t > g2) {
1251 det = 1.0;
1252 if (jfail == jrange) {
1253 jfail = jover;
1254 }
1255 }
1256 if (j != n) {
1257 mIter mk = mj + n;
1258 mIter mkjp = mk + j;
1259 mIter mjk = mj + j;
1260 for (k = j + 1; k <= n; k++) {
1261 s11 = -(*mjk);
1262 s12 = -(*mkjp);
1263 if (j != 1) {
1264 mIter mik = rhs.Array() + k - 1;
1265 mIter mijp = rhs.Array() + j;
1266 mIter mki = mk;
1267 mIter mji = mj;
1268 for (unsigned int i = 1; i < j; i++) {
1269 s11 += (*mik) * (*(mji++));
1270 s12 += (*mijp) * (*(mki++));
1271 mik += n;
1272 mijp += n;
1273 }
1274 }
1275 // cast to avoid warnings from double to float conversions
1276 *(mjk++) = static_cast<T>(-s11 * (*mjj));
1277 *(mkjp) = static_cast<T>(-(((*(mjj + 1))) * ((*(mkjp - 1))) + (s12)));
1278 mk += n;
1279 mkjp += n;
1280 }
1281 }
1282 mj += n;
1283 mjj += (n + 1);
1284 }
1285 if (nxch % 2 == 1) {
1286 det = -det;
1287 }
1288 if (jfail != jrange) {
1289 det = 0.0;
1290 }
1291 ir[n] = nxch;
1292 return 0;
1293}
1294
1295template <unsigned int D, unsigned int n>
1296template <class T>
1297GPUdi() int Inverter<D, n>::DfinvMatrix(MatRepStdGPU<T, D, n>& rhs, unsigned int* ir)
1298{
1299 typedef T* mIter;
1300 typedef T value_type;
1301
1302 if (D != n) {
1303 return -1;
1304 }
1305
1306 value_type s31, s32;
1307 value_type s33, s34;
1308
1309 mIter m11 = rhs.Array();
1310 mIter m12 = m11 + 1;
1311 mIter m21 = m11 + n;
1312 mIter m22 = m12 + n;
1313 *m21 = -(*m22) * (*m11) * (*m21);
1314 *m12 = -(*m12);
1315 if (n > 2) {
1316 mIter mi = rhs.Array() + 2 * n;
1317 mIter mii = rhs.Array() + 2 * n + 2;
1318 mIter mimim = rhs.Array() + n + 1;
1319 for (unsigned int i = 3; i <= n; i++) {
1320 unsigned int im2 = i - 2;
1321 mIter mj = rhs.Array();
1322 mIter mji = mj + i - 1;
1323 mIter mij = mi;
1324 for (unsigned int j = 1; j <= im2; j++) {
1325 s31 = 0.0;
1326 s32 = *mji;
1327 mIter mkj = mj + j - 1;
1328 mIter mik = mi + j - 1;
1329 mIter mjkp = mj + j;
1330 mIter mkpi = mj + n + i - 1;
1331 for (unsigned int k = j; k <= im2; k++) {
1332 s31 += (*mkj) * (*(mik++));
1333 s32 += (*(mjkp++)) * (*mkpi);
1334 mkj += n;
1335 mkpi += n;
1336 }
1337 *mij = static_cast<T>(-(*mii) * (((*(mij - n))) * ((*(mii - 1))) + (s31)));
1338 *mji = static_cast<T>(-s32);
1339 mj += n;
1340 mji += n;
1341 mij++;
1342 }
1343 *(mii - 1) = -(*mii) * (*mimim) * (*(mii - 1));
1344 *(mimim + 1) = -(*(mimim + 1));
1345 mi += n;
1346 mimim += (n + 1);
1347 mii += (n + 1);
1348 }
1349 }
1350 mIter mi = rhs.Array();
1351 mIter mii = rhs.Array();
1352 for (unsigned int i = 1; i < n; i++) {
1353 unsigned int ni = n - i;
1354 mIter mij = mi;
1355 // int j;
1356 for (unsigned j = 1; j <= i; j++) {
1357 s33 = *mij;
1358 mIter mikj = mi + n + j - 1;
1359 mIter miik = mii + 1;
1360 mIter min_end = mi + n;
1361 for (; miik < min_end;) {
1362 s33 += (*mikj) * (*(miik++));
1363 mikj += n;
1364 }
1365 *(mij++) = static_cast<T>(s33);
1366 }
1367 for (unsigned j = 1; j <= ni; j++) {
1368 s34 = 0.0;
1369 mIter miik = mii + j;
1370 mIter mikij = mii + j * n + j;
1371 for (unsigned int k = j; k <= ni; k++) {
1372 s34 += *mikij * (*(miik++));
1373 mikij += n;
1374 }
1375 *(mii + j) = s34;
1376 }
1377 mi += n;
1378 mii += (n + 1);
1379 }
1380 unsigned int nxch = ir[n];
1381 if (nxch == 0) {
1382 return 0;
1383 }
1384 for (unsigned int mm = 1; mm <= nxch; mm++) {
1385 unsigned int k = nxch - mm + 1;
1386 int ij = ir[k];
1387 int i = ij >> 12;
1388 int j = ij % 4096;
1389 mIter mki = rhs.Array() + i - 1;
1390 mIter mkj = rhs.Array() + j - 1;
1391 for (k = 1; k <= n; k++) {
1392 T ti = *mki;
1393 *mki = *mkj;
1394 *mkj = ti;
1395 mki += n;
1396 mkj += n;
1397 }
1398 }
1399 return 0;
1400}
1401
1402template <class T, unsigned int D1, unsigned int D2, class R>
1403GPUdi() bool SMatrixGPU<T, D1, D2, R>::Invert()
1404{
1405 GPU_STATIC_CHECK(D1 == D2, SMatrixGPU_not_square);
1406 return Inverter<D1, D2>::Dinv((*this).mRep);
1407}
1408
1409template <class T, unsigned int D1, unsigned int D2, class R>
1410template <class R2>
1411GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator+=(const SMatrixGPU<T, D1, D2, R2>& rhs)
1412{
1413 mRep += rhs.mRep;
1414 return *this;
1415}
1416
1417template <class T, unsigned int D1, unsigned int D2, class R>
1418GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const T & rhs)
1419{
1420 for (unsigned int i = 0; i < R::kSize; ++i) {
1421 mRep.Array()[i] *= rhs;
1422 }
1423 return *this;
1424}
1425
1426template <class T, unsigned int D1, unsigned int D2, class R>
1427template <class R2>
1428GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const SMatrixGPU<T, D1, D2, R2>& rhs)
1429{
1430 return operator=(*this* rhs);
1431}
1432
1433template <class T, unsigned int D1, unsigned int D2, class R>
1434template <class A, class R2>
1435GPUdi() SMatrixGPU<T, D1, D2, R>& SMatrixGPU<T, D1, D2, R>::operator*=(const Expr<A, T, D1, D2, R2>& rhs)
1436{
1437 return operator=(*this* rhs);
1438}
1439
1440template <class T, unsigned int D1, unsigned int D2, class R>
1442 enum {
1443 N1 = R::kRows,
1444 N2 = R::kCols
1447};
1448
1449template <class T, unsigned int D1, unsigned int D2>
1453
1454template <class Matrix, class T, unsigned int D1, unsigned int D2 = D1>
1456{
1457 public:
1458 GPUd() TransposeOpGPU(const Matrix& rhs) : mRhs(rhs) {}
1459
1461
1462 GPUdi() T apply(unsigned int i) const
1463 {
1464 return mRhs.apply((i % D1) * D2 + i / D1);
1465 }
1466
1467 GPUdi() T operator()(unsigned int i, unsigned j) const
1468 {
1469 return mRhs(j, i);
1470 }
1471
1472 GPUdi() bool IsInUse(const T* p) const
1473 {
1474 return mRhs.IsInUse(p);
1475 }
1476
1477 protected:
1478 const Matrix& mRhs;
1479};
1480
1481template <class T, unsigned int D1, unsigned int D2, class R>
1482GPUdi() SVectorGPU<T, D1> operator*(const SMatrixGPU<T, D1, D2, R>& rhs, const SVectorGPU<T, D2>& lhs)
1483{
1485 for (unsigned int i = 0; i < D1; ++i) {
1486 const unsigned int rpos = i * D2;
1487 for (unsigned int j = 0; j < D2; ++j) {
1488 tmp[i] += rhs.apply(rpos + j) * lhs.apply(j);
1489 }
1490 }
1491 return tmp;
1492}
1493
1494template <class T, unsigned int D1, unsigned int D2, class R>
1495GPUdi() Expr<TransposeOpGPU<SMatrixGPU<T, D1, D2, R>, T, D1, D2>, T, D2, D1, typename TranspPolicyGPU<T, D1, D2, R>::RepType> Transpose(const SMatrixGPU<T, D1, D2, R>& rhs)
1496{
1497 typedef TransposeOpGPU<SMatrixGPU<T, D1, D2, R>, T, D1, D2> MatTrOp;
1499}
1500
1501template <class T, unsigned int D1, unsigned int D2, class R>
1502GPUdi() SMatrixGPU<T, D1, D1, MatRepSymGPU<T, D1>> Similarity(const SMatrixGPU<T, D1, D2, R>& lhs, const SMatrixGPU<T, D2, D2, MatRepSymGPU<T, D2>>& rhs)
1503{
1506 SMatrixSym mret;
1507 AssignSym::Evaluate(mret, tmp * Transpose(lhs));
1508 return mret;
1509}
1510} // namespace o2::math_utils::detail
1511#endif
int32_t i
#define GPUdi()
#define GPUhd()
#define GPUd()
uint32_t j
Definition RawData.h:0
#define GPU_STATIC_CHECK(expr, msg)
Definition SMatrixGPU.h:44
Definition A.h:16
Definition B.h:16
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:416
GPUdDefault() ~Expr()=default
GPUd() Expr(const ExprType &rhs)
Definition SMatrixGPU.h:414
GPUdi() bool IsInUse(const T *p) const
Definition SMatrixGPU.h:424
GPUd() static bool Dinv(MatrixRep &rhs)
Definition SMatrixGPU.h:762
GPUd() static bool Dinv(MatRepSymGPU< T
SMatReprStd starting port here.
Definition SMatrixGPU.h:357
GPUdi() const T *Array() const
Definition SMatrixGPU.h:373
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:371
GPUdDefault() MatRepStdGPU()=default
GPUdi() const T &operator[](unsigned int i) const
Definition SMatrixGPU.h:370
GPUdi() T &operator()(unsigned int i
GPUdi() T &operator[](unsigned int i)
Definition SMatrixGPU.h:369
static GPUdi() const expr unsigned int offset(unsigned int i
static GPUdi() int off(int i)
Definition SMatrixGPU.h:339
static constexpr int off0(int i)
Definition SMatrixGPU.h:335
GPUdDefault() MatRepSymGPU()=default
static constexpr int off2(int i, int j)
Definition SMatrixGPU.h:336
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:308
GPUdi() T const &operator[](unsigned int i) const
Definition SMatrixGPU.h:307
GPUdi() const T *Array() const
Definition SMatrixGPU.h:311
GPUhdi() T &operator[](unsigned int i)
Definition SMatrixGPU.h:306
static constexpr int off1(int i)
Definition SMatrixGPU.h:337
GPUdi() T apply(unsigned int i) const
Definition SMatrixGPU.h:726
GPUd() MatrixMulOpGPU(const MatrixA &lhs
GPUdDefault() ~MatrixMulOpGPU()=default
GPUdi() bool IsInUse(const T *p) const
Definition SMatrixGPU.h:736
GPUd() SMatrixRowGPUconst(const SMatrixGPU< T
GPUd() SMatrixGPU(SMatrixIdentity)
GPUd() T apply(unsigned int i) const
Definition SMatrixGPU.h:464
GPUd() SMatrixRowGPUconst operator[](unsigned int i) const
Definition SMatrixGPU.h:502
GPUdi() SMatrixGPU(SMatrixNoInit)
Definition SMatrixGPU.h:447
GPUd() const T *Array() const
Definition SMatrixGPU.h:465
GPUd() SMatrixRowGPU operator[](unsigned int i)
Definition SMatrixGPU.h:503
GPUdDefault() SMatrixGPU()=default
GPUdi() static unsigned int Dim()
Definition SMatrixGPU.h:83
GPUdDefault() ~TransposeOpGPU()=default
GPUdi() T apply(unsigned int i) const
GPUd() TransposeOpGPU(const Matrix &rhs)
GPUdi() bool IsInUse(const T *p) const
GLdouble n
Definition glcorearb.h:1982
GLfloat GLfloat GLfloat alpha
Definition glcorearb.h:279
GLint GLenum GLint x
Definition glcorearb.h:403
GLuint GLuint end
Definition glcorearb.h:469
const GLdouble * v
Definition glcorearb.h:832
GLdouble f
Definition glcorearb.h:310
GLint GLint GLsizei GLint GLenum GLenum type
Definition glcorearb.h:275
GLintptr offset
Definition glcorearb.h:660
GLboolean GLboolean g
Definition glcorearb.h:1233
std::array< T, N > array
constexpr float mm
Definition SpecsV2.h:28
constexpr auto do_make(F f, indices< I... >) -> gpu::gpustd::array< int, sizeof...(I)>
Definition SMatrixGPU.h:284
constexpr auto make(F f) -> gpu::gpustd::array< int, N >
Definition SMatrixGPU.h:291
return((dphi > 0 &&dphi< constants::math::PI)||dphi< -constants::math::PI) ? true GPUhdi() const expr bool okForPhiMax(T phiMax
GPUd() SVectorGPU< T
D const SVectorGPU< T, D > & rhs
Definition SMatrixGPU.h:191
TranspPolicyGPU< T, D1, D2, R >::RepType Transpose(const SMatrixGPU< T, D1, D2, R > &rhs)
const SMatrixGPU< T, D1, D2, R > D2
Definition SMatrixGPU.h:529
const SMatrixGPU< T, D1, D2, R > D1
Definition SMatrixGPU.h:529
GPUdi() int nint(T x)
Definition basicMath.h:66
T Dot(const SVector< T, D > &lhs, const SVector< T, D > &rhs)
Definition Cartesian.h:257
std::unique_ptr< GPUReconstructionTimeframe > tf
GPUd() static void Evaluate(SMatrixGPU< T
MatRepStdGPU< T, N1, N2 > RepType
Definition SMatrixGPU.h:717
SMatrixGPU starting port here.
Definition SMatrixGPU.h:403
MatRepStdGPU< T, N2, N1 > RepType
static GPUdi() T f(const A &lhs
static GPUdi() T f(const A &lhs
static GPUdi() typename MatrixA
Definition SMatrixGPU.h:217
o2::InteractionRecord ir(0, 0)