Project
Loading...
Searching...
No Matches
BandMatrixSolver.cxx
Go to the documentation of this file.
1// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3// All rights not expressly granted are reserved.
4//
5// This software is distributed under the terms of the GNU General Public
6// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7//
8// In applying this license CERN does not waive the privileges and immunities
9// granted to it by virtue of its status as an Intergovernmental Organization
10// or submit itself to any jurisdiction.
11
16
17#include "BandMatrixSolver.h"
18#include "GPUCommonLogger.h"
19#include <random>
20#include <iomanip>
21#include <chrono>
22
23using namespace std;
24using namespace o2::gpu;
25
27
28template <>
29int32_t BandMatrixSolver<0>::test(bool prn)
30{
31 constexpr int32_t n = 30;
32 constexpr int32_t m = 6;
33 constexpr int32_t d = 3;
34
35 // std::random_device rd; // Will be used to obtain a seed for the random
36 std::mt19937 gen(1); // Standard mersenne_twister_engine seeded with 1
37 std::uniform_real_distribution<> uniform(-.999, .999);
38
39 double maxDiff = 0.;
40 double maxDiffType1 = 0.;
41 int32_t nTries = 10000;
42
43 auto tmpTime = std::chrono::high_resolution_clock::now();
44 auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(tmpTime - tmpTime);
45 auto durationMult = duration;
46
47 for (int32_t iter = 0; iter < nTries; iter++) {
48
49 double x[n][d];
50 double A[n][n];
51 double Atype1[n][n];
52
53 {
54 for (int32_t i = 0; i < n; i++) {
55 for (int32_t j = 0; j < d; j++) {
56 x[i][j] = 1. * uniform(gen);
57 }
58 }
59 for (int32_t i = 0; i < n; i++) {
60 A[i][i] = fabs(2. + uniform(gen));
61 }
62 for (int32_t i = 0; i < n; i++) {
63 for (int32_t j = i + 1; j < n; j++) {
64 if (j < i + m) {
65 A[i][j] = A[i][i] * A[j][j] * uniform(gen);
66 } else {
67 A[i][j] = 0;
68 }
69 A[j][i] = A[i][j];
70 }
71 }
72 for (int32_t i = 0; i < n; i++) {
73 A[i][i] = A[i][i] * A[i][i];
74 }
75 }
76
77 for (int32_t i = 0; i < n; i++) {
78 int32_t oddRow = ((i % 2) != 0);
79 for (int32_t j = i; j < n; j++) {
80 if (j < i + m - oddRow) {
81 Atype1[i][j] = A[i][j];
82 } else {
83 Atype1[i][j] = 0;
84 }
85 Atype1[j][i] = Atype1[i][j];
86 }
87 }
88
89 if (prn && iter == nTries - 1) {
90 LOG(info) << "Matrix A:";
91 for (int32_t i = 0; i < n; i++) {
92 LOG(info) << "";
93 for (int32_t j = 0; j < n; j++) {
94 LOG(info) << std::fixed << std::setw(5) << std::setprecision(2) << A[i][j] << " ";
95 }
96 }
97 LOG(info) << "";
98 LOG(info) << "\nMatrix A type 1:";
99 for (int32_t i = 0; i < n; i++) {
100 LOG(info) << "";
101 for (int32_t j = 0; j < n; j++) {
102 LOG(info) << std::fixed << std::setw(5) << std::setprecision(2) << Atype1[i][j] << " ";
103 }
104 }
105 LOG(info) << "";
106 }
107
108 double B[n][d];
109 for (int32_t i = 0; i < n; i++) {
110 for (int32_t k = 0; k < d; k++) {
111 B[i][k] = 0.;
112 }
113 for (int32_t j = 0; j < n; j++) {
114 for (int32_t k = 0; k < d; k++) {
115 B[i][k] += x[j][k] * A[i][j];
116 }
117 }
118 }
119
120 double Btype1[n][d];
121 auto startMult = std::chrono::high_resolution_clock::now();
122 for (int32_t i = 0; i < n; i++) {
123 for (int32_t k = 0; k < d; k++) {
124 Btype1[i][k] = 0.;
125 }
126 for (int32_t j = 0; j < n; j++) {
127 for (int32_t k = 0; k < d; k++) {
128 Btype1[i][k] += x[j][k] * Atype1[i][j];
129 }
130 }
131 }
132 auto stopMult = std::chrono::high_resolution_clock::now();
133 durationMult += std::chrono::duration_cast<std::chrono::nanoseconds>(stopMult - startMult);
134
135 BandMatrixSolver<m> band(n, d);
136 BandMatrixSolver<m> bandType1(n, d);
137
138 band.initWithNaN();
139 bandType1.initWithNaN();
140
141 for (int32_t i = 0; i < n; i++) {
142 for (int32_t k = 0; k < d; k++) {
143 band.B(i, k) = B[i][k];
144 bandType1.B(i, k) = Btype1[i][k];
145 }
146 int32_t oddRow = ((i % 2) != 0);
147 for (int32_t j = 0; j < m; j++) {
148 if (i + j < n && j < m) {
149 band.A(i, i + j) = A[i][i + j];
150 }
151 if (i + j < n && j < m - oddRow) {
152 bandType1.A(i, i + j) = Atype1[i][i + j];
153 }
154 }
155 }
156
157 band.solve();
158 auto start = std::chrono::high_resolution_clock::now();
159 bandType1.solveType1();
160 auto stop = std::chrono::high_resolution_clock::now();
161 duration += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start);
162
163 for (int32_t i = 0; i < n; i++) {
164 for (int32_t k = 0; k < d; k++) {
165 double t = fabs(x[i][k] - band.B(i, k));
166 double t1 = fabs(x[i][k] - bandType1.B(i, k));
167 if (!std::isfinite(t) || maxDiff < t) {
168 maxDiff = t;
169 }
170 if (!std::isfinite(t1) || maxDiffType1 < t1) {
171 maxDiffType1 = t1;
172 }
173 }
174 }
175 }
176
177 int32_t ok = (maxDiff < 1.e-6);
178
179 if (prn || !ok) {
180 LOG(info) << std::defaultfloat;
181 LOG(info) << "\n\n Band matrix. Overall max diff: " << maxDiff << "\n";
182 }
183
184 int32_t ok1 = (maxDiffType1 < 1.e-6);
185
186 if (prn || !ok1) {
187 LOG(info) << std::defaultfloat;
188 LOG(info) << "\n\n Band matrix of Type 1. Overall max diff: " << maxDiffType1 << "\n";
189 LOG(info) << " time " << duration.count() / nTries;
190 LOG(info) << " time multiplication " << durationMult.count() / nTries << " ns";
191 }
192
193 return ok && ok1;
194}
195
196template class o2::gpu::BandMatrixSolver<0>;
templateClassImp(o2::gpu::BandMatrixSolver)
Definition of BandMatrixSolver class.
default_random_engine gen(dev())
int32_t i
uint32_t j
Definition RawData.h:0
Definition A.h:16
A()
Definition A.cxx:15
Definition B.h:16
B()
Definition B.cxx:16
static int32_t test(bool prn=0)
Test the class functionality. Returns 1 when ok, 0 when not ok.
GLdouble n
Definition glcorearb.h:1982
GLint GLenum GLint x
Definition glcorearb.h:403
const GLfloat * m
Definition glcorearb.h:4066
GLuint start
Definition glcorearb.h:469
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Definition glcorearb.h:5034
Defining DataPointCompositeObject explicitly as copiable.
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"