31 constexpr int32_t
n = 30;
32 constexpr int32_t
m = 6;
33 constexpr int32_t d = 3;
37 std::uniform_real_distribution<> uniform(-.999, .999);
40 double maxDiffType1 = 0.;
41 int32_t nTries = 10000;
43 auto tmpTime = std::chrono::high_resolution_clock::now();
44 auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(tmpTime - tmpTime);
45 auto durationMult = duration;
47 for (int32_t iter = 0; iter < nTries; iter++) {
54 for (int32_t
i = 0;
i <
n;
i++) {
55 for (int32_t
j = 0;
j < d;
j++) {
56 x[
i][
j] = 1. * uniform(
gen);
59 for (int32_t
i = 0;
i <
n;
i++) {
60 A[
i][
i] = fabs(2. + uniform(
gen));
62 for (int32_t
i = 0;
i <
n;
i++) {
63 for (int32_t
j =
i + 1;
j <
n;
j++) {
72 for (int32_t
i = 0;
i <
n;
i++) {
77 for (int32_t
i = 0;
i <
n;
i++) {
78 int32_t oddRow = ((
i % 2) != 0);
79 for (int32_t
j =
i;
j <
n;
j++) {
80 if (
j <
i +
m - oddRow) {
81 Atype1[
i][
j] =
A[
i][
j];
85 Atype1[
j][
i] = Atype1[
i][
j];
89 if (prn && iter == nTries - 1) {
90 LOG(info) <<
"Matrix A:";
91 for (int32_t
i = 0;
i <
n;
i++) {
93 for (int32_t
j = 0;
j <
n;
j++) {
94 LOG(info) << std::fixed << std::setw(5) << std::setprecision(2) <<
A[
i][
j] <<
" ";
98 LOG(info) <<
"\nMatrix A type 1:";
99 for (int32_t
i = 0;
i <
n;
i++) {
101 for (int32_t
j = 0;
j <
n;
j++) {
102 LOG(info) << std::fixed << std::setw(5) << std::setprecision(2) << Atype1[
i][
j] <<
" ";
109 for (int32_t
i = 0;
i <
n;
i++) {
110 for (int32_t k = 0; k < d; k++) {
113 for (int32_t
j = 0;
j <
n;
j++) {
114 for (int32_t k = 0; k < d; k++) {
121 auto startMult = std::chrono::high_resolution_clock::now();
122 for (int32_t
i = 0;
i <
n;
i++) {
123 for (int32_t k = 0; k < d; k++) {
126 for (int32_t
j = 0;
j <
n;
j++) {
127 for (int32_t k = 0; k < d; k++) {
128 Btype1[
i][k] +=
x[
j][k] * Atype1[
i][
j];
132 auto stopMult = std::chrono::high_resolution_clock::now();
133 durationMult += std::chrono::duration_cast<std::chrono::nanoseconds>(stopMult - startMult);
139 bandType1.initWithNaN();
141 for (int32_t
i = 0;
i <
n;
i++) {
142 for (int32_t k = 0; k < d; k++) {
143 band.B(
i, k) =
B[
i][k];
144 bandType1.
B(
i, k) = Btype1[
i][k];
146 int32_t oddRow = ((
i % 2) != 0);
147 for (int32_t
j = 0;
j <
m;
j++) {
148 if (
i +
j <
n &&
j <
m) {
149 band.A(
i,
i +
j) =
A[
i][
i +
j];
151 if (
i +
j <
n &&
j <
m - oddRow) {
152 bandType1.
A(
i,
i +
j) = Atype1[
i][
i +
j];
158 auto start = std::chrono::high_resolution_clock::now();
159 bandType1.solveType1();
160 auto stop = std::chrono::high_resolution_clock::now();
161 duration += std::chrono::duration_cast<std::chrono::nanoseconds>(stop -
start);
163 for (int32_t
i = 0;
i <
n;
i++) {
164 for (int32_t k = 0; k < d; k++) {
165 double t = fabs(
x[
i][k] - band.B(
i, k));
166 double t1 = fabs(
x[
i][k] - bandType1.B(
i, k));
167 if (!std::isfinite(t) || maxDiff < t) {
170 if (!std::isfinite(
t1) || maxDiffType1 <
t1) {
177 int32_t ok = (maxDiff < 1.e-6);
180 LOG(info) << std::defaultfloat;
181 LOG(info) <<
"\n\n Band matrix. Overall max diff: " << maxDiff <<
"\n";
184 int32_t ok1 = (maxDiffType1 < 1.e-6);
187 LOG(info) << std::defaultfloat;
188 LOG(info) <<
"\n\n Band matrix of Type 1. Overall max diff: " << maxDiffType1 <<
"\n";
189 LOG(info) <<
" time " << duration.count() / nTries;
190 LOG(info) <<
" time multiplication " << durationMult.count() / nTries <<
" ns";
templateClassImp(o2::gpu::BandMatrixSolver)
Definition of BandMatrixSolver class.
default_random_engine gen(dev())
static int32_t test(bool prn=0)
Test the class functionality. Returns 1 when ok, 0 when not ok.
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Defining DataPointCompositeObject explicitly as copiable.
LOG(info)<< "Compressed in "<< sw.CpuTime()<< " s"