helper_cusolver.h
4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*
* Copyright 2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef HELPER_CUSOLVER
#define HELPER_CUSOLVER
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <cuda_runtime.h>
#include "cusparse.h"
#define SWITCH_CHAR '-'
struct testOpts {
char *sparse_mat_filename; // by switch -F<filename>
const char *testFunc; // by switch -R<name>
const char *reorder; // by switch -P<name>
int lda; // by switch -lda<int>
};
double vec_norminf(int n, const double *x)
{
double norminf = 0;
for(int j = 0 ; j < n ; j++){
double x_abs = fabs(x[j]);
norminf = (norminf > x_abs)? norminf : x_abs;
}
return norminf;
}
/*
* |A| = max { |A|*ones(m,1) }
*/
double mat_norminf(
int m,
int n,
const double *A,
int lda)
{
double norminf = 0;
for(int i = 0 ; i < m ; i++){
double sum = 0.0;
for(int j = 0 ; j < n ; j++){
double A_abs = fabs(A[i + j*lda]);
sum += A_abs;
}
norminf = (norminf > sum)? norminf : sum;
}
return norminf;
}
/*
* |A| = max { |A|*ones(m,1) }
*/
double csr_mat_norminf(
int m,
int n,
int nnzA,
const cusparseMatDescr_t descrA,
const double *csrValA,
const int *csrRowPtrA,
const int *csrColIndA)
{
const int baseA = (CUSPARSE_INDEX_BASE_ONE == cusparseGetMatIndexBase(descrA))? 1:0;
double norminf = 0;
for(int i = 0 ; i < m ; i++){
double sum = 0.0;
const int start = csrRowPtrA[i ] - baseA;
const int end = csrRowPtrA[i+1] - baseA;
for(int colidx = start ; colidx < end ; colidx++){
// const int j = csrColIndA[colidx] - baseA;
double A_abs = fabs( csrValA[colidx] );
sum += A_abs;
}
norminf = (norminf > sum)? norminf : sum;
}
return norminf;
}
void display_matrix(
int m,
int n,
int nnzA,
const cusparseMatDescr_t descrA,
const double *csrValA,
const int *csrRowPtrA,
const int *csrColIndA)
{
const int baseA = (CUSPARSE_INDEX_BASE_ONE == cusparseGetMatIndexBase(descrA))? 1:0;
printf("m = %d, n = %d, nnz = %d, matlab base-1\n", m, n, nnzA);
for(int row = 0 ; row < m ; row++){
const int start = csrRowPtrA[row ] - baseA;
const int end = csrRowPtrA[row+1] - baseA;
for(int colidx = start ; colidx < end ; colidx++){
const int col = csrColIndA[colidx] - baseA;
double Areg = csrValA[colidx];
printf("A(%d, %d) = %20.16E\n", row+1, col+1, Areg);
}
}
}
#if defined(_WIN32)
#if !defined(WIN32_LEAN_AND_MEAN)
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
double second (void)
{
LARGE_INTEGER t;
static double oofreq;
static int checkedForHighResTimer;
static BOOL hasHighResTimer;
if (!checkedForHighResTimer) {
hasHighResTimer = QueryPerformanceFrequency (&t);
oofreq = 1.0 / (double)t.QuadPart;
checkedForHighResTimer = 1;
}
if (hasHighResTimer) {
QueryPerformanceCounter (&t);
return (double)t.QuadPart * oofreq;
} else {
return (double)GetTickCount() / 1000.0;
}
}
#elif defined(__linux) || defined(__QNX__)
#include <stddef.h>
#include <sys/time.h>
#include <sys/resource.h>
double second (void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
#elif defined(__APPLE__)
#include <stddef.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/sysctl.h>
double second (void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
#else
#error unsupported platform
#endif
#endif