32 #ifndef VSMC_MATH_CBLAS_HPP 33 #define VSMC_MATH_CBLAS_HPP 40 #if VSMC_USE_MKL_CBLAS 42 #define VSMC_CBLAS_INT MKL_INT 45 #ifndef VSMC_CBLAS_INT 46 #define VSMC_CBLAS_INT int 59 inline T
asum(std::size_t n,
const T *x, std::size_t incx)
63 for (std::size_t i = 0; i != n; ++i, j += incx)
64 sum += std::fabs(x[j]);
72 std::size_t n, T a,
const T *x, std::size_t incx, T *y, std::size_t incy)
76 for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
83 std::size_t n,
const T *x, std::size_t incx, T *y, std::size_t incy)
87 for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
94 std::size_t n,
const T *x, std::size_t incx,
const T *y, std::size_t incy)
99 for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
106 template <
typename T>
107 inline T
nrm2(std::size_t n,
const T *x, std::size_t incx)
113 template <
typename T>
114 inline void scal(std::size_t n, T a, T *x, std::size_t incx)
117 for (std::size_t i = 0; i != n; ++i, j += incx)
128 template <
typename T>
130 std::size_t n, T alpha,
const T *A, std::size_t lda,
const T *x,
131 std::size_t incx, T beta, T *y, std::size_t incy)
133 std::size_t nrow = trans ==
NoTrans ? m : n;
134 std::size_t ncol = trans ==
NoTrans ? n : m;
136 scal(nrow, beta, y, incy);
141 for (std::size_t r = 0; r != nrow; ++r, k += incy)
142 y[k] += alpha * dot<T>(ncol, x, incx, A + r * lda, 1);
145 for (std::size_t c = 0; c != ncol; ++c, j += incx) {
147 std::size_t l = c * lda;
148 const double ax = alpha * x[j];
149 for (std::size_t r = 0; r != nrow; ++r, ++l, k += incy)
159 #ifdef VSMC_CBLAS_INT 164 inline float asum(std::size_t n,
const float *x, std::size_t incx)
166 return ::cblas_sasum(
167 static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
170 inline double asum(std::size_t n,
const double *x, std::size_t incx)
172 return ::cblas_dasum(
173 static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
176 inline void axpy(std::size_t n,
float a,
const float *x, std::size_t incx,
177 float *y, std::size_t incy)
179 ::cblas_saxpy(static_cast<VSMC_CBLAS_INT>(n), a, x,
180 static_cast<VSMC_CBLAS_INT>(incx), y,
181 static_cast<VSMC_CBLAS_INT>(incy));
184 inline void axpy(std::size_t n,
double a,
const double *x, std::size_t incx,
185 double *y, std::size_t incy)
187 ::cblas_daxpy(static_cast<VSMC_CBLAS_INT>(n), a, x,
188 static_cast<VSMC_CBLAS_INT>(incx), y,
189 static_cast<VSMC_CBLAS_INT>(incy));
192 inline void copy(std::size_t n,
const float *x, std::size_t incx,
float *y,
195 ::cblas_scopy(static_cast<VSMC_CBLAS_INT>(n), x,
196 static_cast<VSMC_CBLAS_INT>(incx), y,
197 static_cast<VSMC_CBLAS_INT>(incy));
200 inline void copy(std::size_t n,
const double *x, std::size_t incx,
double *y,
203 ::cblas_dcopy(static_cast<VSMC_CBLAS_INT>(n), x,
204 static_cast<VSMC_CBLAS_INT>(incx), y,
205 static_cast<VSMC_CBLAS_INT>(incy));
208 inline float dot(std::size_t n,
const float *x, std::size_t incx,
209 const float *y, std::size_t incy)
211 return ::cblas_sdot(static_cast<VSMC_CBLAS_INT>(n), x,
212 static_cast<VSMC_CBLAS_INT>(incx), y,
213 static_cast<VSMC_CBLAS_INT>(incy));
216 inline double dot(std::size_t n,
const double *x, std::size_t incx,
217 const double *y, std::size_t incy)
219 return ::cblas_ddot(static_cast<VSMC_CBLAS_INT>(n), x,
220 static_cast<VSMC_CBLAS_INT>(incx), y,
221 static_cast<VSMC_CBLAS_INT>(incy));
224 inline float nrm2(std::size_t n,
const float *x, std::size_t incx)
226 return ::cblas_snrm2(
227 static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
230 inline double nrm2(std::size_t n,
const double *x, std::size_t incx)
232 return ::cblas_dnrm2(
233 static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
236 inline void scal(std::size_t n,
float a,
float *x, std::size_t incx)
238 ::cblas_sscal(static_cast<VSMC_CBLAS_INT>(n), a, x,
239 static_cast<VSMC_CBLAS_INT>(incx));
242 inline void scal(std::size_t n,
double a,
double *x, std::size_t incx)
244 ::cblas_dscal(static_cast<VSMC_CBLAS_INT>(n), a, x,
245 static_cast<VSMC_CBLAS_INT>(incx));
249 std::size_t n,
float alpha,
const float *A, std::size_t lda,
250 const float *x, std::size_t incx,
float beta,
float *y, std::size_t incy)
252 ::cblas_sgemv((order ==
RowMajor ? ::CblasRowMajor : ::CblasColMajor),
253 (trans ==
NoTrans ? ::CblasNoTrans : ::CblasTrans),
254 static_cast<VSMC_CBLAS_INT>(m), static_cast<VSMC_CBLAS_INT>(n), alpha,
255 A, static_cast<VSMC_CBLAS_INT>(lda), x,
256 static_cast<VSMC_CBLAS_INT>(incx), beta, y,
257 static_cast<VSMC_CBLAS_INT>(incy));
261 std::size_t n,
double alpha,
const double *A, std::size_t lda,
262 const double *x, std::size_t incx,
double beta,
double *y,
265 ::cblas_dgemv((order ==
RowMajor ? ::CblasRowMajor : ::CblasColMajor),
266 (trans ==
NoTrans ? ::CblasNoTrans : ::CblasTrans),
267 static_cast<VSMC_CBLAS_INT>(m), static_cast<VSMC_CBLAS_INT>(n), alpha,
268 A, static_cast<VSMC_CBLAS_INT>(lda), x,
269 static_cast<VSMC_CBLAS_INT>(incx), beta, y,
270 static_cast<VSMC_CBLAS_INT>(incy));
275 #endif // VSMC_CBLAS_INT 277 #endif // VSMC_MATH_CBLAS_HPP T dot(std::size_t n, const T *x, std::size_t incx, const T *y, std::size_t incy)
Computes a vector-vector dot product.
T asum(std::size_t n, const T *x, std::size_t incx)
Computes the sum of magnitudes of the vector elements.
T nrm2(std::size_t n, const T *x, std::size_t incx)
Computes the Euclidean norm of a vector.
void sqrt(std::size_t n, const float *a, float *y)
void copy(std::size_t n, const T *x, std::size_t incx, T *y, std::size_t incy)
Copies vector to another vector.
Data are stored column by column in memory.
void scal(std::size_t n, T a, T *x, std::size_t incx)
Computes the product of a vector by a scalar.
Data are stored row by row in memory.
MatrixTrans
Matrix Transpose.
void axpy(std::size_t n, T a, const T *x, std::size_t incx, T *y, std::size_t incy)
Computes a vector-scalar product and adds the result to a vector.
The matrix shall be transposed.
void gemv(MatrixOrder order, MatrixTrans trans, std::size_t m, std::size_t n, T alpha, const T *A, std::size_t lda, const T *x, std::size_t incx, T beta, T *y, std::size_t incy)
Computes a matrix-vector product using a general matrix.
The matrix shall not be transposed.