vSMC
vSMC: Scalable Monte Carlo
cblas.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/math/cblas.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2015, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_MATH_CBLAS_HPP
33 #define VSMC_MATH_CBLAS_HPP
34 
35 #include <vsmc/internal/config.h>
37 #include <cmath>
38 #include <numeric>
39 
40 #if VSMC_USE_MKL_CBLAS
41 #include <mkl.h>
42 #define VSMC_CBLAS_INT MKL_INT
43 #elif VSMC_HAS_CBLAS
44 #include <cblas.h>
45 #ifndef VSMC_CBLAS_INT
46 #define VSMC_CBLAS_INT int
47 #endif
48 #endif
49 
50 namespace vsmc
51 {
52 
56 
58 template <typename T>
59 inline T asum(std::size_t n, const T *x, std::size_t incx)
60 {
61  T sum = 0;
62  std::size_t j = 0;
63  for (std::size_t i = 0; i != n; ++i, j += incx)
64  sum += std::fabs(x[j]);
65 
66  return sum;
67 }
68 
70 template <typename T>
71 inline void axpy(
72  std::size_t n, T a, const T *x, std::size_t incx, T *y, std::size_t incy)
73 {
74  std::size_t j = 0;
75  std::size_t k = 0;
76  for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
77  y[k] += a * x[j];
78 }
79 
81 template <typename T>
82 inline void copy(
83  std::size_t n, const T *x, std::size_t incx, T *y, std::size_t incy)
84 {
85  std::size_t j = 0;
86  std::size_t k = 0;
87  for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
88  y[k] = x[j];
89 }
90 
92 template <typename T>
93 inline T dot(
94  std::size_t n, const T *x, std::size_t incx, const T *y, std::size_t incy)
95 {
96  T sum = 0;
97  std::size_t j = 0;
98  std::size_t k = 0;
99  for (std::size_t i = 0; i != n; ++i, j += incx, k += incy)
100  sum += x[j] * y[k];
101 
102  return sum;
103 }
104 
106 template <typename T>
107 inline T nrm2(std::size_t n, const T *x, std::size_t incx)
108 {
109  return std::sqrt(dot(n, x, incx, x, incx));
110 }
111 
113 template <typename T>
114 inline void scal(std::size_t n, T a, T *x, std::size_t incx)
115 {
116  std::size_t j = 0;
117  for (std::size_t i = 0; i != n; ++i, j += incx)
118  x[j] *= a;
119 }
120 
122 
126 
128 template <typename T>
129 inline void gemv(MatrixOrder order, MatrixTrans trans, std::size_t m,
130  std::size_t n, T alpha, const T *A, std::size_t lda, const T *x,
131  std::size_t incx, T beta, T *y, std::size_t incy)
132 {
133  std::size_t nrow = trans == NoTrans ? m : n;
134  std::size_t ncol = trans == NoTrans ? n : m;
135 
136  scal(nrow, beta, y, incy);
137 
138  if ((order == RowMajor && trans == NoTrans) ||
139  (order == ColMajor && trans == Trans)) {
140  std::size_t k = 0;
141  for (std::size_t r = 0; r != nrow; ++r, k += incy)
142  y[k] += alpha * dot<T>(ncol, x, incx, A + r * lda, 1);
143  } else {
144  std::size_t j = 0;
145  for (std::size_t c = 0; c != ncol; ++c, j += incx) {
146  std::size_t k = 0;
147  std::size_t l = c * lda;
148  const double ax = alpha * x[j];
149  for (std::size_t r = 0; r != nrow; ++r, ++l, k += incy)
150  y[k] += ax * A[l];
151  }
152  }
153 }
154 
156 
157 } // namespace vsmc
158 
159 #ifdef VSMC_CBLAS_INT
160 
161 namespace vsmc
162 {
163 
164 inline float asum(std::size_t n, const float *x, std::size_t incx)
165 {
166  return ::cblas_sasum(
167  static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
168 }
169 
170 inline double asum(std::size_t n, const double *x, std::size_t incx)
171 {
172  return ::cblas_dasum(
173  static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
174 }
175 
176 inline void axpy(std::size_t n, float a, const float *x, std::size_t incx,
177  float *y, std::size_t incy)
178 {
179  ::cblas_saxpy(static_cast<VSMC_CBLAS_INT>(n), a, x,
180  static_cast<VSMC_CBLAS_INT>(incx), y,
181  static_cast<VSMC_CBLAS_INT>(incy));
182 }
183 
184 inline void axpy(std::size_t n, double a, const double *x, std::size_t incx,
185  double *y, std::size_t incy)
186 {
187  ::cblas_daxpy(static_cast<VSMC_CBLAS_INT>(n), a, x,
188  static_cast<VSMC_CBLAS_INT>(incx), y,
189  static_cast<VSMC_CBLAS_INT>(incy));
190 }
191 
192 inline void copy(std::size_t n, const float *x, std::size_t incx, float *y,
193  std::size_t incy)
194 {
195  ::cblas_scopy(static_cast<VSMC_CBLAS_INT>(n), x,
196  static_cast<VSMC_CBLAS_INT>(incx), y,
197  static_cast<VSMC_CBLAS_INT>(incy));
198 }
199 
200 inline void copy(std::size_t n, const double *x, std::size_t incx, double *y,
201  std::size_t incy)
202 {
203  ::cblas_dcopy(static_cast<VSMC_CBLAS_INT>(n), x,
204  static_cast<VSMC_CBLAS_INT>(incx), y,
205  static_cast<VSMC_CBLAS_INT>(incy));
206 }
207 
208 inline float dot(std::size_t n, const float *x, std::size_t incx,
209  const float *y, std::size_t incy)
210 {
211  return ::cblas_sdot(static_cast<VSMC_CBLAS_INT>(n), x,
212  static_cast<VSMC_CBLAS_INT>(incx), y,
213  static_cast<VSMC_CBLAS_INT>(incy));
214 }
215 
216 inline double dot(std::size_t n, const double *x, std::size_t incx,
217  const double *y, std::size_t incy)
218 {
219  return ::cblas_ddot(static_cast<VSMC_CBLAS_INT>(n), x,
220  static_cast<VSMC_CBLAS_INT>(incx), y,
221  static_cast<VSMC_CBLAS_INT>(incy));
222 }
223 
224 inline float nrm2(std::size_t n, const float *x, std::size_t incx)
225 {
226  return ::cblas_snrm2(
227  static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
228 }
229 
230 inline double nrm2(std::size_t n, const double *x, std::size_t incx)
231 {
232  return ::cblas_dnrm2(
233  static_cast<VSMC_CBLAS_INT>(n), x, static_cast<VSMC_CBLAS_INT>(incx));
234 }
235 
236 inline void scal(std::size_t n, float a, float *x, std::size_t incx)
237 {
238  ::cblas_sscal(static_cast<VSMC_CBLAS_INT>(n), a, x,
239  static_cast<VSMC_CBLAS_INT>(incx));
240 }
241 
242 inline void scal(std::size_t n, double a, double *x, std::size_t incx)
243 {
244  ::cblas_dscal(static_cast<VSMC_CBLAS_INT>(n), a, x,
245  static_cast<VSMC_CBLAS_INT>(incx));
246 }
247 
248 inline void gemv(MatrixOrder order, MatrixTrans trans, std::size_t m,
249  std::size_t n, float alpha, const float *A, std::size_t lda,
250  const float *x, std::size_t incx, float beta, float *y, std::size_t incy)
251 {
252  ::cblas_sgemv((order == RowMajor ? ::CblasRowMajor : ::CblasColMajor),
253  (trans == NoTrans ? ::CblasNoTrans : ::CblasTrans),
254  static_cast<VSMC_CBLAS_INT>(m), static_cast<VSMC_CBLAS_INT>(n), alpha,
255  A, static_cast<VSMC_CBLAS_INT>(lda), x,
256  static_cast<VSMC_CBLAS_INT>(incx), beta, y,
257  static_cast<VSMC_CBLAS_INT>(incy));
258 }
259 
260 inline void gemv(MatrixOrder order, MatrixTrans trans, std::size_t m,
261  std::size_t n, double alpha, const double *A, std::size_t lda,
262  const double *x, std::size_t incx, double beta, double *y,
263  std::size_t incy)
264 {
265  ::cblas_dgemv((order == RowMajor ? ::CblasRowMajor : ::CblasColMajor),
266  (trans == NoTrans ? ::CblasNoTrans : ::CblasTrans),
267  static_cast<VSMC_CBLAS_INT>(m), static_cast<VSMC_CBLAS_INT>(n), alpha,
268  A, static_cast<VSMC_CBLAS_INT>(lda), x,
269  static_cast<VSMC_CBLAS_INT>(incx), beta, y,
270  static_cast<VSMC_CBLAS_INT>(incy));
271 }
272 
273 } // namespace vsmc
274 
275 #endif // VSMC_CBLAS_INT
276 
277 #endif // VSMC_MATH_CBLAS_HPP
T dot(std::size_t n, const T *x, std::size_t incx, const T *y, std::size_t incy)
Computes a vector-vector dot product.
Definition: cblas.hpp:93
Definition: monitor.hpp:49
T asum(std::size_t n, const T *x, std::size_t incx)
Computes the sum of magnitudes of the vector elements.
Definition: cblas.hpp:59
T nrm2(std::size_t n, const T *x, std::size_t incx)
Computes the Euclidean norm of a vector.
Definition: cblas.hpp:107
void sqrt(std::size_t n, const float *a, float *y)
Definition: vmath.hpp:129
void copy(std::size_t n, const T *x, std::size_t incx, T *y, std::size_t incy)
Copies vector to another vector.
Definition: cblas.hpp:82
Data are stored column by column in memory.
Definition: defines.hpp:55
void scal(std::size_t n, T a, T *x, std::size_t incx)
Computes the product of a vector by a scalar.
Definition: cblas.hpp:114
Data are stored row by row in memory.
Definition: defines.hpp:54
MatrixTrans
Matrix Transpose.
Definition: defines.hpp:60
MatrixOrder
Matrix order.
Definition: defines.hpp:53
void axpy(std::size_t n, T a, const T *x, std::size_t incx, T *y, std::size_t incy)
Computes a vector-scalar product and adds the result to a vector.
Definition: cblas.hpp:71
The matrix shall be transposed.
Definition: defines.hpp:62
void gemv(MatrixOrder order, MatrixTrans trans, std::size_t m, std::size_t n, T alpha, const T *A, std::size_t lda, const T *x, std::size_t incx, T beta, T *y, std::size_t incy)
Computes a matrix-vector product using a general matrix.
Definition: cblas.hpp:129
The matrix shall not be transposed.
Definition: defines.hpp:61