vSMC
vSMC: Scalable Monte Carlo
simd.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/utility/simd.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2015, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, value, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_UTILITY_SIMD_HPP
33 #define VSMC_UTILITY_SIMD_HPP
34 
35 #include <vsmc/internal/common.hpp>
36 
37 #define VSMC_DEFINE_UTILITY_SIMD_INTEGER_BINARY_OP( \
38  Type, CType, op, bin, assign) \
39  template <typename T> \
40  inline Type &assign(Type &a, const Type &b) \
41  { \
42  a = a op b; \
43  \
44  return a; \
45  } \
46  \
47  template <typename T> \
48  inline Type bin(const Type &a, CType b) \
49  { \
50  Type x; \
51  x.set1(b); \
52  \
53  return a + x; \
54  } \
55  \
56  template <typename T> \
57  inline Type bin(CType a, const Type &b) \
58  { \
59  Type x; \
60  x.set1(a); \
61  \
62  return x + b; \
63  } \
64  \
65  template <typename T> \
66  inline Type &assign(Type &a, CType b) \
67  { \
68  a = a + b; \
69  \
70  return a; \
71  }
72 
73 #define VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(Type, CType, op, bin, assign) \
74  inline Type &assign(Type &a, const Type &b) \
75  { \
76  a = a op b; \
77  \
78  return a; \
79  } \
80  \
81  inline Type bin(const Type &a, CType b) \
82  { \
83  Type x; \
84  x.set1(b); \
85  \
86  return a + x; \
87  } \
88  \
89  inline Type bin(CType a, const Type &b) \
90  { \
91  Type x; \
92  x.set1(a); \
93  \
94  return x + b; \
95  } \
96  \
97  inline Type &assign(Type &a, CType b) \
98  { \
99  a = a + b; \
100  \
101  return a; \
102  }
103 
104 #if VSMC_HAS_SSE2
105 #include <emmintrin.h>
106 
107 namespace vsmc
108 {
109 
112 template <typename IntType = __m128i>
113 class M128I
114 {
115  public:
116  using value_type = IntType;
117 
118  M128I() = default;
119 
120  M128I(const __m128i &value) : value_(value) {}
121 
122  template <typename T>
123  M128I(const M128I<T> &other)
124  : value_(other.value())
125  {
126  }
127 
128  template <typename T>
129  M128I<IntType> &operator=(const M128I<T> &other)
130  {
131  value_ = other.value();
132 
133  return *this;
134  }
135 
136  static constexpr std::size_t size()
137  {
138  return sizeof(__m128i) / sizeof(IntType);
139  }
140 
141  __m128i &value() { return value_; }
142  const __m128i &value() const { return value_; }
143 
144  __m128i *data() { return &value_; }
145  const __m128i *data() const { return &value_; }
146 
147  template <typename T>
148  void load_a(const T *mem)
149  {
150  value_ = _mm_load_si128(reinterpret_cast<const __m128i *>(mem));
151  }
152 
153  template <typename T>
154  void load_u(const T *mem)
155  {
156  value_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(mem));
157  }
158 
159  template <typename T>
160  void load(const T *mem)
161  {
162  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? load_a(mem) :
163  load_u(mem);
164  }
165 
166  template <typename T>
167  void store_a(T *mem) const
168  {
169  _mm_store_si128(reinterpret_cast<__m128i *>(mem), value_);
170  }
171 
172  template <typename T>
173  void store_u(T *mem) const
174  {
175  _mm_storeu_si128(reinterpret_cast<__m128i *>(mem), value_);
176  }
177 
178  template <typename T>
179  void store(T *mem) const
180  {
181  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? store_a(mem) :
182  store_u(mem);
183  }
184 
185  void set0() { value_ = _mm_setzero_si128(); }
186 
187  template <typename T>
188  void set1(T n)
189  {
190  value_ = set1(n, std::integral_constant<std::size_t, sizeof(T)>());
191  }
192 
193  template <typename T>
194  void set(T e1, T e0)
195  {
196  value_ = _mm_set_epi64x(
197  static_cast<VSMC_INT64>(e1), static_cast<VSMC_INT64>(e0));
198  }
199 
200  template <typename T>
201  void set(T e3, T e2, T e1, T e0)
202  {
203  value_ = _mm_set_epi32(static_cast<int>(e3), static_cast<int>(e2),
204  static_cast<int>(e1), static_cast<int>(e0));
205  }
206 
207  template <typename T>
208  void set(T e7, T e6, T e5, T e4, T e3, T e2, T e1, T e0)
209  {
210  value_ = _mm_set_epi16(static_cast<short>(e7), static_cast<short>(e6),
211  static_cast<short>(e5), static_cast<short>(e4),
212  static_cast<short>(e3), static_cast<short>(e2),
213  static_cast<short>(e1), static_cast<short>(e0));
214  }
215 
216  template <typename T>
217  void set(T e15, T e14, T e13, T e12, T e11, T e10, T e9, T e8, T e7, T e6,
218  T e5, T e4, T e3, T e2, T e1, T e0)
219  {
220  value_ = _mm_set_epi8(static_cast<char>(e15), static_cast<char>(e14),
221  static_cast<char>(e13), static_cast<char>(e12),
222  static_cast<char>(e11), static_cast<char>(e10),
223  static_cast<char>(e9), static_cast<char>(e8),
224  static_cast<char>(e7), static_cast<char>(e6),
225  static_cast<char>(e5), static_cast<char>(e4),
226  static_cast<char>(e3), static_cast<char>(e2),
227  static_cast<char>(e1), static_cast<char>(e0));
228  }
229 
230  private:
231  __m128i value_;
232 
233  template <typename T>
234  __m128i set1(T n, std::integral_constant<std::size_t, sizeof(std::int8_t)>)
235  {
236  return _mm_set1_epi8(static_cast<char>(n));
237  }
238 
239  template <typename T>
240  __m128i set1(
241  T n, std::integral_constant<std::size_t, sizeof(std::int16_t)>)
242  {
243  return _mm_set1_epi16(static_cast<short>(n));
244  }
245 
246  template <typename T>
247  __m128i set1(
248  T n, std::integral_constant<std::size_t, sizeof(std::int32_t)>)
249  {
250  return _mm_set1_epi32(static_cast<int>(n));
251  }
252 
253  template <typename T>
254  __m128i set1(
255  T n, std::integral_constant<std::size_t, sizeof(std::int64_t)>)
256  {
257  return _mm_set1_epi64x(static_cast<VSMC_INT64>(n));
258  }
259 }; // class M128I
260 
261 namespace internal
262 {
263 
264 template <typename T>
265 inline M128I<T> m128i_add(const M128I<T> &a, const M128I<T> &b,
266  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
267 {
268  return M128I<T>(_mm_add_epi8(a.value(), b.value()));
269 }
270 
271 template <typename T>
272 inline M128I<T> m128i_add(const M128I<T> &a, const M128I<T> &b,
273  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
274 {
275  return M128I<T>(_mm_add_epi16(a.value(), b.value()));
276 }
277 
278 template <typename T>
279 inline M128I<T> m128i_add(const M128I<T> &a, const M128I<T> &b,
280  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
281 {
282  return M128I<T>(_mm_add_epi32(a.value(), b.value()));
283 }
284 
285 template <typename T>
286 inline M128I<T> m128i_add(const M128I<T> &a, const M128I<T> &b,
287  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
288 {
289  return M128I<T>(_mm_add_epi64(a.value(), b.value()));
290 }
291 
292 template <typename T>
293 inline M128I<T> m128i_sub(const M128I<T> &a, const M128I<T> &b,
294  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
295 {
296  return M128I<T>(_mm_sub_epi8(a.value(), b.value()));
297 }
298 
299 template <typename T>
300 inline M128I<T> m128i_sub(const M128I<T> &a, const M128I<T> &b,
301  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
302 {
303  return M128I<T>(_mm_sub_epi16(a.value(), b.value()));
304 }
305 
306 template <typename T>
307 inline M128I<T> m128i_sub(const M128I<T> &a, const M128I<T> &b,
308  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
309 {
310  return M128I<T>(_mm_sub_epi32(a.value(), b.value()));
311 }
312 
313 template <typename T>
314 inline M128I<T> m128i_sub(const M128I<T> &a, const M128I<T> &b,
315  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
316 {
317  return M128I<T>(_mm_sub_epi64(a.value(), b.value()));
318 }
319 
320 template <typename T>
321 inline M128I<T> m128i_slli(const M128I<T> &a, int imm8,
322  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
323 {
324  return M128I<T>(_mm_slli_epi8(a.value(), imm8));
325 }
326 
327 template <typename T>
328 inline M128I<T> m128i_slli(const M128I<T> &a, int imm8,
329  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
330 {
331  return M128I<T>(_mm_slli_epi16(a.value(), imm8));
332 }
333 
334 template <typename T>
335 inline M128I<T> m128i_slli(const M128I<T> &a, int imm8,
336  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
337 {
338  return M128I<T>(_mm_slli_epi32(a.value(), imm8));
339 }
340 
341 template <typename T>
342 inline M128I<T> m128i_slli(const M128I<T> &a, int imm8,
343  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
344 {
345  return M128I<T>(_mm_slli_epi64(a.value(), imm8));
346 }
347 
348 template <typename T>
349 inline M128I<T> m128i_srli(const M128I<T> &a, int imm8,
350  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
351 {
352  return M128I<T>(_mm_srli_epi8(a.value(), imm8));
353 }
354 
355 template <typename T>
356 inline M128I<T> m128i_srli(const M128I<T> &a, int imm8,
357  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
358 {
359  return M128I<T>(_mm_srli_epi16(a.value(), imm8));
360 }
361 
362 template <typename T>
363 inline M128I<T> m128i_srli(const M128I<T> &a, int imm8,
364  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
365 {
366  return M128I<T>(_mm_srli_epi32(a.value(), imm8));
367 }
368 
369 template <typename T>
370 inline M128I<T> m128i_srli(const M128I<T> &a, int imm8,
371  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
372 {
373  return M128I<T>(_mm_srli_epi64(a.value(), imm8));
374 }
375 
376 } // namespace internal
377 
378 template <typename T>
379 inline bool operator==(const M128I<T> &a, const M128I<T> &b)
380 {
381  std::array<std::uint64_t, 2> sa;
382  std::array<std::uint64_t, 2> sb;
383  a.store_u(sa.data());
384  b.store_u(sb.data());
385 
386  return sa == sb;
387 }
388 
389 template <typename T>
390 inline bool operator!=(const M128I<T> &a, const M128I<T> &b)
391 {
392  return !(a == b);
393 }
394 
395 template <typename CharT, typename Traits, typename T>
396 inline std::basic_ostream<CharT, Traits> &operator<<(
397  std::basic_ostream<CharT, Traits> &os, const M128I<T> &a)
398 {
399  if (!os.good())
400  return os;
401 
402  std::array<T, M128I<T>::size()> sa;
403  a.store_u(sa.data());
404  os << sa;
405 
406  return os;
407 }
408 
409 template <typename CharT, typename Traits, typename T>
410 inline std::basic_istream<CharT, Traits> &operator>>(
411  std::basic_istream<CharT, Traits> &is, M128I<T> &a)
412 {
413  if (!is.good())
414  return is;
415 
416  std::array<T, M128I<T>::size()> sa;
417  is >> sa;
418 
419  if (is.good())
420  a.load_u(sa.data());
421 
422  return is;
423 }
424 
425 template <typename T>
426 inline M128I<T> operator+(const M128I<T> &a, const M128I<T> &b)
427 {
428  return internal::m128i_add(
429  a, b, std::integral_constant<std::size_t, sizeof(T)>());
430 }
431 
432 template <typename T>
433 inline M128I<T> operator-(const M128I<T> &a, const M128I<T> &b)
434 {
435  return internal::m128i_sub(
436  a, b, std::integral_constant<std::size_t, sizeof(T)>());
437 }
438 
439 template <typename T>
440 inline M128I<T> operator&(const M128I<T> &a, const M128I<T> &b)
441 {
442  return M128I<T>(_mm_and_si128(a.value(), b.value()));
443 }
444 
445 template <typename T>
446 inline M128I<T> operator|(const M128I<T> &a, const M128I<T> &b)
447 {
448  return M128I<T>(_mm_or_si128(a.value(), b.value()));
449 }
450 
451 template <typename T>
452 inline M128I<T> operator^(const M128I<T> &a, const M128I<T> &b) {
453  return M128I<T>(_mm_xor_si128(a.value(), b.value()));
454 }
455 
456 template <typename T>
457 inline M128I<T> operator<<(const M128I<T> &a, int imm8)
458 {
459  return internal::m128i_slli(
460  a, imm8, std::integral_constant<std::size_t, sizeof(T)>());
461 }
462 
463 template <typename T>
464 inline M128I<T> operator<<=(M128I<T> &a, int imm8)
465 {
466  a = a << imm8;
467 
468  return a;
469 }
470 
471 template <typename T>
472 inline M128I<T> operator>>(const M128I<T> &a, int imm8)
473 {
474  return internal::m128i_srli(
475  a, imm8, std::integral_constant<std::size_t, sizeof(T)>());
476 }
477 
478 template <typename T>
479 inline M128I<T> operator>>=(M128I<T> &a, int imm8)
480 {
481  a = a << imm8;
482 
483  return a;
484 }
485 
487  M128I<T>, T, +, operator+, operator+=)
489  M128I<T>, T, -, operator-, operator-=)
491  M128I<T>, T, &, operator&, operator&=)
493  M128I<T>, T, |, operator|, operator|=)
495  M128I<T>, T, ^, operator^, operator^=)
496 
497 class M128
500 {
501  public:
502  M128() = default;
503 
504  M128(const __m128 &value) : value_(value) {}
505 
506  static constexpr std::size_t size() { return 4; }
507 
508  __m128 &value() { return value_; }
509  const __m128 &value() const { return value_; }
510 
511  __m128 *data() { return &value_; }
512  const __m128 *data() const { return &value_; }
513 
514  template <typename T>
515  void load_a(const T *mem)
516  {
517  value_ = _mm_load_ps(reinterpret_cast<const float *>(mem));
518  }
519 
520  template <typename T>
521  void load_u(const T *mem)
522  {
523  value_ = _mm_loadu_ps(reinterpret_cast<const float *>(mem));
524  }
525 
526  template <typename T>
527  void load(const T *mem)
528  {
529  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? load_a(mem) :
530  load_u(mem);
531  }
532 
533  template <typename T>
534  void store_a(T *mem) const
535  {
536  _mm_store_ps(reinterpret_cast<float *>(mem), value_);
537  }
538 
539  template <typename T>
540  void store_u(T *mem) const
541  {
542  _mm_storeu_ps(reinterpret_cast<float *>(mem), value_);
543  }
544 
545  template <typename T>
546  void store(T *mem) const
547  {
548  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? store_a(mem) :
549  store_u(mem);
550  }
551 
552  void set0() { value_ = _mm_setzero_ps(); }
553 
554  void set1(float e) { value_ = _mm_set1_ps(e); }
555 
556  void set(float e3, float e2, float e1, float e0)
557  {
558  value_ = _mm_set_ps(e3, e2, e1, e0);
559  }
560 
561  private:
562  __m128 value_;
563 }; // class M128
564 
565 inline bool operator==(const M128 &a, const M128 &b)
566 {
567  std::array<float, 4> sa;
568  std::array<float, 4> sb;
569  a.store_u(sa.data());
570  b.store_u(sb.data());
571 
572  return sa == sb;
573 }
574 
575 inline bool operator!=(const M128 &a, const M128 &b) { return !(a == b); }
576 
577 template <typename CharT, typename Traits>
578 inline std::basic_ostream<CharT, Traits> &operator<<(
579  std::basic_ostream<CharT, Traits> &os, const M128 &a)
580 {
581  if (!os.good())
582  return os;
583 
584  std::array<float, 4> sa;
585  a.store_u(sa.data());
586  os << sa;
587 
588  return os;
589 }
590 
591 template <typename CharT, typename Traits>
592 inline std::basic_istream<CharT, Traits> &operator>>(
593  std::basic_istream<CharT, Traits> &is, M128 &a)
594 {
595  if (!is.good())
596  return is;
597 
598  std::array<float, 4> sa;
599  is >> sa;
600 
601  if (is.good())
602  a.load_u(sa.data());
603 
604  return is;
605 }
606 
607 inline M128 operator+(const M128 &a, const M128 &b)
608 {
609  return M128(_mm_add_ps(a.value(), b.value()));
610 }
611 
612 inline M128 operator-(const M128 &a, const M128 &b)
613 {
614  return M128(_mm_sub_ps(a.value(), b.value()));
615 }
616 
617 inline M128 operator*(const M128 &a, const M128 &b)
618 {
619  return M128(_mm_mul_ps(a.value(), b.value()));
620 }
621 
622 inline M128 operator/(const M128 &a, const M128 &b)
623 {
624  return M128(_mm_div_ps(a.value(), b.value()));
625 }
626 
627 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M128, float, +, operator+, operator+=)
628 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M128, float, -, operator-, operator-=)
629 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M128, float, *, operator*, operator*=)
630 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M128, float, /, operator/, operator/=)
631 
632 class M128D
635 {
636  public:
637  M128D() = default;
638 
639  M128D(const __m128d &value) : value_(value) {}
640 
641  static constexpr std::size_t size() { return 2; }
642 
643  __m128d &value() { return value_; }
644  const __m128d &value() const { return value_; }
645 
646  __m128d *data() { return &value_; }
647  const __m128d *data() const { return &value_; }
648 
649  template <typename T>
650  void load_a(const T *mem)
651  {
652  value_ = _mm_load_pd(reinterpret_cast<const double *>(mem));
653  }
654 
655  template <typename T>
656  void load_u(const T *mem)
657  {
658  value_ = _mm_loadu_pd(reinterpret_cast<const double *>(mem));
659  }
660 
661  template <typename T>
662  void load(const T *mem)
663  {
664  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? load_a(mem) :
665  load_u(mem);
666  }
667 
668  template <typename T>
669  void store_a(T *mem) const
670  {
671  _mm_store_pd(reinterpret_cast<double *>(mem), value_);
672  }
673 
674  template <typename T>
675  void store_u(T *mem) const
676  {
677  _mm_storeu_pd(reinterpret_cast<double *>(mem), value_);
678  }
679 
680  template <typename T>
681  void store(T *mem) const
682  {
683  reinterpret_cast<std::uintptr_t>(mem) % 16 == 0 ? store_a(mem) :
684  store_u(mem);
685  }
686 
687  void set0() { value_ = _mm_setzero_pd(); }
688 
689  void set1(double e) { value_ = _mm_set1_pd(e); }
690 
691  void set(double e1, double e0) { value_ = _mm_set_pd(e1, e0); }
692 
693  private:
694  __m128d value_;
695 }; // class M128D
696 
697 inline bool operator==(const M128D &a, const M128D &b)
698 {
699  std::array<double, 2> sa;
700  std::array<double, 2> sb;
701  a.store_u(sa.data());
702  b.store_u(sb.data());
703 
704  return sa == sb;
705 }
706 
707 inline bool operator!=(const M128D &a, const M128D &b) { return !(a == b); }
708 
709 template <typename CharT, typename Traits>
710 inline std::basic_ostream<CharT, Traits> &operator<<(
711  std::basic_ostream<CharT, Traits> &os, const M128D &a)
712 {
713  if (!os.good())
714  return os;
715 
716  std::array<double, 2> sa;
717  a.store_u(sa.data());
718  os << sa;
719 
720  return os;
721 }
722 
723 template <typename CharT, typename Traits>
724 inline std::basic_istream<CharT, Traits> &operator>>(
725  std::basic_istream<CharT, Traits> &is, M128D &a)
726 {
727  if (!is.good())
728  return is;
729 
730  std::array<double, 2> sa;
731  is >> sa;
732 
733  if (is.good())
734  a.load_u(sa.data());
735 
736  return is;
737 }
738 
739 inline M128D operator+(const M128D &a, const M128D &b)
740 {
741  return M128D(_mm_add_pd(a.value(), b.value()));
742 }
743 
744 inline M128D operator-(const M128D &a, const M128D &b)
745 {
746  return M128D(_mm_sub_pd(a.value(), b.value()));
747 }
748 
749 inline M128D operator*(const M128D &a, const M128D &b)
750 {
751  return M128D(_mm_mul_pd(a.value(), b.value()));
752 }
753 
754 inline M128D operator/(const M128D &a, const M128D &b)
755 {
756  return M128D(_mm_div_pd(a.value(), b.value()));
757 }
758 
760  M128D, double, +, operator+, operator+=)
762  M128D, double, -, operator-, operator-=)
764  M128D, double, *, operator*, operator*=)
766  M128D, double, /, operator/, operator/=)
767 
768 namespace internal
769 {
770 
771 template <typename RealType>
772 class M128TypeTrait;
773 
774 template <>
775 class M128TypeTrait<float>
776 {
777  public:
778  using type = M128;
779 };
780 
781 template <>
782 class M128TypeTrait<double>
783 {
784  public:
785  using type = M128D;
786 };
787 
788 } // namespace vsmc::internal
789 
791 template <typename T>
792 using M128Type = typename std::conditional<std::is_integral<T>::value,
793  M128I<T>, typename internal::M128TypeTrait<T>::type>::type;
794 
795 #endif // VSMC_HAS_SSE2
796 
797 #if VSMC_HAS_AVX2
798 #include <immintrin.h>
799 
802 template <typename IntType = __m256i>
803 class M256I
804 {
805  public:
806  using value_type = IntType;
807 
808  M256I() = default;
809 
810  M256I(const __m256i &value) : value_(value) {}
811 
812  template <typename T>
813  M256I(const M256I<T> &other)
814  : value_(other.value())
815  {
816  }
817 
818  template <typename T>
819  M256I<IntType> &operator=(const M256I<T> &other)
820  {
821  value_ = other.value_;
822 
823  return *this;
824  }
825 
826  static constexpr std::size_t size()
827  {
828  return sizeof(__m256i) / sizeof(IntType);
829  }
830 
831  __m256i &value() { return value_; }
832  const __m256i &value() const { return value_; }
833 
834  __m256i *data() { return &value_; }
835  const __m256i *data() const { return &value_; }
836 
837  template <typename T>
838  void load_a(const T *mem)
839  {
840  value_ = _mm256_load_si256(reinterpret_cast<const __m256i *>(mem));
841  }
842 
843  template <typename T>
844  void load_u(const T *mem)
845  {
846  value_ = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(mem));
847  }
848 
849  template <typename T>
850  void load(const T *mem)
851  {
852  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? load_a(mem) :
853  load_u(mem);
854  }
855 
856  template <typename T>
857  void store_a(T *mem) const
858  {
859  _mm256_store_si256(reinterpret_cast<__m256i *>(mem), value_);
860  }
861 
862  template <typename T>
863  void store_u(T *mem) const
864  {
865  _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), value_);
866  }
867 
868  template <typename T>
869  void store(T *mem) const
870  {
871  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? store_a(mem) :
872  store_u(mem);
873  }
874 
875  void set0() { value_ = _mm256_setzero_si256(); }
876 
877  template <typename T>
878  void set1(T n)
879  {
880  value_ = set1(n, std::integral_constant<std::size_t, sizeof(T)>());
881  }
882 
883  template <typename T>
884  void set(T e3, T e2, T e1, T e0)
885  {
886  value_ = _mm256_set_epi64x(static_cast<VSMC_INT64>(e3),
887  static_cast<VSMC_INT64>(e2), static_cast<VSMC_INT64>(e1),
888  static_cast<VSMC_INT64>(e0));
889  }
890 
891  template <typename T>
892  void set(T e7, T e6, T e5, T e4, T e3, T e2, T e1, T e0)
893  {
894  value_ = _mm256_set_epi32(static_cast<int>(e7), static_cast<int>(e6),
895  static_cast<int>(e5), static_cast<int>(e4), static_cast<int>(e3),
896  static_cast<int>(e2), static_cast<int>(e1), static_cast<int>(e0));
897  }
898 
899  template <typename T>
900  void set(T e15, T e14, T e13, T e12, T e11, T e10, T e9, T e8, T e7, T e6,
901  T e5, T e4, T e3, T e2, T e1, T e0)
902  {
903  value_ =
904  _mm256_set_epi16(static_cast<short>(e15), static_cast<short>(e14),
905  static_cast<short>(e13), static_cast<short>(e12),
906  static_cast<short>(e11), static_cast<short>(e10),
907  static_cast<short>(e9), static_cast<short>(e8),
908  static_cast<short>(e7), static_cast<short>(e6),
909  static_cast<short>(e5), static_cast<short>(e4),
910  static_cast<short>(e3), static_cast<short>(e2),
911  static_cast<short>(e1), static_cast<short>(e0));
912  }
913 
914  template <typename T>
915  void set(T e31, T e30, T e29, T e28, T e27, T e26, T e25, T e24, T e23,
916  T e22, T e21, T e20, T e19, T e18, T e17, T e16, T e15, T e14, T e13,
917  T e12, T e11, T e10, T e9, T e8, T e7, T e6, T e5, T e4, T e3, T e2,
918  T e1, T e0)
919  {
920  value_ =
921  _mm256_set_epi8(static_cast<char>(e31), static_cast<char>(e30),
922  static_cast<char>(e29), static_cast<char>(e28),
923  static_cast<char>(e27), static_cast<char>(e26),
924  static_cast<char>(e25), static_cast<char>(e24),
925  static_cast<char>(e23), static_cast<char>(e22),
926  static_cast<char>(e21), static_cast<char>(e20),
927  static_cast<char>(e19), static_cast<char>(e18),
928  static_cast<char>(e17), static_cast<char>(e16),
929  static_cast<char>(e15), static_cast<char>(e14),
930  static_cast<char>(e13), static_cast<char>(e12),
931  static_cast<char>(e11), static_cast<char>(e10),
932  static_cast<char>(e9), static_cast<char>(e8),
933  static_cast<char>(e7), static_cast<char>(e6),
934  static_cast<char>(e5), static_cast<char>(e4),
935  static_cast<char>(e3), static_cast<char>(e2),
936  static_cast<char>(e1), static_cast<char>(e0));
937  }
938 
939  private:
940  __m256i value_;
941 
942  template <typename T>
943  __m256i set1(T n, std::integral_constant<std::size_t, sizeof(std::int8_t)>)
944  {
945  return _mm256_set1_epi8(static_cast<char>(n));
946  }
947 
948  template <typename T>
949  __m256i set1(
950  T n, std::integral_constant<std::size_t, sizeof(std::int16_t)>)
951  {
952  return _mm256_set1_epi16(static_cast<short>(n));
953  }
954 
955  template <typename T>
956  __m256i set1(
957  T n, std::integral_constant<std::size_t, sizeof(std::int32_t)>)
958  {
959  return _mm256_set1_epi32(static_cast<int>(n));
960  }
961 
962  template <typename T>
963  __m256i set1(
964  T n, std::integral_constant<std::size_t, sizeof(std::int64_t)>)
965  {
966  return _mm256_set1_epi64x(static_cast<long long>(n));
967  }
968 }; // class M256I
969 
970 namespace internal
971 {
972 
973 template <typename T>
974 inline M256I<T> m256i_add(const M256I<T> &a, const M256I<T> &b,
975  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
976 {
977  return M256I<T>(_mm256_add_epi8(a.value(), b.value()));
978 }
979 
980 template <typename T>
981 inline M256I<T> m256i_add(const M256I<T> &a, const M256I<T> &b,
982  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
983 {
984  return M256I<T>(_mm256_add_epi16(a.value(), b.value()));
985 }
986 
987 template <typename T>
988 inline M256I<T> m256i_add(const M256I<T> &a, const M256I<T> &b,
989  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
990 {
991  return M256I<T>(_mm256_add_epi32(a.value(), b.value()));
992 }
993 
994 template <typename T>
995 inline M256I<T> m256i_add(const M256I<T> &a, const M256I<T> &b,
996  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
997 {
998  return M256I<T>(_mm256_add_epi64(a.value(), b.value()));
999 }
1000 
1001 template <typename T>
1002 inline M256I<T> m256i_sub(const M256I<T> &a, const M256I<T> &b,
1003  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
1004 {
1005  return M256I<T>(_mm256_sub_epi8(a.value(), b.value()));
1006 }
1007 
1008 template <typename T>
1009 inline M256I<T> m256i_sub(const M256I<T> &a, const M256I<T> &b,
1010  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
1011 {
1012  return M256I<T>(_mm256_sub_epi16(a.value(), b.value()));
1013 }
1014 
1015 template <typename T>
1016 inline M256I<T> m256i_sub(const M256I<T> &a, const M256I<T> &b,
1017  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
1018 {
1019  return M256I<T>(_mm256_sub_epi32(a.value(), b.value()));
1020 }
1021 
1022 template <typename T>
1023 inline M256I<T> m256i_sub(const M256I<T> &a, const M256I<T> &b,
1024  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
1025 {
1026  return M256I<T>(_mm256_sub_epi64(a.value(), b.value()));
1027 }
1028 
1029 template <typename T>
1030 inline M256I<T> m256i_slli(const M256I<T> &a, int imm8,
1031  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
1032 {
1033  return M256I<T>(_mm256_slli_epi8(a.value(), imm8));
1034 }
1035 
1036 template <typename T>
1037 inline M256I<T> m256i_slli(const M256I<T> &a, int imm8,
1038  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
1039 {
1040  return M256I<T>(_mm256_slli_epi16(a.value(), imm8));
1041 }
1042 
1043 template <typename T>
1044 inline M256I<T> m256i_slli(const M256I<T> &a, int imm8,
1045  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
1046 {
1047  return M256I<T>(_mm256_slli_epi32(a.value(), imm8));
1048 }
1049 
1050 template <typename T>
1051 inline M256I<T> m256i_slli(const M256I<T> &a, int imm8,
1052  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
1053 {
1054  return M256I<T>(_mm256_slli_epi64(a.value(), imm8));
1055 }
1056 
1057 template <typename T>
1058 inline M256I<T> m256i_srli(const M256I<T> &a, int imm8,
1059  std::integral_constant<std::size_t, sizeof(std::int8_t)>)
1060 {
1061  return M256I<T>(_mm256_srli_epi8(a.value(), imm8));
1062 }
1063 
1064 template <typename T>
1065 inline M256I<T> m256i_srli(const M256I<T> &a, int imm8,
1066  std::integral_constant<std::size_t, sizeof(std::int16_t)>)
1067 {
1068  return M256I<T>(_mm256_srli_epi16(a.value(), imm8));
1069 }
1070 
1071 template <typename T>
1072 inline M256I<T> m256i_srli(const M256I<T> &a, int imm8,
1073  std::integral_constant<std::size_t, sizeof(std::int32_t)>)
1074 {
1075  return M256I<T>(_mm256_srli_epi32(a.value(), imm8));
1076 }
1077 
1078 template <typename T>
1079 inline M256I<T> m256i_srli(const M256I<T> &a, int imm8,
1080  std::integral_constant<std::size_t, sizeof(std::int64_t)>)
1081 {
1082  return M256I<T>(_mm256_srli_epi64(a.value(), imm8));
1083 }
1084 
1085 } // namespace vsmc::internal
1086 
1087 template <typename T>
1088 inline bool operator==(const M256I<T> &a, const M256I<T> &b)
1089 {
1090  std::array<std::uint64_t, 4> sa;
1091  std::array<std::uint64_t, 4> sb;
1092  a.store_u(sa.data());
1093  b.store_u(sb.data());
1094 
1095  return sa == sb;
1096 }
1097 
1098 template <typename T>
1099 inline bool operator!=(const M256I<T> &a, const M256I<T> &b)
1100 {
1101  return !(a == b);
1102 }
1103 
1104 template <typename CharT, typename Traits, typename T>
1105 inline std::basic_ostream<CharT, Traits> &operator<<(
1106  std::basic_ostream<CharT, Traits> &os, const M256I<T> &a)
1107 {
1108  if (!os.good())
1109  return os;
1110 
1111  std::array<T, M256I<T>::size()> sa;
1112  a.store_u(sa.data());
1113  os << sa;
1114 
1115  return os;
1116 }
1117 
1118 template <typename CharT, typename Traits, typename T>
1119 inline std::basic_istream<CharT, Traits> &operator>>(
1120  std::basic_istream<CharT, Traits> &is, M256I<T> &a)
1121 {
1122  if (!is.good())
1123  return is;
1124 
1125  std::array<T, M256I<T>::size()> sa;
1126  is >> sa;
1127 
1128  if (is.good())
1129  a.load_u(sa.data());
1130 
1131  return is;
1132 }
1133 
1134 template <typename T>
1135 inline M256I<T> operator+(const M256I<T> &a, const M256I<T> &b)
1136 {
1137  return internal::m256i_add(
1138  a, b, std::integral_constant<std::size_t, sizeof(T)>());
1139 }
1140 
1141 template <typename T>
1142 inline M256I<T> operator-(const M256I<T> &a, const M256I<T> &b)
1143 {
1144  return internal::m256i_sub(
1145  a, b, std::integral_constant<std::size_t, sizeof(T)>());
1146 }
1147 
1148 template <typename T>
1149 inline M256I<T> operator&(const M256I<T> &a, const M256I<T> &b)
1150 {
1151  return M256I<T>(_mm256_and_si256(a.value(), b.value()));
1152 }
1153 
1154 template <typename T>
1155 inline M256I<T> operator|(const M256I<T> &a, const M256I<T> &b)
1156 {
1157  return M256I<T>(_mm256_or_si256(a.value(), b.value()));
1158 }
1159 
1160 template <typename T>
1161 inline M256I<T> operator^(const M256I<T> &a, const M256I<T> &b) {
1162  return M256I<T>(_mm256_xor_si256(a.value(), b.value()));
1163 }
1164 
1165 template <typename T>
1166 inline M256I<T> operator<<(const M256I<T> &a, int imm8)
1167 {
1168  return internal::m256i_slli(
1169  a, imm8, std::integral_constant<std::size_t, sizeof(T)>());
1170 }
1171 
1172 template <typename T>
1173 inline M256I<T> operator<<=(M256I<T> &a, int imm8)
1174 {
1175  a = a << imm8;
1176 
1177  return a;
1178 }
1179 
1180 template <typename T>
1181 inline M256I<T> operator>>(const M256I<T> &a, int imm8)
1182 {
1183  return internal::m256i_srli(
1184  a, imm8, std::integral_constant<std::size_t, sizeof(T)>());
1185 }
1186 
1187 template <typename T>
1188 inline M256I<T> operator>>=(M256I<T> &a, int imm8)
1189 {
1190  a = a << imm8;
1191 
1192  return a;
1193 }
1194 
1196  M256I<T>, T, +, operator+, operator+=)
1198  M256I<T>, T, -, operator-, operator-=)
1200  M256I<T>, T, &, operator&, operator&=)
1202  M256I<T>, T, |, operator|, operator|=)
1204  M256I<T>, T, ^, operator^, operator^=)
1205 
1206 class M256
1209 {
1210  public:
1211  M256() = default;
1212 
1213  M256(const __m256 &value) : value_(value) {}
1214 
1215  static constexpr std::size_t size() { return 8; }
1216 
1217  __m256 &value() { return value_; }
1218  const __m256 &value() const { return value_; }
1219 
1220  __m256 *data() { return &value_; }
1221  const __m256 *data() const { return &value_; }
1222 
1223  template <typename T>
1224  void load_a(const T *mem)
1225  {
1226  value_ = _mm256_load_ps(reinterpret_cast<const float *>(mem));
1227  }
1228 
1229  template <typename T>
1230  void load_u(const T *mem)
1231  {
1232  value_ = _mm256_loadu_ps(reinterpret_cast<const float *>(mem));
1233  }
1234 
1235  template <typename T>
1236  void load(const T *mem)
1237  {
1238  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? load_a(mem) :
1239  load_u(mem);
1240  }
1241 
1242  template <typename T>
1243  void store_a(T *mem) const
1244  {
1245  _mm256_store_ps(reinterpret_cast<float *>(mem), value_);
1246  }
1247 
1248  template <typename T>
1249  void store_u(T *mem) const
1250  {
1251  _mm256_storeu_ps(reinterpret_cast<float *>(mem), value_);
1252  }
1253 
1254  template <typename T>
1255  void store(T *mem) const
1256  {
1257  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? store_a(mem) :
1258  store_u(mem);
1259  }
1260 
1261  void set0() { value_ = _mm256_setzero_ps(); }
1262 
1263  void set1(float e) { value_ = _mm256_set1_ps(e); }
1264 
1265  void set(float e7, float e6, float e5, float e4, float e3, float e2,
1266  float e1, float e0)
1267  {
1268  value_ = _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0);
1269  }
1270 
1271  private:
1272  __m256 value_;
1273 }; // class M256
1274 
1275 inline bool operator==(const M256 &a, const M256 &b)
1276 {
1277  std::array<float, 8> sa;
1278  std::array<float, 8> sb;
1279  a.store_u(sa.data());
1280  b.store_u(sb.data());
1281 
1282  return sa == sb;
1283 }
1284 
1285 inline bool operator!=(const M256 &a, const M256 &b) { return !(a == b); }
1286 
1287 template <typename CharT, typename Traits>
1288 inline std::basic_ostream<CharT, Traits> &operator<<(
1289  std::basic_ostream<CharT, Traits> &os, const M256 &a)
1290 {
1291  if (!os.good())
1292  return os;
1293 
1294  std::array<float, 8> sa;
1295  a.store_u(sa.data());
1296  os << sa;
1297 
1298  return os;
1299 }
1300 
1301 template <typename CharT, typename Traits>
1302 inline std::basic_istream<CharT, Traits> &operator>>(
1303  std::basic_istream<CharT, Traits> &is, M256 &a)
1304 {
1305  if (!is.good())
1306  return is;
1307 
1308  std::array<float, 8> sa;
1309  is >> sa;
1310 
1311  if (is.good())
1312  a.load_u(sa.data());
1313 
1314  return is;
1315 }
1316 
1317 inline M256 operator+(const M256 &a, const M256 &b)
1318 {
1319  return M256(_mm256_add_ps(a.value(), b.value()));
1320 }
1321 
1322 inline M256 operator-(const M256 &a, const M256 &b)
1323 {
1324  return M256(_mm256_sub_ps(a.value(), b.value()));
1325 }
1326 
1327 inline M256 operator*(const M256 &a, const M256 &b)
1328 {
1329  return M256(_mm256_mul_ps(a.value(), b.value()));
1330 }
1331 
1332 inline M256 operator/(const M256 &a, const M256 &b)
1333 {
1334  return M256(_mm256_div_ps(a.value(), b.value()));
1335 }
1336 
1337 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M256, float, +, operator+, operator+=)
1338 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M256, float, -, operator-, operator-=)
1339 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M256, float, *, operator*, operator*=)
1340 VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(M256, float, /, operator/, operator/=)
1341 
1342 class M256D
1345 {
1346  public:
1347  M256D() = default;
1348 
1349  M256D(const __m256d &value) : value_(value) {}
1350 
1351  static constexpr std::size_t size() { return 4; }
1352 
1353  __m256d &value() { return value_; }
1354  const __m256d &value() const { return value_; }
1355 
1356  __m256d *data() { return &value_; }
1357  const __m256d *data() const { return &value_; }
1358 
1359  template <typename T>
1360  void load_a(const T *mem)
1361  {
1362  value_ = _mm256_load_pd(reinterpret_cast<const double *>(mem));
1363  }
1364 
1365  template <typename T>
1366  void load_u(const T *mem)
1367  {
1368  value_ = _mm256_loadu_pd(reinterpret_cast<const double *>(mem));
1369  }
1370 
1371  template <typename T>
1372  void load(const T *mem)
1373  {
1374  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? load_a(mem) :
1375  load_u(mem);
1376  }
1377 
1378  template <typename T>
1379  void store_a(T *mem) const
1380  {
1381  _mm256_store_pd(reinterpret_cast<double *>(mem), value_);
1382  }
1383 
1384  template <typename T>
1385  void store_u(T *mem) const
1386  {
1387  _mm256_storeu_pd(reinterpret_cast<double *>(mem), value_);
1388  }
1389 
1390  template <typename T>
1391  void store(T *mem) const
1392  {
1393  reinterpret_cast<std::uintptr_t>(mem) % 32 == 0 ? store_a(mem) :
1394  store_u(mem);
1395  }
1396 
1397  void set0() { value_ = _mm256_setzero_pd(); }
1398 
1399  void set1(double e) { value_ = _mm256_set1_pd(e); }
1400 
1401  void set(double e3, double e2, double e1, double e0)
1402  {
1403  value_ = _mm256_set_pd(e3, e2, e1, e0);
1404  }
1405 
1406  private:
1407  __m256d value_;
1408 }; // class M256D
1409 
1410 inline bool operator==(const M256D &a, const M256D &b)
1411 {
1412  std::array<double, 4> sa;
1413  std::array<double, 4> sb;
1414  a.store_u(sa.data());
1415  b.store_u(sb.data());
1416 
1417  return sa == sb;
1418 }
1419 
1420 inline bool operator!=(const M256D &a, const M256D &b) { return !(a == b); }
1421 
1422 template <typename CharT, typename Traits>
1423 inline std::basic_ostream<CharT, Traits> &operator<<(
1424  std::basic_ostream<CharT, Traits> &os, const M256D &a)
1425 {
1426  if (!os.good())
1427  return os;
1428 
1429  std::array<double, 4> sa;
1430  a.store_u(sa.data());
1431  os << sa;
1432 
1433  return os;
1434 }
1435 
1436 template <typename CharT, typename Traits>
1437 inline std::basic_istream<CharT, Traits> &operator>>(
1438  std::basic_istream<CharT, Traits> &is, M256D &a)
1439 {
1440  if (!is.good())
1441  return is;
1442 
1443  std::array<double, 4> sa;
1444  is >> sa;
1445 
1446  if (is.good())
1447  a.load_u(sa.data());
1448 
1449  return is;
1450 }
1451 
1452 inline M256D operator+(const M256D &a, const M256D &b)
1453 {
1454  return M256D(_mm256_add_pd(a.value(), b.value()));
1455 }
1456 
1457 inline M256D operator-(const M256D &a, const M256D &b)
1458 {
1459  return M256D(_mm256_sub_pd(a.value(), b.value()));
1460 }
1461 
1462 inline M256D operator*(const M256D &a, const M256D &b)
1463 {
1464  return M256D(_mm256_mul_pd(a.value(), b.value()));
1465 }
1466 
1467 inline M256D operator/(const M256D &a, const M256D &b)
1468 {
1469  return M256D(_mm256_div_pd(a.value(), b.value()));
1470 }
1471 
1473  M256D, double, +, operator+, operator+=)
1475  M256D, double, -, operator-, operator-=)
1477  M256D, double, *, operator*, operator*=)
1479  M256D, double, /, operator/, operator/=)
1480 
1481 namespace internal
1482 {
1483 
1484 template <typename RealType>
1485 class M256TypeTrait;
1486 
1487 template <>
1488 class M256TypeTrait<float>
1489 {
1490  public:
1491  using type = M256;
1492 };
1493 
1494 template <>
1495 class M256TypeTrait<double>
1496 {
1497  public:
1498  using type = M256D;
1499 };
1500 
1501 } // namespace vsmc::internal
1502 
1504 template <typename T>
1505 using M256Type = typename std::conditional<std::is_integral<T>::value,
1506  M256I<T>, typename internal::M256TypeTrait<T>::type>::type;
1507 
1508 #endif // VSMC_HAS_AVX2
1509 
1510 } // namespace vsmc
1511 
1512 #endif // VSMC_UTILITY_SIMD_HPP
Definition: monitor.hpp:49
std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const Sampler< T > &sampler)
Definition: sampler.hpp:929
#define VSMC_DEFINE_UTILITY_SIMD_INTEGER_BINARY_OP( Type, CType, op, bin, assign)
Definition: simd.hpp:37
bool operator==(const MKLBase< MKLPtr, Derived > &ptr1, const MKLBase< MKLPtr, Derived > &ptr2)
Comparison of equality of two MKLBase objects.
Definition: mkl.hpp:152
std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, std::array< T, N > &ary)
Definition: common.hpp:139
bool operator!=(const MKLBase< MKLPtr, Derived > &ptr1, const MKLBase< MKLPtr, Derived > &ptr2)
Comparison of inequality of two MKLBase objects.
Definition: mkl.hpp:161
#define VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(Type, CType, op, bin, assign)
Definition: simd.hpp:73