32 #ifndef VSMC_UTILITY_SIMD_HPP 33 #define VSMC_UTILITY_SIMD_HPP 37 #define VSMC_DEFINE_UTILITY_SIMD_INTEGER_BINARY_OP( \ 38 Type, CType, op, bin, assign) \ 39 template <typename T> \ 40 inline Type &assign(Type &a, const Type &b) \ 47 template <typename T> \ 48 inline Type bin(const Type &a, CType b) \ 56 template <typename T> \ 57 inline Type bin(CType a, const Type &b) \ 65 template <typename T> \ 66 inline Type &assign(Type &a, CType b) \ 73 #define VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(Type, CType, op, bin, assign) \ 74 inline Type &assign(Type &a, const Type &b) \ 81 inline Type bin(const Type &a, CType b) \ 89 inline Type bin(CType a, const Type &b) \ 97 inline Type &assign(Type &a, CType b) \ 105 #include <emmintrin.h> 112 template <
typename IntType = __m128i>
122 template <
typename T>
127 template <
typename T>
130 value_ = other.
value();
135 static constexpr std::size_t
size()
137 return sizeof(__m128i) /
sizeof(IntType);
141 const __m128i &
value()
const {
return value_; }
143 __m128i *
data() {
return &value_; }
144 const __m128i *
data()
const {
return &value_; }
146 template <
typename T>
149 value_ = _mm_load_si128(reinterpret_cast<const __m128i *>(mem));
152 template <
typename T>
155 value_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(mem));
158 template <
typename T>
161 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
load_a(mem) :
165 template <
typename T>
168 _mm_store_si128(reinterpret_cast<__m128i *>(mem), value_);
171 template <
typename T>
174 _mm_storeu_si128(reinterpret_cast<__m128i *>(mem), value_);
177 template <
typename T>
180 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
store_a(mem) :
184 void set0() { value_ = _mm_setzero_si128(); }
186 template <
typename T>
189 value_ =
set1(n, std::integral_constant<std::size_t,
sizeof(T)>());
192 template <
typename T>
195 value_ = _mm_set_epi64x(
196 static_cast<VSMC_INT64>(e1), static_cast<VSMC_INT64>(e0));
199 template <
typename T>
200 void set(T e3, T e2, T e1, T e0)
202 value_ = _mm_set_epi32(static_cast<int>(e3), static_cast<int>(e2),
203 static_cast<int>(e1), static_cast<int>(e0));
206 template <
typename T>
207 void set(T e7, T e6, T e5, T e4, T e3, T e2, T e1, T e0)
209 value_ = _mm_set_epi16(static_cast<short>(e7), static_cast<short>(e6),
210 static_cast<short>(e5), static_cast<short>(e4),
211 static_cast<short>(e3), static_cast<short>(e2),
212 static_cast<short>(e1), static_cast<short>(e0));
215 template <
typename T>
216 void set(T e15, T e14, T e13, T e12, T e11, T e10, T e9, T e8, T e7, T e6,
217 T e5, T e4, T e3, T e2, T e1, T e0)
219 value_ = _mm_set_epi8(static_cast<char>(e15), static_cast<char>(e14),
220 static_cast<char>(e13), static_cast<char>(e12),
221 static_cast<char>(e11), static_cast<char>(e10),
222 static_cast<char>(e9), static_cast<char>(e8),
223 static_cast<char>(e7), static_cast<char>(e6),
224 static_cast<char>(e5), static_cast<char>(e4),
225 static_cast<char>(e3), static_cast<char>(e2),
226 static_cast<char>(e1), static_cast<char>(e0));
232 template <
typename T>
233 __m128i
set1(T n, std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
235 return _mm_set1_epi8(static_cast<char>(n));
238 template <
typename T>
240 T n, std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
242 return _mm_set1_epi16(static_cast<short>(n));
245 template <
typename T>
247 T n, std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
249 return _mm_set1_epi32(static_cast<int>(n));
252 template <
typename T>
254 T n, std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
256 return _mm_set1_epi64x(static_cast<VSMC_INT64>(n));
263 template <
typename T>
265 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
270 template <
typename T>
272 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
277 template <
typename T>
279 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
284 template <
typename T>
286 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
291 template <
typename T>
293 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
298 template <
typename T>
300 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
305 template <
typename T>
307 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
312 template <
typename T>
314 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
319 template <
typename T>
321 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
326 template <
typename T>
328 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
333 template <
typename T>
335 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
340 template <
typename T>
342 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
347 template <
typename T>
349 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
354 template <
typename T>
356 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
361 template <
typename T>
363 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
368 template <
typename T>
370 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
377 template <
typename T>
380 std::array<std::uint64_t, 2> sa;
381 std::array<std::uint64_t, 2> sb;
388 template <
typename T>
394 template <
typename CharT,
typename Traits,
typename T>
396 std::basic_ostream<CharT, Traits> &os,
const M128I<T> &a)
401 std::array<T, M128I<T>::size()> sa;
408 template <
typename CharT,
typename Traits,
typename T>
410 std::basic_istream<CharT, Traits> &is,
M128I<T> &a)
415 std::array<T, M128I<T>::size()> sa;
424 template <
typename T>
428 a, b, std::integral_constant<std::size_t,
sizeof(T)>());
431 template <
typename T>
435 a, b, std::integral_constant<std::size_t,
sizeof(T)>());
438 template <
typename T>
444 template <
typename T>
450 template <
typename T>
456 template <
typename T>
457 inline M128I<T> operator<<(const M128I<T> &a,
int imm8)
460 a, imm8, std::integral_constant<std::size_t,
sizeof(T)>());
463 template <
typename T>
471 template <
typename T>
475 a, imm8, std::integral_constant<std::size_t,
sizeof(T)>());
478 template <
typename T>
489 M128I<T>, T, -, operator-, operator-=)
491 M128I<T>, T, &, operator&, operator&=)
493 M128I<T>, T, |, operator|, operator|=)
495 M128I<T>, T, ^, operator^, operator^=)
506 static constexpr std::size_t
size() {
return 4; }
509 const __m128 &
value()
const {
return value_; }
511 __m128 *
data() {
return &value_; }
512 const __m128 *
data()
const {
return &value_; }
514 template <
typename T>
517 value_ = _mm_load_ps(reinterpret_cast<const float *>(mem));
520 template <
typename T>
523 value_ = _mm_loadu_ps(reinterpret_cast<const float *>(mem));
526 template <
typename T>
529 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
load_a(mem) :
533 template <
typename T>
536 _mm_store_ps(reinterpret_cast<float *>(mem), value_);
539 template <
typename T>
542 _mm_storeu_ps(reinterpret_cast<float *>(mem), value_);
545 template <
typename T>
548 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
store_a(mem) :
552 void set0() { value_ = _mm_setzero_ps(); }
554 void set1(
float e) { value_ = _mm_set1_ps(e); }
556 void set(
float e3,
float e2,
float e1,
float e0)
558 value_ = _mm_set_ps(e3, e2, e1, e0);
567 std::array<float, 4> sa;
568 std::array<float, 4> sb;
577 template <
typename CharT,
typename Traits>
579 std::basic_ostream<CharT, Traits> &os,
const M128 &a)
584 std::array<float, 4> sa;
591 template <
typename CharT,
typename Traits>
593 std::basic_istream<CharT, Traits> &is,
M128 &a)
598 std::array<float, 4> sa;
641 static constexpr std::size_t
size() {
return 2; }
644 const __m128d &
value()
const {
return value_; }
646 __m128d *
data() {
return &value_; }
647 const __m128d *
data()
const {
return &value_; }
649 template <
typename T>
652 value_ = _mm_load_pd(reinterpret_cast<const double *>(mem));
655 template <
typename T>
658 value_ = _mm_loadu_pd(reinterpret_cast<const double *>(mem));
661 template <
typename T>
664 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
load_a(mem) :
668 template <
typename T>
671 _mm_store_pd(reinterpret_cast<double *>(mem), value_);
674 template <
typename T>
677 _mm_storeu_pd(reinterpret_cast<double *>(mem), value_);
680 template <
typename T>
683 reinterpret_cast<std::uintptr_t
>(mem) % 16 == 0 ?
store_a(mem) :
687 void set0() { value_ = _mm_setzero_pd(); }
689 void set1(
double e) { value_ = _mm_set1_pd(e); }
691 void set(
double e1,
double e0) { value_ = _mm_set_pd(e1, e0); }
699 std::array<double, 2> sa;
700 std::array<double, 2> sb;
709 template <
typename CharT,
typename Traits>
711 std::basic_ostream<CharT, Traits> &os,
const M128D &a)
716 std::array<double, 2> sa;
723 template <
typename CharT,
typename Traits>
725 std::basic_istream<CharT, Traits> &is,
M128D &a)
730 std::array<double, 2> sa;
760 M128D,
double, +,
operator+,
operator+=)
762 M128D,
double, -, operator-, operator-=)
764 M128D,
double, *, operator*, operator*=)
766 M128D,
double, /, operator/, operator/=)
771 template <
typename RealType>
792 template <
typename T>
793 using M128Type =
typename std::conditional<std::is_integral<T>::value,
796 #endif // VSMC_HAS_SSE2 799 #include <immintrin.h> 803 template <
typename IntType = __m256i>
813 template <
typename T>
818 template <
typename T>
821 value_ = other.value_;
826 static constexpr std::size_t
size()
828 return sizeof(__m256i) /
sizeof(IntType);
832 const __m256i &
value()
const {
return value_; }
834 __m256i *
data() {
return &value_; }
835 const __m256i *
data()
const {
return &value_; }
837 template <
typename T>
840 value_ = _mm256_load_si256(reinterpret_cast<const __m256i *>(mem));
843 template <
typename T>
846 value_ = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(mem));
849 template <
typename T>
852 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
load_a(mem) :
856 template <
typename T>
859 _mm256_store_si256(reinterpret_cast<__m256i *>(mem), value_);
862 template <
typename T>
865 _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), value_);
868 template <
typename T>
871 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
store_a(mem) :
875 void set0() { value_ = _mm256_setzero_si256(); }
877 template <
typename T>
880 value_ =
set1(n, std::integral_constant<std::size_t,
sizeof(T)>());
883 template <
typename T>
884 void set(T e3, T e2, T e1, T e0)
886 value_ = _mm256_set_epi64x(static_cast<VSMC_INT64>(e3),
887 static_cast<VSMC_INT64>(e2), static_cast<VSMC_INT64>(e1),
888 static_cast<VSMC_INT64>(e0));
891 template <
typename T>
892 void set(T e7, T e6, T e5, T e4, T e3, T e2, T e1, T e0)
894 value_ = _mm256_set_epi32(static_cast<int>(e7), static_cast<int>(e6),
895 static_cast<int>(e5), static_cast<int>(e4), static_cast<int>(e3),
896 static_cast<int>(e2), static_cast<int>(e1), static_cast<int>(e0));
899 template <
typename T>
900 void set(T e15, T e14, T e13, T e12, T e11, T e10, T e9, T e8, T e7, T e6,
901 T e5, T e4, T e3, T e2, T e1, T e0)
904 _mm256_set_epi16(static_cast<short>(e15), static_cast<short>(e14),
905 static_cast<short>(e13), static_cast<short>(e12),
906 static_cast<short>(e11), static_cast<short>(e10),
907 static_cast<short>(e9), static_cast<short>(e8),
908 static_cast<short>(e7), static_cast<short>(e6),
909 static_cast<short>(e5), static_cast<short>(e4),
910 static_cast<short>(e3), static_cast<short>(e2),
911 static_cast<short>(e1), static_cast<short>(e0));
914 template <
typename T>
915 void set(T e31, T e30, T e29, T e28, T e27, T e26, T e25, T e24, T e23,
916 T e22, T e21, T e20, T e19, T e18, T e17, T e16, T e15, T e14, T e13,
917 T e12, T e11, T e10, T e9, T e8, T e7, T e6, T e5, T e4, T e3, T e2,
921 _mm256_set_epi8(static_cast<char>(e31), static_cast<char>(e30),
922 static_cast<char>(e29), static_cast<char>(e28),
923 static_cast<char>(e27), static_cast<char>(e26),
924 static_cast<char>(e25), static_cast<char>(e24),
925 static_cast<char>(e23), static_cast<char>(e22),
926 static_cast<char>(e21), static_cast<char>(e20),
927 static_cast<char>(e19), static_cast<char>(e18),
928 static_cast<char>(e17), static_cast<char>(e16),
929 static_cast<char>(e15), static_cast<char>(e14),
930 static_cast<char>(e13), static_cast<char>(e12),
931 static_cast<char>(e11), static_cast<char>(e10),
932 static_cast<char>(e9), static_cast<char>(e8),
933 static_cast<char>(e7), static_cast<char>(e6),
934 static_cast<char>(e5), static_cast<char>(e4),
935 static_cast<char>(e3), static_cast<char>(e2),
936 static_cast<char>(e1), static_cast<char>(e0));
942 template <
typename T>
943 __m256i
set1(T n, std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
945 return _mm256_set1_epi8(static_cast<char>(n));
948 template <
typename T>
950 T n, std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
952 return _mm256_set1_epi16(static_cast<short>(n));
955 template <
typename T>
957 T n, std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
959 return _mm256_set1_epi32(static_cast<int>(n));
962 template <
typename T>
964 T n, std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
966 return _mm256_set1_epi64x(static_cast<long long>(n));
973 template <
typename T>
975 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
980 template <
typename T>
982 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
987 template <
typename T>
989 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
994 template <
typename T>
996 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
1001 template <
typename T>
1003 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
1008 template <
typename T>
1010 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
1015 template <
typename T>
1017 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
1022 template <
typename T>
1024 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
1029 template <
typename T>
1031 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
1036 template <
typename T>
1038 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
1043 template <
typename T>
1045 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
1050 template <
typename T>
1052 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
1057 template <
typename T>
1059 std::integral_constant<std::size_t,
sizeof(std::int8_t)>)
1064 template <
typename T>
1066 std::integral_constant<std::size_t,
sizeof(std::int16_t)>)
1071 template <
typename T>
1073 std::integral_constant<std::size_t,
sizeof(std::int32_t)>)
1078 template <
typename T>
1080 std::integral_constant<std::size_t,
sizeof(std::int64_t)>)
1087 template <
typename T>
1090 std::array<std::uint64_t, 4> sa;
1091 std::array<std::uint64_t, 4> sb;
1098 template <
typename T>
1104 template <
typename CharT,
typename Traits,
typename T>
1106 std::basic_ostream<CharT, Traits> &os,
const M256I<T> &a)
1111 std::array<T, M256I<T>::size()> sa;
1118 template <
typename CharT,
typename Traits,
typename T>
1120 std::basic_istream<CharT, Traits> &is,
M256I<T> &a)
1125 std::array<T, M256I<T>::size()> sa;
1134 template <
typename T>
1138 a, b, std::integral_constant<std::size_t,
sizeof(T)>());
1141 template <
typename T>
1145 a, b, std::integral_constant<std::size_t,
sizeof(T)>());
1148 template <
typename T>
1154 template <
typename T>
1160 template <
typename T>
1166 template <
typename T>
1167 inline M256I<T> operator<<(const M256I<T> &a,
int imm8)
1170 a, imm8, std::integral_constant<std::size_t,
sizeof(T)>());
1173 template <
typename T>
1181 template <
typename T>
1185 a, imm8, std::integral_constant<std::size_t,
sizeof(T)>());
1188 template <
typename T>
1216 static constexpr std::size_t
size() {
return 8; }
1219 const __m256 &
value()
const {
return value_; }
1222 const __m256 *
data()
const {
return &value_; }
1224 template <
typename T>
1227 value_ = _mm256_load_ps(reinterpret_cast<const float *>(mem));
1230 template <
typename T>
1233 value_ = _mm256_loadu_ps(reinterpret_cast<const float *>(mem));
1236 template <
typename T>
1239 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
load_a(mem) :
1243 template <
typename T>
1246 _mm256_store_ps(reinterpret_cast<float *>(mem), value_);
1249 template <
typename T>
1252 _mm256_storeu_ps(reinterpret_cast<float *>(mem), value_);
1255 template <
typename T>
1258 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
store_a(mem) :
1262 void set0() { value_ = _mm256_setzero_ps(); }
1264 void set1(
float e) { value_ = _mm256_set1_ps(e); }
1266 void set(
float e7,
float e6,
float e5,
float e4,
float e3,
float e2,
1269 value_ = _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0);
1278 std::array<float, 8> sa;
1279 std::array<float, 8> sb;
1288 template <
typename CharT,
typename Traits>
1290 std::basic_ostream<CharT, Traits> &os,
const M256 &a)
1295 std::array<float, 8> sa;
1302 template <
typename CharT,
typename Traits>
1304 std::basic_istream<CharT, Traits> &is,
M256 &a)
1309 std::array<float, 8> sa;
1352 static constexpr std::size_t
size() {
return 4; }
1355 const __m256d &
value()
const {
return value_; }
1357 __m256d *
data() {
return &value_; }
1358 const __m256d *
data()
const {
return &value_; }
1360 template <
typename T>
1363 value_ = _mm256_load_pd(reinterpret_cast<const double *>(mem));
1366 template <
typename T>
1369 value_ = _mm256_loadu_pd(reinterpret_cast<const double *>(mem));
1372 template <
typename T>
1375 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
load_a(mem) :
1379 template <
typename T>
1382 _mm256_store_pd(reinterpret_cast<double *>(mem), value_);
1385 template <
typename T>
1388 _mm256_storeu_pd(reinterpret_cast<double *>(mem), value_);
1391 template <
typename T>
1394 reinterpret_cast<std::uintptr_t
>(mem) % 32 == 0 ?
store_a(mem) :
1398 void set0() { value_ = _mm256_setzero_pd(); }
1400 void set1(
double e) { value_ = _mm256_set1_pd(e); }
1402 void set(
double e3,
double e2,
double e1,
double e0)
1404 value_ = _mm256_set_pd(e3, e2, e1, e0);
1413 std::array<double, 4> sa;
1414 std::array<double, 4> sb;
1423 template <
typename CharT,
typename Traits>
1425 std::basic_ostream<CharT, Traits> &os,
const M256D &a)
1430 std::array<double, 4> sa;
1437 template <
typename CharT,
typename Traits>
1439 std::basic_istream<CharT, Traits> &is,
M256D &a)
1444 std::array<double, 4> sa;
1474 M256D,
double, +,
operator+,
operator+=)
1476 M256D,
double, -, operator-, operator-=)
1478 M256D,
double, *, operator*, operator*=)
1480 M256D,
double, /, operator/, operator/=)
1485 template <
typename RealType>
1506 template <
typename T>
1507 using M256Type =
typename std::conditional<std::is_integral<T>::value,
1510 #endif // VSMC_HAS_AVX2 1514 #endif // VSMC_UTILITY_SIMD_HPP
const __m256i * data() const
M256I(const __m256i &value)
M128I< T > operator>>=(M128I< T > &a, int imm8)
const __m128 & value() const
const __m128i & value() const
void load_u(const T *mem)
M128I< T > m128i_srli(const M128I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
M256I< T > m256i_add(const M256I< T > &a, const M256I< T > &b, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
void load_a(const T *mem)
SingleParticle< T > operator-(const SingleParticle< T > &sp, IntType n)
Using __mm256i as integer vector.
void store_a(T *mem) const
M128I< T > m128i_slli(const M128I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
void store_a(T *mem) const
void store_a(T *mem) const
M128 operator/(const M128 &a, const M128 &b)
void load_u(const T *mem)
const __m256 & value() const
M256I< T > m256i_srli(const M256I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
Using __m128i as integer vector.
const __m128 * data() const
static constexpr std::size_t size()
M128I< T > m128i_add(const M128I< T > &a, const M128I< T > &b, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
void store_u(T *mem) const
const __m128d * data() const
M256I< T > m256i_slli(const M256I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
void load_a(const T *mem)
M256(const __m256 &value)
M256I< T > m256i_slli(const M256I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
M128I< T > m128i_sub(const M128I< T > &a, const M128I< T > &b, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
bool operator!=(const SingleParticle< T > &sp1, const SingleParticle< T > &sp2)
void load_u(const T *mem)
M128I(const __m128i &value)
void store_u(T *mem) const
void load_a(const T *mem)
M128I< T > operator&(const M128I< T > &a, const M128I< T > &b)
void load_u(const T *mem)
static constexpr std::size_t size()
M128 operator*(const M128 &a, const M128 &b)
static constexpr std::size_t size()
void load_u(const T *mem)
void store_u(T *mem) const
const __m256d & value() const
const __m256i & value() const
void store_u(T *mem) const
const __m128i * data() const
std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const Sampler< T > &sampler)
static constexpr std::size_t size()
M256D(const __m256d &value)
bool operator==(const SingleParticle< T > &sp1, const SingleParticle< T > &sp2)
M128I< T > m128i_slli(const M128I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
void load_u(const T *mem)
static constexpr std::size_t size()
M128I< IntType > & operator=(const M128I< T > &other)
M128I(const M128I< T > &other)
M128I< T > operator^(const M128I< T > &a, const M128I< T > &b)
M256I< T > m256i_add(const M256I< T > &a, const M256I< T > &b, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
typename std::conditional< std::is_integral< T >::value, M128I< T >, typename internal::M128TypeTrait< T >::type >::type M128Type
floating point SSE2 type
void store_a(T *mem) const
void store_u(T *mem) const
M256I< T > m256i_srli(const M256I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
void store_u(T *mem) const
const __m256 * data() const
M128I< T > m128i_sub(const M128I< T > &a, const M128I< T > &b, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
#define VSMC_DEFINE_UTILITY_SIMD_INTEGER_BINARY_OP( Type, CType, op, bin, assign)
M256I< T > m256i_sub(const M256I< T > &a, const M256I< T > &b, std::integral_constant< std::size_t, sizeof(std::int8_t)>)
M128I< T > m128i_srli(const M128I< T > &a, int imm8, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
M128(const __m128 &value)
typename std::conditional< std::is_integral< T >::value, M256I< T >, typename internal::M256TypeTrait< T >::type >::type M256Type
floating point SSE2 type
void load_a(const T *mem)
M128D(const __m128d &value)
static constexpr std::size_t size()
M128I< T > m128i_add(const M128I< T > &a, const M128I< T > &b, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
const __m256d * data() const
M256I(const M256I< T > &other)
M256I< T > m256i_sub(const M256I< T > &a, const M256I< T > &b, std::integral_constant< std::size_t, sizeof(std::int64_t)>)
M256I< IntType > & operator=(const M256I< T > &other)
void store_a(T *mem) const
void store_a(T *mem) const
std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, std::array< T, N > &ary)
void load_a(const T *mem)
void load_a(const T *mem)
#define VSMC_DEFINE_UTILITY_SIMD_REAL_BINARY_OP(Type, CType, op, bin, assign)
const __m128d & value() const
M128I< T > operator|(const M128I< T > &a, const M128I< T > &b)
SingleParticle< T > operator+(const SingleParticle< T > &sp, IntType n)