32 #ifndef VSMC_RNG_THREEFRY_HPP 33 #define VSMC_RNG_THREEFRY_HPP 38 #define VSMC_STATIC_ASSERT_RNG_THREEFRY_RESULT_TYPE(ResultType, SIMD) \ 39 VSMC_STATIC_ASSERT(((sizeof(ResultType) == sizeof(std::uint32_t) && \ 40 std::is_unsigned<ResultType>::value) || \ 41 (sizeof(ResultType) == sizeof(std::uint64_t) && \ 42 std::is_unsigned<ResultType>::value)), \ 43 "**ThreefryGenerator" #SIMD \ 44 "** USED WITH ResultType OTHER THAN UNSIGNED 32/64 BITS INTEGER") 46 #define VSMC_STATIC_ASSERT_RNG_THREEFRY_SIZE(K, SIMD) \ 47 VSMC_STATIC_ASSERT((K == 2 || K == 4), \ 48 "**Threefry" #SIMD "** USED WITH SIZE OTHER THAN 2 OR 4") 50 #define VSMC_STATIC_ASSERT_RNG_THREEFRY(SIMD) \ 51 VSMC_STATIC_ASSERT_RNG_THREEFRY_RESULT_TYPE(ResultType, SIMD); \ 52 VSMC_STATIC_ASSERT_RNG_THREEFRY_SIZE(K, SIMD); 54 #define VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(T, K, N, I, val) \ 56 class ThreefryRotateConstant<T, K, N, I> \ 57 : public std::integral_constant<int, val> \ 59 }; // class ThreefryRotateConstant 63 #ifndef VSMC_RNG_THREEFRY_VECTOR_LENGTH 64 #define VSMC_RNG_THREEFRY_VECTOR_LENGTH 4 69 #ifndef VSMC_RNG_THREEFRY_ROUNDS 70 #define VSMC_RNG_THREEFRY_ROUNDS 20 82 template <
typename T,
template <
typename>
class SIMD>
89 :
public std::integral_constant<std::uint32_t, UINT32_C(0x1BD11BDA)>
95 :
public std::integral_constant<std::uint64_t,
96 UINT64_C(0x1BD11BDAA9FC1A22)>
100 template <
typename, std::
size_t, std::
size_t, std::
size_t>
103 template <
typename T,
template <
typename>
class SIMD, std::size_t K,
104 std::size_t N, std::size_t I>
164 template <typename T, std::
size_t K>
168 static void eval(
const std::array<T, K> &key, std::array<T, K + 1> &par)
171 par_xor<0>(key, par, std::integral_constant<bool, 0 < K>());
175 template <std::
size_t>
177 const std::array<T, K> &, std::array<T, K + 1> &, std::false_type)
181 template <std::
size_t N>
183 const std::array<T, K> &key, std::array<T, K + 1> &par, std::true_type)
185 std::get<N>(par) = std::get<N>(key);
186 par.back() ^= std::get<N>(key);
187 par_xor<N + 1>(key, par, std::integral_constant<bool, N + 1 < K>());
191 template <
typename T>
196 template <
typename T,
template <
typename>
class SIMD>
201 template <
typename T,
int R>
211 template <
typename T, std::
size_t K, std::
size_t N,
bool = (N > 0)>
215 static void eval(std::array<T, K> &) {}
218 template <
typename T, std::
size_t N>
222 static void eval(std::array<T, 2> &state)
224 std::get<0>(state) += std::get<1>(state);
227 0>::value>::eval(std::get<1>(state));
228 std::get<1>(state) ^= std::get<0>(state);
232 static constexpr std::size_t r_ = (N - 1) % 8;
235 template <
typename T, std::
size_t N>
239 static void eval(std::array<T, 4> &state)
241 std::get<0>(state) += std::get<i0_>(state);
242 std::get<i0_>(state) =
244 0>::value>::eval(std::get<i0_>(state));
245 std::get<i0_>(state) ^= std::get<0>(state);
247 std::get<2>(state) += std::get<i2_>(state);
248 std::get<i2_>(state) =
250 1>::value>::eval(std::get<i2_>(state));
251 std::get<i2_>(state) ^= std::get<2>(state);
255 static constexpr std::size_t i0_ = N % 2 ? 1 : 3;
256 static constexpr std::size_t i2_ = N % 2 ? 3 : 1;
257 static constexpr std::size_t r_ = (N - 1) % 8;
260 template <
typename T, std::
size_t Inc>
262 :
public std::integral_constant<T, static_cast<T>(Inc)>
266 template <
typename T,
template <
typename>
class SIMD, std::size_t Inc>
271 template <
typename T, std::
size_t K, std::
size_t N,
bool = (N % 4 == 0)>
275 static void eval(std::array<T, K> &,
const std::array<T, K + 1> &) {}
278 template <
typename T, std::
size_t N>
282 static void eval(std::array<T, 2> &state,
const std::array<T, 3> &par)
284 std::get<0>(state) += std::get<i0_>(par);
285 std::get<1>(state) += std::get<i1_>(par);
290 static constexpr std::size_t inc_ = N / 4;
291 static constexpr std::size_t i0_ = (inc_ + 0) % 3;
292 static constexpr std::size_t i1_ = (inc_ + 1) % 3;
295 template <
typename T, std::
size_t N>
299 static void eval(std::array<T, 4> &state,
const std::array<T, 5> &par)
301 std::get<0>(state) += std::get<i0_>(par);
302 std::get<1>(state) += std::get<i1_>(par);
303 std::get<2>(state) += std::get<i2_>(par);
304 std::get<3>(state) += std::get<i3_>(par);
309 static constexpr std::size_t inc_ = N / 4;
310 static constexpr std::size_t i0_ = (inc_ + 0) % 5;
311 static constexpr std::size_t i1_ = (inc_ + 1) % 5;
312 static constexpr std::size_t i2_ = (inc_ + 2) % 5;
313 static constexpr std::size_t i3_ = (inc_ + 3) % 5;
331 static constexpr std::size_t
size() {
return K; }
336 std::array<result_type, K> &buffer)
const 338 std::array<result_type, K + 1> par;
342 generate<0>(buffer, par, std::true_type());
348 const std::size_t m = n / size();
349 std::array<result_type, K + 1> par;
353 for (std::size_t i = 0; i != m; ++i)
354 generate<0>(s[i], par, std::true_type());
360 template <std::
size_t>
361 void generate(std::array<result_type, K> &,
362 const std::array<result_type, K + 1> &, std::false_type)
const 366 template <std::
size_t N>
367 void generate(std::array<result_type, K> &state,
368 const std::array<result_type, K + 1> &par, std::true_type)
const 373 state, par, std::integral_constant < bool, N<Rounds>());
413 template <
typename ResultType, std::
size_t K>
414 class ThreefryParPackSSE2
417 static void eval(
const std::array<ResultType, K + 1> &p,
418 std::array<M128I<ResultType>, K + 1> &par)
420 pack<0>(p, par, std::integral_constant<bool, 0 < K + 1>());
424 template <std::
size_t>
425 static void pack(
const std::array<ResultType, K + 1> &,
426 std::array<M128I<ResultType>, K + 1> &, std::false_type)
430 template <std::
size_t N>
431 static void pack(
const std::array<ResultType, K + 1> &p,
432 std::array<M128I<ResultType>, K + 1> &par, std::true_type)
434 std::get<N>(par).set1(std::get<N>(p));
435 pack<N + 1>(p, par, std::integral_constant<bool, N + 1 < K + 1>());
439 template <
typename ResultType, std::
size_t K>
440 class ThreefryCtrPackSSE2
443 static void eval(std::array<ResultType, K> &ctr,
444 std::array<M128I<ResultType>, K> &state)
446 std::array<std::array<ResultType, K>, M128I<ResultType>::size()>
449 pack<0>(ctr_block, state, std::integral_constant<bool, 0 < K>());
453 template <std::
size_t N>
454 static void pack(
const std::array<std::array<ResultType, K>,
455 M128I<ResultType>::size()> &,
456 std::array<M128I<ResultType>, K> &, std::false_type)
460 template <std::
size_t N>
461 static void pack(
const std::array<std::array<ResultType, K>,
462 M128I<ResultType>::size()> &ctr_block,
463 std::array<M128I<ResultType>, K> &state, std::true_type)
465 set<N>(ctr_block, state,
466 std::integral_constant<std::size_t, sizeof(ResultType)>());
468 ctr_block, state, std::integral_constant<bool, N + 1 < K>());
471 template <std::
size_t N>
472 static void set(
const std::array<std::array<ResultType, K>,
473 M128I<ResultType>::size()> &ctr_block,
474 std::array<M128I<ResultType>, K> &state,
475 std::integral_constant<std::size_t, 4>)
477 std::get<N>(state).
set(std::get<N>(std::get<0>(ctr_block)),
478 std::get<N>(std::get<1>(ctr_block)),
479 std::get<N>(std::get<2>(ctr_block)),
480 std::get<N>(std::get<3>(ctr_block)));
483 template <std::
size_t N>
484 static void set(
const std::array<std::array<ResultType, K>,
485 M128I<ResultType>::size()> &ctr_block,
486 std::array<M128I<ResultType>, K> &state,
487 std::integral_constant<std::size_t, 8>)
489 std::get<N>(state).
set(std::get<N>(std::get<0>(ctr_block)),
490 std::get<N>(std::get<1>(ctr_block)));
500 class ThreefryGeneratorSSE2
503 using result_type = ResultType;
504 using ctr_type = std::array<ResultType, K>;
505 using key_type = std::array<ResultType, K>;
509 static constexpr std::size_t size()
511 return K * M128I<ResultType>::size();
514 void reset(
const key_type &) {}
516 void operator()(ctr_type &ctr,
const key_type &key,
517 std::array<result_type, K * M128I<ResultType>::size()> &buffer)
520 std::array<M128I<ResultType>, K> state;
521 std::array<ResultType, size()> result;
524 std::array<result_type, K + 1> p;
525 std::array<M128I<ResultType>, K + 1> par;
527 internal::ThreefryParPackSSE2<ResultType, K>::eval(p, par);
528 internal::ThreefryCtrPackSSE2<ResultType, K>::eval(ctr, buf.state);
529 generate<0>(buf.state, par, std::true_type());
533 std::size_t operator()(
534 ctr_type &,
const key_type &, std::size_t, result_type *)
const 540 template <std::
size_t>
541 void generate(std::array<M128I<ResultType>, K> &,
542 const std::array<M128I<ResultType>, K + 1> &, std::false_type)
546 template <std::
size_t N>
547 void generate(std::array<M128I<ResultType>, K> &state,
548 const std::array<M128I<ResultType>, K + 1> &par, std::true_type)
553 state, par, std::integral_constant < bool, N<Rounds>());
561 using ThreefryEngineSSE2 =
566 using Threefry2x32SSE2 = ThreefryEngineSSE2<std::uint32_t, 2>;
570 using Threefry4x32SSE2 = ThreefryEngineSSE2<std::uint32_t, 4>;
574 using Threefry2x64SSE2 = ThreefryEngineSSE2<std::uint64_t, 2>;
578 using Threefry4x64SSE2 = ThreefryEngineSSE2<std::uint64_t, 4>;
582 using ThreefrySSE2 = ThreefryEngineSSE2<std::uint32_t>;
586 using ThreefrySSE2_64 = ThreefryEngineSSE2<std::uint64_t>;
588 #endif // VSMC_HAS_SSE2 595 template <
typename ResultType, std::
size_t K>
596 class ThreefryParPackAVX2
599 static void eval(
const std::array<ResultType, K + 1> &p,
600 std::array<M256I<ResultType>, K + 1> &par)
602 pack<0>(p, par, std::integral_constant<bool, 0 < K + 1>());
606 template <std::
size_t>
607 static void pack(
const std::array<ResultType, K + 1> &,
608 std::array<M256I<ResultType>, K + 1> &, std::false_type)
612 template <std::
size_t N>
613 static void pack(
const std::array<ResultType, K + 1> &p,
614 std::array<M256I<ResultType>, K + 1> &par, std::true_type)
616 std::get<N>(par).set1(std::get<N>(p));
617 pack<N + 1>(p, par, std::integral_constant<bool, N + 1 < K + 1>());
621 template <
typename ResultType, std::
size_t K>
622 class ThreefryCtrPackAVX2
625 static void eval(std::array<ResultType, K> &ctr,
626 std::array<M256I<ResultType>, K> &state)
628 std::array<std::array<ResultType, K>, M256I<ResultType>::size()>
631 pack<0>(ctr_block, state, std::integral_constant<bool, 0 < K>());
635 template <std::
size_t N>
636 static void pack(
const std::array<std::array<ResultType, K>,
637 M256I<ResultType>::size()> &,
638 std::array<M256I<ResultType>, K> &, std::false_type)
642 template <std::
size_t N>
643 static void pack(
const std::array<std::array<ResultType, K>,
644 M256I<ResultType>::size()> &ctr_block,
645 std::array<M256I<ResultType>, K> &state, std::true_type)
647 set<N>(ctr_block, state,
648 std::integral_constant<std::size_t, sizeof(ResultType)>());
650 ctr_block, state, std::integral_constant<bool, N + 1 < K>());
653 template <std::
size_t N>
654 static void set(
const std::array<std::array<ResultType, K>,
655 M256I<ResultType>::size()> &ctr_block,
656 std::array<M256I<ResultType>, K> &state,
657 std::integral_constant<std::size_t, 4>)
659 std::get<N>(state).
set(std::get<N>(std::get<0>(ctr_block)),
660 std::get<N>(std::get<1>(ctr_block)),
661 std::get<N>(std::get<2>(ctr_block)),
662 std::get<N>(std::get<3>(ctr_block)),
663 std::get<N>(std::get<4>(ctr_block)),
664 std::get<N>(std::get<5>(ctr_block)),
665 std::get<N>(std::get<6>(ctr_block)),
666 std::get<N>(std::get<7>(ctr_block)));
669 template <std::
size_t N>
670 static void set(
const std::array<std::array<ResultType, K>,
671 M256I<ResultType>::size()> &ctr_block,
672 std::array<M256I<ResultType>, K> &state,
673 std::integral_constant<std::size_t, 8>)
675 std::get<N>(state).
set(std::get<N>(std::get<0>(ctr_block)),
676 std::get<N>(std::get<1>(ctr_block)),
677 std::get<N>(std::get<2>(ctr_block)),
678 std::get<N>(std::get<3>(ctr_block)));
688 class ThreefryGeneratorAVX2
691 using result_type = ResultType;
692 using ctr_type = std::array<ResultType, K>;
693 using key_type = std::array<ResultType, K>;
697 static constexpr std::size_t size()
699 return K * M256I<ResultType>::size();
702 void reset(
const key_type &) {}
704 void operator()(ctr_type &ctr,
const key_type &key,
705 std::array<result_type, K * M256I<ResultType>::size()> &buffer)
708 std::array<M256I<ResultType>, K> state;
709 std::array<ResultType, size()> result;
712 std::array<result_type, K + 1> p;
713 std::array<M256I<ResultType>, K + 1> par;
715 internal::ThreefryParPackAVX2<ResultType, K>::eval(p, par);
716 internal::ThreefryCtrPackAVX2<ResultType, K>::eval(ctr, buf.state);
717 generate<0>(buf.state, par, std::true_type());
721 std::size_t operator()(
722 ctr_type &,
const key_type &, std::size_t, result_type *)
const 728 template <std::
size_t>
729 void generate(std::array<M256I<ResultType>, K> &,
730 const std::array<M256I<ResultType>, K + 1> &, std::false_type)
734 template <std::
size_t N>
735 void generate(std::array<M256I<ResultType>, K> &state,
736 const std::array<M256I<ResultType>, K + 1> &par, std::true_type)
741 state, par, std::integral_constant < bool, N<Rounds>());
749 using ThreefryEngineAVX2 =
754 using Threefry2x32AVX2 = ThreefryEngineAVX2<std::uint32_t, 2>;
758 using Threefry4x32AVX2 = ThreefryEngineAVX2<std::uint32_t, 4>;
762 using Threefry2x64AVX2 = ThreefryEngineAVX2<std::uint64_t, 2>;
766 using Threefry4x64AVX2 = ThreefryEngineAVX2<std::uint64_t, 4>;
770 using ThreefryAVX2 = ThreefryEngineAVX2<std::uint32_t>;
774 using ThreefryAVX2_64 = ThreefryEngineAVX2<std::uint64_t>;
776 #endif // VSMC_HAS_AVX2 780 #endif // VSMC_RNG_THREEFRY_HPP static void eval(std::array< T, K > &, const std::array< T, K+1 > &)
ThreefryGeneratorGeneric()
#define VSMC_RNG_THREEFRY_ROUNDS
ThreefryGenerator default rounds.
void increment(std::array< T, K > &ctr)
Increment a counter by one.
static T eval(const T &x)
Counter based RNG engine.
static void eval(const std::array< T, K > &key, std::array< T, K+1 > &par)
static constexpr std::size_t size()
void reset(const key_type &)
#define VSMC_RNG_THREEFRY_VECTOR_LENGTH
ThreefryGenerator default vector length.
static void eval(std::array< T, K > &)
std::array< ResultType, K > ctr_type
void operator()(ctr_type &ctr, const key_type &key, std::array< result_type, K > &buffer) const
std::array< ResultType, K > key_type
static void eval(std::array< T, 4 > &state)
static void eval(std::array< T, 2 > &state, const std::array< T, 3 > &par)
static void eval(std::array< T, 2 > &state)
#define VSMC_STATIC_ASSERT_RNG_THREEFRY(SIMD)
#define VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(T, K, N, I, val)
static void eval(std::array< T, 4 > &state, const std::array< T, 5 > &par)
std::size_t operator()(ctr_type &ctr, const key_type &key, std::size_t n, result_type *r) const