vSMC
vSMC: Scalable Monte Carlo
threefry.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/rng/threefry.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2015, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_RNG_THREEFRY_HPP
33 #define VSMC_RNG_THREEFRY_HPP
34 
36 #include <vsmc/rng/counter.hpp>
37 
38 #define VSMC_STATIC_ASSERT_RNG_THREEFRY_RESULT_TYPE(ResultType, SIMD) \
39  VSMC_STATIC_ASSERT(((sizeof(ResultType) == sizeof(std::uint32_t) && \
40  std::is_unsigned<ResultType>::value) || \
41  (sizeof(ResultType) == sizeof(std::uint64_t) && \
42  std::is_unsigned<ResultType>::value)), \
43  "**ThreefryGenerator" #SIMD \
44  "** USED WITH ResultType OTHER THAN UNSIGNED 32/64 BITS INTEGER")
45 
46 #define VSMC_STATIC_ASSERT_RNG_THREEFRY_SIZE(K, SIMD) \
47  VSMC_STATIC_ASSERT((K == 2 || K == 4), \
48  "**Threefry" #SIMD "** USED WITH SIZE OTHER THAN 2 OR 4")
49 
50 #define VSMC_STATIC_ASSERT_RNG_THREEFRY(SIMD) \
51  VSMC_STATIC_ASSERT_RNG_THREEFRY_RESULT_TYPE(ResultType, SIMD); \
52  VSMC_STATIC_ASSERT_RNG_THREEFRY_SIZE(K, SIMD);
53 
54 #define VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(T, K, N, I, val) \
55  template <> \
56  class ThreefryRotateConstant<T, K, N, I> \
57  : public std::integral_constant<int, val> \
58  { \
59  }; // class ThreefryRotateConstant
60 
63 #ifndef VSMC_RNG_THREEFRY_VECTOR_LENGTH
64 #define VSMC_RNG_THREEFRY_VECTOR_LENGTH 4
65 #endif
66 
69 #ifndef VSMC_RNG_THREEFRY_ROUNDS
70 #define VSMC_RNG_THREEFRY_ROUNDS 20
71 #endif
72 
73 namespace vsmc
74 {
75 
76 namespace internal
77 {
78 
79 template <typename>
81 
82 template <typename T, template <typename> class SIMD>
83 class ThreefryKSConstant<SIMD<T>> : public ThreefryKSConstant<T>
84 {
85 }; // class ThreefryKSConstant
86 
87 template <>
89  : public std::integral_constant<std::uint32_t, UINT32_C(0x1BD11BDA)>
90 {
91 }; // class ThreefryKSConstant
92 
93 template <>
95  : public std::integral_constant<std::uint64_t,
96  UINT64_C(0x1BD11BDAA9FC1A22)>
97 {
98 }; // class ThreefryKSConstant
99 
100 template <typename, std::size_t, std::size_t, std::size_t>
102 
103 template <typename T, template <typename> class SIMD, std::size_t K,
104  std::size_t N, std::size_t I>
105 class ThreefryRotateConstant<SIMD<T>, K, N, I>
106  : public ThreefryRotateConstant<T, K, N, I>
107 {
108 }; // class ThreefryRotateConstant
109 
112 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 2, 2, 0, 26)
114 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 2, 4, 0, 17)
115 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 2, 5, 0, 29)
116 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 2, 6, 0, 16)
117 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 2, 7, 0, 24)
118 
119 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 0, 0, 10)
120 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 1, 0, 11)
121 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 2, 0, 13)
122 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 3, 0, 23)
124 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 5, 0, 17)
125 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 6, 0, 25)
126 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 7, 0, 18)
127 
128 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 0, 1, 26)
129 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 1, 1, 21)
130 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 2, 1, 27)
132 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 4, 1, 20)
133 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 5, 1, 11)
134 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 6, 1, 10)
135 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint32_t, 4, 7, 1, 20)
136 
138 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 1, 0, 42)
139 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 2, 0, 12)
140 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 3, 0, 31)
141 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 4, 0, 16)
142 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 5, 0, 32)
143 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 6, 0, 24)
144 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 2, 7, 0, 21)
145 
146 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 0, 0, 14)
147 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 1, 0, 52)
148 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 2, 0, 23)
150 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 4, 0, 25)
151 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 5, 0, 46)
152 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 6, 0, 58)
153 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 7, 0, 32)
154 
155 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 0, 1, 16)
156 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 1, 1, 57)
157 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 2, 1, 40)
158 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 3, 1, 37)
159 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 4, 1, 33)
160 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 5, 1, 12)
161 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 6, 1, 22)
162 VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(std::uint64_t, 4, 7, 1, 32)
163 
164 template <typename T, std::size_t K>
166 {
167  public:
168  static void eval(const std::array<T, K> &key, std::array<T, K + 1> &par)
169  {
170  par.back() = ThreefryKSConstant<T>::value;
171  par_xor<0>(key, par, std::integral_constant<bool, 0 < K>());
172  }
173 
174  private:
175  template <std::size_t>
176  static void par_xor(
177  const std::array<T, K> &, std::array<T, K + 1> &, std::false_type)
178  {
179  }
180 
181  template <std::size_t N>
182  static void par_xor(
183  const std::array<T, K> &key, std::array<T, K + 1> &par, std::true_type)
184  {
185  std::get<N>(par) = std::get<N>(key);
186  par.back() ^= std::get<N>(key);
187  par_xor<N + 1>(key, par, std::integral_constant<bool, N + 1 < K>());
188  }
189 }; // class ThreefryInitPar
190 
191 template <typename T>
192 class ThreefryRotateBits : public std::integral_constant<int, sizeof(T) * 8>
193 {
194 }; // class ThreefryRotateBits
195 
196 template <typename T, template <typename> class SIMD>
197 class ThreefryRotateBits<SIMD<T>> : public ThreefryRotateBits<T>
198 {
199 }; // class ThreefryRotateBits
200 
201 template <typename T, int R>
203 {
204  public:
205  static T eval(const T &x)
206  {
207  return (x << R) | (x >> (ThreefryRotateBits<T>::value - R));
208  }
209 }; // class ThreefryRotateImpl
210 
211 template <typename T, std::size_t K, std::size_t N, bool = (N > 0)>
213 {
214  public:
215  static void eval(std::array<T, K> &) {}
216 }; // class ThreefryRotate
217 
218 template <typename T, std::size_t N>
219 class ThreefryRotate<T, 2, N, true>
220 {
221  public:
222  static void eval(std::array<T, 2> &state)
223  {
224  std::get<0>(state) += std::get<1>(state);
225  std::get<1>(state) =
227  0>::value>::eval(std::get<1>(state));
228  std::get<1>(state) ^= std::get<0>(state);
229  }
230 
231  private:
232  static constexpr std::size_t r_ = (N - 1) % 8;
233 }; // class ThreefryRotate
234 
235 template <typename T, std::size_t N>
236 class ThreefryRotate<T, 4, N, true>
237 {
238  public:
239  static void eval(std::array<T, 4> &state)
240  {
241  std::get<0>(state) += std::get<i0_>(state);
242  std::get<i0_>(state) =
244  0>::value>::eval(std::get<i0_>(state));
245  std::get<i0_>(state) ^= std::get<0>(state);
246 
247  std::get<2>(state) += std::get<i2_>(state);
248  std::get<i2_>(state) =
250  1>::value>::eval(std::get<i2_>(state));
251  std::get<i2_>(state) ^= std::get<2>(state);
252  }
253 
254  private:
255  static constexpr std::size_t i0_ = N % 2 ? 1 : 3;
256  static constexpr std::size_t i2_ = N % 2 ? 3 : 1;
257  static constexpr std::size_t r_ = (N - 1) % 8;
258 }; // class ThreefryRotate
259 
260 template <typename T, std::size_t Inc>
262  : public std::integral_constant<T, static_cast<T>(Inc)>
263 {
264 }; // class ThreefryInsertKeyInc
265 
266 template <typename T, template <typename> class SIMD, std::size_t Inc>
267 class ThreefryInsertKeyInc<SIMD<T>, Inc> : public ThreefryInsertKeyInc<T, Inc>
268 {
269 }; // class ThreefryInsertKeyInc
270 
271 template <typename T, std::size_t K, std::size_t N, bool = (N % 4 == 0)>
273 {
274  public:
275  static void eval(std::array<T, K> &, const std::array<T, K + 1> &) {}
276 }; // class ThreefryInsertKey
277 
278 template <typename T, std::size_t N>
279 class ThreefryInsertKey<T, 2, N, true>
280 {
281  public:
282  static void eval(std::array<T, 2> &state, const std::array<T, 3> &par)
283  {
284  std::get<0>(state) += std::get<i0_>(par);
285  std::get<1>(state) += std::get<i1_>(par);
286  std::get<1>(state) += ThreefryInsertKeyInc<T, inc_>::value;
287  }
288 
289  private:
290  static constexpr std::size_t inc_ = N / 4;
291  static constexpr std::size_t i0_ = (inc_ + 0) % 3;
292  static constexpr std::size_t i1_ = (inc_ + 1) % 3;
293 }; // class ThreefryInsertKey
294 
295 template <typename T, std::size_t N>
296 class ThreefryInsertKey<T, 4, N, true>
297 {
298  public:
299  static void eval(std::array<T, 4> &state, const std::array<T, 5> &par)
300  {
301  std::get<0>(state) += std::get<i0_>(par);
302  std::get<1>(state) += std::get<i1_>(par);
303  std::get<2>(state) += std::get<i2_>(par);
304  std::get<3>(state) += std::get<i3_>(par);
305  std::get<3>(state) += ThreefryInsertKeyInc<T, inc_>::value;
306  }
307 
308  private:
309  static constexpr std::size_t inc_ = N / 4;
310  static constexpr std::size_t i0_ = (inc_ + 0) % 5;
311  static constexpr std::size_t i1_ = (inc_ + 1) % 5;
312  static constexpr std::size_t i2_ = (inc_ + 2) % 5;
313  static constexpr std::size_t i3_ = (inc_ + 3) % 5;
314 }; // class ThreefryInsertKey
315 
316 } // namespace vsmc::internal
317 
320 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
321  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
323 {
324  public:
325  using result_type = ResultType;
326  using ctr_type = std::array<ResultType, K>;
327  using key_type = std::array<ResultType, K>;
328 
330 
331  static constexpr std::size_t size() { return K; }
332 
333  void reset(const key_type &) {}
334 
335  void operator()(ctr_type &ctr, const key_type &key,
336  std::array<result_type, K> &buffer) const
337  {
338  std::array<result_type, K + 1> par;
340  increment(ctr);
341  buffer = ctr;
342  generate<0>(buffer, par, std::true_type());
343  }
344 
345  std::size_t operator()(ctr_type &ctr, const key_type &key, std::size_t n,
346  result_type *r) const
347  {
348  const std::size_t m = n / size();
349  std::array<result_type, K + 1> par;
351  ctr_type *s = reinterpret_cast<ctr_type *>(r);
352  increment(ctr, m, s);
353  for (std::size_t i = 0; i != m; ++i)
354  generate<0>(s[i], par, std::true_type());
355 
356  return m * size();
357  }
358 
359  private:
360  template <std::size_t>
361  void generate(std::array<result_type, K> &,
362  const std::array<result_type, K + 1> &, std::false_type) const
363  {
364  }
365 
366  template <std::size_t N>
367  void generate(std::array<result_type, K> &state,
368  const std::array<result_type, K + 1> &par, std::true_type) const
369  {
372  generate<N + 1>(
373  state, par, std::integral_constant < bool, N<Rounds>());
374  }
375 }; // class ThreefryGeneratorGeneric
376 
379 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
380  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
381 using ThreefryEngine =
383 
387 
391 
395 
399 
403 
407 
408 #if VSMC_HAS_SSE2
409 
410 namespace internal
411 {
412 
413 template <typename ResultType, std::size_t K>
414 class ThreefryParPackSSE2
415 {
416  public:
417  static void eval(const std::array<ResultType, K + 1> &p,
418  std::array<M128I<ResultType>, K + 1> &par)
419  {
420  pack<0>(p, par, std::integral_constant<bool, 0 < K + 1>());
421  }
422 
423  private:
424  template <std::size_t>
425  static void pack(const std::array<ResultType, K + 1> &,
426  std::array<M128I<ResultType>, K + 1> &, std::false_type)
427  {
428  }
429 
430  template <std::size_t N>
431  static void pack(const std::array<ResultType, K + 1> &p,
432  std::array<M128I<ResultType>, K + 1> &par, std::true_type)
433  {
434  std::get<N>(par).set1(std::get<N>(p));
435  pack<N + 1>(p, par, std::integral_constant<bool, N + 1 < K + 1>());
436  }
437 }; // class ThreefryParPackSSE2
438 
439 template <typename ResultType, std::size_t K>
440 class ThreefryCtrPackSSE2
441 {
442  public:
443  static void eval(std::array<ResultType, K> &ctr,
444  std::array<M128I<ResultType>, K> &state)
445  {
446  std::array<std::array<ResultType, K>, M128I<ResultType>::size()>
447  ctr_block;
448  increment(ctr, ctr_block);
449  pack<0>(ctr_block, state, std::integral_constant<bool, 0 < K>());
450  }
451 
452  private:
453  template <std::size_t N>
454  static void pack(const std::array<std::array<ResultType, K>,
455  M128I<ResultType>::size()> &,
456  std::array<M128I<ResultType>, K> &, std::false_type)
457  {
458  }
459 
460  template <std::size_t N>
461  static void pack(const std::array<std::array<ResultType, K>,
462  M128I<ResultType>::size()> &ctr_block,
463  std::array<M128I<ResultType>, K> &state, std::true_type)
464  {
465  set<N>(ctr_block, state,
466  std::integral_constant<std::size_t, sizeof(ResultType)>());
467  pack<N + 1>(
468  ctr_block, state, std::integral_constant<bool, N + 1 < K>());
469  }
470 
471  template <std::size_t N>
472  static void set(const std::array<std::array<ResultType, K>,
473  M128I<ResultType>::size()> &ctr_block,
474  std::array<M128I<ResultType>, K> &state,
475  std::integral_constant<std::size_t, 4>)
476  {
477  std::get<N>(state).set(std::get<N>(std::get<0>(ctr_block)),
478  std::get<N>(std::get<1>(ctr_block)),
479  std::get<N>(std::get<2>(ctr_block)),
480  std::get<N>(std::get<3>(ctr_block)));
481  }
482 
483  template <std::size_t N>
484  static void set(const std::array<std::array<ResultType, K>,
485  M128I<ResultType>::size()> &ctr_block,
486  std::array<M128I<ResultType>, K> &state,
487  std::integral_constant<std::size_t, 8>)
488  {
489  std::get<N>(state).set(std::get<N>(std::get<0>(ctr_block)),
490  std::get<N>(std::get<1>(ctr_block)));
491  }
492 }; // class ThreefryCtrPackSSE2
493 
494 } // namespace vsmc::internal
495 
498 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
499  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
500 class ThreefryGeneratorSSE2
501 {
502  public:
503  using result_type = ResultType;
504  using ctr_type = std::array<ResultType, K>;
505  using key_type = std::array<ResultType, K>;
506 
507  ThreefryGeneratorSSE2() { VSMC_STATIC_ASSERT_RNG_THREEFRY(SSE2); }
508 
509  static constexpr std::size_t size()
510  {
511  return K * M128I<ResultType>::size();
512  }
513 
514  void reset(const key_type &) {}
515 
516  void operator()(ctr_type &ctr, const key_type &key,
517  std::array<result_type, K * M128I<ResultType>::size()> &buffer)
518  {
519  union {
520  std::array<M128I<ResultType>, K> state;
521  std::array<ResultType, size()> result;
522  } buf;
523 
524  std::array<result_type, K + 1> p;
525  std::array<M128I<ResultType>, K + 1> par;
527  internal::ThreefryParPackSSE2<ResultType, K>::eval(p, par);
528  internal::ThreefryCtrPackSSE2<ResultType, K>::eval(ctr, buf.state);
529  generate<0>(buf.state, par, std::true_type());
530  buffer = buf.result;
531  }
532 
533  std::size_t operator()(
534  ctr_type &, const key_type &, std::size_t, result_type *) const
535  {
536  return 0;
537  }
538 
539  private:
540  template <std::size_t>
541  void generate(std::array<M128I<ResultType>, K> &,
542  const std::array<M128I<ResultType>, K + 1> &, std::false_type)
543  {
544  }
545 
546  template <std::size_t N>
547  void generate(std::array<M128I<ResultType>, K> &state,
548  const std::array<M128I<ResultType>, K + 1> &par, std::true_type)
549  {
550  internal::ThreefryRotate<M128I<ResultType>, K, N>::eval(state);
551  internal::ThreefryInsertKey<M128I<ResultType>, K, N>::eval(state, par);
552  generate<N + 1>(
553  state, par, std::integral_constant < bool, N<Rounds>());
554  }
555 }; // class ThreefryGeneratorSSE2
556 
559 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
560  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
561 using ThreefryEngineSSE2 =
563 
566 using Threefry2x32SSE2 = ThreefryEngineSSE2<std::uint32_t, 2>;
567 
570 using Threefry4x32SSE2 = ThreefryEngineSSE2<std::uint32_t, 4>;
571 
574 using Threefry2x64SSE2 = ThreefryEngineSSE2<std::uint64_t, 2>;
575 
578 using Threefry4x64SSE2 = ThreefryEngineSSE2<std::uint64_t, 4>;
579 
582 using ThreefrySSE2 = ThreefryEngineSSE2<std::uint32_t>;
583 
586 using ThreefrySSE2_64 = ThreefryEngineSSE2<std::uint64_t>;
587 
588 #endif // VSMC_HAS_SSE2
589 
590 #if VSMC_HAS_AVX2
591 
592 namespace internal
593 {
594 
595 template <typename ResultType, std::size_t K>
596 class ThreefryParPackAVX2
597 {
598  public:
599  static void eval(const std::array<ResultType, K + 1> &p,
600  std::array<M256I<ResultType>, K + 1> &par)
601  {
602  pack<0>(p, par, std::integral_constant<bool, 0 < K + 1>());
603  }
604 
605  private:
606  template <std::size_t>
607  static void pack(const std::array<ResultType, K + 1> &,
608  std::array<M256I<ResultType>, K + 1> &, std::false_type)
609  {
610  }
611 
612  template <std::size_t N>
613  static void pack(const std::array<ResultType, K + 1> &p,
614  std::array<M256I<ResultType>, K + 1> &par, std::true_type)
615  {
616  std::get<N>(par).set1(std::get<N>(p));
617  pack<N + 1>(p, par, std::integral_constant<bool, N + 1 < K + 1>());
618  }
619 }; // class ThreefryParPackAVX2
620 
621 template <typename ResultType, std::size_t K>
622 class ThreefryCtrPackAVX2
623 {
624  public:
625  static void eval(std::array<ResultType, K> &ctr,
626  std::array<M256I<ResultType>, K> &state)
627  {
628  std::array<std::array<ResultType, K>, M256I<ResultType>::size()>
629  ctr_block;
630  increment(ctr, ctr_block);
631  pack<0>(ctr_block, state, std::integral_constant<bool, 0 < K>());
632  }
633 
634  private:
635  template <std::size_t N>
636  static void pack(const std::array<std::array<ResultType, K>,
637  M256I<ResultType>::size()> &,
638  std::array<M256I<ResultType>, K> &, std::false_type)
639  {
640  }
641 
642  template <std::size_t N>
643  static void pack(const std::array<std::array<ResultType, K>,
644  M256I<ResultType>::size()> &ctr_block,
645  std::array<M256I<ResultType>, K> &state, std::true_type)
646  {
647  set<N>(ctr_block, state,
648  std::integral_constant<std::size_t, sizeof(ResultType)>());
649  pack<N + 1>(
650  ctr_block, state, std::integral_constant<bool, N + 1 < K>());
651  }
652 
653  template <std::size_t N>
654  static void set(const std::array<std::array<ResultType, K>,
655  M256I<ResultType>::size()> &ctr_block,
656  std::array<M256I<ResultType>, K> &state,
657  std::integral_constant<std::size_t, 4>)
658  {
659  std::get<N>(state).set(std::get<N>(std::get<0>(ctr_block)),
660  std::get<N>(std::get<1>(ctr_block)),
661  std::get<N>(std::get<2>(ctr_block)),
662  std::get<N>(std::get<3>(ctr_block)),
663  std::get<N>(std::get<4>(ctr_block)),
664  std::get<N>(std::get<5>(ctr_block)),
665  std::get<N>(std::get<6>(ctr_block)),
666  std::get<N>(std::get<7>(ctr_block)));
667  }
668 
669  template <std::size_t N>
670  static void set(const std::array<std::array<ResultType, K>,
671  M256I<ResultType>::size()> &ctr_block,
672  std::array<M256I<ResultType>, K> &state,
673  std::integral_constant<std::size_t, 8>)
674  {
675  std::get<N>(state).set(std::get<N>(std::get<0>(ctr_block)),
676  std::get<N>(std::get<1>(ctr_block)),
677  std::get<N>(std::get<2>(ctr_block)),
678  std::get<N>(std::get<3>(ctr_block)));
679  }
680 }; // class ThreefryCtrPackAVX2
681 
682 } // namespace vsmc::internal
683 
686 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
687  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
688 class ThreefryGeneratorAVX2
689 {
690  public:
691  using result_type = ResultType;
692  using ctr_type = std::array<ResultType, K>;
693  using key_type = std::array<ResultType, K>;
694 
695  ThreefryGeneratorAVX2() { VSMC_STATIC_ASSERT_RNG_THREEFRY(AVX2); }
696 
697  static constexpr std::size_t size()
698  {
699  return K * M256I<ResultType>::size();
700  }
701 
702  void reset(const key_type &) {}
703 
704  void operator()(ctr_type &ctr, const key_type &key,
705  std::array<result_type, K * M256I<ResultType>::size()> &buffer)
706  {
707  union {
708  std::array<M256I<ResultType>, K> state;
709  std::array<ResultType, size()> result;
710  } buf;
711 
712  std::array<result_type, K + 1> p;
713  std::array<M256I<ResultType>, K + 1> par;
715  internal::ThreefryParPackAVX2<ResultType, K>::eval(p, par);
716  internal::ThreefryCtrPackAVX2<ResultType, K>::eval(ctr, buf.state);
717  generate<0>(buf.state, par, std::true_type());
718  buffer = buf.result;
719  }
720 
721  std::size_t operator()(
722  ctr_type &, const key_type &, std::size_t, result_type *) const
723  {
724  return 0;
725  }
726 
727  private:
728  template <std::size_t>
729  void generate(std::array<M256I<ResultType>, K> &,
730  const std::array<M256I<ResultType>, K + 1> &, std::false_type)
731  {
732  }
733 
734  template <std::size_t N>
735  void generate(std::array<M256I<ResultType>, K> &state,
736  const std::array<M256I<ResultType>, K + 1> &par, std::true_type)
737  {
738  internal::ThreefryRotate<M256I<ResultType>, K, N>::eval(state);
739  internal::ThreefryInsertKey<M256I<ResultType>, K, N>::eval(state, par);
740  generate<N + 1>(
741  state, par, std::integral_constant < bool, N<Rounds>());
742  }
743 }; // class ThreefryGeneratorAVX2
744 
747 template <typename ResultType, std::size_t K = VSMC_RNG_THREEFRY_VECTOR_LENGTH,
748  std::size_t Rounds = VSMC_RNG_THREEFRY_ROUNDS>
749 using ThreefryEngineAVX2 =
751 
754 using Threefry2x32AVX2 = ThreefryEngineAVX2<std::uint32_t, 2>;
755 
758 using Threefry4x32AVX2 = ThreefryEngineAVX2<std::uint32_t, 4>;
759 
762 using Threefry2x64AVX2 = ThreefryEngineAVX2<std::uint64_t, 2>;
763 
766 using Threefry4x64AVX2 = ThreefryEngineAVX2<std::uint64_t, 4>;
767 
770 using ThreefryAVX2 = ThreefryEngineAVX2<std::uint32_t>;
771 
774 using ThreefryAVX2_64 = ThreefryEngineAVX2<std::uint64_t>;
775 
776 #endif // VSMC_HAS_AVX2
777 
778 } // namespace vsmc
779 
780 #endif // VSMC_RNG_THREEFRY_HPP
static void eval(std::array< T, K > &, const std::array< T, K+1 > &)
Definition: threefry.hpp:275
Definition: monitor.hpp:49
#define VSMC_RNG_THREEFRY_ROUNDS
ThreefryGenerator default rounds.
Definition: threefry.hpp:70
uint uint32_t
Definition: opencl.h:39
void increment(std::array< T, K > &ctr)
Increment a counter by one.
Definition: counter.hpp:62
ulong uint64_t
Definition: opencl.h:40
STL namespace.
Counter based RNG engine.
Definition: counter.hpp:290
static void eval(const std::array< T, K > &key, std::array< T, K+1 > &par)
Definition: threefry.hpp:168
static constexpr std::size_t size()
Definition: threefry.hpp:331
void reset(const key_type &)
Definition: threefry.hpp:333
Threefry RNG generator.
Definition: threefry.hpp:322
#define VSMC_RNG_THREEFRY_VECTOR_LENGTH
ThreefryGenerator default vector length.
Definition: threefry.hpp:64
static void eval(std::array< T, K > &)
Definition: threefry.hpp:215
std::array< ResultType, K > ctr_type
Definition: threefry.hpp:326
void operator()(ctr_type &ctr, const key_type &key, std::array< result_type, K > &buffer) const
Definition: threefry.hpp:335
std::array< ResultType, K > key_type
Definition: threefry.hpp:327
static void eval(std::array< T, 4 > &state)
Definition: threefry.hpp:239
static void eval(std::array< T, 2 > &state, const std::array< T, 3 > &par)
Definition: threefry.hpp:282
static void eval(std::array< T, 2 > &state)
Definition: threefry.hpp:222
#define VSMC_STATIC_ASSERT_RNG_THREEFRY(SIMD)
Definition: threefry.hpp:50
#define VSMC_DEFINE_RNG_THREEFRY_ROTATE_CONSTANT(T, K, N, I, val)
Definition: threefry.hpp:54
static void eval(std::array< T, 4 > &state, const std::array< T, 5 > &par)
Definition: threefry.hpp:299
std::size_t operator()(ctr_type &ctr, const key_type &key, std::size_t n, result_type *r) const
Definition: threefry.hpp:345