vSMC  v3.0.0
Scalable Monte Carlo
aes_ni.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/rng/aes_ni.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2016, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_RNG_AES_NI_HPP
33 #define VSMC_RNG_AES_NI_HPP
34 
36 #include <vsmc/rng/counter.hpp>
37 #include <wmmintrin.h>
38 
39 #ifdef VSMC_GCC
40 #if __GNUC__ >= 6
41 #pragma GCC diagnostic push
42 #pragma GCC diagnostic ignored "-Wignored-attributes"
43 #endif
44 #endif
45 
46 #define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val) \
47  template <> \
48  inline __m128i AESKeyGenAssist<N>(const __m128i &xmm) \
49  { \
50  return _mm_aeskeygenassist_si128(xmm, val); \
51  }
52 
54 #ifndef VSMC_RNG_AES128_ROUNDS
55 #define VSMC_RNG_AES128_ROUNDS 10
56 #endif
57 
59 #ifndef VSMC_RNG_AES192_ROUNDS
60 #define VSMC_RNG_AES192_ROUNDS 12
61 #endif
62 
64 #ifndef VSMC_RNG_AES256_ROUNDS
65 #define VSMC_RNG_AES256_ROUNDS 14
66 #endif
67 
70 #ifndef VSMC_RNG_ARS_ROUNDS
71 #define VSMC_RNG_ARS_ROUNDS 5
72 #endif
73 
76 #ifndef VSMC_RNG_AES_NI_BLOCKS
77 #define VSMC_RNG_AES_NI_BLOCKS 8
78 #endif
79 
80 namespace vsmc
81 {
82 
85 template <typename KeySeqType, std::size_t Rounds, std::size_t Blocks>
87 {
88  static_assert(
89  Rounds != 0, "**AESNIGenerator** USED WITH ROUNDS EQUAL TO ZERO");
90 
91  static_assert(
92  Blocks != 0, "**AESNIGenerator** USED WITH Blocks EQUAL TO ZERO");
93 
94  public:
95  using ctr_type = std::array<std::uint64_t, 2>;
96  using key_type = typename KeySeqType::key_type;
97 
98  static constexpr std::size_t size() { return Blocks * sizeof(__m128i); }
99 
100  void reset(const key_type &key) { key_seq_.reset(key); }
101 
102  void enc(const ctr_type &ctr, ctr_type &buffer) const
103  {
104  union {
105  std::array<__m128i, 1> state;
106  ctr_type result;
107  } buf;
108 
109  std::array<__m128i, Rounds + 1> rk_tmp;
110  const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
111 
112  buf.result = ctr;
113  enc(buf.state, rk);
114  buffer = buf.result;
115  }
116 
117  template <typename ResultType>
118  void operator()(ctr_type &ctr,
119  std::array<ResultType, size() / sizeof(ResultType)> &buffer) const
120  {
121  union {
122  std::array<__m128i, Blocks> state;
123  std::array<ctr_type, Blocks> ctr_block;
124  std::array<ResultType, size() / sizeof(ResultType)> result;
125  } buf;
126 
127  std::array<__m128i, Rounds + 1> rk_tmp;
128  const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
129 
130  increment(ctr, buf.ctr_block);
131  enc(buf.state, rk);
132  buffer = buf.result;
133  }
134 
135  template <typename ResultType>
136  void operator()(ctr_type &ctr, std::size_t n,
137  std::array<ResultType, size() / sizeof(ResultType)> *buffer) const
138  {
139  union {
140  std::array<__m128i, Blocks> state;
141  std::array<ctr_type, Blocks> ctr_block;
142  std::array<ResultType, size() / sizeof(ResultType)> result;
143  } buf;
144 
145  std::array<__m128i, Rounds + 1> rk_tmp;
146  const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
147 
148  for (std::size_t i = 0; i != n; ++i) {
149  increment(ctr, buf.ctr_block);
150  enc(buf.state, rk);
151  buffer[i] = buf.result;
152  }
153  }
154 
155  friend bool operator==(
158  {
159  return gen1.key_seq_ == gen2.key_seq_;
160  }
161 
162  friend bool operator!=(
165  {
166  return !(gen1 == gen2);
167  }
168 
169  template <typename CharT, typename Traits>
170  friend std::basic_ostream<CharT, Traits> &operator<<(
171  std::basic_ostream<CharT, Traits> &os,
173  {
174  if (!os)
175  return os;
176 
177  os << gen.key_seq_;
178 
179  return os;
180  }
181 
182  template <typename CharT, typename Traits>
183  friend std::basic_istream<CharT, Traits> &operator>>(
184  std::basic_istream<CharT, Traits> &is,
186  {
187  if (!is)
188  return is;
189 
191  is >> std::ws >> gen_tmp.key_seq_;
192 
193  if (is)
194  gen = std::move(gen_tmp);
195 
196  return is;
197  }
198 
199  private:
200  KeySeqType key_seq_;
201 
202  template <std::size_t K, std::size_t Rp1>
203  void enc(std::array<__m128i, K> &state,
204  const std::array<__m128i, Rp1> &rk) const
205  {
206  enc_first(state, rk);
207  enc_round<1>(state, rk, std::integral_constant<bool, 2 < Rp1>());
208  enc_last(state, rk);
209  }
210 
211  template <std::size_t K, std::size_t Rp1>
212  void enc_first(std::array<__m128i, K> &state,
213  const std::array<__m128i, Rp1> &rk) const
214  {
215  enc_first<0>(state, rk, std::true_type());
216  }
217 
218  template <std::size_t, std::size_t K, std::size_t Rp1>
219  void enc_first(std::array<__m128i, K> &, const std::array<__m128i, Rp1> &,
220  std::false_type) const
221  {
222  }
223 
224  template <std::size_t B, std::size_t K, std::size_t Rp1>
225  void enc_first(std::array<__m128i, K> &state,
226  const std::array<__m128i, Rp1> &rk, std::true_type) const
227  {
228  std::get<B>(state) =
229  _mm_xor_si128(std::get<B>(state), std::get<0>(rk));
230  enc_first<B + 1>(state, rk, std::integral_constant<bool, B + 1 < K>());
231  }
232 
233  template <std::size_t, std::size_t K, std::size_t Rp1>
234  void enc_round(std::array<__m128i, K> &, const std::array<__m128i, Rp1> &,
235  std::false_type) const
236  {
237  }
238 
239  template <std::size_t N, std::size_t K, std::size_t Rp1>
240  void enc_round(std::array<__m128i, K> &state,
241  const std::array<__m128i, Rp1> &rk, std::true_type) const
242  {
243  enc_round_block<0, N>(state, rk, std::true_type());
244  enc_round<N + 1>(
245  state, rk, std::integral_constant<bool, N + 2 < Rp1>());
246  }
247 
248  template <std::size_t, std::size_t, std::size_t K, std::size_t Rp1>
249  void enc_round_block(std::array<__m128i, K> &,
250  const std::array<__m128i, Rp1> &, std::false_type) const
251  {
252  }
253 
254  template <std::size_t B, std::size_t N, std::size_t K, std::size_t Rp1>
255  void enc_round_block(std::array<__m128i, K> &state,
256  const std::array<__m128i, Rp1> &rk, std::true_type) const
257 
258  {
259  std::get<B>(state) =
260  _mm_aesenc_si128(std::get<B>(state), std::get<N>(rk));
261  enc_round_block<B + 1, N>(
262  state, rk, std::integral_constant<bool, B + 1 < K>());
263  }
264 
265  template <std::size_t K, std::size_t Rp1>
266  void enc_last(std::array<__m128i, K> &state,
267  const std::array<__m128i, Rp1> &rk) const
268  {
269  enc_last<0>(state, rk, std::true_type());
270  }
271 
272  template <std::size_t, std::size_t K, std::size_t Rp1>
273  void enc_last(std::array<__m128i, K> &, const std::array<__m128i, Rp1> &,
274  std::false_type) const
275  {
276  }
277 
278  template <std::size_t B, std::size_t K, std::size_t Rp1>
279  void enc_last(std::array<__m128i, K> &state,
280  const std::array<__m128i, Rp1> &rk, std::true_type) const
281  {
282  std::get<B>(state) =
283  _mm_aesenclast_si128(std::get<B>(state), std::get<Rp1 - 1>(rk));
284  enc_last<B + 1>(state, rk, std::integral_constant<bool, B + 1 < K>());
285  }
286 }; // class AESNIGenerator
287 
290 template <typename ResultType, typename KeySeqType, std::size_t Rounds,
291  std::size_t Blocks>
292 using AESNIEngine =
294 
295 namespace internal
296 {
297 
298 template <std::size_t>
299 inline __m128i AESKeyGenAssist(const __m128i &);
300 
557 
558 template <std::size_t>
560 
561 template <>
562 class ARSWeylConstant<0> : public std::integral_constant<std::uint64_t,
563  UINT64_C(0x9E3779B97F4A7C15)>
564 {
565 }; // class ARSWeylConstant
566 
567 template <>
568 class ARSWeylConstant<1> : public std::integral_constant<std::uint64_t,
569  UINT64_C(0xBB67AE8584CAA73B)>
570 {
571 }; // class ARSWeylConstant
572 
573 } // namespace internal
574 
578 {
579  public:
581  template <std::size_t I>
583 }; // class ARSConstants
584 
585 namespace internal
586 {
587 
588 template <std::size_t Rounds, typename KeySeqGenerator>
590 {
591  public:
592  using key_type = typename KeySeqGenerator::key_type;
593 
594  void reset(const key_type &key)
595  {
596  KeySeqGenerator generator;
597  generator(key, key_seq_);
598  }
599 
600  const std::array<__m128i, Rounds + 1> &operator()(
601  std::array<__m128i, Rounds + 1> &) const
602  {
603  return key_seq_;
604  }
605 
608  {
609  alignas(16) std::array<std::uint64_t, 2 * (Rounds + 1)> ks1;
610  alignas(16) std::array<std::uint64_t, 2 * (Rounds + 1)> ks2;
611  std::memcpy(ks1.data(), seq1.key_seq_.data(), 16 * (Rounds + 1));
612  std::memcpy(ks2.data(), seq2.key_seq_.data(), 16 * (Rounds + 1));
613 
614  return ks1 == ks2;
615  }
616 
619  {
620  return !(seq1 == seq2);
621  }
622 
623  template <typename CharT, typename Traits>
624  friend std::basic_ostream<CharT, Traits> &operator<<(
625  std::basic_ostream<CharT, Traits> &os,
627  {
628  if (!os)
629  return os;
630 
631  alignas(16) std::array<std::uint64_t, 2 * (Rounds + 1)> ks;
632  std::memcpy(ks.data(), seq.key_seq_.data(), 16 * (Rounds + 1));
633  os << ks;
634 
635  return os;
636  }
637 
638  template <typename CharT, typename Traits>
639  friend std::basic_istream<CharT, Traits> &operator>>(
640  std::basic_istream<CharT, Traits> &is,
642  {
643  if (!is)
644  return is;
645 
646  alignas(16) std::array<std::uint64_t, 2 * (Rounds + 1)> ks;
647  is >> ks;
648  if (is)
649  std::memcpy(seq.key_seq_.data(), ks.data(), 16 * (Rounds + 1));
650 
651  return is;
652  }
653 
654  private:
655  std::array<__m128i, Rounds + 1> key_seq_;
656 }; // class AESKeySeq
657 
659 {
660  public:
661  using key_type = std::array<std::uint64_t, 2>;
662 
663  template <std::size_t Rp1>
664  void operator()(const key_type &key, std::array<__m128i, Rp1> &rk)
665  {
666  xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
667  std::get<0>(rk) = xmm1_;
668  generate_seq<1>(rk, std::integral_constant<bool, 1 < Rp1>());
669  }
670 
671  private:
672  __m128i xmm1_;
673  __m128i xmm2_;
674  __m128i xmm3_;
675 
676  template <std::size_t, std::size_t Rp1>
677  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
678  {
679  }
680 
681  template <std::size_t N, std::size_t Rp1>
682  void generate_seq(std::array<__m128i, Rp1> &rk, std::true_type)
683  {
684  xmm2_ = AESKeyGenAssist<N % 256>(xmm1_);
685  expand_key();
686  std::get<N>(rk) = xmm1_;
687  generate_seq<N + 1>(rk, std::integral_constant<bool, N + 1 < Rp1>());
688  }
689 
690  void expand_key()
691  {
692  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
693  xmm3_ = _mm_slli_si128(xmm1_, 0x04); // pslldq xmm3, 0x04
694  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
695  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
696  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
697  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
698  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
699  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
700  }
701 }; // class AES128KeySeq
702 
704 {
705  public:
706  using key_type = std::array<std::uint64_t, 3>;
707 
708  template <std::size_t Rp1>
709  void operator()(const key_type &key, std::array<__m128i, Rp1> &rk)
710  {
711  std::array<std::uint64_t, 2> tmp = {{0, std::get<2>(key)}};
712  xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
713  xmm7_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(tmp.data()));
714  std::get<0>(rk) = xmm1_;
715  std::get<1>(rk) = xmm7_;
716 
717  xmm3_ = _mm_setzero_si128();
718  xmm6_ = _mm_setzero_si128();
719  xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F); // pshufd xmm4, xmm7, 0x4F
720 
721  std::array<unsigned char, Rp1 * 16 + 16> rk_tmp;
722  generate_seq<1, Rp1>(
723  rk_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
724  copy_key(
725  rk, rk_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
726  }
727 
728  private:
729  __m128i xmm1_;
730  __m128i xmm2_;
731  __m128i xmm3_;
732  __m128i xmm4_;
733  __m128i xmm5_;
734  __m128i xmm6_;
735  __m128i xmm7_;
736 
737  template <std::size_t, std::size_t>
738  void generate_seq(unsigned char *, std::false_type)
739  {
740  }
741 
742  template <std::size_t N, std::size_t Rp1>
743  void generate_seq(unsigned char *rk_ptr, std::true_type)
744  {
745  generate_key<N>(rk_ptr);
746  complete_key<N>(
747  rk_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
748  generate_seq<N + 1, Rp1>(
749  rk_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
750  }
751 
752  template <std::size_t N>
753  void generate_key(unsigned char *rk_ptr)
754  {
755  // In entry, N * 24 < Rp1 * 16
756  // Required Storage: N * 24 + 16;
757 
758  xmm2_ = AESKeyGenAssist<N % 256>(xmm4_);
759  generate_key_expansion();
760  _mm_storeu_si128(reinterpret_cast<__m128i *>(rk_ptr + N * 24), xmm1_);
761  }
762 
763  template <std::size_t>
764  void complete_key(unsigned char *, std::false_type)
765  {
766  }
767 
768  template <std::size_t N>
769  void complete_key(unsigned char *rk_ptr, std::true_type)
770  {
771  // In entry, N * 24 + 16 < Rp1 * 16
772  // Required storage: N * 24 + 32
773 
774  complete_key_expansion();
775  _mm_storeu_si128(
776  reinterpret_cast<__m128i *>(rk_ptr + N * 24 + 16), xmm7_);
777  }
778 
779  void generate_key_expansion()
780  {
781  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
782  xmm3_ = _mm_castps_si128( // shufps xmm3, xmm1, 0x10
783  _mm_shuffle_ps(
784  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
785  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
786  xmm3_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm3, xmm1, 0x10
787  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
788  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
789  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
790  }
791 
792  void complete_key_expansion()
793  {
794  xmm5_ = xmm4_; // movdqa xmm5, xmm4
795  xmm5_ = _mm_slli_si128(xmm5_, 0x04); // pslldq xmm5, 0x04
796  xmm6_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm6, xmm1, 0x10
797  _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
798  xmm6_ = _mm_xor_si128(xmm6_, xmm5_); // pxor xmm6, xmm5
799  xmm4_ = _mm_xor_si128(xmm4_, xmm6_); // pxor xmm4, xmm6
800  xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E); // pshufd xmm7, xmm4, 0x0E
801  }
802 
803  template <std::size_t Rp1>
804  void copy_key(
805  std::array<__m128i, Rp1> &, const unsigned char *, std::false_type)
806  {
807  }
808 
809  template <std::size_t Rp1>
810  void copy_key(std::array<__m128i, Rp1> &rk, const unsigned char *rk_ptr,
811  std::true_type)
812  {
813  unsigned char *dst = reinterpret_cast<unsigned char *>(rk.data());
814  std::memcpy(dst + 24, rk_ptr + 24, Rp1 * 16 - 24);
815  }
816 }; // class AES192KeySeq
817 
819 {
820  public:
821  using key_type = std::array<std::uint64_t, 4>;
822 
823  template <std::size_t Rp1>
824  void operator()(const key_type &key, std::array<__m128i, Rp1> &rk)
825  {
826  xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
827  xmm3_ =
828  _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data() + 2));
829  std::get<0>(rk) = xmm1_;
830  std::get<1>(rk) = xmm3_;
831  generate_seq<2>(rk, std::integral_constant<bool, 2 < Rp1>());
832  }
833 
834  private:
835  __m128i xmm1_;
836  __m128i xmm2_;
837  __m128i xmm3_;
838  __m128i xmm4_;
839 
840  template <std::size_t, std::size_t Rp1>
841  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
842  {
843  }
844 
845  template <std::size_t N, std::size_t Rp1>
846  void generate_seq(std::array<__m128i, Rp1> &rk, std::true_type)
847  {
848  generate_key<N>(rk, std::integral_constant<bool, N % 2 == 0>());
849  generate_seq<N + 1>(rk, std::integral_constant<bool, N + 1 < Rp1>());
850  }
851 
852  template <std::size_t N, std::size_t Rp1>
853  void generate_key(std::array<__m128i, Rp1> &rk, std::true_type)
854  {
855  xmm2_ = AESKeyGenAssist<(N / 2) % 256>(xmm3_);
856  expand_key(std::true_type());
857  std::get<N>(rk) = xmm1_;
858  }
859 
860  template <std::size_t N, std::size_t Rp1>
861  void generate_key(std::array<__m128i, Rp1> &rk, std::false_type)
862  {
863  xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
864  expand_key(std::false_type());
865  std::get<N>(rk) = xmm3_;
866  }
867 
868  void expand_key(std::true_type)
869  {
870  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
871  xmm4_ = _mm_slli_si128(xmm1_, 0x04); // pslldq xmm4, 0x04
872  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
873  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
874  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
875  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
876  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
877  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
878  }
879 
880  void expand_key(std::false_type)
881  {
882  xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA); // pshufd xmm2, xmm4, 0xAA
883  xmm4_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm4, 0x04
884  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
885  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
886  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
887  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
888  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
889  xmm3_ = _mm_xor_si128(xmm3_, xmm2_); // pxor xmm1, xmm2
890  }
891 }; // class AESKey256
892 
893 template <typename Constants>
895 {
896  template <std::size_t I>
897  using weyl = typename Constants::template weyl<I>;
898 
899  public:
900  using key_type = std::array<std::uint64_t, 2>;
901 
902  ARSKeySeqImpl() : key_(_mm_setzero_si128()) {}
903 
904  void reset(const key_type &key)
905  {
906  key_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
907  }
908 
909  template <std::size_t Rp1>
910  const std::array<__m128i, Rp1> &operator()(
911  std::array<__m128i, Rp1> &rk) const
912  {
913  std::array<std::uint64_t, 2> tmp = {{weyl<0>::value, weyl<1>::value}};
914  __m128i w =
915  _mm_loadu_si128(reinterpret_cast<const __m128i *>(tmp.data()));
916  std::get<0>(rk) = key_;
917  generate<1>(rk, w, std::integral_constant<bool, 1 < Rp1>());
918 
919  return rk;
920  }
921 
922  friend bool operator==(
923  const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
924  {
925  alignas(16) key_type k1;
926  alignas(16) key_type k2;
927  _mm_store_si128(reinterpret_cast<__m128i *>(k1.data()), seq1.key_);
928  _mm_store_si128(reinterpret_cast<__m128i *>(k2.data()), seq2.key_);
929 
930  return k1 == k2;
931  }
932 
933  friend bool operator!=(
934  const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
935  {
936  return !(seq1 == seq2);
937  }
938 
939  template <typename CharT, typename Traits>
940  friend std::basic_ostream<CharT, Traits> &operator<<(
941  std::basic_ostream<CharT, Traits> &os, const ARSKeySeqImpl &seq)
942  {
943  if (!os)
944  return os;
945 
946  alignas(16) key_type k;
947  _mm_store_si128(reinterpret_cast<__m128i *>(k.data()), seq.key_);
948  os << k;
949 
950  return os;
951  }
952 
953  template <typename CharT, typename Traits>
954  friend std::basic_istream<CharT, Traits> &operator>>(
955  std::basic_istream<CharT, Traits> &is, ARSKeySeqImpl &seq)
956  {
957  if (!is)
958  return is;
959 
960  alignas(16) key_type k = {{0}};
961  is >> k;
962  if (is) {
963  seq.key_ =
964  _mm_load_si128(reinterpret_cast<const __m128i *>(k.data()));
965  }
966 
967  return is;
968  }
969 
970  private:
971  template <std::size_t, std::size_t Rp1>
972  void generate(
973  std::array<__m128i, Rp1> &, const __m128i &, std::false_type) const
974  {
975  }
976 
977  template <std::size_t N, std::size_t Rp1>
978  void generate(
979  std::array<__m128i, Rp1> &rk, const __m128i &w, std::true_type) const
980  {
981  std::get<N>(rk) = _mm_add_epi64(std::get<N - 1>(rk), w);
982  generate<N + 1>(rk, w, std::integral_constant<bool, N + 1 < Rp1>());
983  }
984 
985  private:
986  __m128i key_;
987 }; // class ARSKeySeqImpl
988 
989 } // namespace vsmc::internal
990 
993 template <std::size_t Rounds>
994 using AES128KeySeq =
996 
999 template <std::size_t Rounds>
1000 using AES192KeySeq =
1002 
1005 template <std::size_t Rounds>
1006 using AES256KeySeq =
1008 
1019 template <typename Constants = ARSConstants>
1021 
1024 template <typename ResultType, std::size_t Rounds = VSMC_RNG_AES128_ROUNDS,
1025  std::size_t Blocks = VSMC_RNG_AES_NI_BLOCKS>
1026 using AES128Engine =
1028 
1031 template <typename ResultType, std::size_t Rounds = VSMC_RNG_AES192_ROUNDS,
1032  std::size_t Blocks = VSMC_RNG_AES_NI_BLOCKS>
1033 using AES192Engine =
1035 
1038 template <typename ResultType, std::size_t Rounds = VSMC_RNG_AES256_ROUNDS,
1039  std::size_t Blocks = VSMC_RNG_AES_NI_BLOCKS>
1040 using AES256Engine =
1042 
1045 template <typename ResultType, std::size_t Rounds = VSMC_RNG_ARS_ROUNDS,
1046  std::size_t Blocks = VSMC_RNG_AES_NI_BLOCKS,
1047  typename Constants = ARSConstants>
1048 using ARSEngine =
1050 
1054 
1058 
1062 
1066 
1070 
1074 
1078 
1082 
1085 
1089 
1093 
1097 
1101 
1105 
1109 
1113 
1117 
1121 
1125 
1129 
1133 
1137 
1141 
1145 
1149 
1153 
1157 
1161 
1165 
1169 
1174 
1179 
1184 
1189 
1194 
1199 
1204 
1209 
1214 
1219 
1220 } // namespace vsmc
1221 
1222 #ifdef VSMC_GCC
1223 #if __GNUC__ >= 6
1224 #pragma GCC diagnostic pop
1225 #endif
1226 #endif
1227 
1228 #endif // VSMC_RNG_AES_NI_HPP
#define UINT64_C(x)
Definition: opencl.h:44
Definition: monitor.hpp:48
void enc(const ctr_type &ctr, ctr_type &buffer) const
Definition: aes_ni.hpp:102
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
Definition: aes_ni.hpp:709
#define VSMC_RNG_AES128_ROUNDS
AES-128 default rounds.
Definition: aes_ni.hpp:55
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, AESNIGenerator< KeySeqType, Rounds, Blocks > &gen)
Definition: aes_ni.hpp:183
const std::array< __m128i, Rounds+1 > & operator()(std::array< __m128i, Rounds+1 > &) const
Definition: aes_ni.hpp:600
std::array< std::uint64_t, 2 > ctr_type
Definition: aes_ni.hpp:95
#define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)
Definition: aes_ni.hpp:46
void increment(std::array< T, K > &ctr)
Increment a counter by one.
Definition: counter.hpp:62
__m128i AESKeyGenAssist(const __m128i &)
ulong uint64_t
Definition: opencl.h:42
friend bool operator==(const AESKeySeq< Rounds, KeySeqGenerator > &seq1, const AESKeySeq< Rounds, KeySeqGenerator > &seq2)
Definition: aes_ni.hpp:606
STL namespace.
Counter based RNG engine.
Definition: counter.hpp:187
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
Definition: aes_ni.hpp:664
#define VSMC_RNG_AES192_ROUNDS
AES-192 default rounds.
Definition: aes_ni.hpp:60
void operator()(ctr_type &ctr, std::size_t n, std::array< ResultType, size()/sizeof(ResultType)> *buffer) const
Definition: aes_ni.hpp:136
Default ARS constants.
Definition: aes_ni.hpp:577
const std::array< __m128i, Rp1 > & operator()(std::array< __m128i, Rp1 > &rk) const
Definition: aes_ni.hpp:910
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen)
Definition: aes_ni.hpp:170
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const ARSKeySeqImpl &seq)
Definition: aes_ni.hpp:940
#define VSMC_RNG_AES256_ROUNDS
AES-256 default rounds.
Definition: aes_ni.hpp:65
friend bool operator!=(const AESKeySeq< Rounds, KeySeqGenerator > &seq1, const AESKeySeq< Rounds, KeySeqGenerator > &seq2)
Definition: aes_ni.hpp:617
std::array< std::uint64_t, 4 > key_type
Definition: aes_ni.hpp:821
void reset(const key_type &key)
Definition: aes_ni.hpp:100
#define VSMC_RNG_ARS_ROUNDS
ARSEngine default rounds.
Definition: aes_ni.hpp:71
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const AESKeySeq< Rounds, KeySeqGenerator > &seq)
Definition: aes_ni.hpp:624
friend bool operator==(const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen1, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen2)
Definition: aes_ni.hpp:155
void reset(const key_type &key)
Definition: aes_ni.hpp:904
std::array< std::uint64_t, 3 > key_type
Definition: aes_ni.hpp:706
friend bool operator!=(const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen1, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen2)
Definition: aes_ni.hpp:162
void reset(const key_type &key)
Definition: aes_ni.hpp:594
void operator()(ctr_type &ctr, std::array< ResultType, size()/sizeof(ResultType)> &buffer) const
Definition: aes_ni.hpp:118
friend bool operator!=(const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
Definition: aes_ni.hpp:933
typename KeySeqType::key_type key_type
Definition: aes_ni.hpp:96
friend bool operator==(const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
Definition: aes_ni.hpp:922
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
Definition: aes_ni.hpp:824
typename KeySeqGenerator::key_type key_type
Definition: aes_ni.hpp:592
static constexpr std::size_t size()
Definition: aes_ni.hpp:98
#define VSMC_RNG_AES_NI_BLOCKS
AESEngine default blocks.
Definition: aes_ni.hpp:77
std::array< std::uint64_t, 2 > key_type
Definition: aes_ni.hpp:661
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, AESKeySeq< Rounds, KeySeqGenerator > &seq)
Definition: aes_ni.hpp:639
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, ARSKeySeqImpl &seq)
Definition: aes_ni.hpp:954
std::array< std::uint64_t, 2 > key_type
Definition: aes_ni.hpp:900
RNG generator using AES-NI instructions.
Definition: aes_ni.hpp:86