vSMC
vSMC: Scalable Monte Carlo
aes.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/rng/aes.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2016, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_RNG_AES_HPP
33 #define VSMC_RNG_AES_HPP
34 
36 #include <vsmc/rng/aes_ni.hpp>
37 
38 #define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val) \
39  template <> \
40  inline __m128i AESKeyGenAssist<N>(__m128i xmm) \
41  { \
42  return _mm_aeskeygenassist_si128(xmm, val); \
43  }
44 
47 #ifndef VSMC_RNG_AES_BLOCKS
48 #define VSMC_RNG_AES_BLOCKS 4
49 #endif
50 
51 namespace vsmc
52 {
53 
54 namespace internal
55 {
56 
57 template <std::size_t>
58 inline __m128i AESKeyGenAssist(__m128i) ;
59 
316 
318 {
319  public:
320  template <std::size_t Offset, std::size_t N, typename T,
321  std::size_t KeySize, std::size_t Rp1>
322  static void eval(const std::array<T, KeySize> &key,
323  std::array<__m128i, Rp1> &ks, __m128i &xmm)
324  {
325  init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
326  }
327 
328  private:
329  template <std::size_t, std::size_t, typename T, std::size_t KeySize,
330  std::size_t Rp1>
331  static void init(const std::array<T, KeySize> &,
332  std::array<__m128i, Rp1> &, __m128i &, std::false_type)
333  {
334  }
335 
336  template <std::size_t Offset, std::size_t N, typename T,
337  std::size_t KeySize, std::size_t Rp1>
338  static void init(const std::array<T, KeySize> &key,
339  std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type)
340  {
341  M128I<> tmp;
342  tmp.load(key.data());
343  std::get<N>(ks) = xmm = tmp.value();
344  }
345 }; // class AESKeyInit
346 
347 template <typename T, std::size_t Rounds, typename KeySeqGenerator>
349 {
350  public:
351  using key_type = typename KeySeqGenerator::key_type;
352 
353  void reset(const key_type &key)
354  {
355  KeySeqGenerator generator;
356  generator(key, key_seq_);
357  }
358 
359  template <typename U>
361  const key_type &, std::array<M128I<U>, Rounds + 1> &rk) const
362  {
363  rk = key_seq_;
364  }
365 
366  private:
367  std::array<M128I<>, Rounds + 1> key_seq_;
368 }; // class AESKeySeq
369 
370 template <typename T>
372 {
373  public:
374  using key_type = std::array<T, 16 / sizeof(T)>;
375 
376  template <std::size_t Rp1>
378  const key_type &key, std::array<M128I<>, Rp1> &key_seq)
379  {
380  std::array<__m128i, Rp1> ks;
381  AESKeyInit::eval<0, 0>(key, ks, xmm1_);
382  generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
383  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
384  }
385 
386  private:
387  __m128i xmm1_;
388  __m128i xmm2_;
389  __m128i xmm3_;
390 
391  template <std::size_t, std::size_t Rp1>
392  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
393  {
394  }
395 
396  template <std::size_t N, std::size_t Rp1>
397  void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
398  {
399  xmm2_ = AESKeyGenAssist<N>(xmm1_);
400  expand_key();
401  std::get<N>(ks) = xmm1_;
402  generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
403  }
404 
405  void expand_key()
406  {
407  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
408  xmm3_ = _mm_slli_si128(xmm1_, 0x04); // pshufb xmm3, xmm5
409  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
410  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
411  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
412  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
413  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
414  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
415  }
416 }; // class AES128KeySeq
417 
418 template <typename T>
420 {
421  public:
422  using key_type = std::array<T, 24 / sizeof(T)>;
423 
424  template <std::size_t Rp1>
426  const key_type &key, std::array<M128I<>, Rp1> &key_seq)
427  {
428  std::array<__m128i, Rp1> ks;
429 
430  std::array<std::uint64_t, 3> key_tmp;
431  std::memcpy(key_tmp.data(), key.data(), 24);
432  AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
433  std::get<0>(key_tmp) = std::get<2>(key_tmp);
434  std::get<1>(key_tmp) = 0;
435  AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
436 
437  xmm3_ = _mm_setzero_si128();
438  xmm6_ = _mm_setzero_si128();
439  xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F); // pshufd xmm4, xmm7, 0x4F
440 
441  std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
442  generate_seq<1, Rp1>(
443  ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
444  copy_key(
445  ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
446 
447  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
448  }
449 
450  private:
451  __m128i xmm1_;
452  __m128i xmm2_;
453  __m128i xmm3_;
454  __m128i xmm4_;
455  __m128i xmm5_;
456  __m128i xmm6_;
457  __m128i xmm7_;
458 
459  template <std::size_t, std::size_t>
460  void generate_seq(unsigned char *, std::false_type)
461  {
462  }
463 
464  template <std::size_t N, std::size_t Rp1>
465  void generate_seq(unsigned char *ks_ptr, std::true_type)
466  {
467  generate_key<N>(ks_ptr);
468  complete_key<N>(
469  ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
470  generate_seq<N + 1, Rp1>(
471  ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
472  }
473 
474  template <std::size_t N>
475  void generate_key(unsigned char *ks_ptr)
476  {
477  // In entry, N * 24 < Rp1 * 16
478  // Required Storage: N * 24 + 16;
479 
480  xmm2_ = AESKeyGenAssist<N>(xmm4_);
481  generate_key_expansion();
482  _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
483  }
484 
485  template <std::size_t>
486  void complete_key(unsigned char *, std::false_type)
487  {
488  }
489 
490  template <std::size_t N>
491  void complete_key(unsigned char *ks_ptr, std::true_type)
492  {
493  // In entry, N * 24 + 16 < Rp1 * 16
494  // Required storage: N * 24 + 32
495 
496  complete_key_expansion();
497  _mm_storeu_si128(
498  reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
499  }
500 
501  void generate_key_expansion()
502  {
503  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
504  xmm3_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm3, xmm1, 0x10
505  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
506  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
507  xmm3_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm3, xmm1, 0x10
508  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
509  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
510  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
511  }
512 
513  void complete_key_expansion()
514  {
515  xmm5_ = _mm_load_si128(&xmm4_); // movdqa xmm5, xmm4
516  xmm5_ = _mm_slli_si128(xmm5_, 0x04); // pslldq xmm5, 0x04
517  xmm6_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm6, xmm1, 0x10
518  _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
519  xmm6_ = _mm_xor_si128(xmm6_, xmm5_); // pxor xmm6, xmm5
520  xmm4_ = _mm_xor_si128(xmm4_, xmm6_); // pxor xmm4, xmm6
521  xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E); // pshufd xmm7, xmm4, 0x0E
522  }
523 
524  template <std::size_t Rp1>
525  void copy_key(std::array<__m128i, Rp1> &, const unsigned char *,
526  std::false_type)
527  {
528  }
529 
530  template <std::size_t Rp1>
531  void copy_key(std::array<__m128i, Rp1> &ks, const unsigned char *ks_ptr,
532  std::true_type)
533  {
534  unsigned char *dst = reinterpret_cast<unsigned char *>(ks.data());
535  std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
536  }
537 }; // class AES192KeySeq
538 
539 template <typename T>
541 {
542  public:
543  using key_type = std::array<T, 32 / sizeof(T)>;
544 
545  template <std::size_t Rp1>
547  const key_type &key, std::array<M128I<>, Rp1> &key_seq)
548  {
549  std::array<__m128i, Rp1> ks;
550  AESKeyInit::eval<0, 0>(key, ks, xmm1_);
551  AESKeyInit::eval<16 / sizeof(T), 1>(key, ks, xmm3_);
552  generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
553  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
554  }
555 
556  private:
557  __m128i xmm1_;
558  __m128i xmm2_;
559  __m128i xmm3_;
560  __m128i xmm4_;
561 
562  template <std::size_t, std::size_t Rp1>
563  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
564  {
565  }
566 
567  template <std::size_t N, std::size_t Rp1>
568  void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
569  {
570  generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
571  generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
572  }
573 
574  template <std::size_t N, std::size_t Rp1>
575  void generate_key(std::array<__m128i, Rp1> &ks, std::true_type)
576  {
577  xmm2_ = AESKeyGenAssist<N / 2>(xmm3_);
578  expand_key(std::true_type());
579  std::get<N>(ks) = xmm1_;
580  }
581 
582  template <std::size_t N, std::size_t Rp1>
583  void generate_key(std::array<__m128i, Rp1> &ks, std::false_type)
584  {
585  xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
586  expand_key(std::false_type());
587  std::get<N>(ks) = xmm3_;
588  }
589 
590  void expand_key(std::true_type)
591  {
592  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
593  xmm4_ = _mm_slli_si128(xmm1_, 0x04); // pshufb xmm4, xmm5
594  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
595  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
596  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
597  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
598  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
599  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
600  }
601 
602  void expand_key(std::false_type)
603  {
604  xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA); // pshufd xmm2, xmm4, 0xAA
605  xmm4_ = _mm_slli_si128(xmm3_, 0x04); // pshufb xmm4, xmm5
606  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
607  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
608  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
609  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
610  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
611  xmm3_ = _mm_xor_si128(xmm3_, xmm2_); // pxor xmm1, xmm2
612  }
613 }; // class AESKey256
614 
615 } // namespace vsmc::internal
616 
619 template <typename ResultType, std::size_t Rounds>
620 using AES128KeySeq = internal::AESKeySeq<ResultType, Rounds,
622 
625 template <typename ResultType, std::size_t Rounds>
626 using AES192KeySeq = internal::AESKeySeq<ResultType, Rounds,
628 
631 template <typename ResultType, std::size_t Rounds>
632 using AES256KeySeq = internal::AESKeySeq<ResultType, Rounds,
634 
637 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
638 using AES128Engine =
640 
643 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
644 using AES192Engine =
646 
649 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
650 using AES256Engine =
652 
656 
660 
664 
668 
672 
676 
680 
684 
688 
692 
696 
700 
704 
708 
712 
716 
720 
724 
728 
732 
736 
740 
744 
748 
752 
756 
760 
764 
768 
772 
773 } // namespace vsmc
774 
775 #endif // VSMC_RNG_AES_HPP
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:425
Definition: monitor.hpp:49
#define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)
Definition: aes.hpp:38
std::array< T, 24/sizeof(T)> key_type
Definition: aes.hpp:422
std::array< T, 32/sizeof(T)> key_type
Definition: aes.hpp:543
void reset(const key_type &key)
Definition: aes.hpp:353
typename KeySeqGenerator::key_type key_type
Definition: aes.hpp:351
Counter based RNG engine.
Definition: counter.hpp:289
__m128i & value()
Definition: simd.hpp:140
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
Definition: aes.hpp:322
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:546
std::array< T, 16/sizeof(T)> key_type
Definition: aes.hpp:374
void load(const T *mem)
Definition: simd.hpp:159
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:377
__m128i AESKeyGenAssist(__m128i)
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const
Definition: aes.hpp:360