vSMC
vSMC: Scalable Monte Carlo
aes.hpp
Go to the documentation of this file.
1 //============================================================================
2 // vSMC/include/vsmc/rng/aes.hpp
3 //----------------------------------------------------------------------------
4 // vSMC: Scalable Monte Carlo
5 //----------------------------------------------------------------------------
6 // Copyright (c) 2013-2015, Yan Zhou
7 // All rights reserved.
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are met:
11 //
12 // Redistributions of source code must retain the above copyright notice,
13 // this list of conditions and the following disclaimer.
14 //
15 // Redistributions in binary form must reproduce the above copyright notice,
16 // this list of conditions and the following disclaimer in the documentation
17 // and/or other materials provided with the distribution.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 // POSSIBILITY OF SUCH DAMAGE.
30 //============================================================================
31 
32 #ifndef VSMC_RNG_AES_HPP
33 #define VSMC_RNG_AES_HPP
34 
36 #include <vsmc/rng/aes_ni.hpp>
37 
38 #define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val) \
39  template <> \
40  class AESRoundConstant<N> : public std::integral_constant<int, val> \
41  { \
42  }; // class AESRoundConstant
43 
46 #ifndef VSMC_RNG_AES_BLOCKS
47 #define VSMC_RNG_AES_BLOCKS 4
48 #endif
49 
50 namespace vsmc
51 {
52 
53 namespace internal
54 {
55 
56 template <std::size_t N>
58 
315 
317 {
318  public:
319  template <std::size_t Offset, std::size_t N, typename T,
320  std::size_t KeySize, std::size_t Rp1>
321  static void eval(const std::array<T, KeySize> &key,
322  std::array<__m128i, Rp1> &ks, __m128i &xmm)
323  {
324  init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
325  }
326 
327  private:
328  template <std::size_t, std::size_t, typename T, std::size_t KeySize,
329  std::size_t Rp1>
330  static void init(const std::array<T, KeySize> &,
331  std::array<__m128i, Rp1> &, __m128i &, std::false_type)
332  {
333  }
334 
335  template <std::size_t Offset, std::size_t N, typename T,
336  std::size_t KeySize, std::size_t Rp1>
337  static void init(const std::array<T, KeySize> &key,
338  std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type)
339  {
340  M128I<> tmp;
341  tmp.load(key.data());
342  std::get<N>(ks) = xmm = tmp.value();
343  }
344 }; // class AESKeyInit
345 
346 template <typename T, std::size_t Rounds, typename KeySeqGenerator>
348 {
349  public:
350  using key_type = typename KeySeqGenerator::key_type;
351 
352  void reset(const key_type &key)
353  {
354  KeySeqGenerator generator;
355  generator(key, key_seq_);
356  }
357 
358  template <typename U>
360  const key_type &, std::array<M128I<U>, Rounds + 1> &rk) const
361  {
362  rk = key_seq_;
363  }
364 
365  private:
366  std::array<M128I<>, Rounds + 1> key_seq_;
367 }; // class AESKeySeq
368 
369 template <typename T>
371 {
372  public:
373  using key_type = std::array<T, 16 / sizeof(T)>;
374 
375  template <std::size_t Rp1>
376  void operator()(const key_type &key, std::array<M128I<>, Rp1> &key_seq)
377  {
378  std::array<__m128i, Rp1> ks;
379  AESKeyInit::eval<0, 0>(key, ks, xmm1_);
380  generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
381  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
382  }
383 
384  private:
385  __m128i xmm1_;
386  __m128i xmm2_;
387  __m128i xmm3_;
388 
389  template <std::size_t, std::size_t Rp1>
390  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
391  {
392  }
393 
394  template <std::size_t N, std::size_t Rp1>
395  void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
396  {
397  xmm2_ = _mm_aeskeygenassist_si128(xmm1_, AESRoundConstant<N>::value);
398  expand_key();
399  std::get<N>(ks) = xmm1_;
400  generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
401  }
402 
403  void expand_key()
404  {
405  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
406  xmm3_ = _mm_slli_si128(xmm1_, 0x04); // pshufb xmm3, xmm5
407  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
408  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
409  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
410  xmm3_ = _mm_slli_si128(xmm3_, 0x04); // pslldq xmm3, 0x04
411  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
412  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
413  }
414 }; // class AES128KeySeq
415 
416 template <typename T>
418 {
419  public:
420  using key_type = std::array<T, 24 / sizeof(T)>;
421 
422  template <std::size_t Rp1>
423  void operator()(const key_type &key, std::array<M128I<>, Rp1> &key_seq)
424  {
425  std::array<__m128i, Rp1> ks;
426 
427  std::array<std::uint64_t, 3> key_tmp;
428  std::memcpy(key_tmp.data(), key.data(), 24);
429  AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
430  std::get<0>(key_tmp) = std::get<2>(key_tmp);
431  std::get<1>(key_tmp) = 0;
432  AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
433 
434  xmm3_ = _mm_setzero_si128();
435  xmm6_ = _mm_setzero_si128();
436  xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F); // pshufd xmm4, xmm7, 0x4F
437 
438  std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
439  generate_seq<1, Rp1>(
440  ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
441  copy_key(
442  ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
443 
444  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
445  }
446 
447  private:
448  __m128i xmm1_;
449  __m128i xmm2_;
450  __m128i xmm3_;
451  __m128i xmm4_;
452  __m128i xmm5_;
453  __m128i xmm6_;
454  __m128i xmm7_;
455 
456  template <std::size_t, std::size_t>
457  void generate_seq(unsigned char *, std::false_type)
458  {
459  }
460 
461  template <std::size_t N, std::size_t Rp1>
462  void generate_seq(unsigned char *ks_ptr, std::true_type)
463  {
464  generate_key<N>(ks_ptr);
465  complete_key<N>(
466  ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
467  generate_seq<N + 1, Rp1>(
468  ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
469  }
470 
471  template <std::size_t N>
472  void generate_key(unsigned char *ks_ptr)
473  {
474  // In entry, N * 24 < Rp1 * 16
475  // Required Storage: N * 24 + 16;
476 
477  xmm2_ = _mm_aeskeygenassist_si128(xmm4_, AESRoundConstant<N>::value);
478  generate_key_expansion();
479  _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
480  }
481 
482  template <std::size_t>
483  void complete_key(unsigned char *, std::false_type)
484  {
485  }
486 
487  template <std::size_t N>
488  void complete_key(unsigned char *ks_ptr, std::true_type)
489  {
490  // In entry, N * 24 + 16 < Rp1 * 16
491  // Required storage: N * 24 + 32
492 
493  complete_key_expansion();
494  _mm_storeu_si128(
495  reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
496  }
497 
498  void generate_key_expansion()
499  {
500  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
501  xmm3_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm3, xmm1, 0x10
502  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
503  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
504  xmm3_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm3, xmm1, 0x10
505  _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
506  xmm1_ = _mm_xor_si128(xmm1_, xmm3_); // pxor xmm1, xmm3
507  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
508  }
509 
510  void complete_key_expansion()
511  {
512  xmm5_ = _mm_load_si128(&xmm4_); // movdqa xmm5, xmm4
513  xmm5_ = _mm_slli_si128(xmm5_, 0x04); // pslldq xmm5, 0x04
514  xmm6_ = _mm_castps_si128(_mm_shuffle_ps( // shufps xmm6, xmm1, 0x10
515  _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
516  xmm6_ = _mm_xor_si128(xmm6_, xmm5_); // pxor xmm6, xmm5
517  xmm4_ = _mm_xor_si128(xmm4_, xmm6_); // pxor xmm4, xmm6
518  xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E); // pshufd xmm7, xmm4, 0x0E
519  }
520 
521  template <std::size_t Rp1>
522  void copy_key(
523  std::array<__m128i, Rp1> &, const unsigned char *, std::false_type)
524  {
525  }
526 
527  template <std::size_t Rp1>
528  void copy_key(std::array<__m128i, Rp1> &ks, const unsigned char *ks_ptr,
529  std::true_type)
530  {
531  unsigned char *dst = reinterpret_cast<unsigned char *>(ks.data());
532  std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
533  }
534 }; // class AES192KeySeq
535 
536 template <typename T>
538 {
539  public:
540  using key_type = std::array<T, 32 / sizeof(T)>;
541 
542  template <std::size_t Rp1>
543  void operator()(const key_type &key, std::array<M128I<>, Rp1> &key_seq)
544  {
545  std::array<__m128i, Rp1> ks;
546  AESKeyInit::eval<0, 0>(key, ks, xmm1_);
547  AESKeyInit::eval<16 / sizeof(T), 1>(key, ks, xmm3_);
548  generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
549  std::memcpy(key_seq.data(), ks.data(), sizeof(__m128i) * Rp1);
550  }
551 
552  private:
553  __m128i xmm1_;
554  __m128i xmm2_;
555  __m128i xmm3_;
556  __m128i xmm4_;
557 
558  template <std::size_t, std::size_t Rp1>
559  void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
560  {
561  }
562 
563  template <std::size_t N, std::size_t Rp1>
564  void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
565  {
566  generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
567  generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
568  }
569 
570  template <std::size_t N, std::size_t Rp1>
571  void generate_key(std::array<__m128i, Rp1> &ks, std::true_type)
572  {
573  xmm2_ =
574  _mm_aeskeygenassist_si128(xmm3_, AESRoundConstant<N / 2>::value);
575  expand_key(std::true_type());
576  std::get<N>(ks) = xmm1_;
577  }
578 
579  template <std::size_t N, std::size_t Rp1>
580  void generate_key(std::array<__m128i, Rp1> &ks, std::false_type)
581  {
582  xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
583  expand_key(std::false_type());
584  std::get<N>(ks) = xmm3_;
585  }
586 
587  void expand_key(std::true_type)
588  {
589  xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); // pshufd xmm2, xmm2, 0xFF
590  xmm4_ = _mm_slli_si128(xmm1_, 0x04); // pshufb xmm4, xmm5
591  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
592  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
593  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
594  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
595  xmm1_ = _mm_xor_si128(xmm1_, xmm4_); // pxor xmm1, xmm4
596  xmm1_ = _mm_xor_si128(xmm1_, xmm2_); // pxor xmm1, xmm2
597  }
598 
599  void expand_key(std::false_type)
600  {
601  xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA); // pshufd xmm2, xmm4, 0xAA
602  xmm4_ = _mm_slli_si128(xmm3_, 0x04); // pshufb xmm4, xmm5
603  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
604  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
605  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
606  xmm4_ = _mm_slli_si128(xmm4_, 0x04); // pslldq xmm4, 0x04
607  xmm3_ = _mm_xor_si128(xmm3_, xmm4_); // pxor xmm3, xmm4
608  xmm3_ = _mm_xor_si128(xmm3_, xmm2_); // pxor xmm1, xmm2
609  }
610 }; // class AESKey256
611 
612 } // namespace vsmc::internal
613 
616 template <typename ResultType, std::size_t Rounds>
617 using AES128KeySeq = internal::AESKeySeq<ResultType, Rounds,
619 
622 template <typename ResultType, std::size_t Rounds>
623 using AES192KeySeq = internal::AESKeySeq<ResultType, Rounds,
625 
628 template <typename ResultType, std::size_t Rounds>
629 using AES256KeySeq = internal::AESKeySeq<ResultType, Rounds,
631 
634 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
635 using AES128Engine =
637 
640 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
641 using AES192Engine =
643 
646 template <typename ResultType, std::size_t Blocks = VSMC_RNG_AES_BLOCKS>
647 using AES256Engine =
649 
653 
657 
661 
665 
669 
673 
677 
681 
685 
689 
693 
697 
701 
705 
709 
713 
717 
721 
725 
729 
733 
737 
741 
745 
749 
753 
757 
761 
765 
769 
770 } // namespace vsmc
771 
772 #endif // VSMC_RNG_AES_HPP
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:423
Definition: monitor.hpp:49
std::array< T, 24/sizeof(T)> key_type
Definition: aes.hpp:420
std::array< T, 32/sizeof(T)> key_type
Definition: aes.hpp:540
void reset(const key_type &key)
Definition: aes.hpp:352
typename KeySeqGenerator::key_type key_type
Definition: aes.hpp:350
Counter based RNG engine.
Definition: counter.hpp:290
#define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val)
Definition: aes.hpp:38
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
Definition: aes.hpp:321
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:543
std::array< T, 16/sizeof(T)> key_type
Definition: aes.hpp:373
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
Definition: aes.hpp:376
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const
Definition: aes.hpp:359