32 #ifndef VSMC_RNG_AES_HPP    33 #define VSMC_RNG_AES_HPP    38 #define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val)                            \    40     class AESRoundConstant<N> : public std::integral_constant<int, val>       \    42     }; // class AESRoundConstant    46 #ifndef VSMC_RNG_AES_BLOCKS    47 #define VSMC_RNG_AES_BLOCKS 4    56 template <std::
size_t N>
   319     template <std::size_t Offset, std::size_t N, 
typename T,
   320         std::size_t KeySize, std::size_t Rp1>
   321     static void eval(
const std::array<T, KeySize> &key,
   322         std::array<__m128i, Rp1> &ks, __m128i &xmm)
   324         init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
   328     template <std::size_t, std::size_t, 
typename T, std::size_t KeySize,
   330     static void init(
const std::array<T, KeySize> &,
   331         std::array<__m128i, Rp1> &, __m128i &, std::false_type)
   335     template <std::size_t Offset, std::size_t N, 
typename T,
   336         std::size_t KeySize, std::size_t Rp1>
   337     static void init(
const std::array<T, KeySize> &key,
   338         std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type)
   341         tmp.load(key.data());
   342         std::get<N>(ks) = xmm = tmp.value();
   346 template <
typename T, std::
size_t Rounds, 
typename KeySeqGenerator>
   350     using key_type = 
typename KeySeqGenerator::key_type;
   354         KeySeqGenerator generator;
   355         generator(key, key_seq_);
   358     template <
typename U>
   360         const key_type &, std::array<M128I<U>, Rounds + 1> &rk)
 const   366     std::array<M128I<>, Rounds + 1> key_seq_;
   369 template <
typename T>
   375     template <std::
size_t Rp1>
   378         std::array<__m128i, Rp1> ks;
   379         AESKeyInit::eval<0, 0>(key, ks, xmm1_);
   380         generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
   381         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   389     template <std::
size_t, std::
size_t Rp1>
   390     void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
   394     template <std::
size_t N, std::
size_t Rp1>
   395     void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
   399         std::get<N>(ks) = xmm1_;
   400         generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
   405         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); 
   406         xmm3_ = _mm_slli_si128(xmm1_, 0x04);    
   407         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   408         xmm3_ = _mm_slli_si128(xmm3_, 0x04);    
   409         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   410         xmm3_ = _mm_slli_si128(xmm3_, 0x04);    
   411         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   412         xmm1_ = _mm_xor_si128(xmm1_, xmm2_);    
   416 template <
typename T>
   422     template <std::
size_t Rp1>
   425         std::array<__m128i, Rp1> ks;
   427         std::array<std::uint64_t, 3> key_tmp;
   428         std::memcpy(key_tmp.data(), key.data(), 24);
   429         AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
   430         std::get<0>(key_tmp) = std::get<2>(key_tmp);
   431         std::get<1>(key_tmp) = 0;
   432         AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
   434         xmm3_ = _mm_setzero_si128();
   435         xmm6_ = _mm_setzero_si128();
   436         xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F); 
   438         std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
   439         generate_seq<1, Rp1>(
   440             ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
   442             ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
   444         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   456     template <std::
size_t, std::
size_t>
   457     void generate_seq(
unsigned char *, std::false_type)
   461     template <std::
size_t N, std::
size_t Rp1>
   462     void generate_seq(
unsigned char *ks_ptr, std::true_type)
   464         generate_key<N>(ks_ptr);
   466             ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
   467         generate_seq<N + 1, Rp1>(
   468             ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
   471     template <std::
size_t N>
   472     void generate_key(
unsigned char *ks_ptr)
   478         generate_key_expansion();
   479         _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
   482     template <std::
size_t>
   483     void complete_key(
unsigned char *, std::false_type)
   487     template <std::
size_t N>
   488     void complete_key(
unsigned char *ks_ptr, std::true_type)
   493         complete_key_expansion();
   495             reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
   498     void generate_key_expansion()
   500         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);  
   501         xmm3_ = _mm_castps_si128(_mm_shuffle_ps( 
   502             _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
   503         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);     
   504         xmm3_ = _mm_castps_si128(_mm_shuffle_ps( 
   505             _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
   506         xmm1_ = _mm_xor_si128(xmm1_, xmm3_); 
   507         xmm1_ = _mm_xor_si128(xmm1_, xmm2_); 
   510     void complete_key_expansion()
   512         xmm5_ = _mm_load_si128(&xmm4_);          
   513         xmm5_ = _mm_slli_si128(xmm5_, 0x04);     
   514         xmm6_ = _mm_castps_si128(_mm_shuffle_ps( 
   515             _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
   516         xmm6_ = _mm_xor_si128(xmm6_, xmm5_);    
   517         xmm4_ = _mm_xor_si128(xmm4_, xmm6_);    
   518         xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E); 
   521     template <std::
size_t Rp1>
   523         std::array<__m128i, Rp1> &, 
const unsigned char *, std::false_type)
   527     template <std::
size_t Rp1>
   528     void copy_key(std::array<__m128i, Rp1> &ks, 
const unsigned char *ks_ptr,
   531         unsigned char *dst = 
reinterpret_cast<unsigned char *
>(ks.data());
   532         std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
   536 template <
typename T>
   542     template <std::
size_t Rp1>
   545         std::array<__m128i, Rp1> ks;
   546         AESKeyInit::eval<0, 0>(key, ks, xmm1_);
   548         generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
   549         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   558     template <std::
size_t, std::
size_t Rp1>
   559     void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
   563     template <std::
size_t N, std::
size_t Rp1>
   564     void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
   566         generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
   567         generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
   570     template <std::
size_t N, std::
size_t Rp1>
   571     void generate_key(std::array<__m128i, Rp1> &ks, std::true_type)
   575         expand_key(std::true_type());
   576         std::get<N>(ks) = xmm1_;
   579     template <std::
size_t N, std::
size_t Rp1>
   580     void generate_key(std::array<__m128i, Rp1> &ks, std::false_type)
   582         xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
   583         expand_key(std::false_type());
   584         std::get<N>(ks) = xmm3_;
   587     void expand_key(std::true_type)
   589         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); 
   590         xmm4_ = _mm_slli_si128(xmm1_, 0x04);    
   591         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   592         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   593         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   594         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   595         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   596         xmm1_ = _mm_xor_si128(xmm1_, xmm2_);    
   599     void expand_key(std::false_type)
   601         xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA); 
   602         xmm4_ = _mm_slli_si128(xmm3_, 0x04);    
   603         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   604         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   605         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   606         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   607         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   608         xmm3_ = _mm_xor_si128(xmm3_, xmm2_);    
   616 template <
typename ResultType, std::
size_t Rounds>
   622 template <
typename ResultType, std::
size_t Rounds>
   628 template <
typename ResultType, std::
size_t Rounds>
   634 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   640 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   646 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   772 #endif // VSMC_RNG_AES_HPP void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
std::array< T, 24/sizeof(T)> key_type
 
std::array< T, 32/sizeof(T)> key_type
 
void reset(const key_type &key)
 
typename KeySeqGenerator::key_type key_type
 
Counter based RNG engine. 
 
#define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val)                                                    
 
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
 
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
std::array< T, 16/sizeof(T)> key_type
 
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const