32 #ifndef VSMC_RNG_AES_HPP    33 #define VSMC_RNG_AES_HPP    38 #define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)                            \    40     inline __m128i AESKeyGenAssist<N>(__m128i xmm)                    \    42         return _mm_aeskeygenassist_si128(xmm, val);                           \    47 #ifndef VSMC_RNG_AES_BLOCKS    48 #define VSMC_RNG_AES_BLOCKS 4    57 template <std::
size_t>
   320     template <std::size_t Offset, std::size_t N, 
typename T,
   321         std::size_t KeySize, std::size_t Rp1>
   322     static void eval(
const std::array<T, KeySize> &key,
   323         std::array<__m128i, Rp1> &ks, __m128i &xmm) 
   325         init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
   329     template <std::size_t, std::size_t, 
typename T, std::size_t KeySize,
   331     static void init(
const std::array<T, KeySize> &,
   332         std::array<__m128i, Rp1> &, __m128i &, std::false_type) 
   336     template <std::size_t Offset, std::size_t N, 
typename T,
   337         std::size_t KeySize, std::size_t Rp1>
   338     static void init(
const std::array<T, KeySize> &key,
   339         std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type) 
   342         tmp.
load(key.data());
   343         std::get<N>(ks) = xmm = tmp.
value();
   347 template <
typename T, std::
size_t Rounds, 
typename KeySeqGenerator>
   351     using key_type = 
typename KeySeqGenerator::key_type;
   355         KeySeqGenerator generator;
   356         generator(key, key_seq_);
   359     template <
typename U>
   367     std::array<M128I<>, Rounds + 1> key_seq_;
   370 template <
typename T>
   376     template <std::
size_t Rp1>
   380         std::array<__m128i, Rp1> ks;
   381         AESKeyInit::eval<0, 0>(key, ks, xmm1_);
   382         generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
   383         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   391     template <std::
size_t, std::
size_t Rp1>
   392     void generate_seq(std::array<__m128i, Rp1> &, std::false_type) 
   396     template <std::
size_t N, std::
size_t Rp1>
   397     void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type) 
   399         xmm2_ = AESKeyGenAssist<N>(xmm1_);
   401         std::get<N>(ks) = xmm1_;
   402         generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
   407         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); 
   408         xmm3_ = _mm_slli_si128(xmm1_, 0x04);    
   409         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   410         xmm3_ = _mm_slli_si128(xmm3_, 0x04);    
   411         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   412         xmm3_ = _mm_slli_si128(xmm3_, 0x04);    
   413         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);    
   414         xmm1_ = _mm_xor_si128(xmm1_, xmm2_);    
   418 template <
typename T>
   424     template <std::
size_t Rp1>
   428         std::array<__m128i, Rp1> ks;
   430         std::array<std::uint64_t, 3> key_tmp;
   431         std::memcpy(key_tmp.data(), key.data(), 24);
   432         AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
   433         std::get<0>(key_tmp) = std::get<2>(key_tmp);
   434         std::get<1>(key_tmp) = 0;
   435         AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
   437         xmm3_ = _mm_setzero_si128();
   438         xmm6_ = _mm_setzero_si128();
   439         xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F); 
   441         std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
   442         generate_seq<1, Rp1>(
   443             ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
   445             ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
   447         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   459     template <std::
size_t, std::
size_t>
   460     void generate_seq(
unsigned char *, std::false_type) 
   464     template <std::
size_t N, std::
size_t Rp1>
   465     void generate_seq(
unsigned char *ks_ptr, std::true_type) 
   467         generate_key<N>(ks_ptr);
   469             ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
   470         generate_seq<N + 1, Rp1>(
   471             ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
   474     template <std::
size_t N>
   475     void generate_key(
unsigned char *ks_ptr) 
   480         xmm2_ = AESKeyGenAssist<N>(xmm4_);
   481         generate_key_expansion();
   482         _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
   485     template <std::
size_t>
   486     void complete_key(
unsigned char *, std::false_type) 
   490     template <std::
size_t N>
   491     void complete_key(
unsigned char *ks_ptr, std::true_type) 
   496         complete_key_expansion();
   498             reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
   501     void generate_key_expansion() 
   503         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);  
   504         xmm3_ = _mm_castps_si128(_mm_shuffle_ps( 
   505             _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
   506         xmm1_ = _mm_xor_si128(xmm1_, xmm3_);     
   507         xmm3_ = _mm_castps_si128(_mm_shuffle_ps( 
   508             _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
   509         xmm1_ = _mm_xor_si128(xmm1_, xmm3_); 
   510         xmm1_ = _mm_xor_si128(xmm1_, xmm2_); 
   513     void complete_key_expansion() 
   515         xmm5_ = _mm_load_si128(&xmm4_);          
   516         xmm5_ = _mm_slli_si128(xmm5_, 0x04);     
   517         xmm6_ = _mm_castps_si128(_mm_shuffle_ps( 
   518             _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
   519         xmm6_ = _mm_xor_si128(xmm6_, xmm5_);    
   520         xmm4_ = _mm_xor_si128(xmm4_, xmm6_);    
   521         xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E); 
   524     template <std::
size_t Rp1>
   525     void copy_key(std::array<__m128i, Rp1> &, 
const unsigned char *,
   530     template <std::
size_t Rp1>
   531     void copy_key(std::array<__m128i, Rp1> &ks, 
const unsigned char *ks_ptr,
   534         unsigned char *dst = 
reinterpret_cast<unsigned char *
>(ks.data());
   535         std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
   539 template <
typename T>
   545     template <std::
size_t Rp1>
   549         std::array<__m128i, Rp1> ks;
   550         AESKeyInit::eval<0, 0>(key, ks, xmm1_);
   552         generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
   553         std::memcpy(key_seq.data(), ks.data(), 
sizeof(__m128i) * Rp1);
   562     template <std::
size_t, std::
size_t Rp1>
   563     void generate_seq(std::array<__m128i, Rp1> &, std::false_type) 
   567     template <std::
size_t N, std::
size_t Rp1>
   568     void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type) 
   570         generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
   571         generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
   574     template <std::
size_t N, std::
size_t Rp1>
   575     void generate_key(std::array<__m128i, Rp1> &ks, std::true_type) 
   577         xmm2_ = AESKeyGenAssist<N / 2>(xmm3_);
   578         expand_key(std::true_type());
   579         std::get<N>(ks) = xmm1_;
   582     template <std::
size_t N, std::
size_t Rp1>
   583     void generate_key(std::array<__m128i, Rp1> &ks, std::false_type) 
   585         xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
   586         expand_key(std::false_type());
   587         std::get<N>(ks) = xmm3_;
   590     void expand_key(std::true_type) 
   592         xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF); 
   593         xmm4_ = _mm_slli_si128(xmm1_, 0x04);    
   594         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   595         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   596         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   597         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   598         xmm1_ = _mm_xor_si128(xmm1_, xmm4_);    
   599         xmm1_ = _mm_xor_si128(xmm1_, xmm2_);    
   602     void expand_key(std::false_type) 
   604         xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA); 
   605         xmm4_ = _mm_slli_si128(xmm3_, 0x04);    
   606         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   607         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   608         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   609         xmm4_ = _mm_slli_si128(xmm4_, 0x04);    
   610         xmm3_ = _mm_xor_si128(xmm3_, xmm4_);    
   611         xmm3_ = _mm_xor_si128(xmm3_, xmm2_);    
   619 template <
typename ResultType, std::
size_t Rounds>
   625 template <
typename ResultType, std::
size_t Rounds>
   631 template <
typename ResultType, std::
size_t Rounds>
   637 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   643 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   649 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
   775 #endif // VSMC_RNG_AES_HPP void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
#define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)                                                    
 
std::array< T, 24/sizeof(T)> key_type
 
std::array< T, 32/sizeof(T)> key_type
 
void reset(const key_type &key)
 
typename KeySeqGenerator::key_type key_type
 
Counter based RNG engine. 
 
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
 
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
std::array< T, 16/sizeof(T)> key_type
 
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
 
__m128i AESKeyGenAssist(__m128i)
 
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const