32 #ifndef VSMC_RNG_AES_HPP 33 #define VSMC_RNG_AES_HPP 38 #define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val) \ 40 inline __m128i AESKeyGenAssist<N>(__m128i xmm) \ 42 return _mm_aeskeygenassist_si128(xmm, val); \ 47 #ifndef VSMC_RNG_AES_BLOCKS 48 #define VSMC_RNG_AES_BLOCKS 4 57 template <std::
size_t>
320 template <std::size_t Offset, std::size_t N,
typename T,
321 std::size_t KeySize, std::size_t Rp1>
322 static void eval(
const std::array<T, KeySize> &key,
323 std::array<__m128i, Rp1> &ks, __m128i &xmm)
325 init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
329 template <std::size_t, std::size_t,
typename T, std::size_t KeySize,
331 static void init(
const std::array<T, KeySize> &,
332 std::array<__m128i, Rp1> &, __m128i &, std::false_type)
336 template <std::size_t Offset, std::size_t N,
typename T,
337 std::size_t KeySize, std::size_t Rp1>
338 static void init(
const std::array<T, KeySize> &key,
339 std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type)
342 tmp.
load(key.data());
343 std::get<N>(ks) = xmm = tmp.
value();
347 template <
typename T, std::
size_t Rounds,
typename KeySeqGenerator>
351 using key_type =
typename KeySeqGenerator::key_type;
355 KeySeqGenerator generator;
356 generator(key, key_seq_);
359 template <
typename U>
367 std::array<M128I<>, Rounds + 1> key_seq_;
370 template <
typename T>
376 template <std::
size_t Rp1>
380 std::array<__m128i, Rp1> ks;
381 AESKeyInit::eval<0, 0>(key, ks, xmm1_);
382 generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
383 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
391 template <std::
size_t, std::
size_t Rp1>
392 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
396 template <std::
size_t N, std::
size_t Rp1>
397 void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
399 xmm2_ = AESKeyGenAssist<N>(xmm1_);
401 std::get<N>(ks) = xmm1_;
402 generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
407 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
408 xmm3_ = _mm_slli_si128(xmm1_, 0x04);
409 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
410 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
411 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
412 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
413 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
414 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
418 template <
typename T>
424 template <std::
size_t Rp1>
428 std::array<__m128i, Rp1> ks;
430 std::array<std::uint64_t, 3> key_tmp;
431 std::memcpy(key_tmp.data(), key.data(), 24);
432 AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
433 std::get<0>(key_tmp) = std::get<2>(key_tmp);
434 std::get<1>(key_tmp) = 0;
435 AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
437 xmm3_ = _mm_setzero_si128();
438 xmm6_ = _mm_setzero_si128();
439 xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F);
441 std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
442 generate_seq<1, Rp1>(
443 ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
445 ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
447 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
459 template <std::
size_t, std::
size_t>
460 void generate_seq(
unsigned char *, std::false_type)
464 template <std::
size_t N, std::
size_t Rp1>
465 void generate_seq(
unsigned char *ks_ptr, std::true_type)
467 generate_key<N>(ks_ptr);
469 ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
470 generate_seq<N + 1, Rp1>(
471 ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
474 template <std::
size_t N>
475 void generate_key(
unsigned char *ks_ptr)
480 xmm2_ = AESKeyGenAssist<N>(xmm4_);
481 generate_key_expansion();
482 _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
485 template <std::
size_t>
486 void complete_key(
unsigned char *, std::false_type)
490 template <std::
size_t N>
491 void complete_key(
unsigned char *ks_ptr, std::true_type)
496 complete_key_expansion();
498 reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
501 void generate_key_expansion()
503 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
504 xmm3_ = _mm_castps_si128(_mm_shuffle_ps(
505 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
506 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
507 xmm3_ = _mm_castps_si128(_mm_shuffle_ps(
508 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
509 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
510 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
513 void complete_key_expansion()
515 xmm5_ = _mm_load_si128(&xmm4_);
516 xmm5_ = _mm_slli_si128(xmm5_, 0x04);
517 xmm6_ = _mm_castps_si128(_mm_shuffle_ps(
518 _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
519 xmm6_ = _mm_xor_si128(xmm6_, xmm5_);
520 xmm4_ = _mm_xor_si128(xmm4_, xmm6_);
521 xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E);
524 template <std::
size_t Rp1>
525 void copy_key(std::array<__m128i, Rp1> &,
const unsigned char *,
530 template <std::
size_t Rp1>
531 void copy_key(std::array<__m128i, Rp1> &ks,
const unsigned char *ks_ptr,
534 unsigned char *dst =
reinterpret_cast<unsigned char *
>(ks.data());
535 std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
539 template <
typename T>
545 template <std::
size_t Rp1>
549 std::array<__m128i, Rp1> ks;
550 AESKeyInit::eval<0, 0>(key, ks, xmm1_);
552 generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
553 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
562 template <std::
size_t, std::
size_t Rp1>
563 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
567 template <std::
size_t N, std::
size_t Rp1>
568 void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
570 generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
571 generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
574 template <std::
size_t N, std::
size_t Rp1>
575 void generate_key(std::array<__m128i, Rp1> &ks, std::true_type)
577 xmm2_ = AESKeyGenAssist<N / 2>(xmm3_);
578 expand_key(std::true_type());
579 std::get<N>(ks) = xmm1_;
582 template <std::
size_t N, std::
size_t Rp1>
583 void generate_key(std::array<__m128i, Rp1> &ks, std::false_type)
585 xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
586 expand_key(std::false_type());
587 std::get<N>(ks) = xmm3_;
590 void expand_key(std::true_type)
592 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
593 xmm4_ = _mm_slli_si128(xmm1_, 0x04);
594 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
595 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
596 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
597 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
598 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
599 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
602 void expand_key(std::false_type)
604 xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA);
605 xmm4_ = _mm_slli_si128(xmm3_, 0x04);
606 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
607 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
608 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
609 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
610 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
611 xmm3_ = _mm_xor_si128(xmm3_, xmm2_);
619 template <
typename ResultType, std::
size_t Rounds>
625 template <
typename ResultType, std::
size_t Rounds>
631 template <
typename ResultType, std::
size_t Rounds>
637 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
643 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
649 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
775 #endif // VSMC_RNG_AES_HPP void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
#define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)
std::array< T, 24/sizeof(T)> key_type
std::array< T, 32/sizeof(T)> key_type
void reset(const key_type &key)
typename KeySeqGenerator::key_type key_type
Counter based RNG engine.
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
std::array< T, 16/sizeof(T)> key_type
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
__m128i AESKeyGenAssist(__m128i)
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const