32 #ifndef VSMC_RNG_AES_HPP 33 #define VSMC_RNG_AES_HPP 38 #define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val) \ 40 class AESRoundConstant<N> : public std::integral_constant<int, val> \ 42 }; // class AESRoundConstant 46 #ifndef VSMC_RNG_AES_BLOCKS 47 #define VSMC_RNG_AES_BLOCKS 4 56 template <std::
size_t N>
319 template <std::size_t Offset, std::size_t N,
typename T,
320 std::size_t KeySize, std::size_t Rp1>
321 static void eval(
const std::array<T, KeySize> &key,
322 std::array<__m128i, Rp1> &ks, __m128i &xmm)
324 init<Offset, N>(key, ks, xmm, std::integral_constant < bool, N<Rp1>());
328 template <std::size_t, std::size_t,
typename T, std::size_t KeySize,
330 static void init(
const std::array<T, KeySize> &,
331 std::array<__m128i, Rp1> &, __m128i &, std::false_type)
335 template <std::size_t Offset, std::size_t N,
typename T,
336 std::size_t KeySize, std::size_t Rp1>
337 static void init(
const std::array<T, KeySize> &key,
338 std::array<__m128i, Rp1> &ks, __m128i &xmm, std::true_type)
341 tmp.load(key.data());
342 std::get<N>(ks) = xmm = tmp.value();
346 template <
typename T, std::
size_t Rounds,
typename KeySeqGenerator>
350 using key_type =
typename KeySeqGenerator::key_type;
354 KeySeqGenerator generator;
355 generator(key, key_seq_);
358 template <
typename U>
360 const key_type &, std::array<M128I<U>, Rounds + 1> &rk)
const 366 std::array<M128I<>, Rounds + 1> key_seq_;
369 template <
typename T>
375 template <std::
size_t Rp1>
378 std::array<__m128i, Rp1> ks;
379 AESKeyInit::eval<0, 0>(key, ks, xmm1_);
380 generate_seq<1>(ks, std::integral_constant<bool, 1 < Rp1>());
381 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
389 template <std::
size_t, std::
size_t Rp1>
390 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
394 template <std::
size_t N, std::
size_t Rp1>
395 void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
399 std::get<N>(ks) = xmm1_;
400 generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
405 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
406 xmm3_ = _mm_slli_si128(xmm1_, 0x04);
407 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
408 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
409 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
410 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
411 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
412 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
416 template <
typename T>
422 template <std::
size_t Rp1>
425 std::array<__m128i, Rp1> ks;
427 std::array<std::uint64_t, 3> key_tmp;
428 std::memcpy(key_tmp.data(), key.data(), 24);
429 AESKeyInit::eval<0, 0>(key_tmp, ks, xmm1_);
430 std::get<0>(key_tmp) = std::get<2>(key_tmp);
431 std::get<1>(key_tmp) = 0;
432 AESKeyInit::eval<0, 1>(key_tmp, ks, xmm7_);
434 xmm3_ = _mm_setzero_si128();
435 xmm6_ = _mm_setzero_si128();
436 xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F);
438 std::array<unsigned char, Rp1 * 16 + 16> ks_tmp;
439 generate_seq<1, Rp1>(
440 ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
442 ks, ks_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
444 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
456 template <std::
size_t, std::
size_t>
457 void generate_seq(
unsigned char *, std::false_type)
461 template <std::
size_t N, std::
size_t Rp1>
462 void generate_seq(
unsigned char *ks_ptr, std::true_type)
464 generate_key<N>(ks_ptr);
466 ks_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
467 generate_seq<N + 1, Rp1>(
468 ks_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
471 template <std::
size_t N>
472 void generate_key(
unsigned char *ks_ptr)
478 generate_key_expansion();
479 _mm_storeu_si128(reinterpret_cast<__m128i *>(ks_ptr + N * 24), xmm1_);
482 template <std::
size_t>
483 void complete_key(
unsigned char *, std::false_type)
487 template <std::
size_t N>
488 void complete_key(
unsigned char *ks_ptr, std::true_type)
493 complete_key_expansion();
495 reinterpret_cast<__m128i *>(ks_ptr + N * 24 + 16), xmm7_);
498 void generate_key_expansion()
500 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
501 xmm3_ = _mm_castps_si128(_mm_shuffle_ps(
502 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
503 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
504 xmm3_ = _mm_castps_si128(_mm_shuffle_ps(
505 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
506 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
507 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
510 void complete_key_expansion()
512 xmm5_ = _mm_load_si128(&xmm4_);
513 xmm5_ = _mm_slli_si128(xmm5_, 0x04);
514 xmm6_ = _mm_castps_si128(_mm_shuffle_ps(
515 _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
516 xmm6_ = _mm_xor_si128(xmm6_, xmm5_);
517 xmm4_ = _mm_xor_si128(xmm4_, xmm6_);
518 xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E);
521 template <std::
size_t Rp1>
523 std::array<__m128i, Rp1> &,
const unsigned char *, std::false_type)
527 template <std::
size_t Rp1>
528 void copy_key(std::array<__m128i, Rp1> &ks,
const unsigned char *ks_ptr,
531 unsigned char *dst =
reinterpret_cast<unsigned char *
>(ks.data());
532 std::memcpy(dst + 24, ks_ptr + 24, Rp1 * 16 - 24);
536 template <
typename T>
542 template <std::
size_t Rp1>
545 std::array<__m128i, Rp1> ks;
546 AESKeyInit::eval<0, 0>(key, ks, xmm1_);
548 generate_seq<2>(ks, std::integral_constant<bool, 2 < Rp1>());
549 std::memcpy(key_seq.data(), ks.data(),
sizeof(__m128i) * Rp1);
558 template <std::
size_t, std::
size_t Rp1>
559 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
563 template <std::
size_t N, std::
size_t Rp1>
564 void generate_seq(std::array<__m128i, Rp1> &ks, std::true_type)
566 generate_key<N>(ks, std::integral_constant<bool, N % 2 == 0>());
567 generate_seq<N + 1>(ks, std::integral_constant<bool, N + 1 < Rp1>());
570 template <std::
size_t N, std::
size_t Rp1>
571 void generate_key(std::array<__m128i, Rp1> &ks, std::true_type)
575 expand_key(std::true_type());
576 std::get<N>(ks) = xmm1_;
579 template <std::
size_t N, std::
size_t Rp1>
580 void generate_key(std::array<__m128i, Rp1> &ks, std::false_type)
582 xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
583 expand_key(std::false_type());
584 std::get<N>(ks) = xmm3_;
587 void expand_key(std::true_type)
589 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
590 xmm4_ = _mm_slli_si128(xmm1_, 0x04);
591 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
592 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
593 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
594 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
595 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
596 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
599 void expand_key(std::false_type)
601 xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA);
602 xmm4_ = _mm_slli_si128(xmm3_, 0x04);
603 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
604 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
605 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
606 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
607 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
608 xmm3_ = _mm_xor_si128(xmm3_, xmm2_);
616 template <
typename ResultType, std::
size_t Rounds>
622 template <
typename ResultType, std::
size_t Rounds>
628 template <
typename ResultType, std::
size_t Rounds>
634 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
640 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
646 template <
typename ResultType, std::
size_t Blocks = VSMC_RNG_AES_BLOCKS>
772 #endif // VSMC_RNG_AES_HPP void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
std::array< T, 24/sizeof(T)> key_type
std::array< T, 32/sizeof(T)> key_type
void reset(const key_type &key)
typename KeySeqGenerator::key_type key_type
Counter based RNG engine.
#define VSMC_DEFINE_RNG_AES_ROUND_CONSTANT(N, val)
static void eval(const std::array< T, KeySize > &key, std::array< __m128i, Rp1 > &ks, __m128i &xmm)
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
std::array< T, 16/sizeof(T)> key_type
void operator()(const key_type &key, std::array< M128I<>, Rp1 > &key_seq)
void operator()(const key_type &, std::array< M128I< U >, Rounds+1 > &rk) const