32 #ifndef VSMC_RNG_AES_NI_HPP 33 #define VSMC_RNG_AES_NI_HPP 37 #include <wmmintrin.h> 41 #pragma GCC diagnostic push 42 #pragma GCC diagnostic ignored "-Wignored-attributes" 46 #define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val) \ 48 inline __m128i AESKeyGenAssist<N>(const __m128i &xmm) \ 50 return _mm_aeskeygenassist_si128(xmm, val); \ 54 #ifndef VSMC_RNG_AES128_ROUNDS 55 #define VSMC_RNG_AES128_ROUNDS 10 59 #ifndef VSMC_RNG_AES192_ROUNDS 60 #define VSMC_RNG_AES192_ROUNDS 12 64 #ifndef VSMC_RNG_AES256_ROUNDS 65 #define VSMC_RNG_AES256_ROUNDS 14 70 #ifndef VSMC_RNG_ARS_ROUNDS 71 #define VSMC_RNG_ARS_ROUNDS 5 76 #ifndef VSMC_RNG_AES_NI_BLOCKS 77 #define VSMC_RNG_AES_NI_BLOCKS 8 85 template <
typename KeySeqType, std::
size_t Rounds, std::
size_t Blocks>
89 Rounds != 0,
"**AESNIGenerator** USED WITH ROUNDS EQUAL TO ZERO");
92 Blocks != 0,
"**AESNIGenerator** USED WITH Blocks EQUAL TO ZERO");
96 using key_type =
typename KeySeqType::key_type;
98 static constexpr std::size_t
size() {
return Blocks *
sizeof(__m128i); }
105 std::array<__m128i, 1> state;
109 std::array<__m128i, Rounds + 1> rk_tmp;
110 const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
117 template <
typename ResultType>
119 std::array<ResultType,
size() /
sizeof(ResultType)> &buffer)
const 122 std::array<__m128i, Blocks> state;
123 std::array<ctr_type, Blocks> ctr_block;
124 std::array<ResultType,
size() /
sizeof(ResultType)> result;
127 std::array<__m128i, Rounds + 1> rk_tmp;
128 const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
135 template <
typename ResultType>
137 std::array<ResultType,
size() /
sizeof(ResultType)> *buffer)
const 140 std::array<__m128i, Blocks> state;
141 std::array<ctr_type, Blocks> ctr_block;
142 std::array<ResultType,
size() /
sizeof(ResultType)> result;
145 std::array<__m128i, Rounds + 1> rk_tmp;
146 const std::array<__m128i, Rounds + 1> &rk = key_seq_(rk_tmp);
148 for (std::size_t i = 0; i != n; ++i) {
151 buffer[i] = buf.result;
159 return gen1.key_seq_ == gen2.key_seq_;
166 return !(gen1 == gen2);
169 template <
typename CharT,
typename Traits>
171 std::basic_ostream<CharT, Traits> &os,
182 template <
typename CharT,
typename Traits>
184 std::basic_istream<CharT, Traits> &is,
191 is >> std::ws >> gen_tmp.key_seq_;
194 gen = std::move(gen_tmp);
202 template <std::
size_t K, std::
size_t Rp1>
203 void enc(std::array<__m128i, K> &state,
204 const std::array<__m128i, Rp1> &rk)
const 206 enc_first(state, rk);
207 enc_round<1>(state, rk, std::integral_constant<bool, 2 < Rp1>());
211 template <std::
size_t K, std::
size_t Rp1>
212 void enc_first(std::array<__m128i, K> &state,
213 const std::array<__m128i, Rp1> &rk)
const 215 enc_first<0>(state, rk, std::true_type());
218 template <std::
size_t, std::
size_t K, std::
size_t Rp1>
219 void enc_first(std::array<__m128i, K> &,
const std::array<__m128i, Rp1> &,
220 std::false_type)
const 224 template <std::
size_t B, std::
size_t K, std::
size_t Rp1>
225 void enc_first(std::array<__m128i, K> &state,
226 const std::array<__m128i, Rp1> &rk, std::true_type)
const 229 _mm_xor_si128(std::get<B>(state), std::get<0>(rk));
230 enc_first<B + 1>(state, rk, std::integral_constant<bool, B + 1 < K>());
233 template <std::
size_t, std::
size_t K, std::
size_t Rp1>
234 void enc_round(std::array<__m128i, K> &,
const std::array<__m128i, Rp1> &,
235 std::false_type)
const 239 template <std::
size_t N, std::
size_t K, std::
size_t Rp1>
240 void enc_round(std::array<__m128i, K> &state,
241 const std::array<__m128i, Rp1> &rk, std::true_type)
const 243 enc_round_block<0, N>(state, rk, std::true_type());
245 state, rk, std::integral_constant<bool, N + 2 < Rp1>());
248 template <std::
size_t, std::
size_t, std::
size_t K, std::
size_t Rp1>
249 void enc_round_block(std::array<__m128i, K> &,
250 const std::array<__m128i, Rp1> &, std::false_type)
const 254 template <std::
size_t B, std::
size_t N, std::
size_t K, std::
size_t Rp1>
255 void enc_round_block(std::array<__m128i, K> &state,
256 const std::array<__m128i, Rp1> &rk, std::true_type)
const 260 _mm_aesenc_si128(std::get<B>(state), std::get<N>(rk));
261 enc_round_block<B + 1, N>(
262 state, rk, std::integral_constant<bool, B + 1 < K>());
265 template <std::
size_t K, std::
size_t Rp1>
266 void enc_last(std::array<__m128i, K> &state,
267 const std::array<__m128i, Rp1> &rk)
const 269 enc_last<0>(state, rk, std::true_type());
272 template <std::
size_t, std::
size_t K, std::
size_t Rp1>
273 void enc_last(std::array<__m128i, K> &,
const std::array<__m128i, Rp1> &,
274 std::false_type)
const 278 template <std::
size_t B, std::
size_t K, std::
size_t Rp1>
279 void enc_last(std::array<__m128i, K> &state,
280 const std::array<__m128i, Rp1> &rk, std::true_type)
const 283 _mm_aesenclast_si128(std::get<B>(state), std::get<Rp1 - 1>(rk));
284 enc_last<B + 1>(state, rk, std::integral_constant<bool, B + 1 < K>());
290 template <
typename ResultType,
typename KeySeqType, std::size_t Rounds,
298 template <std::
size_t>
558 template <
std::
size_t>
569 UINT64_C(0xBB67AE8584CAA73B)>
581 template <std::
size_t I>
588 template <std::
size_t Rounds,
typename KeySeqGenerator>
592 using key_type =
typename KeySeqGenerator::key_type;
596 KeySeqGenerator generator;
597 generator(key, key_seq_);
601 std::array<__m128i, Rounds + 1> &)
const 611 std::memcpy(ks1.data(), seq1.key_seq_.data(), 16 * (Rounds + 1));
612 std::memcpy(ks2.data(), seq2.key_seq_.data(), 16 * (Rounds + 1));
620 return !(seq1 == seq2);
623 template <
typename CharT,
typename Traits>
625 std::basic_ostream<CharT, Traits> &os,
632 std::memcpy(ks.data(), seq.key_seq_.data(), 16 * (Rounds + 1));
638 template <
typename CharT,
typename Traits>
640 std::basic_istream<CharT, Traits> &is,
649 std::memcpy(seq.key_seq_.data(), ks.data(), 16 * (Rounds + 1));
655 std::array<__m128i, Rounds + 1> key_seq_;
663 template <std::
size_t Rp1>
666 xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
667 std::get<0>(rk) = xmm1_;
668 generate_seq<1>(rk, std::integral_constant<bool, 1 < Rp1>());
676 template <std::
size_t, std::
size_t Rp1>
677 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
681 template <std::
size_t N, std::
size_t Rp1>
682 void generate_seq(std::array<__m128i, Rp1> &rk, std::true_type)
684 xmm2_ = AESKeyGenAssist<N % 256>(xmm1_);
686 std::get<N>(rk) = xmm1_;
687 generate_seq<N + 1>(rk, std::integral_constant<bool, N + 1 < Rp1>());
692 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
693 xmm3_ = _mm_slli_si128(xmm1_, 0x04);
694 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
695 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
696 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
697 xmm3_ = _mm_slli_si128(xmm3_, 0x04);
698 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
699 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
708 template <std::
size_t Rp1>
711 std::array<std::uint64_t, 2> tmp = {{0, std::get<2>(key)}};
712 xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
713 xmm7_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(tmp.data()));
714 std::get<0>(rk) = xmm1_;
715 std::get<1>(rk) = xmm7_;
717 xmm3_ = _mm_setzero_si128();
718 xmm6_ = _mm_setzero_si128();
719 xmm4_ = _mm_shuffle_epi32(xmm7_, 0x4F);
721 std::array<unsigned char, Rp1 * 16 + 16> rk_tmp;
722 generate_seq<1, Rp1>(
723 rk_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
725 rk, rk_tmp.data(), std::integral_constant<bool, 24 < Rp1 * 16>());
737 template <std::
size_t, std::
size_t>
738 void generate_seq(
unsigned char *, std::false_type)
742 template <std::
size_t N, std::
size_t Rp1>
743 void generate_seq(
unsigned char *rk_ptr, std::true_type)
745 generate_key<N>(rk_ptr);
747 rk_ptr, std::integral_constant<bool, N * 24 + 16 < Rp1 * 16>());
748 generate_seq<N + 1, Rp1>(
749 rk_ptr, std::integral_constant<bool, N * 24 + 24 < Rp1 * 16>());
752 template <std::
size_t N>
753 void generate_key(
unsigned char *rk_ptr)
758 xmm2_ = AESKeyGenAssist<N % 256>(xmm4_);
759 generate_key_expansion();
760 _mm_storeu_si128(reinterpret_cast<__m128i *>(rk_ptr + N * 24), xmm1_);
763 template <std::
size_t>
764 void complete_key(
unsigned char *, std::false_type)
768 template <std::
size_t N>
769 void complete_key(
unsigned char *rk_ptr, std::true_type)
774 complete_key_expansion();
776 reinterpret_cast<__m128i *>(rk_ptr + N * 24 + 16), xmm7_);
779 void generate_key_expansion()
781 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
782 xmm3_ = _mm_castps_si128(
784 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x10));
785 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
786 xmm3_ = _mm_castps_si128(_mm_shuffle_ps(
787 _mm_castsi128_ps(xmm3_), _mm_castsi128_ps(xmm1_), 0x8C));
788 xmm1_ = _mm_xor_si128(xmm1_, xmm3_);
789 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
792 void complete_key_expansion()
795 xmm5_ = _mm_slli_si128(xmm5_, 0x04);
796 xmm6_ = _mm_castps_si128(_mm_shuffle_ps(
797 _mm_castsi128_ps(xmm6_), _mm_castsi128_ps(xmm1_), 0xF0));
798 xmm6_ = _mm_xor_si128(xmm6_, xmm5_);
799 xmm4_ = _mm_xor_si128(xmm4_, xmm6_);
800 xmm7_ = _mm_shuffle_epi32(xmm4_, 0x0E);
803 template <std::
size_t Rp1>
805 std::array<__m128i, Rp1> &,
const unsigned char *, std::false_type)
809 template <std::
size_t Rp1>
810 void copy_key(std::array<__m128i, Rp1> &rk,
const unsigned char *rk_ptr,
813 unsigned char *dst =
reinterpret_cast<unsigned char *
>(rk.data());
814 std::memcpy(dst + 24, rk_ptr + 24, Rp1 * 16 - 24);
823 template <std::
size_t Rp1>
826 xmm1_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
828 _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data() + 2));
829 std::get<0>(rk) = xmm1_;
830 std::get<1>(rk) = xmm3_;
831 generate_seq<2>(rk, std::integral_constant<bool, 2 < Rp1>());
840 template <std::
size_t, std::
size_t Rp1>
841 void generate_seq(std::array<__m128i, Rp1> &, std::false_type)
845 template <std::
size_t N, std::
size_t Rp1>
846 void generate_seq(std::array<__m128i, Rp1> &rk, std::true_type)
848 generate_key<N>(rk, std::integral_constant<bool, N % 2 == 0>());
849 generate_seq<N + 1>(rk, std::integral_constant<bool, N + 1 < Rp1>());
852 template <std::
size_t N, std::
size_t Rp1>
853 void generate_key(std::array<__m128i, Rp1> &rk, std::true_type)
856 expand_key(std::true_type());
857 std::get<N>(rk) = xmm1_;
860 template <std::
size_t N, std::
size_t Rp1>
861 void generate_key(std::array<__m128i, Rp1> &rk, std::false_type)
863 xmm4_ = _mm_aeskeygenassist_si128(xmm1_, 0);
864 expand_key(std::false_type());
865 std::get<N>(rk) = xmm3_;
868 void expand_key(std::true_type)
870 xmm2_ = _mm_shuffle_epi32(xmm2_, 0xFF);
871 xmm4_ = _mm_slli_si128(xmm1_, 0x04);
872 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
873 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
874 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
875 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
876 xmm1_ = _mm_xor_si128(xmm1_, xmm4_);
877 xmm1_ = _mm_xor_si128(xmm1_, xmm2_);
880 void expand_key(std::false_type)
882 xmm2_ = _mm_shuffle_epi32(xmm4_, 0xAA);
883 xmm4_ = _mm_slli_si128(xmm3_, 0x04);
884 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
885 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
886 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
887 xmm4_ = _mm_slli_si128(xmm4_, 0x04);
888 xmm3_ = _mm_xor_si128(xmm3_, xmm4_);
889 xmm3_ = _mm_xor_si128(xmm3_, xmm2_);
893 template <
typename Constants>
896 template <std::
size_t I>
897 using weyl =
typename Constants::template weyl<I>;
906 key_ = _mm_loadu_si128(reinterpret_cast<const __m128i *>(key.data()));
909 template <std::
size_t Rp1>
911 std::array<__m128i, Rp1> &rk)
const 913 std::array<std::uint64_t, 2> tmp = {{weyl<0>::value, weyl<1>::value}};
915 _mm_loadu_si128(reinterpret_cast<const __m128i *>(tmp.data()));
916 std::get<0>(rk) = key_;
917 generate<1>(rk, w, std::integral_constant<bool, 1 < Rp1>());
927 _mm_store_si128(reinterpret_cast<__m128i *>(k1.data()), seq1.key_);
928 _mm_store_si128(reinterpret_cast<__m128i *>(k2.data()), seq2.key_);
936 return !(seq1 == seq2);
939 template <
typename CharT,
typename Traits>
941 std::basic_ostream<CharT, Traits> &os,
const ARSKeySeqImpl &seq)
947 _mm_store_si128(reinterpret_cast<__m128i *>(k.data()), seq.key_);
953 template <
typename CharT,
typename Traits>
964 _mm_load_si128(reinterpret_cast<const __m128i *>(k.data()));
971 template <std::
size_t, std::
size_t Rp1>
973 std::array<__m128i, Rp1> &,
const __m128i &, std::false_type)
const 977 template <std::
size_t N, std::
size_t Rp1>
979 std::array<__m128i, Rp1> &rk,
const __m128i &w, std::true_type)
const 981 std::get<N>(rk) = _mm_add_epi64(std::get<N - 1>(rk), w);
982 generate<N + 1>(rk, w, std::integral_constant<bool, N + 1 < Rp1>());
993 template <std::
size_t Rounds>
999 template <std::
size_t Rounds>
1005 template <std::
size_t Rounds>
1019 template <
typename Constants = ARSConstants>
1224 #pragma GCC diagnostic pop 1228 #endif // VSMC_RNG_AES_NI_HPP
void enc(const ctr_type &ctr, ctr_type &buffer) const
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
#define VSMC_RNG_AES128_ROUNDS
AES-128 default rounds.
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, AESNIGenerator< KeySeqType, Rounds, Blocks > &gen)
const std::array< __m128i, Rounds+1 > & operator()(std::array< __m128i, Rounds+1 > &) const
std::array< std::uint64_t, 2 > ctr_type
#define VSMC_DEFINE_RNG_AES_KEY_GEN_ASSIST(N, val)
void increment(std::array< T, K > &ctr)
Increment a counter by one.
__m128i AESKeyGenAssist(const __m128i &)
friend bool operator==(const AESKeySeq< Rounds, KeySeqGenerator > &seq1, const AESKeySeq< Rounds, KeySeqGenerator > &seq2)
Counter based RNG engine.
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
#define VSMC_RNG_AES192_ROUNDS
AES-192 default rounds.
void operator()(ctr_type &ctr, std::size_t n, std::array< ResultType, size()/sizeof(ResultType)> *buffer) const
const std::array< __m128i, Rp1 > & operator()(std::array< __m128i, Rp1 > &rk) const
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen)
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const ARSKeySeqImpl &seq)
#define VSMC_RNG_AES256_ROUNDS
AES-256 default rounds.
friend bool operator!=(const AESKeySeq< Rounds, KeySeqGenerator > &seq1, const AESKeySeq< Rounds, KeySeqGenerator > &seq2)
std::array< std::uint64_t, 4 > key_type
void reset(const key_type &key)
#define VSMC_RNG_ARS_ROUNDS
ARSEngine default rounds.
friend std::basic_ostream< CharT, Traits > & operator<<(std::basic_ostream< CharT, Traits > &os, const AESKeySeq< Rounds, KeySeqGenerator > &seq)
friend bool operator==(const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen1, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen2)
void reset(const key_type &key)
std::array< std::uint64_t, 3 > key_type
friend bool operator!=(const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen1, const AESNIGenerator< KeySeqType, Rounds, Blocks > &gen2)
void reset(const key_type &key)
void operator()(ctr_type &ctr, std::array< ResultType, size()/sizeof(ResultType)> &buffer) const
friend bool operator!=(const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
typename KeySeqType::key_type key_type
friend bool operator==(const ARSKeySeqImpl &seq1, const ARSKeySeqImpl &seq2)
void operator()(const key_type &key, std::array< __m128i, Rp1 > &rk)
typename KeySeqGenerator::key_type key_type
static constexpr std::size_t size()
#define VSMC_RNG_AES_NI_BLOCKS
AESEngine default blocks.
std::array< std::uint64_t, 2 > key_type
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, AESKeySeq< Rounds, KeySeqGenerator > &seq)
friend std::basic_istream< CharT, Traits > & operator>>(std::basic_istream< CharT, Traits > &is, ARSKeySeqImpl &seq)
std::array< std::uint64_t, 2 > key_type
RNG generator using AES-NI instructions.