qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/arm: Convert aes and sm4 to gvec helpers

With this conversion, we will be able to use the same helpers
with sve. In particular, pass 3 vector parameters for the
3-operand operations; for advsimd the destination register
is also an input.

This also fixes a bug in which we failed to clear the high bits
of the SVE register after an AdvSIMD operation.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200514212831.31248-2-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

authored by

Richard Henderson and committed by
Peter Maydell
a04b68e1 fc417e5b

+138 -67
+52 -20
target/arm/crypto_helper.c
··· 13 13 14 14 #include "cpu.h" 15 15 #include "exec/helper-proto.h" 16 + #include "tcg/tcg-gvec-desc.h" 16 17 #include "crypto/aes.h" 18 + #include "vec_internal.h" 17 19 18 20 union CRYPTO_STATE { 19 21 uint8_t bytes[16]; ··· 29 31 #define CR_ST_WORD(state, i) (state.words[i]) 30 32 #endif 31 33 32 - void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt) 34 + static void do_crypto_aese(uint64_t *rd, uint64_t *rn, 35 + uint64_t *rm, bool decrypt) 33 36 { 34 37 static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox }; 35 38 static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts }; 36 - uint64_t *rd = vd; 37 - uint64_t *rm = vm; 38 39 union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } }; 39 - union CRYPTO_STATE st = { .l = { rd[0], rd[1] } }; 40 + union CRYPTO_STATE st = { .l = { rn[0], rn[1] } }; 40 41 int i; 41 - 42 - assert(decrypt < 2); 43 42 44 43 /* xor state vector with round key */ 45 44 rk.l[0] ^= st.l[0]; ··· 54 53 rd[1] = st.l[1]; 55 54 } 56 55 57 - void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt) 56 + void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc) 57 + { 58 + intptr_t i, opr_sz = simd_oprsz(desc); 59 + bool decrypt = simd_data(desc); 60 + 61 + for (i = 0; i < opr_sz; i += 16) { 62 + do_crypto_aese(vd + i, vn + i, vm + i, decrypt); 63 + } 64 + clear_tail(vd, opr_sz, simd_maxsz(desc)); 65 + } 66 + 67 + static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt) 58 68 { 59 69 static uint32_t const mc[][256] = { { 60 70 /* MixColumns lookup table */ ··· 190 200 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d, 191 201 } }; 192 202 193 - uint64_t *rd = vd; 194 - uint64_t *rm = vm; 195 203 union CRYPTO_STATE st = { .l = { rm[0], rm[1] } }; 196 204 int i; 197 - 198 - assert(decrypt < 2); 199 205 200 206 for (i = 0; i < 16; i += 4) { 201 207 CR_ST_WORD(st, i >> 2) = ··· 207 213 208 214 rd[0] = st.l[0]; 209 215 rd[1] = st.l[1]; 216 + } 217 + 218 + void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc) 219 + { 220 + intptr_t i, opr_sz = simd_oprsz(desc); 221 + bool decrypt = simd_data(desc); 222 + 223 + for (i = 0; i < opr_sz; i += 16) { 224 + do_crypto_aesmc(vd + i, vm + i, decrypt); 225 + } 226 + clear_tail(vd, opr_sz, simd_maxsz(desc)); 210 227 } 211 228 212 229 /* ··· 638 655 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, 639 656 }; 640 657 641 - void HELPER(crypto_sm4e)(void *vd, void *vn) 658 + static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) 642 659 { 643 - uint64_t *rd = vd; 644 - uint64_t *rn = vn; 645 - union CRYPTO_STATE d = { .l = { rd[0], rd[1] } }; 646 - union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 660 + union CRYPTO_STATE d = { .l = { rn[0], rn[1] } }; 661 + union CRYPTO_STATE n = { .l = { rm[0], rm[1] } }; 647 662 uint32_t t, i; 648 663 649 664 for (i = 0; i < 4; i++) { ··· 665 680 rd[1] = d.l[1]; 666 681 } 667 682 668 - void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm) 683 + void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc) 684 + { 685 + intptr_t i, opr_sz = simd_oprsz(desc); 686 + 687 + for (i = 0; i < opr_sz; i += 16) { 688 + do_crypto_sm4e(vd + i, vn + i, vm + i); 689 + } 690 + clear_tail(vd, opr_sz, simd_maxsz(desc)); 691 + } 692 + 693 + static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) 669 694 { 670 - uint64_t *rd = vd; 671 - uint64_t *rn = vn; 672 - uint64_t *rm = vm; 673 695 union CRYPTO_STATE d; 674 696 union CRYPTO_STATE n = { .l = { rn[0], rn[1] } }; 675 697 union CRYPTO_STATE m = { .l = { rm[0], rm[1] } }; ··· 693 715 rd[0] = d.l[0]; 694 716 rd[1] = d.l[1]; 695 717 } 718 + 719 + void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc) 720 + { 721 + intptr_t i, opr_sz = simd_oprsz(desc); 722 + 723 + for (i = 0; i < opr_sz; i += 16) { 724 + do_crypto_sm4ekey(vd + i, vn + i, vm + i); 725 + } 726 + clear_tail(vd, opr_sz, simd_maxsz(desc)); 727 + }
+3 -3
target/arm/helper.h
··· 510 510 DEF_HELPER_FLAGS_2(neon_qzip16, TCG_CALL_NO_RWG, void, ptr, ptr) 511 511 DEF_HELPER_FLAGS_2(neon_qzip32, TCG_CALL_NO_RWG, void, ptr, ptr) 512 512 513 - DEF_HELPER_FLAGS_3(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, i32) 513 + DEF_HELPER_FLAGS_4(crypto_aese, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) 514 514 DEF_HELPER_FLAGS_3(crypto_aesmc, TCG_CALL_NO_RWG, void, ptr, ptr, i32) 515 515 516 516 DEF_HELPER_FLAGS_4(crypto_sha1_3reg, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) ··· 531 531 DEF_HELPER_FLAGS_3(crypto_sm3partw1, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) 532 532 DEF_HELPER_FLAGS_3(crypto_sm3partw2, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) 533 533 534 - DEF_HELPER_FLAGS_2(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr) 535 - DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr) 534 + DEF_HELPER_FLAGS_4(crypto_sm4e, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) 535 + DEF_HELPER_FLAGS_4(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) 536 536 537 537 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32) 538 538 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+35 -20
target/arm/translate-a64.c
··· 571 571 is_q ? 16 : 8, vec_full_reg_size(s)); 572 572 } 573 573 574 + /* Expand a 2-operand operation using an out-of-line helper. */ 575 + static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 576 + int rn, int data, gen_helper_gvec_2 *fn) 577 + { 578 + tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 579 + vec_full_reg_offset(s, rn), 580 + is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 581 + } 582 + 574 583 /* Expand a 3-operand operation using an out-of-line helper. */ 575 584 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 576 585 int rn, int rm, int data, gen_helper_gvec_3 *fn) ··· 13403 13412 int rn = extract32(insn, 5, 5); 13404 13413 int rd = extract32(insn, 0, 5); 13405 13414 int decrypt; 13406 - TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; 13407 - TCGv_i32 tcg_decrypt; 13408 - CryptoThreeOpIntFn *genfn; 13415 + gen_helper_gvec_2 *genfn2 = NULL; 13416 + gen_helper_gvec_3 *genfn3 = NULL; 13409 13417 13410 13418 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 13411 13419 unallocated_encoding(s); ··· 13415 13423 switch (opcode) { 13416 13424 case 0x4: /* AESE */ 13417 13425 decrypt = 0; 13418 - genfn = gen_helper_crypto_aese; 13426 + genfn3 = gen_helper_crypto_aese; 13419 13427 break; 13420 13428 case 0x6: /* AESMC */ 13421 13429 decrypt = 0; 13422 - genfn = gen_helper_crypto_aesmc; 13430 + genfn2 = gen_helper_crypto_aesmc; 13423 13431 break; 13424 13432 case 0x5: /* AESD */ 13425 13433 decrypt = 1; 13426 - genfn = gen_helper_crypto_aese; 13434 + genfn3 = gen_helper_crypto_aese; 13427 13435 break; 13428 13436 case 0x7: /* AESIMC */ 13429 13437 decrypt = 1; 13430 - genfn = gen_helper_crypto_aesmc; 13438 + genfn2 = gen_helper_crypto_aesmc; 13431 13439 break; 13432 13440 default: 13433 13441 unallocated_encoding(s); ··· 13437 13445 if (!fp_access_check(s)) { 13438 13446 return; 13439 13447 } 13440 - 13441 - tcg_rd_ptr = vec_full_reg_ptr(s, rd); 13442 - tcg_rn_ptr = vec_full_reg_ptr(s, rn); 13443 - tcg_decrypt = tcg_const_i32(decrypt); 13444 - 13445 - genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt); 13446 - 13447 - tcg_temp_free_ptr(tcg_rd_ptr); 13448 - tcg_temp_free_ptr(tcg_rn_ptr); 13449 - tcg_temp_free_i32(tcg_decrypt); 13448 + if (genfn2) { 13449 + gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); 13450 + } else { 13451 + gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); 13452 + } 13450 13453 } 13451 13454 13452 13455 /* Crypto three-reg SHA ··· 13595 13598 int rn = extract32(insn, 5, 5); 13596 13599 int rd = extract32(insn, 0, 5); 13597 13600 bool feature; 13598 - CryptoThreeOpFn *genfn; 13601 + CryptoThreeOpFn *genfn = NULL; 13602 + gen_helper_gvec_3 *oolfn = NULL; 13599 13603 13600 13604 if (o == 0) { 13601 13605 switch (opcode) { ··· 13630 13634 break; 13631 13635 case 2: /* SM4EKEY */ 13632 13636 feature = dc_isar_feature(aa64_sm4, s); 13633 - genfn = gen_helper_crypto_sm4ekey; 13637 + oolfn = gen_helper_crypto_sm4ekey; 13634 13638 break; 13635 13639 default: 13636 13640 unallocated_encoding(s); ··· 13647 13651 return; 13648 13652 } 13649 13653 13654 + if (oolfn) { 13655 + gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 13656 + return; 13657 + } 13658 + 13650 13659 if (genfn) { 13651 13660 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr; 13652 13661 ··· 13699 13708 TCGv_ptr tcg_rd_ptr, tcg_rn_ptr; 13700 13709 bool feature; 13701 13710 CryptoTwoOpFn *genfn; 13711 + gen_helper_gvec_3 *oolfn = NULL; 13702 13712 13703 13713 switch (opcode) { 13704 13714 case 0: /* SHA512SU0 */ ··· 13707 13717 break; 13708 13718 case 1: /* SM4E */ 13709 13719 feature = dc_isar_feature(aa64_sm4, s); 13710 - genfn = gen_helper_crypto_sm4e; 13720 + oolfn = gen_helper_crypto_sm4e; 13711 13721 break; 13712 13722 default: 13713 13723 unallocated_encoding(s); ··· 13720 13730 } 13721 13731 13722 13732 if (!fp_access_check(s)) { 13733 + return; 13734 + } 13735 + 13736 + if (oolfn) { 13737 + gen_gvec_op3_ool(s, true, rd, rd, rn, 0, oolfn); 13723 13738 return; 13724 13739 } 13725 13740
+14 -13
target/arm/translate.c
··· 6350 6350 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) { 6351 6351 return 1; 6352 6352 } 6353 - ptr1 = vfp_reg_ptr(true, rd); 6354 - ptr2 = vfp_reg_ptr(true, rm); 6355 - 6356 - /* Bit 6 is the lowest opcode bit; it distinguishes between 6357 - * encryption (AESE/AESMC) and decryption (AESD/AESIMC) 6358 - */ 6359 - tmp3 = tcg_const_i32(extract32(insn, 6, 1)); 6360 - 6353 + /* 6354 + * Bit 6 is the lowest opcode bit; it distinguishes 6355 + * between encryption (AESE/AESMC) and decryption 6356 + * (AESD/AESIMC). 6357 + */ 6361 6358 if (op == NEON_2RM_AESE) { 6362 - gen_helper_crypto_aese(ptr1, ptr2, tmp3); 6359 + tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd), 6360 + vfp_reg_offset(true, rd), 6361 + vfp_reg_offset(true, rm), 6362 + 16, 16, extract32(insn, 6, 1), 6363 + gen_helper_crypto_aese); 6363 6364 } else { 6364 - gen_helper_crypto_aesmc(ptr1, ptr2, tmp3); 6365 + tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd), 6366 + vfp_reg_offset(true, rm), 6367 + 16, 16, extract32(insn, 6, 1), 6368 + gen_helper_crypto_aesmc); 6365 6369 } 6366 - tcg_temp_free_ptr(ptr1); 6367 - tcg_temp_free_ptr(ptr2); 6368 - tcg_temp_free_i32(tmp3); 6369 6370 break; 6370 6371 case NEON_2RM_SHA1H: 6371 6372 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
+1 -11
target/arm/vec_helper.c
··· 22 22 #include "exec/helper-proto.h" 23 23 #include "tcg/tcg-gvec-desc.h" 24 24 #include "fpu/softfloat.h" 25 - 25 + #include "vec_internal.h" 26 26 27 27 /* Note that vector data is stored in host-endian 64-bit chunks, 28 28 so addressing units smaller than that needs a host-endian fixup. */ ··· 35 35 #define H2(x) (x) 36 36 #define H4(x) (x) 37 37 #endif 38 - 39 - static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 40 - { 41 - uint64_t *d = vd + opr_sz; 42 - uintptr_t i; 43 - 44 - for (i = opr_sz; i < max_sz; i += 8) { 45 - *d++ = 0; 46 - } 47 - } 48 38 49 39 /* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */ 50 40 static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
+33
target/arm/vec_internal.h
··· 1 + /* 2 + * ARM AdvSIMD / SVE Vector Helpers 3 + * 4 + * Copyright (c) 2020 Linaro 5 + * 6 + * This library is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU Lesser General Public 8 + * License as published by the Free Software Foundation; either 9 + * version 2 of the License, or (at your option) any later version. 10 + * 11 + * This library is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 + * Lesser General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU Lesser General Public 17 + * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 + */ 19 + 20 + #ifndef TARGET_ARM_VEC_INTERNALS_H 21 + #define TARGET_ARM_VEC_INTERNALS_H 22 + 23 + static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) 24 + { 25 + uint64_t *d = vd + opr_sz; 26 + uintptr_t i; 27 + 28 + for (i = opr_sz; i < max_sz; i += 8) { 29 + *d++ = 0; 30 + } 31 + } 32 + 33 + #endif /* TARGET_ARM_VEC_INTERNALS_H */