qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/arm: Convert Neon VSWP to decodetree

Convert the Neon VSWP insn to decodetree. Since the new implementation
doesn't have to share a pass-loop with the other 2-reg-misc operations
we can implement the swap with 64-bit accesses rather than 32-bits
(which brings us into line with the pseudocode and is more efficient).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200616170844.13318-20-peter.maydell@linaro.org

+44 -4
+2
target/arm/neon-dp.decode
··· 488 488 VABS_F 1111 001 11 . 11 .. 01 .... 0 1110 . . 0 .... @2misc 489 489 VNEG_F 1111 001 11 . 11 .. 01 .... 0 1111 . . 0 .... @2misc 490 490 491 + VSWP 1111 001 11 . 11 .. 10 .... 0 0000 . . 0 .... @2misc 492 + 491 493 VUZP 1111 001 11 . 11 .. 10 .... 0 0010 . . 0 .... @2misc 492 494 VZIP 1111 001 11 . 11 .. 10 .... 0 0011 . . 0 .... @2misc 493 495
+41
target/arm/translate-neon.inc.c
··· 3927 3927 DO_VCVT(VCVTPS, FPROUNDING_POSINF, true) 3928 3928 DO_VCVT(VCVTMU, FPROUNDING_NEGINF, false) 3929 3929 DO_VCVT(VCVTMS, FPROUNDING_NEGINF, true) 3930 + 3931 + static bool trans_VSWP(DisasContext *s, arg_2misc *a) 3932 + { 3933 + TCGv_i64 rm, rd; 3934 + int pass; 3935 + 3936 + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 3937 + return false; 3938 + } 3939 + 3940 + /* UNDEF accesses to D16-D31 if they don't exist. */ 3941 + if (!dc_isar_feature(aa32_simd_r32, s) && 3942 + ((a->vd | a->vm) & 0x10)) { 3943 + return false; 3944 + } 3945 + 3946 + if (a->size != 0) { 3947 + return false; 3948 + } 3949 + 3950 + if ((a->vd | a->vm) & a->q) { 3951 + return false; 3952 + } 3953 + 3954 + if (!vfp_access_check(s)) { 3955 + return true; 3956 + } 3957 + 3958 + rm = tcg_temp_new_i64(); 3959 + rd = tcg_temp_new_i64(); 3960 + for (pass = 0; pass < (a->q ? 2 : 1); pass++) { 3961 + neon_load_reg64(rm, a->vm + pass); 3962 + neon_load_reg64(rd, a->vd + pass); 3963 + neon_store_reg64(rm, a->vd + pass); 3964 + neon_store_reg64(rd, a->vm + pass); 3965 + } 3966 + tcg_temp_free_i64(rm); 3967 + tcg_temp_free_i64(rd); 3968 + 3969 + return true; 3970 + }
+1 -4
target/arm/translate.c
··· 4944 4944 case NEON_2RM_VCVTPS: 4945 4945 case NEON_2RM_VCVTMU: 4946 4946 case NEON_2RM_VCVTMS: 4947 + case NEON_2RM_VSWP: 4947 4948 /* handled by decodetree */ 4948 4949 return 1; 4949 4950 case NEON_2RM_VTRN: ··· 4965 4966 for (pass = 0; pass < (q ? 4 : 2); pass++) { 4966 4967 tmp = neon_load_reg(rm, pass); 4967 4968 switch (op) { 4968 - case NEON_2RM_VSWP: 4969 - tmp2 = neon_load_reg(rd, pass); 4970 - neon_store_reg(rm, pass, tmp2); 4971 - break; 4972 4969 case NEON_2RM_VTRN: 4973 4970 tmp2 = neon_load_reg(rd, pass); 4974 4971 switch (size) {