qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20190311' into staging

s390x update:
- clean up LowCore definition
- first part of vector instruction support for tcg

# gpg: Signature made Mon 11 Mar 2019 08:59:02 GMT
# gpg: using RSA key C3D0D66DC3624FF6A8C018CEDECF6B93C6F02FAF
# gpg: issuer "cohuck@redhat.com"
# gpg: Good signature from "Cornelia Huck <conny@cornelia-huck.de>" [unknown]
# gpg: aka "Cornelia Huck <huckc@linux.vnet.ibm.com>" [full]
# gpg: aka "Cornelia Huck <cornelia.huck@de.ibm.com>" [full]
# gpg: aka "Cornelia Huck <cohuck@kernel.org>" [unknown]
# gpg: aka "Cornelia Huck <cohuck@redhat.com>" [unknown]
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0 18CE DECF 6B93 C6F0 2FAF

* remotes/cohuck/tags/s390x-20190311: (33 commits)
s390x/tcg: Implement VECTOR UNPACK *
s390x/tcg: Implement VECTOR STORE WITH LENGTH
s390x/tcg: Implement VECTOR STORE MULTIPLE
s390x/tcg: Implement VECTOR STORE ELEMENT
s390x/tcg: Implement VECTOR STORE
s390x/tcg: Provide probe_write_access helper
s390x/tcg: Implement VECTOR SIGN EXTEND TO DOUBLEWORD
s390x/tcg: Implement VECTOR SELECT
s390x/tcg: Implement VECTOR SCATTER ELEMENT
s390x/tcg: Implement VECTOR REPLICATE IMMEDIATE
s390x/tcg: Implement VECTOR REPLICATE
s390x/tcg: Implement VECTOR PERMUTE DOUBLEWORD IMMEDIATE
s390x/tcg: Implement VECTOR PERMUTE
s390x/tcg: Implement VECTOR PACK *
s390x/tcg: Implement VECTOR MERGE (HIGH|LOW)
s390x/tcg: Implement VECTOR LOAD WITH LENGTH
s390x/tcg: Implement VECTOR LOAD VR FROM GRS DISJOINT
s390x/tcg: Implement VECTOR LOAD VR ELEMENT FROM GR
s390x/tcg: Implement VECTOR LOAD TO BLOCK BOUNDARY
s390x/tcg: Implement VECTOR LOAD MULTIPLE
...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+1455 -40
+1
target/s390x/Makefile.objs
··· 1 1 obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o 2 2 obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o 3 3 obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o 4 + obj-$(CONFIG_TCG) += vec_helper.o 4 5 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o 5 6 obj-$(CONFIG_SOFTMMU) += sigp.o 6 7 obj-$(CONFIG_KVM) += kvm.o
+7
target/s390x/cpu.h
··· 257 257 /* PSW defines */ 258 258 #undef PSW_MASK_PER 259 259 #undef PSW_MASK_UNUSED_2 260 + #undef PSW_MASK_UNUSED_3 260 261 #undef PSW_MASK_DAT 261 262 #undef PSW_MASK_IO 262 263 #undef PSW_MASK_EXT ··· 276 277 277 278 #define PSW_MASK_PER 0x4000000000000000ULL 278 279 #define PSW_MASK_UNUSED_2 0x2000000000000000ULL 280 + #define PSW_MASK_UNUSED_3 0x1000000000000000ULL 279 281 #define PSW_MASK_DAT 0x0400000000000000ULL 280 282 #define PSW_MASK_IO 0x0200000000000000ULL 281 283 #define PSW_MASK_EXT 0x0100000000000000ULL ··· 323 325 324 326 /* we'll use some unused PSW positions to store CR flags in tb flags */ 325 327 #define FLAG_MASK_AFP (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT) 328 + #define FLAG_MASK_VECTOR (PSW_MASK_UNUSED_3 >> FLAG_MASK_PSW_SHIFT) 326 329 327 330 /* Control register 0 bits */ 328 331 #define CR0_LOWPROT 0x0000000010000000ULL 329 332 #define CR0_SECONDARY 0x0000000004000000ULL 330 333 #define CR0_EDAT 0x0000000000800000ULL 331 334 #define CR0_AFP 0x0000000000040000ULL 335 + #define CR0_VECTOR 0x0000000000020000ULL 332 336 #define CR0_EMERGENCY_SIGNAL_SC 0x0000000000004000ULL 333 337 #define CR0_EXTERNAL_CALL_SC 0x0000000000002000ULL 334 338 #define CR0_CKC_SC 0x0000000000000800ULL ··· 372 376 *flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; 373 377 if (env->cregs[0] & CR0_AFP) { 374 378 *flags |= FLAG_MASK_AFP; 379 + } 380 + if (env->cregs[0] & CR0_VECTOR) { 381 + *flags |= FLAG_MASK_VECTOR; 375 382 } 376 383 } 377 384
+21
target/s390x/helper.h
··· 123 123 DEF_HELPER_5(msa, i32, env, i32, i32, i32, i32) 124 124 DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env) 125 125 DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env) 126 + DEF_HELPER_FLAGS_3(probe_write_access, TCG_CALL_NO_WG, void, env, i64, i64) 127 + 128 + /* === Vector Support Instructions === */ 129 + DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64) 130 + DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 131 + DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 132 + DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 133 + DEF_HELPER_FLAGS_4(gvec_vpks16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 134 + DEF_HELPER_FLAGS_4(gvec_vpks32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 135 + DEF_HELPER_FLAGS_4(gvec_vpks64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 136 + DEF_HELPER_5(gvec_vpks_cc16, void, ptr, cptr, cptr, env, i32) 137 + DEF_HELPER_5(gvec_vpks_cc32, void, ptr, cptr, cptr, env, i32) 138 + DEF_HELPER_5(gvec_vpks_cc64, void, ptr, cptr, cptr, env, i32) 139 + DEF_HELPER_FLAGS_4(gvec_vpkls16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 140 + DEF_HELPER_FLAGS_4(gvec_vpkls32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 141 + DEF_HELPER_FLAGS_4(gvec_vpkls64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) 142 + DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, i32) 143 + DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32) 144 + DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32) 145 + DEF_HELPER_FLAGS_5(gvec_vperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) 146 + DEF_HELPER_FLAGS_4(vstl, TCG_CALL_NO_WG, void, env, cptr, i64, i64) 126 147 127 148 #ifndef CONFIG_USER_ONLY 128 149 DEF_HELPER_3(servc, i32, env, i64, i64)
+82
target/s390x/insn-data.def
··· 972 972 D(0xb93e, KIMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KIMD) 973 973 D(0xb93f, KLMD, RRE, MSA, 0, 0, 0, 0, msa, 0, S390_FEAT_TYPE_KLMD) 974 974 975 + /* === Vector Support Instructions === */ 976 + 977 + /* VECTOR GATHER ELEMENT */ 978 + E(0xe713, VGEF, VRV, V, la2, 0, 0, 0, vge, 0, ES_32, IF_VEC) 979 + E(0xe712, VGEG, VRV, V, la2, 0, 0, 0, vge, 0, ES_64, IF_VEC) 980 + /* VECTOR GENERATE BYTE MASK */ 981 + F(0xe744, VGBM, VRI_a, V, 0, 0, 0, 0, vgbm, 0, IF_VEC) 982 + /* VECTOR GENERATE MASK */ 983 + F(0xe746, VGM, VRI_b, V, 0, 0, 0, 0, vgm, 0, IF_VEC) 984 + /* VECTOR LOAD */ 985 + F(0xe706, VL, VRX, V, la2, 0, 0, 0, vl, 0, IF_VEC) 986 + F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) 987 + /* VECTOR LOAD AND REPLICATE */ 988 + F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) 989 + /* VECTOR LOAD ELEMENT */ 990 + E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) 991 + E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) 992 + E(0xe703, VLEF, VRX, V, la2, 0, 0, 0, vle, 0, ES_32, IF_VEC) 993 + E(0xe702, VLEG, VRX, V, la2, 0, 0, 0, vle, 0, ES_64, IF_VEC) 994 + /* VECTOR LOAD ELEMENT IMMEDIATE */ 995 + E(0xe740, VLEIB, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_8, IF_VEC) 996 + E(0xe741, VLEIH, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_16, IF_VEC) 997 + E(0xe743, VLEIF, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_32, IF_VEC) 998 + E(0xe742, VLEIG, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_64, IF_VEC) 999 + /* VECTOR LOAD GR FROM VR ELEMENT */ 1000 + F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) 1001 + /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ 1002 + F(0xe704, VLLEZ, VRX, V, la2, 0, 0, 0, vllez, 0, IF_VEC) 1003 + /* VECTOR LOAD MULTIPLE */ 1004 + F(0xe736, VLM, VRS_a, V, la2, 0, 0, 0, vlm, 0, IF_VEC) 1005 + /* VECTOR LOAD TO BLOCK BOUNDARY */ 1006 + F(0xe707, VLBB, VRX, V, la2, 0, 0, 0, vlbb, 0, IF_VEC) 1007 + /* VECTOR LOAD VR ELEMENT FROM GR */ 1008 + F(0xe722, VLVG, VRS_b, V, la2, r3, 0, 0, vlvg, 0, IF_VEC) 1009 + /* VECTOR LOAD VR FROM GRS DISJOINT */ 1010 + F(0xe762, VLVGP, VRR_f, V, r2, r3, 0, 0, vlvgp, 0, IF_VEC) 1011 + /* VECTOR LOAD WITH LENGTH */ 1012 + F(0xe737, VLL, VRS_b, V, la2, r3_32u, 0, 0, vll, 0, IF_VEC) 1013 + /* VECTOR MERGE HIGH */ 1014 + F(0xe761, VMRH, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) 1015 + /* VECTOR MERGE LOW */ 1016 + F(0xe760, VMRL, VRR_c, V, 0, 0, 0, 0, vmr, 0, IF_VEC) 1017 + /* VECTOR PACK */ 1018 + F(0xe794, VPK, VRR_c, V, 0, 0, 0, 0, vpk, 0, IF_VEC) 1019 + /* VECTOR PACK SATURATE */ 1020 + F(0xe797, VPKS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) 1021 + /* VECTOR PACK LOGICAL SATURATE */ 1022 + F(0xe795, VPKLS, VRR_b, V, 0, 0, 0, 0, vpk, 0, IF_VEC) 1023 + F(0xe78c, VPERM, VRR_e, V, 0, 0, 0, 0, vperm, 0, IF_VEC) 1024 + /* VECTOR PERMUTE DOUBLEWORD IMMEDIATE */ 1025 + F(0xe784, VPDI, VRR_c, V, 0, 0, 0, 0, vpdi, 0, IF_VEC) 1026 + /* VECTOR REPLICATE */ 1027 + F(0xe74d, VREP, VRI_c, V, 0, 0, 0, 0, vrep, 0, IF_VEC) 1028 + /* VECTOR REPLICATE IMMEDIATE */ 1029 + F(0xe745, VREPI, VRI_a, V, 0, 0, 0, 0, vrepi, 0, IF_VEC) 1030 + /* VECTOR SCATTER ELEMENT */ 1031 + E(0xe71b, VSCEF, VRV, V, la2, 0, 0, 0, vsce, 0, ES_32, IF_VEC) 1032 + E(0xe71a, VSCEG, VRV, V, la2, 0, 0, 0, vsce, 0, ES_64, IF_VEC) 1033 + /* VECTOR SELECT */ 1034 + F(0xe78d, VSEL, VRR_e, V, 0, 0, 0, 0, vsel, 0, IF_VEC) 1035 + /* VECTOR SIGN EXTEND TO DOUBLEWORD */ 1036 + F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) 1037 + /* VECTOR STORE */ 1038 + F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) 1039 + /* VECTOR STORE ELEMENT */ 1040 + E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) 1041 + E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC) 1042 + E(0xe70b, VSTEF, VRX, V, la2, 0, 0, 0, vste, 0, ES_32, IF_VEC) 1043 + E(0xe70a, VSTEG, VRX, V, la2, 0, 0, 0, vste, 0, ES_64, IF_VEC) 1044 + /* VECTOR STORE MULTIPLE */ 1045 + F(0xe73e, VSTM, VRS_a, V, la2, 0, 0, 0, vstm, 0, IF_VEC) 1046 + /* VECTOR STORE WITH LENGTH */ 1047 + F(0xe73f, VSTL, VRS_b, V, la2, r3_32u, 0, 0, vstl, 0, IF_VEC) 1048 + /* VECTOR UNPACK HIGH */ 1049 + F(0xe7d7, VUPH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) 1050 + /* VECTOR UNPACK LOGICAL HIGH */ 1051 + F(0xe7d5, VUPLH, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) 1052 + /* VECTOR UNPACK LOW */ 1053 + F(0xe7d6, VUPL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) 1054 + /* VECTOR UNPACK LOGICAL LOW */ 1055 + F(0xe7d4, VUPLL, VRR_a, V, 0, 0, 0, 0, vup, 0, IF_VEC) 1056 + 975 1057 #ifndef CONFIG_USER_ONLY 976 1058 /* COMPARE AND SWAP AND PURGE */ 977 1059 E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV)
+25
target/s390x/insn-format.def
··· 54 54 F3(SS_f, BD(1,16,20), L(2,8,8), BD(2,32,36)) 55 55 F2(SSE, BD(1,16,20), BD(2,32,36)) 56 56 F3(SSF, BD(1,16,20), BD(2,32,36), R(3,8)) 57 + F3(VRI_a, V(1,8), I(2,16,16), M(3,32)) 58 + F4(VRI_b, V(1,8), I(2,16,8), I(3,24,8), M(4,32)) 59 + F4(VRI_c, V(1,8), V(3,12), I(2,16,16), M(4,32)) 60 + F5(VRI_d, V(1,8), V(2,12), V(3,16), I(4,24,8), M(5,32)) 61 + F5(VRI_e, V(1,8), V(2,12), I(3,16,12), M(5,28), M(4,32)) 62 + F5(VRI_f, V(1,8), V(2,12), V(3,16), M(5,24), I(4,28,8)) 63 + F5(VRI_g, V(1,8), V(2,12), I(4,16,8), M(5,24), I(3,28,8)) 64 + F3(VRI_h, V(1,8), I(2,16,16), I(3,32,4)) 65 + F4(VRI_i, V(1,8), R(2,12), M(4,24), I(3,28,8)) 66 + F5(VRR_a, V(1,8), V(2,12), M(5,24), M(4,28), M(3,32)) 67 + F5(VRR_b, V(1,8), V(2,12), V(3,16), M(5,24), M(4,32)) 68 + F6(VRR_c, V(1,8), V(2,12), V(3,16), M(6,24), M(5,28), M(4,32)) 69 + F6(VRR_d, V(1,8), V(2,12), V(3,16), M(5,20), M(6,24), V(4,32)) 70 + F6(VRR_e, V(1,8), V(2,12), V(3,16), M(6,20), M(5,28), V(4,32)) 71 + F3(VRR_f, V(1,8), R(2,12), R(3,16)) 72 + F1(VRR_g, V(1,12)) 73 + F3(VRR_h, V(1,12), V(2,16), M(3,24)) 74 + F3(VRR_i, R(1,8), V(2,12), M(3,24)) 75 + F4(VRS_a, V(1,8), V(3,12), BD(2,16,20), M(4,32)) 76 + F4(VRS_b, V(1,8), R(3,12), BD(2,16,20), M(4,32)) 77 + F4(VRS_c, R(1,8), V(3,12), BD(2,16,20), M(4,32)) 78 + F3(VRS_d, R(3,12), BD(2,16,20), V(1,32)) 79 + F4(VRV, V(1,8), V(2,12), BD(2,16,20), M(3,32)) 80 + F3(VRX, V(1,8), BXD(2), M(3,32)) 81 + F3(VSI, I(3,8,8), BD(2,16,20), V(1,32))
+4 -39
target/s390x/internal.h
··· 63 63 PSW program_new_psw; /* 0x1d0 */ 64 64 PSW mcck_new_psw; /* 0x1e0 */ 65 65 PSW io_new_psw; /* 0x1f0 */ 66 - PSW return_psw; /* 0x200 */ 67 - uint8_t irb[64]; /* 0x210 */ 68 - uint64_t sync_enter_timer; /* 0x250 */ 69 - uint64_t async_enter_timer; /* 0x258 */ 70 - uint64_t exit_timer; /* 0x260 */ 71 - uint64_t last_update_timer; /* 0x268 */ 72 - uint64_t user_timer; /* 0x270 */ 73 - uint64_t system_timer; /* 0x278 */ 74 - uint64_t last_update_clock; /* 0x280 */ 75 - uint64_t steal_clock; /* 0x288 */ 76 - PSW return_mcck_psw; /* 0x290 */ 77 - uint8_t pad9[0xc00 - 0x2a0]; /* 0x2a0 */ 78 - /* System info area */ 79 - uint64_t save_area[16]; /* 0xc00 */ 80 - uint8_t pad10[0xd40 - 0xc80]; /* 0xc80 */ 81 - uint64_t kernel_stack; /* 0xd40 */ 82 - uint64_t thread_info; /* 0xd48 */ 83 - uint64_t async_stack; /* 0xd50 */ 84 - uint64_t kernel_asce; /* 0xd58 */ 85 - uint64_t user_asce; /* 0xd60 */ 86 - uint64_t panic_stack; /* 0xd68 */ 87 - uint64_t user_exec_asce; /* 0xd70 */ 88 - uint8_t pad11[0xdc0 - 0xd78]; /* 0xd78 */ 89 - 90 - /* SMP info area: defined by DJB */ 91 - uint64_t clock_comparator; /* 0xdc0 */ 92 - uint64_t ext_call_fast; /* 0xdc8 */ 93 - uint64_t percpu_offset; /* 0xdd0 */ 94 - uint64_t current_task; /* 0xdd8 */ 95 - uint32_t softirq_pending; /* 0xde0 */ 96 - uint32_t pad_0x0de4; /* 0xde4 */ 97 - uint64_t int_clock; /* 0xde8 */ 98 - uint8_t pad12[0xe00 - 0xdf0]; /* 0xdf0 */ 99 - 100 - /* 0xe00 is used as indicator for dump tools */ 101 - /* whether the kernel died with panic() or not */ 102 - uint32_t panic_magic; /* 0xe00 */ 103 - 104 - uint8_t pad13[0x11b0 - 0xe04]; /* 0xe04 */ 66 + uint8_t pad13[0x11b0 - 0x200]; /* 0x200 */ 105 67 106 68 uint64_t mcesad; /* 0x11B0 */ 107 69 ··· 130 92 131 93 uint8_t pad18[0x2000 - 0x1400]; /* 0x1400 */ 132 94 } QEMU_PACKED LowCore; 95 + QEMU_BUILD_BUG_ON(sizeof(LowCore) != 8192); 133 96 #endif /* CONFIG_USER_ONLY */ 134 97 135 98 #define MAX_ILEN 6 ··· 386 349 387 350 /* mem_helper.c */ 388 351 target_ulong mmu_real2abs(CPUS390XState *env, target_ulong raddr); 352 + void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len, 353 + uintptr_t ra); 389 354 390 355 391 356 /* mmu_helper.c */
+26
target/s390x/mem_helper.c
··· 2623 2623 return convert_unicode(env, r1, r2, m3, GETPC(), 2624 2624 decode_utf32, encode_utf16); 2625 2625 } 2626 + 2627 + void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len, 2628 + uintptr_t ra) 2629 + { 2630 + #ifdef CONFIG_USER_ONLY 2631 + if (!h2g_valid(addr) || !h2g_valid(addr + len - 1) || 2632 + page_check_range(addr, len, PAGE_WRITE) < 0) { 2633 + s390_program_interrupt(env, PGM_ADDRESSING, ILEN_AUTO, ra); 2634 + } 2635 + #else 2636 + /* test the actual access, not just any access to the page due to LAP */ 2637 + while (len) { 2638 + const uint64_t pagelen = -(addr | -TARGET_PAGE_MASK); 2639 + const uint64_t curlen = MIN(pagelen, len); 2640 + 2641 + probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra); 2642 + addr = wrap_address(env, addr + curlen); 2643 + len -= curlen; 2644 + } 2645 + #endif 2646 + } 2647 + 2648 + void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len) 2649 + { 2650 + probe_write_access(env, addr, len, GETPC()); 2651 + }
+60 -1
target/s390x/translate.c
··· 34 34 #include "disas/disas.h" 35 35 #include "exec/exec-all.h" 36 36 #include "tcg-op.h" 37 + #include "tcg-op-gvec.h" 37 38 #include "qemu/log.h" 38 39 #include "qemu/host-utils.h" 39 40 #include "exec/cpu_ldst.h" ··· 985 986 #define F3(N, X1, X2, X3) F0(N) 986 987 #define F4(N, X1, X2, X3, X4) F0(N) 987 988 #define F5(N, X1, X2, X3, X4, X5) F0(N) 989 + #define F6(N, X1, X2, X3, X4, X5, X6) F0(N) 988 990 989 991 typedef enum { 990 992 #include "insn-format.def" ··· 996 998 #undef F3 997 999 #undef F4 998 1000 #undef F5 1001 + #undef F6 999 1002 1000 1003 /* Define a structure to hold the decoded fields. We'll store each inside 1001 1004 an array indexed by an enum. In order to conserve memory, we'll arrange ··· 1010 1013 FLD_O_m1, 1011 1014 FLD_O_m3, 1012 1015 FLD_O_m4, 1016 + FLD_O_m5, 1017 + FLD_O_m6, 1013 1018 FLD_O_b1, 1014 1019 FLD_O_b2, 1015 1020 FLD_O_b4, ··· 1023 1028 FLD_O_i2, 1024 1029 FLD_O_i3, 1025 1030 FLD_O_i4, 1026 - FLD_O_i5 1031 + FLD_O_i5, 1032 + FLD_O_v1, 1033 + FLD_O_v2, 1034 + FLD_O_v3, 1035 + FLD_O_v4, 1027 1036 }; 1028 1037 1029 1038 enum DisasFieldIndexC { ··· 1031 1040 FLD_C_m1 = 0, 1032 1041 FLD_C_b1 = 0, 1033 1042 FLD_C_i1 = 0, 1043 + FLD_C_v1 = 0, 1034 1044 1035 1045 FLD_C_r2 = 1, 1036 1046 FLD_C_b2 = 1, ··· 1039 1049 FLD_C_r3 = 2, 1040 1050 FLD_C_m3 = 2, 1041 1051 FLD_C_i3 = 2, 1052 + FLD_C_v3 = 2, 1042 1053 1043 1054 FLD_C_m4 = 3, 1044 1055 FLD_C_b4 = 3, 1045 1056 FLD_C_i4 = 3, 1046 1057 FLD_C_l1 = 3, 1058 + FLD_C_v4 = 3, 1047 1059 1048 1060 FLD_C_i5 = 4, 1049 1061 FLD_C_d1 = 4, 1062 + FLD_C_m5 = 4, 1050 1063 1051 1064 FLD_C_d2 = 5, 1065 + FLD_C_m6 = 5, 1052 1066 1053 1067 FLD_C_d4 = 6, 1054 1068 FLD_C_x2 = 6, 1055 1069 FLD_C_l2 = 6, 1070 + FLD_C_v2 = 6, 1056 1071 1057 1072 NUM_C_FIELD = 7 1058 1073 }; ··· 1097 1112 1098 1113 #define R(N, B) { B, 4, 0, FLD_C_r##N, FLD_O_r##N } 1099 1114 #define M(N, B) { B, 4, 0, FLD_C_m##N, FLD_O_m##N } 1115 + #define V(N, B) { B, 4, 3, FLD_C_v##N, FLD_O_v##N } 1100 1116 #define BD(N, BB, BD) { BB, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ 1101 1117 { BD, 12, 0, FLD_C_d##N, FLD_O_d##N } 1102 1118 #define BXD(N) { 16, 4, 0, FLD_C_b##N, FLD_O_b##N }, \ ··· 1116 1132 #define F3(N, X1, X2, X3) { { X1, X2, X3 } }, 1117 1133 #define F4(N, X1, X2, X3, X4) { { X1, X2, X3, X4 } }, 1118 1134 #define F5(N, X1, X2, X3, X4, X5) { { X1, X2, X3, X4, X5 } }, 1135 + #define F6(N, X1, X2, X3, X4, X5, X6) { { X1, X2, X3, X4, X5, X6 } }, 1119 1136 1120 1137 static const DisasFormatInfo format_info[] = { 1121 1138 #include "insn-format.def" ··· 1127 1144 #undef F3 1128 1145 #undef F4 1129 1146 #undef F5 1147 + #undef F6 1130 1148 #undef R 1131 1149 #undef M 1150 + #undef V 1132 1151 #undef BD 1133 1152 #undef BXD 1134 1153 #undef BDL ··· 1185 1204 #define IF_BFP 0x0008 /* binary floating point instruction */ 1186 1205 #define IF_DFP 0x0010 /* decimal floating point instruction */ 1187 1206 #define IF_PRIV 0x0020 /* privileged instruction */ 1207 + #define IF_VEC 0x0040 /* vector instruction */ 1188 1208 1189 1209 struct DisasInsn { 1190 1210 unsigned opc:16; ··· 5101 5121 } 5102 5122 #endif 5103 5123 5124 + #include "translate_vx.inc.c" 5125 + 5104 5126 /* ====================================================================== */ 5105 5127 /* The "Cc OUTput" generators. Given the generated output (and in some cases 5106 5128 the original inputs), update the various cc data structures in order to ··· 5772 5794 } 5773 5795 #define SPEC_in2_r3_sr32 0 5774 5796 5797 + static void in2_r3_32u(DisasContext *s, DisasFields *f, DisasOps *o) 5798 + { 5799 + o->in2 = tcg_temp_new_i64(); 5800 + tcg_gen_ext32u_i64(o->in2, regs[get_field(f, r3)]); 5801 + } 5802 + #define SPEC_in2_r3_32u 0 5803 + 5775 5804 static void in2_r2_32s(DisasContext *s, DisasFields *f, DisasOps *o) 5776 5805 { 5777 5806 o->in2 = tcg_temp_new_i64(); ··· 6119 6148 case 2: /* dl+dh split, signed 20 bit. */ 6120 6149 r = ((int8_t)r << 12) | (r >> 8); 6121 6150 break; 6151 + case 3: /* MSB stored in RXB */ 6152 + g_assert(f->size == 4); 6153 + switch (f->beg) { 6154 + case 8: 6155 + r |= extract64(insn, 63 - 36, 1) << 4; 6156 + break; 6157 + case 12: 6158 + r |= extract64(insn, 63 - 37, 1) << 4; 6159 + break; 6160 + case 16: 6161 + r |= extract64(insn, 63 - 38, 1) << 4; 6162 + break; 6163 + case 32: 6164 + r |= extract64(insn, 63 - 39, 1) << 4; 6165 + break; 6166 + default: 6167 + g_assert_not_reached(); 6168 + } 6169 + break; 6122 6170 default: 6123 6171 abort(); 6124 6172 } ··· 6300 6348 if (insn->flags & IF_DFP) { 6301 6349 dxc = 3; 6302 6350 } 6351 + if (insn->flags & IF_VEC) { 6352 + dxc = 0xfe; 6353 + } 6303 6354 if (dxc) { 6304 6355 gen_data_exception(dxc); 6356 + return DISAS_NORETURN; 6357 + } 6358 + } 6359 + 6360 + /* if vector instructions not enabled, executing them is forbidden */ 6361 + if (insn->flags & IF_VEC) { 6362 + if (!((s->base.tb->flags & FLAG_MASK_VECTOR))) { 6363 + gen_data_exception(0xfe); 6305 6364 return DISAS_NORETURN; 6306 6365 } 6307 6366 }
+935
target/s390x/translate_vx.inc.c
··· 1 + /* 2 + * QEMU TCG support -- s390x vector instruction translation functions 3 + * 4 + * Copyright (C) 2019 Red Hat Inc 5 + * 6 + * Authors: 7 + * David Hildenbrand <david@redhat.com> 8 + * 9 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 + * See the COPYING file in the top-level directory. 11 + */ 12 + 13 + /* 14 + * For most instructions that use the same element size for reads and 15 + * writes, we can use real gvec vector expansion, which potantially uses 16 + * real host vector instructions. As they only work up to 64 bit elements, 17 + * 128 bit elements (vector is a single element) have to be handled 18 + * differently. Operations that are too complicated to encode via TCG ops 19 + * are handled via gvec ool (out-of-line) handlers. 20 + * 21 + * As soon as instructions use different element sizes for reads and writes 22 + * or access elements "out of their element scope" we expand them manually 23 + * in fancy loops, as gvec expansion does not deal with actual element 24 + * numbers and does also not support access to other elements. 25 + * 26 + * 128 bit elements: 27 + * As we only have i32/i64, such elements have to be loaded into two 28 + * i64 values and can then be processed e.g. by tcg_gen_add2_i64. 29 + * 30 + * Sizes: 31 + * On s390x, the operand size (oprsz) and the maximum size (maxsz) are 32 + * always 16 (128 bit). What gvec code calls "vece", s390x calls "es", 33 + * a.k.a. "element size". These values nicely map to MO_8 ... MO_64. Only 34 + * 128 bit element size has to be treated in a special way (MO_64 + 1). 35 + * We will use ES_* instead of MO_* for this reason in this file. 36 + * 37 + * CC handling: 38 + * As gvec ool-helpers can currently not return values (besides via 39 + * pointers like vectors or cpu_env), whenever we have to set the CC and 40 + * can't conclude the value from the result vector, we will directly 41 + * set it in "env->cc_op" and mark it as static via set_cc_static()". 42 + * Whenever this is done, the helper writes globals (cc_op). 43 + */ 44 + 45 + #define NUM_VEC_ELEMENT_BYTES(es) (1 << (es)) 46 + #define NUM_VEC_ELEMENTS(es) (16 / NUM_VEC_ELEMENT_BYTES(es)) 47 + #define NUM_VEC_ELEMENT_BITS(es) (NUM_VEC_ELEMENT_BYTES(es) * BITS_PER_BYTE) 48 + 49 + #define ES_8 MO_8 50 + #define ES_16 MO_16 51 + #define ES_32 MO_32 52 + #define ES_64 MO_64 53 + #define ES_128 4 54 + 55 + static inline bool valid_vec_element(uint8_t enr, TCGMemOp es) 56 + { 57 + return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1)); 58 + } 59 + 60 + static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, uint8_t enr, 61 + TCGMemOp memop) 62 + { 63 + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); 64 + 65 + switch (memop) { 66 + case ES_8: 67 + tcg_gen_ld8u_i64(dst, cpu_env, offs); 68 + break; 69 + case ES_16: 70 + tcg_gen_ld16u_i64(dst, cpu_env, offs); 71 + break; 72 + case ES_32: 73 + tcg_gen_ld32u_i64(dst, cpu_env, offs); 74 + break; 75 + case ES_8 | MO_SIGN: 76 + tcg_gen_ld8s_i64(dst, cpu_env, offs); 77 + break; 78 + case ES_16 | MO_SIGN: 79 + tcg_gen_ld16s_i64(dst, cpu_env, offs); 80 + break; 81 + case ES_32 | MO_SIGN: 82 + tcg_gen_ld32s_i64(dst, cpu_env, offs); 83 + break; 84 + case ES_64: 85 + case ES_64 | MO_SIGN: 86 + tcg_gen_ld_i64(dst, cpu_env, offs); 87 + break; 88 + default: 89 + g_assert_not_reached(); 90 + } 91 + } 92 + 93 + static void write_vec_element_i64(TCGv_i64 src, int reg, uint8_t enr, 94 + TCGMemOp memop) 95 + { 96 + const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE); 97 + 98 + switch (memop) { 99 + case ES_8: 100 + tcg_gen_st8_i64(src, cpu_env, offs); 101 + break; 102 + case ES_16: 103 + tcg_gen_st16_i64(src, cpu_env, offs); 104 + break; 105 + case ES_32: 106 + tcg_gen_st32_i64(src, cpu_env, offs); 107 + break; 108 + case ES_64: 109 + tcg_gen_st_i64(src, cpu_env, offs); 110 + break; 111 + default: 112 + g_assert_not_reached(); 113 + } 114 + } 115 + 116 + 117 + static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, 118 + uint8_t es) 119 + { 120 + TCGv_i64 tmp = tcg_temp_new_i64(); 121 + 122 + /* mask off invalid parts from the element nr */ 123 + tcg_gen_andi_i64(tmp, enr, NUM_VEC_ELEMENTS(es) - 1); 124 + 125 + /* convert it to an element offset relative to cpu_env (vec_reg_offset() */ 126 + tcg_gen_shli_i64(tmp, tmp, es); 127 + #ifndef HOST_WORDS_BIGENDIAN 128 + tcg_gen_xori_i64(tmp, tmp, 8 - NUM_VEC_ELEMENT_BYTES(es)); 129 + #endif 130 + tcg_gen_addi_i64(tmp, tmp, vec_full_reg_offset(reg)); 131 + 132 + /* generate the final ptr by adding cpu_env */ 133 + tcg_gen_trunc_i64_ptr(ptr, tmp); 134 + tcg_gen_add_ptr(ptr, ptr, cpu_env); 135 + 136 + tcg_temp_free_i64(tmp); 137 + } 138 + 139 + #define gen_gvec_3_ool(v1, v2, v3, data, fn) \ 140 + tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 141 + vec_full_reg_offset(v3), 16, 16, data, fn) 142 + #define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \ 143 + tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 144 + vec_full_reg_offset(v3), ptr, 16, 16, data, fn) 145 + #define gen_gvec_4(v1, v2, v3, v4, gen) \ 146 + tcg_gen_gvec_4(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 147 + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ 148 + 16, 16, gen) 149 + #define gen_gvec_4_ool(v1, v2, v3, v4, data, fn) \ 150 + tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 151 + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ 152 + 16, 16, data, fn) 153 + #define gen_gvec_dup_i64(es, v1, c) \ 154 + tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) 155 + #define gen_gvec_mov(v1, v2) \ 156 + tcg_gen_gvec_mov(0, vec_full_reg_offset(v1), vec_full_reg_offset(v2), 16, \ 157 + 16) 158 + #define gen_gvec_dup64i(v1, c) \ 159 + tcg_gen_gvec_dup64i(vec_full_reg_offset(v1), 16, 16, c) 160 + 161 + static void gen_gvec_dupi(uint8_t es, uint8_t reg, uint64_t c) 162 + { 163 + switch (es) { 164 + case ES_8: 165 + tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, c); 166 + break; 167 + case ES_16: 168 + tcg_gen_gvec_dup16i(vec_full_reg_offset(reg), 16, 16, c); 169 + break; 170 + case ES_32: 171 + tcg_gen_gvec_dup32i(vec_full_reg_offset(reg), 16, 16, c); 172 + break; 173 + case ES_64: 174 + gen_gvec_dup64i(reg, c); 175 + break; 176 + default: 177 + g_assert_not_reached(); 178 + } 179 + } 180 + 181 + static void zero_vec(uint8_t reg) 182 + { 183 + tcg_gen_gvec_dup8i(vec_full_reg_offset(reg), 16, 16, 0); 184 + } 185 + 186 + static DisasJumpType op_vge(DisasContext *s, DisasOps *o) 187 + { 188 + const uint8_t es = s->insn->data; 189 + const uint8_t enr = get_field(s->fields, m3); 190 + TCGv_i64 tmp; 191 + 192 + if (!valid_vec_element(enr, es)) { 193 + gen_program_exception(s, PGM_SPECIFICATION); 194 + return DISAS_NORETURN; 195 + } 196 + 197 + tmp = tcg_temp_new_i64(); 198 + read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es); 199 + tcg_gen_add_i64(o->addr1, o->addr1, tmp); 200 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); 201 + 202 + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); 203 + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); 204 + tcg_temp_free_i64(tmp); 205 + return DISAS_NEXT; 206 + } 207 + 208 + static uint64_t generate_byte_mask(uint8_t mask) 209 + { 210 + uint64_t r = 0; 211 + int i; 212 + 213 + for (i = 0; i < 8; i++) { 214 + if ((mask >> i) & 1) { 215 + r |= 0xffull << (i * 8); 216 + } 217 + } 218 + return r; 219 + } 220 + 221 + static DisasJumpType op_vgbm(DisasContext *s, DisasOps *o) 222 + { 223 + const uint16_t i2 = get_field(s->fields, i2); 224 + 225 + if (i2 == (i2 & 0xff) * 0x0101) { 226 + /* 227 + * Masks for both 64 bit elements of the vector are the same. 228 + * Trust tcg to produce a good constant loading. 229 + */ 230 + gen_gvec_dup64i(get_field(s->fields, v1), 231 + generate_byte_mask(i2 & 0xff)); 232 + } else { 233 + TCGv_i64 t = tcg_temp_new_i64(); 234 + 235 + tcg_gen_movi_i64(t, generate_byte_mask(i2 >> 8)); 236 + write_vec_element_i64(t, get_field(s->fields, v1), 0, ES_64); 237 + tcg_gen_movi_i64(t, generate_byte_mask(i2)); 238 + write_vec_element_i64(t, get_field(s->fields, v1), 1, ES_64); 239 + tcg_temp_free_i64(t); 240 + } 241 + return DISAS_NEXT; 242 + } 243 + 244 + static DisasJumpType op_vgm(DisasContext *s, DisasOps *o) 245 + { 246 + const uint8_t es = get_field(s->fields, m4); 247 + const uint8_t bits = NUM_VEC_ELEMENT_BITS(es); 248 + const uint8_t i2 = get_field(s->fields, i2) & (bits - 1); 249 + const uint8_t i3 = get_field(s->fields, i3) & (bits - 1); 250 + uint64_t mask = 0; 251 + int i; 252 + 253 + if (es > ES_64) { 254 + gen_program_exception(s, PGM_SPECIFICATION); 255 + return DISAS_NORETURN; 256 + } 257 + 258 + /* generate the mask - take care of wrapping */ 259 + for (i = i2; ; i = (i + 1) % bits) { 260 + mask |= 1ull << (bits - i - 1); 261 + if (i == i3) { 262 + break; 263 + } 264 + } 265 + 266 + gen_gvec_dupi(es, get_field(s->fields, v1), mask); 267 + return DISAS_NEXT; 268 + } 269 + 270 + static DisasJumpType op_vl(DisasContext *s, DisasOps *o) 271 + { 272 + TCGv_i64 t0 = tcg_temp_new_i64(); 273 + TCGv_i64 t1 = tcg_temp_new_i64(); 274 + 275 + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEQ); 276 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 277 + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); 278 + write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64); 279 + write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64); 280 + tcg_temp_free(t0); 281 + tcg_temp_free(t1); 282 + return DISAS_NEXT; 283 + } 284 + 285 + static DisasJumpType op_vlr(DisasContext *s, DisasOps *o) 286 + { 287 + gen_gvec_mov(get_field(s->fields, v1), get_field(s->fields, v2)); 288 + return DISAS_NEXT; 289 + } 290 + 291 + static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) 292 + { 293 + const uint8_t es = get_field(s->fields, m3); 294 + TCGv_i64 tmp; 295 + 296 + if (es > ES_64) { 297 + gen_program_exception(s, PGM_SPECIFICATION); 298 + return DISAS_NORETURN; 299 + } 300 + 301 + tmp = tcg_temp_new_i64(); 302 + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); 303 + gen_gvec_dup_i64(es, get_field(s->fields, v1), tmp); 304 + tcg_temp_free_i64(tmp); 305 + return DISAS_NEXT; 306 + } 307 + 308 + static DisasJumpType op_vle(DisasContext *s, DisasOps *o) 309 + { 310 + const uint8_t es = s->insn->data; 311 + const uint8_t enr = get_field(s->fields, m3); 312 + TCGv_i64 tmp; 313 + 314 + if (!valid_vec_element(enr, es)) { 315 + gen_program_exception(s, PGM_SPECIFICATION); 316 + return DISAS_NORETURN; 317 + } 318 + 319 + tmp = tcg_temp_new_i64(); 320 + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); 321 + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); 322 + tcg_temp_free_i64(tmp); 323 + return DISAS_NEXT; 324 + } 325 + 326 + static DisasJumpType op_vlei(DisasContext *s, DisasOps *o) 327 + { 328 + const uint8_t es = s->insn->data; 329 + const uint8_t enr = get_field(s->fields, m3); 330 + TCGv_i64 tmp; 331 + 332 + if (!valid_vec_element(enr, es)) { 333 + gen_program_exception(s, PGM_SPECIFICATION); 334 + return DISAS_NORETURN; 335 + } 336 + 337 + tmp = tcg_const_i64((int16_t)get_field(s->fields, i2)); 338 + write_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); 339 + tcg_temp_free_i64(tmp); 340 + return DISAS_NEXT; 341 + } 342 + 343 + static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o) 344 + { 345 + const uint8_t es = get_field(s->fields, m4); 346 + TCGv_ptr ptr; 347 + 348 + if (es > ES_64) { 349 + gen_program_exception(s, PGM_SPECIFICATION); 350 + return DISAS_NORETURN; 351 + } 352 + 353 + /* fast path if we don't need the register content */ 354 + if (!get_field(s->fields, b2)) { 355 + uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1); 356 + 357 + read_vec_element_i64(o->out, get_field(s->fields, v3), enr, es); 358 + return DISAS_NEXT; 359 + } 360 + 361 + ptr = tcg_temp_new_ptr(); 362 + get_vec_element_ptr_i64(ptr, get_field(s->fields, v3), o->addr1, es); 363 + switch (es) { 364 + case ES_8: 365 + tcg_gen_ld8u_i64(o->out, ptr, 0); 366 + break; 367 + case ES_16: 368 + tcg_gen_ld16u_i64(o->out, ptr, 0); 369 + break; 370 + case ES_32: 371 + tcg_gen_ld32u_i64(o->out, ptr, 0); 372 + break; 373 + case ES_64: 374 + tcg_gen_ld_i64(o->out, ptr, 0); 375 + break; 376 + default: 377 + g_assert_not_reached(); 378 + } 379 + tcg_temp_free_ptr(ptr); 380 + 381 + return DISAS_NEXT; 382 + } 383 + 384 + static DisasJumpType op_vllez(DisasContext *s, DisasOps *o) 385 + { 386 + uint8_t es = get_field(s->fields, m3); 387 + uint8_t enr; 388 + TCGv_i64 t; 389 + 390 + switch (es) { 391 + /* rightmost sub-element of leftmost doubleword */ 392 + case ES_8: 393 + enr = 7; 394 + break; 395 + case ES_16: 396 + enr = 3; 397 + break; 398 + case ES_32: 399 + enr = 1; 400 + break; 401 + case ES_64: 402 + enr = 0; 403 + break; 404 + /* leftmost sub-element of leftmost doubleword */ 405 + case 6: 406 + if (s390_has_feat(S390_FEAT_VECTOR_ENH)) { 407 + es = ES_32; 408 + enr = 0; 409 + break; 410 + } 411 + default: 412 + /* fallthrough */ 413 + gen_program_exception(s, PGM_SPECIFICATION); 414 + return DISAS_NORETURN; 415 + } 416 + 417 + t = tcg_temp_new_i64(); 418 + tcg_gen_qemu_ld_i64(t, o->addr1, get_mem_index(s), MO_TE | es); 419 + zero_vec(get_field(s->fields, v1)); 420 + write_vec_element_i64(t, get_field(s->fields, v1), enr, es); 421 + tcg_temp_free_i64(t); 422 + return DISAS_NEXT; 423 + } 424 + 425 + static DisasJumpType op_vlm(DisasContext *s, DisasOps *o) 426 + { 427 + const uint8_t v3 = get_field(s->fields, v3); 428 + uint8_t v1 = get_field(s->fields, v1); 429 + TCGv_i64 t0, t1; 430 + 431 + if (v3 < v1 || (v3 - v1 + 1) > 16) { 432 + gen_program_exception(s, PGM_SPECIFICATION); 433 + return DISAS_NORETURN; 434 + } 435 + 436 + /* 437 + * Check for possible access exceptions by trying to load the last 438 + * element. The first element will be checked first next. 439 + */ 440 + t0 = tcg_temp_new_i64(); 441 + t1 = tcg_temp_new_i64(); 442 + gen_addi_and_wrap_i64(s, t0, o->addr1, (v3 - v1) * 16 + 8); 443 + tcg_gen_qemu_ld_i64(t0, t0, get_mem_index(s), MO_TEQ); 444 + 445 + for (;; v1++) { 446 + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); 447 + write_vec_element_i64(t1, v1, 0, ES_64); 448 + if (v1 == v3) { 449 + break; 450 + } 451 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 452 + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEQ); 453 + write_vec_element_i64(t1, v1, 1, ES_64); 454 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 455 + } 456 + 457 + /* Store the last element, loaded first */ 458 + write_vec_element_i64(t0, v1, 1, ES_64); 459 + 460 + tcg_temp_free_i64(t0); 461 + tcg_temp_free_i64(t1); 462 + return DISAS_NEXT; 463 + } 464 + 465 + static DisasJumpType op_vlbb(DisasContext *s, DisasOps *o) 466 + { 467 + const int64_t block_size = (1ull << (get_field(s->fields, m3) + 6)); 468 + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); 469 + TCGv_ptr a0; 470 + TCGv_i64 bytes; 471 + 472 + if (get_field(s->fields, m3) > 6) { 473 + gen_program_exception(s, PGM_SPECIFICATION); 474 + return DISAS_NORETURN; 475 + } 476 + 477 + bytes = tcg_temp_new_i64(); 478 + a0 = tcg_temp_new_ptr(); 479 + /* calculate the number of bytes until the next block boundary */ 480 + tcg_gen_ori_i64(bytes, o->addr1, -block_size); 481 + tcg_gen_neg_i64(bytes, bytes); 482 + 483 + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); 484 + gen_helper_vll(cpu_env, a0, o->addr1, bytes); 485 + tcg_temp_free_i64(bytes); 486 + tcg_temp_free_ptr(a0); 487 + return DISAS_NEXT; 488 + } 489 + 490 + static DisasJumpType op_vlvg(DisasContext *s, DisasOps *o) 491 + { 492 + const uint8_t es = get_field(s->fields, m4); 493 + TCGv_ptr ptr; 494 + 495 + if (es > ES_64) { 496 + gen_program_exception(s, PGM_SPECIFICATION); 497 + return DISAS_NORETURN; 498 + } 499 + 500 + /* fast path if we don't need the register content */ 501 + if (!get_field(s->fields, b2)) { 502 + uint8_t enr = get_field(s->fields, d2) & (NUM_VEC_ELEMENTS(es) - 1); 503 + 504 + write_vec_element_i64(o->in2, get_field(s->fields, v1), enr, es); 505 + return DISAS_NEXT; 506 + } 507 + 508 + ptr = tcg_temp_new_ptr(); 509 + get_vec_element_ptr_i64(ptr, get_field(s->fields, v1), o->addr1, es); 510 + switch (es) { 511 + case ES_8: 512 + tcg_gen_st8_i64(o->in2, ptr, 0); 513 + break; 514 + case ES_16: 515 + tcg_gen_st16_i64(o->in2, ptr, 0); 516 + break; 517 + case ES_32: 518 + tcg_gen_st32_i64(o->in2, ptr, 0); 519 + break; 520 + case ES_64: 521 + tcg_gen_st_i64(o->in2, ptr, 0); 522 + break; 523 + default: 524 + g_assert_not_reached(); 525 + } 526 + tcg_temp_free_ptr(ptr); 527 + 528 + return DISAS_NEXT; 529 + } 530 + 531 + static DisasJumpType op_vlvgp(DisasContext *s, DisasOps *o) 532 + { 533 + write_vec_element_i64(o->in1, get_field(s->fields, v1), 0, ES_64); 534 + write_vec_element_i64(o->in2, get_field(s->fields, v1), 1, ES_64); 535 + return DISAS_NEXT; 536 + } 537 + 538 + static DisasJumpType op_vll(DisasContext *s, DisasOps *o) 539 + { 540 + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); 541 + TCGv_ptr a0 = tcg_temp_new_ptr(); 542 + 543 + /* convert highest index into an actual length */ 544 + tcg_gen_addi_i64(o->in2, o->in2, 1); 545 + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); 546 + gen_helper_vll(cpu_env, a0, o->addr1, o->in2); 547 + tcg_temp_free_ptr(a0); 548 + return DISAS_NEXT; 549 + } 550 + 551 + static DisasJumpType op_vmr(DisasContext *s, DisasOps *o) 552 + { 553 + const uint8_t v1 = get_field(s->fields, v1); 554 + const uint8_t v2 = get_field(s->fields, v2); 555 + const uint8_t v3 = get_field(s->fields, v3); 556 + const uint8_t es = get_field(s->fields, m4); 557 + int dst_idx, src_idx; 558 + TCGv_i64 tmp; 559 + 560 + if (es > ES_64) { 561 + gen_program_exception(s, PGM_SPECIFICATION); 562 + return DISAS_NORETURN; 563 + } 564 + 565 + tmp = tcg_temp_new_i64(); 566 + if (s->fields->op2 == 0x61) { 567 + /* iterate backwards to avoid overwriting data we might need later */ 568 + for (dst_idx = NUM_VEC_ELEMENTS(es) - 1; dst_idx >= 0; dst_idx--) { 569 + src_idx = dst_idx / 2; 570 + if (dst_idx % 2 == 0) { 571 + read_vec_element_i64(tmp, v2, src_idx, es); 572 + } else { 573 + read_vec_element_i64(tmp, v3, src_idx, es); 574 + } 575 + write_vec_element_i64(tmp, v1, dst_idx, es); 576 + } 577 + } else { 578 + /* iterate forward to avoid overwriting data we might need later */ 579 + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(es); dst_idx++) { 580 + src_idx = (dst_idx + NUM_VEC_ELEMENTS(es)) / 2; 581 + if (dst_idx % 2 == 0) { 582 + read_vec_element_i64(tmp, v2, src_idx, es); 583 + } else { 584 + read_vec_element_i64(tmp, v3, src_idx, es); 585 + } 586 + write_vec_element_i64(tmp, v1, dst_idx, es); 587 + } 588 + } 589 + tcg_temp_free_i64(tmp); 590 + return DISAS_NEXT; 591 + } 592 + 593 + static DisasJumpType op_vpk(DisasContext *s, DisasOps *o) 594 + { 595 + const uint8_t v1 = get_field(s->fields, v1); 596 + const uint8_t v2 = get_field(s->fields, v2); 597 + const uint8_t v3 = get_field(s->fields, v3); 598 + const uint8_t es = get_field(s->fields, m4); 599 + static gen_helper_gvec_3 * const vpk[3] = { 600 + gen_helper_gvec_vpk16, 601 + gen_helper_gvec_vpk32, 602 + gen_helper_gvec_vpk64, 603 + }; 604 + static gen_helper_gvec_3 * const vpks[3] = { 605 + gen_helper_gvec_vpks16, 606 + gen_helper_gvec_vpks32, 607 + gen_helper_gvec_vpks64, 608 + }; 609 + static gen_helper_gvec_3_ptr * const vpks_cc[3] = { 610 + gen_helper_gvec_vpks_cc16, 611 + gen_helper_gvec_vpks_cc32, 612 + gen_helper_gvec_vpks_cc64, 613 + }; 614 + static gen_helper_gvec_3 * const vpkls[3] = { 615 + gen_helper_gvec_vpkls16, 616 + gen_helper_gvec_vpkls32, 617 + gen_helper_gvec_vpkls64, 618 + }; 619 + static gen_helper_gvec_3_ptr * const vpkls_cc[3] = { 620 + gen_helper_gvec_vpkls_cc16, 621 + gen_helper_gvec_vpkls_cc32, 622 + gen_helper_gvec_vpkls_cc64, 623 + }; 624 + 625 + if (es == ES_8 || es > ES_64) { 626 + gen_program_exception(s, PGM_SPECIFICATION); 627 + return DISAS_NORETURN; 628 + } 629 + 630 + switch (s->fields->op2) { 631 + case 0x97: 632 + if (get_field(s->fields, m5) & 0x1) { 633 + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]); 634 + set_cc_static(s); 635 + } else { 636 + gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]); 637 + } 638 + break; 639 + case 0x95: 640 + if (get_field(s->fields, m5) & 0x1) { 641 + gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]); 642 + set_cc_static(s); 643 + } else { 644 + gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]); 645 + } 646 + break; 647 + case 0x94: 648 + /* If sources and destination dont't overlap -> fast path */ 649 + if (v1 != v2 && v1 != v3) { 650 + const uint8_t src_es = get_field(s->fields, m4); 651 + const uint8_t dst_es = src_es - 1; 652 + TCGv_i64 tmp = tcg_temp_new_i64(); 653 + int dst_idx, src_idx; 654 + 655 + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { 656 + src_idx = dst_idx; 657 + if (src_idx < NUM_VEC_ELEMENTS(src_es)) { 658 + read_vec_element_i64(tmp, v2, src_idx, src_es); 659 + } else { 660 + src_idx -= NUM_VEC_ELEMENTS(src_es); 661 + read_vec_element_i64(tmp, v3, src_idx, src_es); 662 + } 663 + write_vec_element_i64(tmp, v1, dst_idx, dst_es); 664 + } 665 + tcg_temp_free_i64(tmp); 666 + } else { 667 + gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]); 668 + } 669 + break; 670 + default: 671 + g_assert_not_reached(); 672 + } 673 + return DISAS_NEXT; 674 + } 675 + 676 + static DisasJumpType op_vperm(DisasContext *s, DisasOps *o) 677 + { 678 + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), 679 + get_field(s->fields, v3), get_field(s->fields, v4), 680 + 0, gen_helper_gvec_vperm); 681 + return DISAS_NEXT; 682 + } 683 + 684 + static DisasJumpType op_vpdi(DisasContext *s, DisasOps *o) 685 + { 686 + const uint8_t i2 = extract32(get_field(s->fields, m4), 2, 1); 687 + const uint8_t i3 = extract32(get_field(s->fields, m4), 0, 1); 688 + TCGv_i64 t0 = tcg_temp_new_i64(); 689 + TCGv_i64 t1 = tcg_temp_new_i64(); 690 + 691 + read_vec_element_i64(t0, get_field(s->fields, v2), i2, ES_64); 692 + read_vec_element_i64(t1, get_field(s->fields, v3), i3, ES_64); 693 + write_vec_element_i64(t0, get_field(s->fields, v1), 0, ES_64); 694 + write_vec_element_i64(t1, get_field(s->fields, v1), 1, ES_64); 695 + tcg_temp_free_i64(t0); 696 + tcg_temp_free_i64(t1); 697 + return DISAS_NEXT; 698 + } 699 + 700 + static DisasJumpType op_vrep(DisasContext *s, DisasOps *o) 701 + { 702 + const uint8_t enr = get_field(s->fields, i2); 703 + const uint8_t es = get_field(s->fields, m4); 704 + 705 + if (es > ES_64 || !valid_vec_element(enr, es)) { 706 + gen_program_exception(s, PGM_SPECIFICATION); 707 + return DISAS_NORETURN; 708 + } 709 + 710 + tcg_gen_gvec_dup_mem(es, vec_full_reg_offset(get_field(s->fields, v1)), 711 + vec_reg_offset(get_field(s->fields, v3), enr, es), 712 + 16, 16); 713 + return DISAS_NEXT; 714 + } 715 + 716 + static DisasJumpType op_vrepi(DisasContext *s, DisasOps *o) 717 + { 718 + const int64_t data = (int16_t)get_field(s->fields, i2); 719 + const uint8_t es = get_field(s->fields, m3); 720 + 721 + if (es > ES_64) { 722 + gen_program_exception(s, PGM_SPECIFICATION); 723 + return DISAS_NORETURN; 724 + } 725 + 726 + gen_gvec_dupi(es, get_field(s->fields, v1), data); 727 + return DISAS_NEXT; 728 + } 729 + 730 + static DisasJumpType op_vsce(DisasContext *s, DisasOps *o) 731 + { 732 + const uint8_t es = s->insn->data; 733 + const uint8_t enr = get_field(s->fields, m3); 734 + TCGv_i64 tmp; 735 + 736 + if (!valid_vec_element(enr, es)) { 737 + gen_program_exception(s, PGM_SPECIFICATION); 738 + return DISAS_NORETURN; 739 + } 740 + 741 + tmp = tcg_temp_new_i64(); 742 + read_vec_element_i64(tmp, get_field(s->fields, v2), enr, es); 743 + tcg_gen_add_i64(o->addr1, o->addr1, tmp); 744 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 0); 745 + 746 + read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); 747 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); 748 + tcg_temp_free_i64(tmp); 749 + return DISAS_NEXT; 750 + } 751 + 752 + static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) 753 + { 754 + TCGv_i64 t = tcg_temp_new_i64(); 755 + 756 + /* bit in c not set -> copy bit from b */ 757 + tcg_gen_andc_i64(t, b, c); 758 + /* bit in c set -> copy bit from a */ 759 + tcg_gen_and_i64(d, a, c); 760 + /* merge the results */ 761 + tcg_gen_or_i64(d, d, t); 762 + tcg_temp_free_i64(t); 763 + } 764 + 765 + static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, 766 + TCGv_vec c) 767 + { 768 + TCGv_vec t = tcg_temp_new_vec_matching(d); 769 + 770 + tcg_gen_andc_vec(vece, t, b, c); 771 + tcg_gen_and_vec(vece, d, a, c); 772 + tcg_gen_or_vec(vece, d, d, t); 773 + tcg_temp_free_vec(t); 774 + } 775 + 776 + static DisasJumpType op_vsel(DisasContext *s, DisasOps *o) 777 + { 778 + static const GVecGen4 gvec_op = { 779 + .fni8 = gen_sel_i64, 780 + .fniv = gen_sel_vec, 781 + .prefer_i64 = TCG_TARGET_REG_BITS == 64, 782 + }; 783 + 784 + gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2), 785 + get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op); 786 + return DISAS_NEXT; 787 + } 788 + 789 + static DisasJumpType op_vseg(DisasContext *s, DisasOps *o) 790 + { 791 + const uint8_t es = get_field(s->fields, m3); 792 + int idx1, idx2; 793 + TCGv_i64 tmp; 794 + 795 + switch (es) { 796 + case ES_8: 797 + idx1 = 7; 798 + idx2 = 15; 799 + break; 800 + case ES_16: 801 + idx1 = 3; 802 + idx2 = 7; 803 + break; 804 + case ES_32: 805 + idx1 = 1; 806 + idx2 = 3; 807 + break; 808 + default: 809 + gen_program_exception(s, PGM_SPECIFICATION); 810 + return DISAS_NORETURN; 811 + } 812 + 813 + tmp = tcg_temp_new_i64(); 814 + read_vec_element_i64(tmp, get_field(s->fields, v2), idx1, es | MO_SIGN); 815 + write_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64); 816 + read_vec_element_i64(tmp, get_field(s->fields, v2), idx2, es | MO_SIGN); 817 + write_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64); 818 + tcg_temp_free_i64(tmp); 819 + return DISAS_NEXT; 820 + } 821 + 822 + static DisasJumpType op_vst(DisasContext *s, DisasOps *o) 823 + { 824 + TCGv_i64 tmp = tcg_const_i64(16); 825 + 826 + /* Probe write access before actually modifying memory */ 827 + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); 828 + 829 + read_vec_element_i64(tmp, get_field(s->fields, v1), 0, ES_64); 830 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); 831 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 832 + read_vec_element_i64(tmp, get_field(s->fields, v1), 1, ES_64); 833 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); 834 + tcg_temp_free_i64(tmp); 835 + return DISAS_NEXT; 836 + } 837 + 838 + static DisasJumpType op_vste(DisasContext *s, DisasOps *o) 839 + { 840 + const uint8_t es = s->insn->data; 841 + const uint8_t enr = get_field(s->fields, m3); 842 + TCGv_i64 tmp; 843 + 844 + if (!valid_vec_element(enr, es)) { 845 + gen_program_exception(s, PGM_SPECIFICATION); 846 + return DISAS_NORETURN; 847 + } 848 + 849 + tmp = tcg_temp_new_i64(); 850 + read_vec_element_i64(tmp, get_field(s->fields, v1), enr, es); 851 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TE | es); 852 + tcg_temp_free_i64(tmp); 853 + return DISAS_NEXT; 854 + } 855 + 856 + static DisasJumpType op_vstm(DisasContext *s, DisasOps *o) 857 + { 858 + const uint8_t v3 = get_field(s->fields, v3); 859 + uint8_t v1 = get_field(s->fields, v1); 860 + TCGv_i64 tmp; 861 + 862 + while (v3 < v1 || (v3 - v1 + 1) > 16) { 863 + gen_program_exception(s, PGM_SPECIFICATION); 864 + return DISAS_NORETURN; 865 + } 866 + 867 + /* Probe write access before actually modifying memory */ 868 + tmp = tcg_const_i64((v3 - v1 + 1) * 16); 869 + gen_helper_probe_write_access(cpu_env, o->addr1, tmp); 870 + 871 + for (;; v1++) { 872 + read_vec_element_i64(tmp, v1, 0, ES_64); 873 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); 874 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 875 + read_vec_element_i64(tmp, v1, 1, ES_64); 876 + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_TEQ); 877 + if (v1 == v3) { 878 + break; 879 + } 880 + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); 881 + } 882 + tcg_temp_free_i64(tmp); 883 + return DISAS_NEXT; 884 + } 885 + 886 + static DisasJumpType op_vstl(DisasContext *s, DisasOps *o) 887 + { 888 + const int v1_offs = vec_full_reg_offset(get_field(s->fields, v1)); 889 + TCGv_ptr a0 = tcg_temp_new_ptr(); 890 + 891 + /* convert highest index into an actual length */ 892 + tcg_gen_addi_i64(o->in2, o->in2, 1); 893 + tcg_gen_addi_ptr(a0, cpu_env, v1_offs); 894 + gen_helper_vstl(cpu_env, a0, o->addr1, o->in2); 895 + tcg_temp_free_ptr(a0); 896 + return DISAS_NEXT; 897 + } 898 + 899 + static DisasJumpType op_vup(DisasContext *s, DisasOps *o) 900 + { 901 + const bool logical = s->fields->op2 == 0xd4 || s->fields->op2 == 0xd5; 902 + const uint8_t v1 = get_field(s->fields, v1); 903 + const uint8_t v2 = get_field(s->fields, v2); 904 + const uint8_t src_es = get_field(s->fields, m3); 905 + const uint8_t dst_es = src_es + 1; 906 + int dst_idx, src_idx; 907 + TCGv_i64 tmp; 908 + 909 + if (src_es > ES_32) { 910 + gen_program_exception(s, PGM_SPECIFICATION); 911 + return DISAS_NORETURN; 912 + } 913 + 914 + tmp = tcg_temp_new_i64(); 915 + if (s->fields->op2 == 0xd7 || s->fields->op2 == 0xd5) { 916 + /* iterate backwards to avoid overwriting data we might need later */ 917 + for (dst_idx = NUM_VEC_ELEMENTS(dst_es) - 1; dst_idx >= 0; dst_idx--) { 918 + src_idx = dst_idx; 919 + read_vec_element_i64(tmp, v2, src_idx, 920 + src_es | (logical ? 0 : MO_SIGN)); 921 + write_vec_element_i64(tmp, v1, dst_idx, dst_es); 922 + } 923 + 924 + } else { 925 + /* iterate forward to avoid overwriting data we might need later */ 926 + for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) { 927 + src_idx = dst_idx + NUM_VEC_ELEMENTS(src_es) / 2; 928 + read_vec_element_i64(tmp, v2, src_idx, 929 + src_es | (logical ? 0 : MO_SIGN)); 930 + write_vec_element_i64(tmp, v1, dst_idx, dst_es); 931 + } 932 + } 933 + tcg_temp_free_i64(tmp); 934 + return DISAS_NEXT; 935 + }
+101
target/s390x/vec.h
··· 1 + /* 2 + * QEMU TCG support -- s390x vector utilitites 3 + * 4 + * Copyright (C) 2019 Red Hat Inc 5 + * 6 + * Authors: 7 + * David Hildenbrand <david@redhat.com> 8 + * 9 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 + * See the COPYING file in the top-level directory. 11 + */ 12 + #ifndef S390X_VEC_H 13 + #define S390X_VEC_H 14 + 15 + typedef union S390Vector { 16 + uint64_t doubleword[2]; 17 + uint32_t word[4]; 18 + uint16_t halfword[8]; 19 + uint8_t byte[16]; 20 + } S390Vector; 21 + 22 + /* 23 + * Each vector is stored as two 64bit host values. So when talking about 24 + * byte/halfword/word numbers, we have to take care of proper translation 25 + * between element numbers. 26 + * 27 + * Big Endian (target/possible host) 28 + * B: [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 9][10][11][12][13][14][15] 29 + * HW: [ 0][ 1][ 2][ 3] - [ 4][ 5][ 6][ 7] 30 + * W: [ 0][ 1] - [ 2][ 3] 31 + * DW: [ 0] - [ 1] 32 + * 33 + * Little Endian (possible host) 34 + * B: [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 8] 35 + * HW: [ 3][ 2][ 1][ 0] - [ 7][ 6][ 5][ 4] 36 + * W: [ 1][ 0] - [ 3][ 2] 37 + * DW: [ 0] - [ 1] 38 + */ 39 + #ifndef HOST_WORDS_BIGENDIAN 40 + #define H1(x) ((x) ^ 7) 41 + #define H2(x) ((x) ^ 3) 42 + #define H4(x) ((x) ^ 1) 43 + #else 44 + #define H1(x) (x) 45 + #define H2(x) (x) 46 + #define H4(x) (x) 47 + #endif 48 + 49 + static inline uint8_t s390_vec_read_element8(const S390Vector *v, uint8_t enr) 50 + { 51 + g_assert(enr < 16); 52 + return v->byte[H1(enr)]; 53 + } 54 + 55 + static inline uint16_t s390_vec_read_element16(const S390Vector *v, uint8_t enr) 56 + { 57 + g_assert(enr < 8); 58 + return v->halfword[H2(enr)]; 59 + } 60 + 61 + static inline uint32_t s390_vec_read_element32(const S390Vector *v, uint8_t enr) 62 + { 63 + g_assert(enr < 4); 64 + return v->word[H4(enr)]; 65 + } 66 + 67 + static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) 68 + { 69 + g_assert(enr < 2); 70 + return v->doubleword[enr]; 71 + } 72 + 73 + static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, 74 + uint8_t data) 75 + { 76 + g_assert(enr < 16); 77 + v->byte[H1(enr)] = data; 78 + } 79 + 80 + static inline void s390_vec_write_element16(S390Vector *v, uint8_t enr, 81 + uint16_t data) 82 + { 83 + g_assert(enr < 8); 84 + v->halfword[H2(enr)] = data; 85 + } 86 + 87 + static inline void s390_vec_write_element32(S390Vector *v, uint8_t enr, 88 + uint32_t data) 89 + { 90 + g_assert(enr < 4); 91 + v->word[H4(enr)] = data; 92 + } 93 + 94 + static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr, 95 + uint64_t data) 96 + { 97 + g_assert(enr < 2); 98 + v->doubleword[enr] = data; 99 + } 100 + 101 + #endif /* S390X_VEC_H */
+193
target/s390x/vec_helper.c
··· 1 + /* 2 + * QEMU TCG support -- s390x vector support instructions 3 + * 4 + * Copyright (C) 2019 Red Hat Inc 5 + * 6 + * Authors: 7 + * David Hildenbrand <david@redhat.com> 8 + * 9 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 + * See the COPYING file in the top-level directory. 11 + */ 12 + #include "qemu/osdep.h" 13 + #include "qemu-common.h" 14 + #include "cpu.h" 15 + #include "internal.h" 16 + #include "vec.h" 17 + #include "tcg/tcg.h" 18 + #include "tcg/tcg-gvec-desc.h" 19 + #include "exec/helper-proto.h" 20 + #include "exec/cpu_ldst.h" 21 + #include "exec/exec-all.h" 22 + 23 + void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes) 24 + { 25 + if (likely(bytes >= 16)) { 26 + uint64_t t0, t1; 27 + 28 + t0 = cpu_ldq_data_ra(env, addr, GETPC()); 29 + addr = wrap_address(env, addr + 8); 30 + t1 = cpu_ldq_data_ra(env, addr, GETPC()); 31 + s390_vec_write_element64(v1, 0, t0); 32 + s390_vec_write_element64(v1, 1, t1); 33 + } else { 34 + S390Vector tmp = {}; 35 + int i; 36 + 37 + for (i = 0; i < bytes; i++) { 38 + uint8_t byte = cpu_ldub_data_ra(env, addr, GETPC()); 39 + 40 + s390_vec_write_element8(&tmp, i, byte); 41 + addr = wrap_address(env, addr + 1); 42 + } 43 + *(S390Vector *)v1 = tmp; 44 + } 45 + } 46 + 47 + #define DEF_VPK_HFN(BITS, TBITS) \ 48 + typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *); \ 49 + static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2, \ 50 + const S390Vector *v3, vpk##BITS##_fn fn) \ 51 + { \ 52 + int i, saturated = 0; \ 53 + S390Vector tmp; \ 54 + \ 55 + for (i = 0; i < (128 / TBITS); i++) { \ 56 + uint##BITS##_t src; \ 57 + \ 58 + if (i < (128 / BITS)) { \ 59 + src = s390_vec_read_element##BITS(v2, i); \ 60 + } else { \ 61 + src = s390_vec_read_element##BITS(v3, i - (128 / BITS)); \ 62 + } \ 63 + s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated)); \ 64 + } \ 65 + *v1 = tmp; \ 66 + return saturated; \ 67 + } 68 + DEF_VPK_HFN(64, 32) 69 + DEF_VPK_HFN(32, 16) 70 + DEF_VPK_HFN(16, 8) 71 + 72 + #define DEF_VPK(BITS, TBITS) \ 73 + static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated) \ 74 + { \ 75 + return src; \ 76 + } \ 77 + void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3, \ 78 + uint32_t desc) \ 79 + { \ 80 + vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e); \ 81 + } 82 + DEF_VPK(64, 32) 83 + DEF_VPK(32, 16) 84 + DEF_VPK(16, 8) 85 + 86 + #define DEF_VPKS(BITS, TBITS) \ 87 + static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated) \ 88 + { \ 89 + if ((int##BITS##_t)src > INT##TBITS##_MAX) { \ 90 + (*saturated)++; \ 91 + return INT##TBITS##_MAX; \ 92 + } else if ((int##BITS##_t)src < INT##TBITS##_MIN) { \ 93 + (*saturated)++; \ 94 + return INT##TBITS##_MIN; \ 95 + } \ 96 + return src; \ 97 + } \ 98 + void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3, \ 99 + uint32_t desc) \ 100 + { \ 101 + vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ 102 + } \ 103 + void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3, \ 104 + CPUS390XState *env, uint32_t desc) \ 105 + { \ 106 + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e); \ 107 + \ 108 + if (saturated == (128 / TBITS)) { \ 109 + env->cc_op = 3; \ 110 + } else if (saturated) { \ 111 + env->cc_op = 1; \ 112 + } else { \ 113 + env->cc_op = 0; \ 114 + } \ 115 + } 116 + DEF_VPKS(64, 32) 117 + DEF_VPKS(32, 16) 118 + DEF_VPKS(16, 8) 119 + 120 + #define DEF_VPKLS(BITS, TBITS) \ 121 + static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated) \ 122 + { \ 123 + if (src > UINT##TBITS##_MAX) { \ 124 + (*saturated)++; \ 125 + return UINT##TBITS##_MAX; \ 126 + } \ 127 + return src; \ 128 + } \ 129 + void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3, \ 130 + uint32_t desc) \ 131 + { \ 132 + vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ 133 + } \ 134 + void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3, \ 135 + CPUS390XState *env, uint32_t desc) \ 136 + { \ 137 + int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e); \ 138 + \ 139 + if (saturated == (128 / TBITS)) { \ 140 + env->cc_op = 3; \ 141 + } else if (saturated) { \ 142 + env->cc_op = 1; \ 143 + } else { \ 144 + env->cc_op = 0; \ 145 + } \ 146 + } 147 + DEF_VPKLS(64, 32) 148 + DEF_VPKLS(32, 16) 149 + DEF_VPKLS(16, 8) 150 + 151 + void HELPER(gvec_vperm)(void *v1, const void *v2, const void *v3, 152 + const void *v4, uint32_t desc) 153 + { 154 + S390Vector tmp; 155 + int i; 156 + 157 + for (i = 0; i < 16; i++) { 158 + const uint8_t selector = s390_vec_read_element8(v4, i) & 0x1f; 159 + uint8_t byte; 160 + 161 + if (selector < 16) { 162 + byte = s390_vec_read_element8(v2, selector); 163 + } else { 164 + byte = s390_vec_read_element8(v3, selector - 16); 165 + } 166 + s390_vec_write_element8(&tmp, i, byte); 167 + } 168 + *(S390Vector *)v1 = tmp; 169 + } 170 + 171 + void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr, 172 + uint64_t bytes) 173 + { 174 + /* Probe write access before actually modifying memory */ 175 + probe_write_access(env, addr, bytes, GETPC()); 176 + 177 + if (likely(bytes >= 16)) { 178 + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 0), GETPC()); 179 + addr = wrap_address(env, addr + 8); 180 + cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC()); 181 + } else { 182 + S390Vector tmp = {}; 183 + int i; 184 + 185 + for (i = 0; i < bytes; i++) { 186 + uint8_t byte = s390_vec_read_element8(v1, i); 187 + 188 + cpu_stb_data_ra(env, addr, byte, GETPC()); 189 + addr = wrap_address(env, addr + 1); 190 + } 191 + *(S390Vector *)v1 = tmp; 192 + } 193 + }