qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/arm: Implement vector shifted FCVT for fp16

While we have some of the scalar paths for FCVT for fp16,
we failed to decode the fp16 version of these instructions.

Cc: qemu-stable@nongnu.org
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20180502221552.3873-3-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit d0ba8e74acd299b092786ffc30b306638d395a9e)
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>

authored by

Richard Henderson and committed by
Michael Roth
0aaf1cca 994b0cf9

+45 -18
+45 -18
target/arm/translate-a64.c
··· 7202 7202 bool is_q, bool is_u, 7203 7203 int immh, int immb, int rn, int rd) 7204 7204 { 7205 - bool is_double = extract32(immh, 3, 1); 7206 7205 int immhb = immh << 3 | immb; 7207 - int fracbits = (is_double ? 128 : 64) - immhb; 7208 - int pass; 7206 + int pass, size, fracbits; 7209 7207 TCGv_ptr tcg_fpstatus; 7210 7208 TCGv_i32 tcg_rmode, tcg_shift; 7211 7209 7212 - if (!extract32(immh, 2, 2)) { 7213 - unallocated_encoding(s); 7214 - return; 7215 - } 7216 - 7217 - if (!is_scalar && !is_q && is_double) { 7210 + if (immh & 0x8) { 7211 + size = MO_64; 7212 + if (!is_scalar && !is_q) { 7213 + unallocated_encoding(s); 7214 + return; 7215 + } 7216 + } else if (immh & 0x4) { 7217 + size = MO_32; 7218 + } else if (immh & 0x2) { 7219 + size = MO_16; 7220 + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { 7221 + unallocated_encoding(s); 7222 + return; 7223 + } 7224 + } else { 7225 + /* Should have split out AdvSIMD modified immediate earlier. */ 7226 + assert(immh == 1); 7218 7227 unallocated_encoding(s); 7219 7228 return; 7220 7229 } ··· 7226 7235 assert(!(is_scalar && is_q)); 7227 7236 7228 7237 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); 7229 - tcg_fpstatus = get_fpstatus_ptr(false); 7238 + tcg_fpstatus = get_fpstatus_ptr(size == MO_16); 7230 7239 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7240 + fracbits = (16 << size) - immhb; 7231 7241 tcg_shift = tcg_const_i32(fracbits); 7232 7242 7233 - if (is_double) { 7243 + if (size == MO_64) { 7234 7244 int maxpass = is_scalar ? 1 : 2; 7235 7245 7236 7246 for (pass = 0; pass < maxpass; pass++) { ··· 7247 7257 } 7248 7258 clear_vec_high(s, is_q, rd); 7249 7259 } else { 7250 - int maxpass = is_scalar ? 1 : is_q ? 4 : 2; 7251 - for (pass = 0; pass < maxpass; pass++) { 7252 - TCGv_i32 tcg_op = tcg_temp_new_i32(); 7260 + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 7261 + int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 7253 7262 7254 - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 7263 + switch (size) { 7264 + case MO_16: 7255 7265 if (is_u) { 7256 - gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 7266 + fn = gen_helper_vfp_toulh; 7267 + } else { 7268 + fn = gen_helper_vfp_toslh; 7269 + } 7270 + break; 7271 + case MO_32: 7272 + if (is_u) { 7273 + fn = gen_helper_vfp_touls; 7257 7274 } else { 7258 - gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 7275 + fn = gen_helper_vfp_tosls; 7259 7276 } 7277 + break; 7278 + default: 7279 + g_assert_not_reached(); 7280 + } 7281 + 7282 + for (pass = 0; pass < maxpass; pass++) { 7283 + TCGv_i32 tcg_op = tcg_temp_new_i32(); 7284 + 7285 + read_vec_element_i32(s, tcg_op, rn, pass, size); 7286 + fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 7260 7287 if (is_scalar) { 7261 7288 write_fp_sreg(s, rd, tcg_op); 7262 7289 } else { 7263 - write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 7290 + write_vec_element_i32(s, tcg_op, rd, pass, size); 7264 7291 } 7265 7292 tcg_temp_free_i32(tcg_op); 7266 7293 }