target/i386: fix fscale handling of rounding precision

qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

The fscale implementation uses floatx80_scalbn for the final scaling
operation. floatx80_scalbn ends up rounding the result using the
dynamic rounding precision configured for the FPU. But only a limited
set of x87 floating-point instructions are supposed to respect the
dynamic rounding precision, and fscale is not in that set. Fix the
implementation to save and restore the rounding precision around the
call to floatx80_scalbn.

Signed-off-by: Joseph Myers <joseph@codesourcery.com>
Message-Id: <alpine.DEB.2.21.2005070045430.18350@digraph.polyomino.org.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Joseph Myers and committed by

Paolo Bonzini 5 years ago c535d687 c1c5fb8f

+16

2 changed files

expand all

target

i386

fpu_helper.c

tests

tcg

i386

test-i386-fscale.c

target/i386/fpu_helper.c

··· 1001 1001 } 1002 1002 } else { 1003 1003 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 1004 + signed char save = env->fp_status.floatx80_rounding_precision; 1005 + env->fp_status.floatx80_rounding_precision = 80; 1004 1006 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 1007 + env->fp_status.floatx80_rounding_precision = save; 1005 1008 } 1006 1009 } 1007 1010

+13

tests/tcg/i386/test-i386-fscale.c

··· 8 8 long double ld; 9 9 }; 10 10 11 + volatile long double ld_third = 1.0L / 3.0L; 12 + volatile long double ld_four_thirds = 4.0L / 3.0L; 11 13 volatile union u ld_invalid_1 = { .s = { 1, 1234 } }; 12 14 volatile union u ld_invalid_2 = { .s = { 0, 1234 } }; 13 15 volatile union u ld_invalid_3 = { .s = { 0, 0x7fff } }; ··· 89 91 "0" (-1.0L), "u" (-__builtin_infl())); 90 92 if (ld_res != -0.0L || __builtin_copysignl(1.0L, ld_res) != -1.0L) { 91 93 printf("FAIL: fscale finite down inf\n"); 94 + ret = 1; 95 + } 96 + /* Set round-to-nearest with single-precision rounding. */ 97 + cw = cw & ~0xf00; 98 + __asm__ volatile ("fldcw %0" : : "m" (cw)); 99 + __asm__ volatile ("fscale" : "=t" (ld_res) : 100 + "0" (ld_third), "u" (2.0L)); 101 + cw = cw | 0x300; 102 + __asm__ volatile ("fldcw %0" : : "m" (cw)); 103 + if (ld_res != ld_four_thirds) { 104 + printf("FAIL: fscale single-precision\n"); 92 105 ret = 1; 93 106 } 94 107 return ret;