qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/arm/arch_dump: Add SVE notes

When dumping a guest with dump-guest-memory also dump the SVE
registers if they are in use.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200120101832.18781-1-drjones@redhat.com
[PMM: fixed checkpatch nits]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

authored by

Andrew Jones and committed by
Peter Maydell
538baab2 acab923d

+148 -26
+1
include/elf.h
··· 1650 1650 #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ 1651 1651 #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ 1652 1652 #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ 1653 + #define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ 1653 1654 1654 1655 /* 1655 1656 * Physical entry point into the kernel.
+122 -2
target/arm/arch_dump.c
··· 62 62 63 63 QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); 64 64 65 + /* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ 66 + struct aarch64_user_sve_header { 67 + uint32_t size; 68 + uint32_t max_size; 69 + uint16_t vl; 70 + uint16_t max_vl; 71 + uint16_t flags; 72 + uint16_t reserved; 73 + } QEMU_PACKED; 74 + 65 75 struct aarch64_note { 66 76 Elf64_Nhdr hdr; 67 77 char name[8]; /* align_up(sizeof("CORE"), 4) */ 68 78 union { 69 79 struct aarch64_elf_prstatus prstatus; 70 80 struct aarch64_user_vfp_state vfp; 81 + struct aarch64_user_sve_header sve; 71 82 }; 72 83 } QEMU_PACKED; 73 84 ··· 76 87 (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) 77 88 #define AARCH64_PRFPREG_NOTE_SIZE \ 78 89 (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) 90 + #define AARCH64_SVE_NOTE_SIZE(env) \ 91 + (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) 79 92 80 93 static void aarch64_note_init(struct aarch64_note *note, DumpState *s, 81 94 const char *name, Elf64_Word namesz, ··· 128 141 return 0; 129 142 } 130 143 144 + #ifdef TARGET_AARCH64 145 + static off_t sve_zreg_offset(uint32_t vq, int n) 146 + { 147 + off_t off = sizeof(struct aarch64_user_sve_header); 148 + return ROUND_UP(off, 16) + vq * 16 * n; 149 + } 150 + 151 + static off_t sve_preg_offset(uint32_t vq, int n) 152 + { 153 + return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; 154 + } 155 + 156 + static off_t sve_fpsr_offset(uint32_t vq) 157 + { 158 + off_t off = sve_preg_offset(vq, 17); 159 + return ROUND_UP(off, 16); 160 + } 161 + 162 + static off_t sve_fpcr_offset(uint32_t vq) 163 + { 164 + return sve_fpsr_offset(vq) + sizeof(uint32_t); 165 + } 166 + 167 + static uint32_t sve_current_vq(CPUARMState *env) 168 + { 169 + return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; 170 + } 171 + 172 + static size_t sve_size_vq(uint32_t vq) 173 + { 174 + off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); 175 + return ROUND_UP(off, 16); 176 + } 177 + 178 + static size_t sve_size(CPUARMState *env) 179 + { 180 + return sve_size_vq(sve_current_vq(env)); 181 + } 182 + 183 + static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, 184 + CPUARMState *env, int cpuid, 185 + DumpState *s) 186 + { 187 + struct aarch64_note *note; 188 + ARMCPU *cpu = env_archcpu(env); 189 + uint32_t vq = sve_current_vq(env); 190 + uint64_t tmp[ARM_MAX_VQ * 2], *r; 191 + uint32_t fpr; 192 + uint8_t *buf; 193 + int ret, i; 194 + 195 + note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); 196 + buf = (uint8_t *)&note->sve; 197 + 198 + aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); 199 + 200 + note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); 201 + note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); 202 + note->sve.vl = cpu_to_dump16(s, vq * 16); 203 + note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); 204 + note->sve.flags = cpu_to_dump16(s, 1); 205 + 206 + for (i = 0; i < 32; ++i) { 207 + r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); 208 + memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); 209 + } 210 + 211 + for (i = 0; i < 17; ++i) { 212 + r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], 213 + DIV_ROUND_UP(vq * 2, 8)); 214 + memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); 215 + } 216 + 217 + fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); 218 + memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); 219 + 220 + fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); 221 + memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); 222 + 223 + ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); 224 + g_free(note); 225 + 226 + if (ret < 0) { 227 + return -1; 228 + } 229 + 230 + return 0; 231 + } 232 + #endif 233 + 131 234 int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, 132 235 int cpuid, void *opaque) 133 236 { 134 237 struct aarch64_note note; 135 - CPUARMState *env = &ARM_CPU(cs)->env; 238 + ARMCPU *cpu = ARM_CPU(cs); 239 + CPUARMState *env = &cpu->env; 136 240 DumpState *s = opaque; 137 241 uint64_t pstate, sp; 138 242 int ret, i; ··· 163 267 return -1; 164 268 } 165 269 166 - return aarch64_write_elf64_prfpreg(f, env, cpuid, s); 270 + ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); 271 + if (ret) { 272 + return ret; 273 + } 274 + 275 + #ifdef TARGET_AARCH64 276 + if (cpu_isar_feature(aa64_sve, cpu)) { 277 + ret = aarch64_write_elf64_sve(f, env, cpuid, s); 278 + } 279 + #endif 280 + 281 + return ret; 167 282 } 168 283 169 284 /* struct pt_regs from arch/arm/include/asm/ptrace.h */ ··· 335 450 if (class == ELFCLASS64) { 336 451 note_size = AARCH64_PRSTATUS_NOTE_SIZE; 337 452 note_size += AARCH64_PRFPREG_NOTE_SIZE; 453 + #ifdef TARGET_AARCH64 454 + if (cpu_isar_feature(aa64_sve, cpu)) { 455 + note_size += AARCH64_SVE_NOTE_SIZE(env); 456 + } 457 + #endif 338 458 } else { 339 459 note_size = ARM_PRSTATUS_NOTE_SIZE; 340 460 if (arm_feature(env, ARM_FEATURE_VFP)) {
+25
target/arm/cpu.h
··· 980 980 void aarch64_sve_change_el(CPUARMState *env, int old_el, 981 981 int new_el, bool el0_a64); 982 982 void aarch64_add_sve_properties(Object *obj); 983 + 984 + /* 985 + * SVE registers are encoded in KVM's memory in an endianness-invariant format. 986 + * The byte at offset i from the start of the in-memory representation contains 987 + * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the 988 + * lowest offsets are stored in the lowest memory addresses, then that nearly 989 + * matches QEMU's representation, which is to use an array of host-endian 990 + * uint64_t's, where the lower offsets are at the lower indices. To complete 991 + * the translation we just need to byte swap the uint64_t's on big-endian hosts. 992 + */ 993 + static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) 994 + { 995 + #ifdef HOST_WORDS_BIGENDIAN 996 + int i; 997 + 998 + for (i = 0; i < nr; ++i) { 999 + dst[i] = bswap64(src[i]); 1000 + } 1001 + 1002 + return dst; 1003 + #else 1004 + return src; 1005 + #endif 1006 + } 1007 + 983 1008 #else 984 1009 static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } 985 1010 static inline void aarch64_sve_change_el(CPUARMState *env, int o,
-24
target/arm/kvm64.c
··· 877 877 } 878 878 879 879 /* 880 - * SVE registers are encoded in KVM's memory in an endianness-invariant format. 881 - * The byte at offset i from the start of the in-memory representation contains 882 - * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the 883 - * lowest offsets are stored in the lowest memory addresses, then that nearly 884 - * matches QEMU's representation, which is to use an array of host-endian 885 - * uint64_t's, where the lower offsets are at the lower indices. To complete 886 - * the translation we just need to byte swap the uint64_t's on big-endian hosts. 887 - */ 888 - static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) 889 - { 890 - #ifdef HOST_WORDS_BIGENDIAN 891 - int i; 892 - 893 - for (i = 0; i < nr; ++i) { 894 - dst[i] = bswap64(src[i]); 895 - } 896 - 897 - return dst; 898 - #else 899 - return src; 900 - #endif 901 - } 902 - 903 - /* 904 880 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 905 881 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 906 882 * code the slice index to zero for now as it's unlikely we'll need more than