qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

target/ppc: Manage external HPT via virtual hypervisor

The pseries machine type implements the behaviour of a PAPR compliant
hypervisor, without actually executing such a hypervisor on the virtual
CPU. To do this we need some hooks in the CPU code to make hypervisor
facilities get redirected to the machine instead of emulated internally.

For hypercalls this is managed through the cpu->vhyp field, which points
to a QOM interface with a method implementing the hypercall.

For the hashed page table (HPT) - also a hypervisor resource - we use an
older hack. CPUPPCState has an 'external_htab' field which when non-NULL
indicates that the HPT is stored in qemu memory, rather than within the
guest's address space.

For consistency - and to make some future extensions easier - this merges
the external HPT mechanism into the vhyp mechanism. Methods are added
to vhyp for the basic operations the core hash MMU code needs: map_hptes()
and unmap_hptes() for reading the HPT, store_hpte() for updating it and
hpt_mask() to retrieve its size.

To match this, the pseries machine now sets these vhyp fields in its
existing vhyp class, rather than reaching into the cpu object to set the
external_htab field.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>

+125 -88
+60
hw/ppc/spapr.c
··· 1053 1053 spapr->htab_fd = -1; 1054 1054 } 1055 1055 1056 + static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp) 1057 + { 1058 + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); 1059 + 1060 + return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1; 1061 + } 1062 + 1063 + static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp, 1064 + hwaddr ptex, int n) 1065 + { 1066 + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); 1067 + hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; 1068 + 1069 + if (!spapr->htab) { 1070 + /* 1071 + * HTAB is controlled by KVM. Fetch into temporary buffer 1072 + */ 1073 + ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64); 1074 + kvmppc_read_hptes(hptes, ptex, n); 1075 + return hptes; 1076 + } 1077 + 1078 + /* 1079 + * HTAB is controlled by QEMU. Just point to the internally 1080 + * accessible PTEG. 1081 + */ 1082 + return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset); 1083 + } 1084 + 1085 + static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp, 1086 + const ppc_hash_pte64_t *hptes, 1087 + hwaddr ptex, int n) 1088 + { 1089 + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); 1090 + 1091 + if (!spapr->htab) { 1092 + g_free((void *)hptes); 1093 + } 1094 + 1095 + /* Nothing to do for qemu managed HPT */ 1096 + } 1097 + 1098 + static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex, 1099 + uint64_t pte0, uint64_t pte1) 1100 + { 1101 + sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp); 1102 + hwaddr offset = ptex * HASH_PTE_SIZE_64; 1103 + 1104 + if (!spapr->htab) { 1105 + kvmppc_write_hpte(ptex, pte0, pte1); 1106 + } else { 1107 + stq_p(spapr->htab + offset, pte0); 1108 + stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1); 1109 + } 1110 + } 1111 + 1056 1112 static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) 1057 1113 { 1058 1114 int shift; ··· 2913 2969 nc->nmi_monitor_handler = spapr_nmi; 2914 2970 smc->phb_placement = spapr_phb_placement; 2915 2971 vhc->hypercall = emulate_spapr_hypercall; 2972 + vhc->hpt_mask = spapr_hpt_mask; 2973 + vhc->map_hptes = spapr_map_hptes; 2974 + vhc->unmap_hptes = spapr_unmap_hptes; 2975 + vhc->store_hpte = spapr_store_hpte; 2916 2976 } 2917 2977 2918 2978 static const TypeInfo spapr_machine_info = {
+15 -2
hw/ppc/spapr_cpu_core.c
··· 13 13 #include "hw/boards.h" 14 14 #include "qapi/error.h" 15 15 #include "sysemu/cpus.h" 16 + #include "sysemu/kvm.h" 16 17 #include "target/ppc/kvm_ppc.h" 17 18 #include "hw/ppc/ppc.h" 18 19 #include "target/ppc/mmu-hash64.h" 19 20 #include "sysemu/numa.h" 21 + #include "qemu/error-report.h" 20 22 21 23 static void spapr_cpu_reset(void *opaque) 22 24 { ··· 34 36 35 37 env->spr[SPR_HIOR] = 0; 36 38 37 - ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift, 38 - &error_fatal); 39 + /* 40 + * This is a hack for the benefit of KVM PR - it abuses the SDR1 41 + * slot in kvm_sregs to communicate the userspace address of the 42 + * HPT 43 + */ 44 + if (kvm_enabled()) { 45 + env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab 46 + | (spapr->htab_shift - 18); 47 + if (kvmppc_put_books_sregs(cpu) < 0) { 48 + error_report("Unable to update SDR1 in KVM"); 49 + exit(1); 50 + } 51 + } 39 52 } 40 53 41 54 static void spapr_cpu_destroy(PowerPCCPU *cpu)
+1 -2
hw/ppc/spapr_hcall.c
··· 326 326 static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr, 327 327 target_ulong opcode, target_ulong *args) 328 328 { 329 - CPUPPCState *env = &cpu->env; 330 329 target_ulong flags = args[0]; 331 330 target_ulong ptex = args[1]; 332 331 uint8_t *hpte; ··· 342 341 n_entries = 4; 343 342 } 344 343 345 - hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64); 344 + hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64); 346 345 347 346 for (i = 0, ridx = 0; i < n_entries; i++) { 348 347 args[ridx++] = ldq_p(hpte);
+8 -2
target/ppc/cpu.h
··· 999 999 #endif 1000 1000 /* segment registers */ 1001 1001 target_ulong sr[32]; 1002 - /* externally stored hash table */ 1003 - uint8_t *external_htab; 1004 1002 /* BATs */ 1005 1003 uint32_t nb_BATs; 1006 1004 target_ulong DBAT[2][8]; ··· 1208 1206 struct PPCVirtualHypervisorClass { 1209 1207 InterfaceClass parent; 1210 1208 void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu); 1209 + hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp); 1210 + const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp, 1211 + hwaddr ptex, int n); 1212 + void (*unmap_hptes)(PPCVirtualHypervisor *vhyp, 1213 + const ppc_hash_pte64_t *hptes, 1214 + hwaddr ptex, int n); 1215 + void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex, 1216 + uint64_t pte0, uint64_t pte1); 1211 1217 }; 1212 1218 1213 1219 #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
+1 -1
target/ppc/kvm.c
··· 1251 1251 return ret; 1252 1252 } 1253 1253 1254 - if (!env->external_htab) { 1254 + if (!cpu->vhyp) { 1255 1255 ppc_store_sdr1(env, sregs.u.s.sdr1); 1256 1256 } 1257 1257
+2 -2
target/ppc/machine.c
··· 76 76 qemu_get_betls(f, &env->pb[i]); 77 77 for (i = 0; i < 1024; i++) 78 78 qemu_get_betls(f, &env->spr[i]); 79 - if (!env->external_htab) { 79 + if (!cpu->vhyp) { 80 80 ppc_store_sdr1(env, sdr1); 81 81 } 82 82 qemu_get_be32s(f, &env->vscr); ··· 228 228 env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1]; 229 229 } 230 230 231 - if (!env->external_htab) { 231 + if (!cpu->vhyp) { 232 232 ppc_store_sdr1(env, env->spr[SPR_SDR1]); 233 233 } 234 234
-8
target/ppc/mmu-hash32.h
··· 80 80 static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu, 81 81 hwaddr pte_offset) 82 82 { 83 - CPUPPCState *env = &cpu->env; 84 83 target_ulong base = ppc_hash32_hpt_base(cpu); 85 84 86 - assert(!env->external_htab); /* Not supported on 32-bit for now */ 87 85 return ldl_phys(CPU(cpu)->as, base + pte_offset); 88 86 } 89 87 ··· 91 89 hwaddr pte_offset) 92 90 { 93 91 target_ulong base = ppc_hash32_hpt_base(cpu); 94 - CPUPPCState *env = &cpu->env; 95 92 96 - assert(!env->external_htab); /* Not supported on 32-bit for now */ 97 93 return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2); 98 94 } 99 95 100 96 static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu, 101 97 hwaddr pte_offset, target_ulong pte0) 102 98 { 103 - CPUPPCState *env = &cpu->env; 104 99 target_ulong base = ppc_hash32_hpt_base(cpu); 105 100 106 - assert(!env->external_htab); /* Not supported on 32-bit for now */ 107 101 stl_phys(CPU(cpu)->as, base + pte_offset, pte0); 108 102 } 109 103 110 104 static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, 111 105 hwaddr pte_offset, target_ulong pte1) 112 106 { 113 - CPUPPCState *env = &cpu->env; 114 107 target_ulong base = ppc_hash32_hpt_base(cpu); 115 108 116 - assert(!env->external_htab); /* Not supported on 32-bit for now */ 117 109 stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); 118 110 } 119 111
+31 -68
target/ppc/mmu-hash64.c
··· 38 38 #endif 39 39 40 40 /* 41 - * Used to indicate that a CPU has its hash page table (HPT) managed 42 - * within the host kernel 43 - */ 44 - #define MMU_HASH64_KVM_MANAGED_HPT ((void *)-1) 45 - 46 - /* 47 41 * SLB handling 48 42 */ 49 43 ··· 313 307 env->spr[SPR_SDR1] = value; 314 308 } 315 309 316 - void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, 317 - Error **errp) 318 - { 319 - CPUPPCState *env = &cpu->env; 320 - Error *local_err = NULL; 321 - 322 - if (hpt) { 323 - env->external_htab = hpt; 324 - } else { 325 - env->external_htab = MMU_HASH64_KVM_MANAGED_HPT; 326 - } 327 - ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18), 328 - &local_err); 329 - if (local_err) { 330 - error_propagate(errp, local_err); 331 - return; 332 - } 333 - 334 - if (kvm_enabled()) { 335 - if (kvmppc_put_books_sregs(cpu) < 0) { 336 - error_setg(errp, "Unable to update SDR1 in KVM"); 337 - } 338 - } 339 - } 340 - 341 310 static int ppc_hash64_pte_prot(PowerPCCPU *cpu, 342 311 ppc_slb_t *slb, ppc_hash_pte64_t pte) 343 312 { ··· 429 398 const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, 430 399 hwaddr ptex, int n) 431 400 { 432 - ppc_hash_pte64_t *hptes = NULL; 433 401 hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; 402 + hwaddr base = ppc_hash64_hpt_base(cpu); 403 + hwaddr plen = n * HASH_PTE_SIZE_64; 404 + const ppc_hash_pte64_t *hptes; 434 405 435 - if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { 436 - /* 437 - * HTAB is controlled by KVM. Fetch into temporary buffer 438 - */ 439 - hptes = g_malloc(HASH_PTEG_SIZE_64); 440 - kvmppc_read_hptes(hptes, ptex, n); 441 - } else if (cpu->env.external_htab) { 442 - /* 443 - * HTAB is controlled by QEMU. Just point to the internally 444 - * accessible PTEG. 445 - */ 446 - hptes = (ppc_hash_pte64_t *)(cpu->env.external_htab + pte_offset); 447 - } else if (ppc_hash64_hpt_base(cpu)) { 448 - hwaddr base = ppc_hash64_hpt_base(cpu); 449 - hwaddr plen = n * HASH_PTE_SIZE_64; 450 - hptes = address_space_map(CPU(cpu)->as, base + pte_offset, 451 - &plen, false); 452 - if (plen < (n * HASH_PTE_SIZE_64)) { 453 - hw_error("%s: Unable to map all requested HPTEs\n", __func__); 454 - } 406 + if (cpu->vhyp) { 407 + PPCVirtualHypervisorClass *vhc = 408 + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 409 + return vhc->map_hptes(cpu->vhyp, ptex, n); 410 + } 411 + 412 + if (!base) { 413 + return NULL; 414 + } 415 + 416 + hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false); 417 + if (plen < (n * HASH_PTE_SIZE_64)) { 418 + hw_error("%s: Unable to map all requested HPTEs\n", __func__); 455 419 } 456 420 return hptes; 457 421 } ··· 459 423 void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, 460 424 hwaddr ptex, int n) 461 425 { 462 - if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) { 463 - g_free((void *)hptes); 464 - } else if (!cpu->env.external_htab) { 465 - address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64, 466 - false, n * HASH_PTE_SIZE_64); 426 + if (cpu->vhyp) { 427 + PPCVirtualHypervisorClass *vhc = 428 + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 429 + vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); 430 + return; 467 431 } 432 + 433 + address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64, 434 + false, n * HASH_PTE_SIZE_64); 468 435 } 469 436 470 437 static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps, ··· 916 883 void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex, 917 884 uint64_t pte0, uint64_t pte1) 918 885 { 919 - CPUPPCState *env = &cpu->env; 886 + hwaddr base = ppc_hash64_hpt_base(cpu); 920 887 hwaddr offset = ptex * HASH_PTE_SIZE_64; 921 888 922 - if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) { 923 - kvmppc_write_hpte(ptex, pte0, pte1); 889 + if (cpu->vhyp) { 890 + PPCVirtualHypervisorClass *vhc = 891 + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 892 + vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1); 924 893 return; 925 894 } 926 895 927 - if (env->external_htab) { 928 - stq_p(env->external_htab + offset, pte0); 929 - stq_p(env->external_htab + offset + HASH_PTE_SIZE_64 / 2, pte1); 930 - } else { 931 - hwaddr base = ppc_hash64_hpt_base(cpu); 932 - stq_phys(CPU(cpu)->as, base + offset, pte0); 933 - stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1); 934 - } 896 + stq_phys(CPU(cpu)->as, base + offset, pte0); 897 + stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1); 935 898 } 936 899 937 900 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
+5 -2
target/ppc/mmu-hash64.h
··· 101 101 102 102 static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) 103 103 { 104 + if (cpu->vhyp) { 105 + PPCVirtualHypervisorClass *vhc = 106 + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 107 + return vhc->hpt_mask(cpu->vhyp); 108 + } 104 109 return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1; 105 110 } 106 111 107 112 void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value, 108 113 Error **errp); 109 - void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift, 110 - Error **errp); 111 114 112 115 struct ppc_hash_pte64 { 113 116 uint64_t pte0, pte1;
+2 -1
target/ppc/mmu_helper.c
··· 2001 2001 /* Special registers manipulation */ 2002 2002 void ppc_store_sdr1(CPUPPCState *env, target_ulong value) 2003 2003 { 2004 + PowerPCCPU *cpu = ppc_env_get_cpu(env); 2004 2005 qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value); 2005 - assert(!env->external_htab); 2006 + assert(!cpu->vhyp); 2006 2007 #if defined(TARGET_PPC64) 2007 2008 if (env->mmu_model & POWERPC_MMU_64) { 2008 2009 PowerPCCPU *cpu = ppc_env_get_cpu(env);