qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

KVM: add support for AMD nested live migration

Support for nested guest live migration is part of Linux 5.8, add the
corresponding code to QEMU. The migration format consists of a few
flags, is an opaque 4k blob.

The blob is in VMCB format (the control area represents the L1 VMCB
control fields, the save area represents the pre-vmentry state; KVM does
not use the host save area since the AMD manual allows that) but QEMU
does not really care about that. However, the flags need to be
copied to hflags/hflags2 and back.

In addition, support for retrieving and setting the AMD nested virtualization
states allows the L1 guest to be reset while running a nested guest, but
a small bug in CPU reset needs to be fixed for that to work.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+70 -9
+1
target/i386/cpu.c
··· 5968 5968 /* init to reset state */ 5969 5969 5970 5970 env->hflags2 |= HF2_GIF_MASK; 5971 + env->hflags &= ~HF_GUEST_MASK; 5971 5972 5972 5973 cpu_x86_update_cr0(env, 0x60000010); 5973 5974 env->a20_mask = ~0x0;
+5
target/i386/cpu.h
··· 2118 2118 return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; 2119 2119 } 2120 2120 2121 + static inline bool cpu_has_svm(CPUX86State *env) 2122 + { 2123 + return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM; 2124 + } 2125 + 2121 2126 /* 2122 2127 * In order for a vCPU to enter VMX operation it must have CR4.VMXE set. 2123 2128 * Since it was set, CR4.VMXE must remain set as long as vCPU is in
+34 -8
target/i386/kvm.c
··· 1840 1840 if (max_nested_state_len > 0) { 1841 1841 assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); 1842 1842 1843 - if (cpu_has_vmx(env)) { 1843 + if (cpu_has_vmx(env) || cpu_has_svm(env)) { 1844 1844 struct kvm_vmx_nested_state_hdr *vmx_hdr; 1845 1845 1846 1846 env->nested_state = g_malloc0(max_nested_state_len); 1847 1847 env->nested_state->size = max_nested_state_len; 1848 1848 env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; 1849 1849 1850 - vmx_hdr = &env->nested_state->hdr.vmx; 1851 - vmx_hdr->vmxon_pa = -1ull; 1852 - vmx_hdr->vmcs12_pa = -1ull; 1850 + if (cpu_has_vmx(env)) { 1851 + vmx_hdr = &env->nested_state->hdr.vmx; 1852 + vmx_hdr->vmxon_pa = -1ull; 1853 + vmx_hdr->vmcs12_pa = -1ull; 1854 + } 1853 1855 } 1854 1856 } 1855 1857 ··· 3873 3875 return 0; 3874 3876 } 3875 3877 3878 + /* 3879 + * Copy flags that are affected by reset from env->hflags and env->hflags2. 3880 + */ 3881 + if (env->hflags & HF_GUEST_MASK) { 3882 + env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE; 3883 + } else { 3884 + env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE; 3885 + } 3886 + if (env->hflags2 & HF2_GIF_MASK) { 3887 + env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET; 3888 + } else { 3889 + env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET; 3890 + } 3891 + 3876 3892 assert(env->nested_state->size <= max_nested_state_len); 3877 3893 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state); 3878 3894 } ··· 3901 3917 return ret; 3902 3918 } 3903 3919 3920 + /* 3921 + * Copy flags that are affected by reset to env->hflags and env->hflags2. 3922 + */ 3904 3923 if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) { 3905 3924 env->hflags |= HF_GUEST_MASK; 3906 3925 } else { 3907 3926 env->hflags &= ~HF_GUEST_MASK; 3927 + } 3928 + if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) { 3929 + env->hflags2 |= HF2_GIF_MASK; 3930 + } else { 3931 + env->hflags2 &= ~HF2_GIF_MASK; 3908 3932 } 3909 3933 3910 3934 return ret; ··· 3916 3940 int ret; 3917 3941 3918 3942 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); 3943 + 3944 + /* must be before kvm_put_nested_state so that EFER.SVME is set */ 3945 + ret = kvm_put_sregs(x86_cpu); 3946 + if (ret < 0) { 3947 + return ret; 3948 + } 3919 3949 3920 3950 if (level >= KVM_PUT_RESET_STATE) { 3921 3951 ret = kvm_put_nested_state(x86_cpu); ··· 3947 3977 return ret; 3948 3978 } 3949 3979 ret = kvm_put_xcrs(x86_cpu); 3950 - if (ret < 0) { 3951 - return ret; 3952 - } 3953 - ret = kvm_put_sregs(x86_cpu); 3954 3980 if (ret < 0) { 3955 3981 return ret; 3956 3982 }
+30 -1
target/i386/machine.c
··· 1071 1071 } 1072 1072 }; 1073 1073 1074 + static bool svm_nested_state_needed(void *opaque) 1075 + { 1076 + struct kvm_nested_state *nested_state = opaque; 1077 + 1078 + /* 1079 + * HF_GUEST_MASK and HF2_GIF_MASK are already serialized 1080 + * via hflags and hflags2, all that's left is the opaque 1081 + * nested state blob. 1082 + */ 1083 + return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM && 1084 + nested_state->size > offsetof(struct kvm_nested_state, data)); 1085 + } 1086 + 1087 + static const VMStateDescription vmstate_svm_nested_state = { 1088 + .name = "cpu/kvm_nested_state/svm", 1089 + .version_id = 1, 1090 + .minimum_version_id = 1, 1091 + .needed = svm_nested_state_needed, 1092 + .fields = (VMStateField[]) { 1093 + VMSTATE_U64(hdr.svm.vmcb_pa, struct kvm_nested_state), 1094 + VMSTATE_UINT8_ARRAY(data.svm[0].vmcb12, 1095 + struct kvm_nested_state, 1096 + KVM_STATE_NESTED_SVM_VMCB_SIZE), 1097 + VMSTATE_END_OF_LIST() 1098 + } 1099 + }; 1100 + 1074 1101 static bool nested_state_needed(void *opaque) 1075 1102 { 1076 1103 X86CPU *cpu = opaque; 1077 1104 CPUX86State *env = &cpu->env; 1078 1105 1079 1106 return (env->nested_state && 1080 - vmx_nested_state_needed(env->nested_state)); 1107 + (vmx_nested_state_needed(env->nested_state) || 1108 + svm_nested_state_needed(env->nested_state))); 1081 1109 } 1082 1110 1083 1111 static int nested_state_post_load(void *opaque, int version_id) ··· 1139 1167 }, 1140 1168 .subsections = (const VMStateDescription*[]) { 1141 1169 &vmstate_vmx_nested_state, 1170 + &vmstate_svm_nested_state, 1142 1171 NULL 1143 1172 } 1144 1173 };