qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

WHPX: TSC get and set should be dependent on VM state

Currently, TSC is set as part of the VM runtime state. Setting TSC at
runtime is heavy and additionally can have side effects on the guest,
which are not very resilient to variances in the TSC. This patch uses
the VM state to determine whether to set TSC or not. Some minor
enhancements for getting TSC values as well that considers the VM state.

Additionally, while setting the TSC, the partition is suspended to
reduce the variance in the TSC value across vCPUs.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Message-Id: <SN4PR2101MB08804D23439166E81FF151F7C0EA0@SN4PR2101MB0880.namprd21.prod.outlook.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Sunil Muthuswamy and committed by
Paolo Bonzini
6785e767 27f08ea1

+110 -9
+7
include/sysemu/whpx.h
··· 35 35 36 36 #endif /* CONFIG_WHPX */ 37 37 38 + /* state subset only touched by the VCPU itself during runtime */ 39 + #define WHPX_SET_RUNTIME_STATE 1 40 + /* state subset modified during VCPU reset */ 41 + #define WHPX_SET_RESET_STATE 2 42 + /* full state set, modified during initialization or on vmload */ 43 + #define WHPX_SET_FULL_STATE 3 44 + 38 45 #endif /* QEMU_WHPX_H */
+9
target/i386/whp-dispatch.h
··· 23 23 X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \ 24 24 X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \ 25 25 26 + /* 27 + * These are supplemental functions that may not be present 28 + * on all versions and are not critical for basic functionality. 29 + */ 30 + #define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ 31 + X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ 26 32 27 33 #define LIST_WINHVEMULATION_FUNCTIONS(X) \ 28 34 X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ ··· 40 46 /* Define function typedef */ 41 47 LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE) 42 48 LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE) 49 + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE) 43 50 44 51 struct WHPDispatch { 45 52 LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER) 46 53 LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER) 54 + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER) 47 55 }; 48 56 49 57 extern struct WHPDispatch whp_dispatch; ··· 53 61 typedef enum WHPFunctionList { 54 62 WINHV_PLATFORM_FNS_DEFAULT, 55 63 WINHV_EMULATION_FNS_DEFAULT, 64 + WINHV_PLATFORM_FNS_SUPPLEMENTAL 56 65 } WHPFunctionList; 57 66 58 67 #endif /* WHP_DISPATCH_H */
+94 -9
target/i386/whpx-all.c
··· 114 114 WHvX64RegisterXmmControlStatus, 115 115 116 116 /* X64 MSRs */ 117 - WHvX64RegisterTsc, 118 117 WHvX64RegisterEfer, 119 118 #ifdef TARGET_X86_64 120 119 WHvX64RegisterKernelGsBase, ··· 215 214 return qs; 216 215 } 217 216 218 - static void whpx_set_registers(CPUState *cpu) 217 + static int whpx_set_tsc(CPUState *cpu) 218 + { 219 + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); 220 + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; 221 + WHV_REGISTER_VALUE tsc_val; 222 + HRESULT hr; 223 + struct whpx_state *whpx = &whpx_global; 224 + 225 + /* 226 + * Suspend the partition prior to setting the TSC to reduce the variance 227 + * in TSC across vCPUs. When the first vCPU runs post suspend, the 228 + * partition is automatically resumed. 229 + */ 230 + if (whp_dispatch.WHvSuspendPartitionTime) { 231 + 232 + /* 233 + * Unable to suspend partition while setting TSC is not a fatal 234 + * error. It just increases the likelihood of TSC variance between 235 + * vCPUs and some guest OS are able to handle that just fine. 236 + */ 237 + hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition); 238 + if (FAILED(hr)) { 239 + warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr); 240 + } 241 + } 242 + 243 + tsc_val.Reg64 = env->tsc; 244 + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( 245 + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); 246 + if (FAILED(hr)) { 247 + error_report("WHPX: Failed to set TSC, hr=%08lx", hr); 248 + return -1; 249 + } 250 + 251 + return 0; 252 + } 253 + 254 + static void whpx_set_registers(CPUState *cpu, int level) 219 255 { 220 256 struct whpx_state *whpx = &whpx_global; 221 257 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); ··· 229 265 int v86, r86; 230 266 231 267 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); 268 + 269 + /* 270 + * Following MSRs have side effects on the guest or are too heavy for 271 + * runtime. Limit them to full state update. 272 + */ 273 + if (level >= WHPX_SET_RESET_STATE) { 274 + whpx_set_tsc(cpu); 275 + } 232 276 233 277 memset(&vcxt, 0, sizeof(struct whpx_register_set)); 234 278 ··· 330 374 idx += 1; 331 375 332 376 /* MSRs */ 333 - assert(whpx_register_names[idx] == WHvX64RegisterTsc); 334 - vcxt.values[idx++].Reg64 = env->tsc; 335 377 assert(whpx_register_names[idx] == WHvX64RegisterEfer); 336 378 vcxt.values[idx++].Reg64 = env->efer; 337 379 #ifdef TARGET_X86_64 ··· 379 421 return; 380 422 } 381 423 424 + static int whpx_get_tsc(CPUState *cpu) 425 + { 426 + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); 427 + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; 428 + WHV_REGISTER_VALUE tsc_val; 429 + HRESULT hr; 430 + struct whpx_state *whpx = &whpx_global; 431 + 432 + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( 433 + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); 434 + if (FAILED(hr)) { 435 + error_report("WHPX: Failed to get TSC, hr=%08lx", hr); 436 + return -1; 437 + } 438 + 439 + env->tsc = tsc_val.Reg64; 440 + return 0; 441 + } 442 + 382 443 static void whpx_get_registers(CPUState *cpu) 383 444 { 384 445 struct whpx_state *whpx = &whpx_global; ··· 393 454 int i; 394 455 395 456 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); 457 + 458 + if (!env->tsc_valid) { 459 + whpx_get_tsc(cpu); 460 + env->tsc_valid = !runstate_is_running(); 461 + } 396 462 397 463 hr = whp_dispatch.WHvGetVirtualProcessorRegisters( 398 464 whpx->partition, cpu->cpu_index, ··· 492 558 idx += 1; 493 559 494 560 /* MSRs */ 495 - assert(whpx_register_names[idx] == WHvX64RegisterTsc); 496 - env->tsc = vcxt.values[idx++].Reg64; 497 561 assert(whpx_register_names[idx] == WHvX64RegisterEfer); 498 562 env->efer = vcxt.values[idx++].Reg64; 499 563 #ifdef TARGET_X86_64 ··· 896 960 897 961 do { 898 962 if (cpu->vcpu_dirty) { 899 - whpx_set_registers(cpu); 963 + whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); 900 964 cpu->vcpu_dirty = false; 901 965 } 902 966 ··· 1074 1138 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, 1075 1139 run_on_cpu_data arg) 1076 1140 { 1077 - whpx_set_registers(cpu); 1141 + whpx_set_registers(cpu, WHPX_SET_RESET_STATE); 1078 1142 cpu->vcpu_dirty = false; 1079 1143 } 1080 1144 1081 1145 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, 1082 1146 run_on_cpu_data arg) 1083 1147 { 1084 - whpx_set_registers(cpu); 1148 + whpx_set_registers(cpu, WHPX_SET_FULL_STATE); 1085 1149 cpu->vcpu_dirty = false; 1086 1150 } 1087 1151 ··· 1122 1186 */ 1123 1187 1124 1188 static Error *whpx_migration_blocker; 1189 + 1190 + static void whpx_cpu_update_state(void *opaque, int running, RunState state) 1191 + { 1192 + CPUX86State *env = opaque; 1193 + 1194 + if (running) { 1195 + env->tsc_valid = false; 1196 + } 1197 + } 1125 1198 1126 1199 int whpx_init_vcpu(CPUState *cpu) 1127 1200 { ··· 1178 1251 1179 1252 cpu->vcpu_dirty = true; 1180 1253 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; 1254 + qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr); 1181 1255 1182 1256 return 0; 1183 1257 } ··· 1367 1441 1368 1442 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll" 1369 1443 #define WINHV_EMULATION_DLL "WinHvEmulation.dll" 1444 + #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \ 1445 + whp_dispatch.function_name = \ 1446 + (function_name ## _t)GetProcAddress(hLib, #function_name); \ 1447 + 1370 1448 #define WHP_LOAD_FIELD(return_type, function_name, signature) \ 1371 1449 whp_dispatch.function_name = \ 1372 1450 (function_name ## _t)GetProcAddress(hLib, #function_name); \ ··· 1394 1472 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib) 1395 1473 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) 1396 1474 break; 1475 + 1476 + case WINHV_PLATFORM_FNS_SUPPLEMENTAL: 1477 + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) 1478 + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL) 1479 + break; 1397 1480 } 1398 1481 1399 1482 *handle = hLib; ··· 1554 1637 goto error; 1555 1638 } 1556 1639 1640 + assert(load_whp_dispatch_fns(&hWinHvPlatform, 1641 + WINHV_PLATFORM_FNS_SUPPLEMENTAL)); 1557 1642 whp_dispatch_initialized = true; 1558 1643 1559 1644 return true;