qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/ehabkost/tags/x86-next-pull-request' into staging

x86 queue, 2018-03-12

* Intel Processor Trace support
* KVM_HINTS_DEDICATED

# gpg: Signature made Mon 12 Mar 2018 19:58:39 GMT
# gpg: using RSA key 2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/x86-next-pull-request:
i386: Add support to get/set/migrate Intel Processor Trace feature
i386: Add Intel Processor Trace feature support
target-i386: add KVM_HINTS_DEDICATED performance hint

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+232 -2
+90 -2
target/i386/cpu.c
··· 173 173 #define L2_ITLB_4K_ASSOC 4 174 174 #define L2_ITLB_4K_ENTRIES 512 175 175 176 - 176 + /* CPUID Leaf 0x14 constants: */ 177 + #define INTEL_PT_MAX_SUBLEAF 0x1 178 + /* 179 + * bit[00]: IA32_RTIT_CTL.CR3 filter can be set to 1 and IA32_RTIT_CR3_MATCH 180 + * MSR can be accessed; 181 + * bit[01]: Support Configurable PSB and Cycle-Accurate Mode; 182 + * bit[02]: Support IP Filtering, TraceStop filtering, and preservation 183 + * of Intel PT MSRs across warm reset; 184 + * bit[03]: Support MTC timing packet and suppression of COFI-based packets; 185 + */ 186 + #define INTEL_PT_MINIMAL_EBX 0xf 187 + /* 188 + * bit[00]: Tracing can be enabled with IA32_RTIT_CTL.ToPA = 1 and 189 + * IA32_RTIT_OUTPUT_BASE and IA32_RTIT_OUTPUT_MASK_PTRS MSRs can be 190 + * accessed; 191 + * bit[01]: ToPA tables can hold any number of output entries, up to the 192 + * maximum allowed by the MaskOrTableOffset field of 193 + * IA32_RTIT_OUTPUT_MASK_PTRS; 194 + * bit[02]: Support Single-Range Output scheme; 195 + */ 196 + #define INTEL_PT_MINIMAL_ECX 0x7 197 + #define INTEL_PT_ADDR_RANGES_NUM 0x2 /* Number of configurable address ranges */ 198 + #define INTEL_PT_ADDR_RANGES_NUM_MASK 0x3 199 + #define INTEL_PT_MTC_BITMAP (0x0249 << 16) /* Support ART(0,3,6,9) */ 200 + #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ 201 + #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ 177 202 178 203 static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, 179 204 uint32_t vendor2, uint32_t vendor3) ··· 359 384 .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX, 360 385 .tcg_features = TCG_KVM_FEATURES, 361 386 }, 387 + [FEAT_KVM_HINTS] = { 388 + .feat_names = { 389 + "kvm-hint-dedicated", NULL, NULL, NULL, 390 + NULL, NULL, NULL, NULL, 391 + NULL, NULL, NULL, NULL, 392 + NULL, NULL, NULL, NULL, 393 + NULL, NULL, NULL, NULL, 394 + NULL, NULL, NULL, NULL, 395 + NULL, NULL, NULL, NULL, 396 + NULL, NULL, NULL, NULL, 397 + }, 398 + .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EDX, 399 + .tcg_features = TCG_KVM_FEATURES, 400 + }, 362 401 [FEAT_HYPERV_EAX] = { 363 402 .feat_names = { 364 403 NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */, ··· 428 467 NULL, NULL, "mpx", NULL, 429 468 "avx512f", "avx512dq", "rdseed", "adx", 430 469 "smap", "avx512ifma", "pcommit", "clflushopt", 431 - "clwb", NULL, "avx512pf", "avx512er", 470 + "clwb", "intel-pt", "avx512pf", "avx512er", 432 471 "avx512cd", "sha-ni", "avx512bw", "avx512vl", 433 472 }, 434 473 .cpuid_eax = 7, ··· 3453 3492 } 3454 3493 break; 3455 3494 } 3495 + case 0x14: { 3496 + /* Intel Processor Trace Enumeration */ 3497 + *eax = 0; 3498 + *ebx = 0; 3499 + *ecx = 0; 3500 + *edx = 0; 3501 + if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) || 3502 + !kvm_enabled()) { 3503 + break; 3504 + } 3505 + 3506 + if (count == 0) { 3507 + *eax = INTEL_PT_MAX_SUBLEAF; 3508 + *ebx = INTEL_PT_MINIMAL_EBX; 3509 + *ecx = INTEL_PT_MINIMAL_ECX; 3510 + } else if (count == 1) { 3511 + *eax = INTEL_PT_MTC_BITMAP | INTEL_PT_ADDR_RANGES_NUM; 3512 + *ebx = INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP; 3513 + } 3514 + break; 3515 + } 3456 3516 case 0x40000000: 3457 3517 /* 3458 3518 * CPUID code in kvm_arch_init_vcpu() ignores stuff ··· 4079 4139 env->features[w] &= host_feat; 4080 4140 cpu->filtered_features[w] = requested_features & ~env->features[w]; 4081 4141 if (cpu->filtered_features[w]) { 4142 + rv = 1; 4143 + } 4144 + } 4145 + 4146 + if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && 4147 + kvm_enabled()) { 4148 + KVMState *s = CPU(cpu)->kvm_state; 4149 + uint32_t eax_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EAX); 4150 + uint32_t ebx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EBX); 4151 + uint32_t ecx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_ECX); 4152 + uint32_t eax_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EAX); 4153 + uint32_t ebx_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EBX); 4154 + 4155 + if (!eax_0 || 4156 + ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) || 4157 + ((ecx_0 & INTEL_PT_MINIMAL_ECX) != INTEL_PT_MINIMAL_ECX) || 4158 + ((eax_1 & INTEL_PT_MTC_BITMAP) != INTEL_PT_MTC_BITMAP) || 4159 + ((eax_1 & INTEL_PT_ADDR_RANGES_NUM_MASK) < 4160 + INTEL_PT_ADDR_RANGES_NUM) || 4161 + ((ebx_1 & (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP)) != 4162 + (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP))) { 4163 + /* 4164 + * Processor Trace capabilities aren't configurable, so if the 4165 + * host can't emulate the capabilities we report on 4166 + * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. 4167 + */ 4168 + env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; 4169 + cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; 4082 4170 rv = 1; 4083 4171 } 4084 4172 }
+26
target/i386/cpu.h
··· 415 415 #define MSR_MC0_ADDR 0x402 416 416 #define MSR_MC0_MISC 0x403 417 417 418 + #define MSR_IA32_RTIT_OUTPUT_BASE 0x560 419 + #define MSR_IA32_RTIT_OUTPUT_MASK 0x561 420 + #define MSR_IA32_RTIT_CTL 0x570 421 + #define MSR_IA32_RTIT_STATUS 0x571 422 + #define MSR_IA32_RTIT_CR3_MATCH 0x572 423 + #define MSR_IA32_RTIT_ADDR0_A 0x580 424 + #define MSR_IA32_RTIT_ADDR0_B 0x581 425 + #define MSR_IA32_RTIT_ADDR1_A 0x582 426 + #define MSR_IA32_RTIT_ADDR1_B 0x583 427 + #define MSR_IA32_RTIT_ADDR2_A 0x584 428 + #define MSR_IA32_RTIT_ADDR2_B 0x585 429 + #define MSR_IA32_RTIT_ADDR3_A 0x586 430 + #define MSR_IA32_RTIT_ADDR3_B 0x587 431 + #define MAX_RTIT_ADDRS 8 432 + 418 433 #define MSR_EFER 0xc0000080 419 434 420 435 #define MSR_EFER_SCE (1 << 0) ··· 471 486 FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ 472 487 FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ 473 488 FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ 489 + FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ 474 490 FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ 475 491 FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ 476 492 FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ ··· 640 656 #define CPUID_7_0_EBX_PCOMMIT (1U << 22) /* Persistent Commit */ 641 657 #define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Flush a Cache Line Optimized */ 642 658 #define CPUID_7_0_EBX_CLWB (1U << 24) /* Cache Line Write Back */ 659 + #define CPUID_7_0_EBX_INTEL_PT (1U << 25) /* Intel Processor Trace */ 643 660 #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ 644 661 #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ 645 662 #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ ··· 665 682 #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ 666 683 #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ 667 684 #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ 685 + 686 + #define KVM_HINTS_DEDICATED (1U << 0) 668 687 669 688 #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ 670 689 ··· 1152 1171 uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; 1153 1172 uint64_t msr_hv_stimer_config[HV_STIMER_COUNT]; 1154 1173 uint64_t msr_hv_stimer_count[HV_STIMER_COUNT]; 1174 + 1175 + uint64_t msr_rtit_ctrl; 1176 + uint64_t msr_rtit_status; 1177 + uint64_t msr_rtit_output_base; 1178 + uint64_t msr_rtit_output_mask; 1179 + uint64_t msr_rtit_cr3_match; 1180 + uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; 1155 1181 1156 1182 /* exception/interrupt handling */ 1157 1183 int error_code;
+78
target/i386/kvm.c
··· 383 383 if (!kvm_irqchip_in_kernel()) { 384 384 ret &= ~(1U << KVM_FEATURE_PV_UNHALT); 385 385 } 386 + } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { 387 + ret |= KVM_HINTS_DEDICATED; 388 + found = 1; 386 389 } 387 390 388 391 /* fallback for older kernels */ ··· 801 804 c = &cpuid_data.entries[cpuid_i++]; 802 805 c->function = KVM_CPUID_FEATURES | kvm_base; 803 806 c->eax = env->features[FEAT_KVM]; 807 + c->edx = env->features[FEAT_KVM_HINTS]; 804 808 } 805 809 806 810 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); ··· 865 869 c = &cpuid_data.entries[cpuid_i++]; 866 870 } 867 871 break; 872 + case 0x14: { 873 + uint32_t times; 874 + 875 + c->function = i; 876 + c->index = 0; 877 + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 878 + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 879 + times = c->eax; 880 + 881 + for (j = 1; j <= times; ++j) { 882 + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { 883 + fprintf(stderr, "cpuid_data is full, no space for " 884 + "cpuid(eax:0x14,ecx:0x%x)\n", j); 885 + abort(); 886 + } 887 + c = &cpuid_data.entries[cpuid_i++]; 888 + c->function = i; 889 + c->index = j; 890 + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 891 + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 892 + } 893 + break; 894 + } 868 895 default: 869 896 c->function = i; 870 897 c->flags = 0; ··· 1788 1815 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask); 1789 1816 } 1790 1817 } 1818 + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { 1819 + int addr_num = kvm_arch_get_supported_cpuid(kvm_state, 1820 + 0x14, 1, R_EAX) & 0x7; 1821 + 1822 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 1823 + env->msr_rtit_ctrl); 1824 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 1825 + env->msr_rtit_status); 1826 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 1827 + env->msr_rtit_output_base); 1828 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 1829 + env->msr_rtit_output_mask); 1830 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 1831 + env->msr_rtit_cr3_match); 1832 + for (i = 0; i < addr_num; i++) { 1833 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 1834 + env->msr_rtit_addrs[i]); 1835 + } 1836 + } 1791 1837 1792 1838 /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see 1793 1839 * kvm_put_msr_feature_control. */ ··· 2101 2147 } 2102 2148 } 2103 2149 2150 + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { 2151 + int addr_num = 2152 + kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7; 2153 + 2154 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0); 2155 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0); 2156 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0); 2157 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0); 2158 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0); 2159 + for (i = 0; i < addr_num; i++) { 2160 + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0); 2161 + } 2162 + } 2163 + 2104 2164 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); 2105 2165 if (ret < 0) { 2106 2166 return ret; ··· 2340 2400 break; 2341 2401 case MSR_IA32_SPEC_CTRL: 2342 2402 env->spec_ctrl = msrs[i].data; 2403 + break; 2404 + case MSR_IA32_RTIT_CTL: 2405 + env->msr_rtit_ctrl = msrs[i].data; 2406 + break; 2407 + case MSR_IA32_RTIT_STATUS: 2408 + env->msr_rtit_status = msrs[i].data; 2409 + break; 2410 + case MSR_IA32_RTIT_OUTPUT_BASE: 2411 + env->msr_rtit_output_base = msrs[i].data; 2412 + break; 2413 + case MSR_IA32_RTIT_OUTPUT_MASK: 2414 + env->msr_rtit_output_mask = msrs[i].data; 2415 + break; 2416 + case MSR_IA32_RTIT_CR3_MATCH: 2417 + env->msr_rtit_cr3_match = msrs[i].data; 2418 + break; 2419 + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: 2420 + env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data; 2343 2421 break; 2344 2422 } 2345 2423 }
+38
target/i386/machine.c
··· 837 837 } 838 838 }; 839 839 840 + static bool intel_pt_enable_needed(void *opaque) 841 + { 842 + X86CPU *cpu = opaque; 843 + CPUX86State *env = &cpu->env; 844 + int i; 845 + 846 + if (env->msr_rtit_ctrl || env->msr_rtit_status || 847 + env->msr_rtit_output_base || env->msr_rtit_output_mask || 848 + env->msr_rtit_cr3_match) { 849 + return true; 850 + } 851 + 852 + for (i = 0; i < MAX_RTIT_ADDRS; i++) { 853 + if (env->msr_rtit_addrs[i]) { 854 + return true; 855 + } 856 + } 857 + 858 + return false; 859 + } 860 + 861 + static const VMStateDescription vmstate_msr_intel_pt = { 862 + .name = "cpu/intel_pt", 863 + .version_id = 1, 864 + .minimum_version_id = 1, 865 + .needed = intel_pt_enable_needed, 866 + .fields = (VMStateField[]) { 867 + VMSTATE_UINT64(env.msr_rtit_ctrl, X86CPU), 868 + VMSTATE_UINT64(env.msr_rtit_status, X86CPU), 869 + VMSTATE_UINT64(env.msr_rtit_output_base, X86CPU), 870 + VMSTATE_UINT64(env.msr_rtit_output_mask, X86CPU), 871 + VMSTATE_UINT64(env.msr_rtit_cr3_match, X86CPU), 872 + VMSTATE_UINT64_ARRAY(env.msr_rtit_addrs, X86CPU, MAX_RTIT_ADDRS), 873 + VMSTATE_END_OF_LIST() 874 + } 875 + }; 876 + 840 877 VMStateDescription vmstate_x86_cpu = { 841 878 .name = "cpu", 842 879 .version_id = 12, ··· 957 994 #endif 958 995 &vmstate_spec_ctrl, 959 996 &vmstate_mcg_ext_ctl, 997 + &vmstate_msr_intel_pt, 960 998 NULL 961 999 } 962 1000 };