qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* microvm docs and fixes (Sergio, Liam)
* New processor features for Intel errata (myself, Pawan)
* Kconfig fixes (myself, Thomas)
* Revert mc146818rtc change (myself)
* Deprecate scsi-disk (myself)
* RTC fix (myself, Marcelo)

# gpg: Signature made Tue 19 Nov 2019 09:03:49 GMT
# gpg: using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
mc146818rtc: fix timer interrupt reinjection again
Revert "mc146818rtc: fix timer interrupt reinjection"
scsi: deprecate scsi-disk
hw/i386: Move save_tsc_khz from PCMachineClass to X86MachineClass
docs/microvm.rst: add instructions for shutting down the guest
docs/microvm.rst: fix alignment in "Limitations"
vfio: vfio-pci requires EDID
hw/i386: Fix compiler warning when CONFIG_IDE_ISA is disabled
target/i386: Export TAA_NO bit to guests
target/i386: add PSCHANGE_NO bit for the ARCH_CAPABILITIES MSR
microvm: fix memory leak in microvm_fix_kernel_cmdline
scripts: Detect git worktrees for get_maintainer.pl --git

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+103 -59
+24 -3
docs/microvm.rst
··· 33 33 34 34 Currently, microvm does *not* support the following features: 35 35 36 - - PCI-only devices. 37 - - Hotplug of any kind. 38 - - Live migration across QEMU versions. 36 + - PCI-only devices. 37 + - Hotplug of any kind. 38 + - Live migration across QEMU versions. 39 39 40 40 41 41 Using the microvm machine type ··· 106 106 -device virtio-blk-device,drive=test \ 107 107 -netdev tap,id=tap0,script=no,downscript=no \ 108 108 -device virtio-net-device,netdev=tap0 109 + 110 + 111 + Triggering a guest-initiated shut down 112 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 + 114 + As the microvm machine type includes just a small set of system 115 + devices, some x86 mechanisms for rebooting or shutting down the 116 + system, like sending a key sequence to the keyboard or writing to an 117 + ACPI register, doesn't have any effect in the VM. 118 + 119 + The recommended way to trigger a guest-initiated shut down is by 120 + generating a ``triple-fault``, which will cause the VM to initiate a 121 + reboot. Additionally, if the ``-no-reboot`` argument is present in the 122 + command line, QEMU will detect this event and terminate its own 123 + execution gracefully. 124 + 125 + Linux does support this mechanism, but by default will only be used 126 + after other options have been tried and failed, causing the reboot to 127 + be delayed by a small number of seconds. It's possible to instruct it 128 + to try the triple-fault mechanism first, by adding ``reboot=t`` to the 129 + kernel's command line.
+2
hw/i386/microvm.c
··· 331 331 332 332 fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1); 333 333 fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline); 334 + 335 + g_free(cmdline); 334 336 } 335 337 336 338 static void microvm_machine_state_init(MachineState *machine)
-1
hw/i386/pc.c
··· 2195 2195 /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported 2196 2196 * to be used at the moment, 32K should be enough for a while. */ 2197 2197 pcmc->acpi_data_size = 0x20000 + 0x8000; 2198 - pcmc->save_tsc_khz = true; 2199 2198 pcmc->linuxboot_dma_enabled = true; 2200 2199 pcmc->pvh_enabled = true; 2201 2200 assert(!mc->get_hotplug_handler);
+4 -4
hw/i386/pc_piix.c
··· 78 78 X86MachineState *x86ms = X86_MACHINE(machine); 79 79 MemoryRegion *system_memory = get_system_memory(); 80 80 MemoryRegion *system_io = get_system_io(); 81 - int i; 82 81 PCIBus *pci_bus; 83 82 ISABus *isa_bus; 84 83 PCII440FXState *i440fx_state; ··· 253 252 } 254 253 #ifdef CONFIG_IDE_ISA 255 254 else { 256 - for(i = 0; i < MAX_IDE_BUS; i++) { 255 + int i; 256 + for (i = 0; i < MAX_IDE_BUS; i++) { 257 257 ISADevice *dev; 258 258 char busname[] = "ide.0"; 259 259 dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ··· 567 567 568 568 static void pc_i440fx_2_5_machine_options(MachineClass *m) 569 569 { 570 - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); 570 + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); 571 571 572 572 pc_i440fx_2_6_machine_options(m); 573 - pcmc->save_tsc_khz = false; 573 + x86mc->save_tsc_khz = false; 574 574 m->legacy_fw_cfg_order = 1; 575 575 compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); 576 576 compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
+2 -2
hw/i386/pc_q35.c
··· 508 508 509 509 static void pc_q35_2_5_machine_options(MachineClass *m) 510 510 { 511 - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); 511 + X86MachineClass *x86mc = X86_MACHINE_CLASS(m); 512 512 513 513 pc_q35_2_6_machine_options(m); 514 - pcmc->save_tsc_khz = false; 514 + x86mc->save_tsc_khz = false; 515 515 m->legacy_fw_cfg_order = 1; 516 516 compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); 517 517 compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
+1
hw/i386/x86.c
··· 763 763 mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; 764 764 mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; 765 765 x86mc->compat_apic_id_mode = false; 766 + x86mc->save_tsc_khz = true; 766 767 nc->nmi_monitor_handler = x86_nmi; 767 768 768 769 object_class_property_add(oc, X86_MACHINE_MAX_RAM_BELOW_4G, "size",
+40 -39
hw/rtc/mc146818rtc.c
··· 168 168 * is just due to period adjustment. 169 169 */ 170 170 static void 171 - periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period) 171 + periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period, bool period_change) 172 172 { 173 173 uint32_t period; 174 174 int64_t cur_clock, next_irq_clock, lost_clock = 0; 175 175 176 176 period = rtc_periodic_clock_ticks(s); 177 + s->period = period; 177 178 178 179 if (!period) { 179 180 s->irq_coalesced = 0; ··· 189 190 * if the periodic timer's update is due to period re-configuration, 190 191 * we should count the clock since last interrupt. 191 192 */ 192 - if (old_period) { 193 + if (old_period && period_change) { 193 194 int64_t last_periodic_clock, next_periodic_clock; 194 195 195 196 next_periodic_clock = muldiv64(s->next_periodic_time, ··· 197 198 last_periodic_clock = next_periodic_clock - old_period; 198 199 lost_clock = cur_clock - last_periodic_clock; 199 200 assert(lost_clock >= 0); 201 + } 200 202 203 + /* 204 + * s->irq_coalesced can change for two reasons: 205 + * 206 + * a) if one or more periodic timer interrupts have been lost, 207 + * lost_clock will be more that a period. 208 + * 209 + * b) when the period may be reconfigured, we expect the OS to 210 + * treat delayed tick as the new period. So, when switching 211 + * from a shorter to a longer period, scale down the missing, 212 + * because the OS will treat past delayed ticks as longer 213 + * (leftovers are put back into lost_clock). When switching 214 + * to a shorter period, scale up the missing ticks since the 215 + * OS handler will treat past delayed ticks as shorter. 216 + */ 217 + if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { 218 + uint32_t old_irq_coalesced = s->irq_coalesced; 219 + 220 + lost_clock += old_irq_coalesced * old_period; 221 + s->irq_coalesced = lost_clock / s->period; 222 + lost_clock %= s->period; 223 + if (old_irq_coalesced != s->irq_coalesced || 224 + old_period != s->period) { 225 + DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, " 226 + "period scaled from %d to %d\n", old_irq_coalesced, 227 + s->irq_coalesced, old_period, s->period); 228 + rtc_coalesced_timer_update(s); 229 + } 230 + } else { 201 231 /* 202 - * s->irq_coalesced can change for two reasons: 203 - * 204 - * a) if one or more periodic timer interrupts have been lost, 205 - * lost_clock will be more that a period. 206 - * 207 - * b) when the period may be reconfigured, we expect the OS to 208 - * treat delayed tick as the new period. So, when switching 209 - * from a shorter to a longer period, scale down the missing, 210 - * because the OS will treat past delayed ticks as longer 211 - * (leftovers are put back into lost_clock). When switching 212 - * to a shorter period, scale up the missing ticks since the 213 - * OS handler will treat past delayed ticks as shorter. 232 + * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW 233 + * is not used, we should make the time progress anyway. 214 234 */ 215 - if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) { 216 - uint32_t old_irq_coalesced = s->irq_coalesced; 217 - 218 - s->period = period; 219 - lost_clock += old_irq_coalesced * old_period; 220 - s->irq_coalesced = lost_clock / s->period; 221 - lost_clock %= s->period; 222 - if (old_irq_coalesced != s->irq_coalesced || 223 - old_period != s->period) { 224 - DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, " 225 - "period scaled from %d to %d\n", old_irq_coalesced, 226 - s->irq_coalesced, old_period, s->period); 227 - rtc_coalesced_timer_update(s); 228 - } 229 - } else { 230 - /* 231 - * no way to compensate the interrupt if LOST_TICK_POLICY_SLEW 232 - * is not used, we should make the time progress anyway. 233 - */ 234 - lost_clock = MIN(lost_clock, period); 235 - } 235 + lost_clock = MIN(lost_clock, period); 236 236 } 237 237 238 238 assert(lost_clock >= 0 && lost_clock <= period); ··· 246 246 { 247 247 RTCState *s = opaque; 248 248 249 - periodic_timer_update(s, s->next_periodic_time, 0); 249 + periodic_timer_update(s, s->next_periodic_time, s->period, false); 250 250 s->cmos_data[RTC_REG_C] |= REG_C_PF; 251 251 if (s->cmos_data[RTC_REG_B] & REG_B_PIE) { 252 252 s->cmos_data[RTC_REG_C] |= REG_C_IRQF; ··· 512 512 513 513 if (update_periodic_timer) { 514 514 periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 515 - old_period); 515 + old_period, true); 516 516 } 517 517 518 518 check_update_timer(s); ··· 551 551 552 552 if (update_periodic_timer) { 553 553 periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 554 - old_period); 554 + old_period, true); 555 555 } 556 556 557 557 check_update_timer(s); ··· 795 795 s->offset = 0; 796 796 check_update_timer(s); 797 797 } 798 + s->period = rtc_periodic_clock_ticks(s); 798 799 799 800 /* The periodic timer is deterministic in record/replay mode, 800 801 * so there is no need to update it after loading the vmstate. ··· 804 805 uint64_t now = qemu_clock_get_ns(rtc_clock); 805 806 if (now < s->next_periodic_time || 806 807 now > (s->next_periodic_time + get_max_clock_jump())) { 807 - periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), 0); 808 + periodic_timer_update(s, qemu_clock_get_ns(rtc_clock), s->period, false); 808 809 } 809 810 } 810 811
+11 -1
hw/scsi/scsi-bus.c
··· 254 254 char *name; 255 255 DeviceState *dev; 256 256 Error *err = NULL; 257 + DriveInfo *dinfo; 257 258 258 - driver = blk_is_sg(blk) ? "scsi-generic" : "scsi-disk"; 259 + if (blk_is_sg(blk)) { 260 + driver = "scsi-generic"; 261 + } else { 262 + dinfo = blk_legacy_dinfo(blk); 263 + if (dinfo && dinfo->media_cd) { 264 + driver = "scsi-cd"; 265 + } else { 266 + driver = "scsi-hd"; 267 + } 268 + } 259 269 dev = qdev_create(&bus->qbus, driver); 260 270 name = g_strdup_printf("legacy[%d]", unit); 261 271 object_property_add_child(OBJECT(bus), name, OBJECT(dev), NULL);
+3
hw/scsi/scsi-disk.c
··· 2481 2481 DriveInfo *dinfo; 2482 2482 Error *local_err = NULL; 2483 2483 2484 + warn_report("'scsi-disk' is deprecated, " 2485 + "please use 'scsi-hd' or 'scsi-cd' instead"); 2486 + 2484 2487 if (!dev->conf.blk) { 2485 2488 scsi_realize(dev, &local_err); 2486 2489 assert(local_err);
-2
include/hw/i386/pc.h
··· 116 116 bool enforce_aligned_dimm; 117 117 bool broken_reserved_end; 118 118 119 - /* TSC rate migration: */ 120 - bool save_tsc_khz; 121 119 /* generate legacy CPU hotplug AML */ 122 120 bool legacy_cpu_hotplug; 123 121
+2
include/hw/i386/x86.h
··· 30 30 31 31 /*< public >*/ 32 32 33 + /* TSC rate migration: */ 34 + bool save_tsc_khz; 33 35 /* Enables contiguous-apic-ID mode */ 34 36 bool compat_apic_id_mode; 35 37 } X86MachineClass;
+5
qemu-deprecated.texi
··· 259 259 The 'ide-drive' device is deprecated. Users should use 'ide-hd' or 260 260 'ide-cd' as appropriate to get an IDE hard disk or CD-ROM as needed. 261 261 262 + @subsection scsi-disk (since 4.2) 263 + 264 + The 'scsi-disk' device is deprecated. Users should use 'scsi-hd' or 265 + 'scsi-cd' as appropriate to get a SCSI hard disk or CD-ROM as needed. 266 + 262 267 @section System emulator machines 263 268 264 269 @subsection pc-0.12, pc-0.13, pc-0.14 and pc-0.15 (since 4.0)
+1 -1
scripts/get_maintainer.pl
··· 81 81 82 82 my %VCS_cmds_git = ( 83 83 "execute_cmd" => \&git_execute_cmd, 84 - "available" => '(which("git") ne "") && (-d ".git")', 84 + "available" => '(which("git") ne "") && (-e ".git")', 85 85 "find_signers_cmd" => 86 86 "git log --no-color --follow --since=\$email_git_since " . 87 87 '--format="GitCommit: %H%n' .
+2 -2
target/i386/cpu.c
··· 1204 1204 .type = MSR_FEATURE_WORD, 1205 1205 .feat_names = { 1206 1206 "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", 1207 - "ssb-no", "mds-no", NULL, NULL, 1208 - NULL, NULL, NULL, NULL, 1207 + "ssb-no", "mds-no", "pschange-mc-no", NULL, 1208 + "taa-no", NULL, NULL, NULL, 1209 1209 NULL, NULL, NULL, NULL, 1210 1210 NULL, NULL, NULL, NULL, 1211 1211 NULL, NULL, NULL, NULL,
+2 -2
target/i386/machine.c
··· 988 988 X86CPU *cpu = opaque; 989 989 CPUX86State *env = &cpu->env; 990 990 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); 991 - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); 992 - return env->tsc_khz && pcmc->save_tsc_khz; 991 + X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); 992 + return env->tsc_khz && x86mc->save_tsc_khz; 993 993 } 994 994 995 995 static const VMStateDescription vmstate_tsc_khz = {
+4 -2
tests/qemu-iotests/051.pc.out
··· 167 167 168 168 Testing: -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk 169 169 QEMU X.Y.Z monitor - type 'help' for more information 170 - (qemu) QEMU_PROG: -device scsi-disk,drive=disk: Device needs media, but drive is empty 170 + (qemu) QEMU_PROG: -device scsi-disk,drive=disk: warning: 'scsi-disk' is deprecated, please use 'scsi-hd' or 'scsi-cd' instead 171 + QEMU_PROG: -device scsi-disk,drive=disk: Device needs media, but drive is empty 171 172 172 173 Testing: -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk 173 174 QEMU X.Y.Z monitor - type 'help' for more information ··· 238 239 239 240 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk 240 241 QEMU X.Y.Z monitor - type 'help' for more information 241 - (qemu) quit 242 + (qemu) QEMU_PROG: -device scsi-disk,drive=disk: warning: 'scsi-disk' is deprecated, please use 'scsi-hd' or 'scsi-cd' instead 243 + quit 242 244 243 245 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk 244 246 QEMU X.Y.Z monitor - type 'help' for more information