qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20180206.0' into staging

VFIO updates 2018-02-06

- SPAPR in-kernel TCE accleration (Alexey Kardashevskiy)

- MSI-X relocation (Alex Williamson)

- Add missing platform mutex init (Eric Auger)

- Redundant variable cleanup (Alexey Kardashevskiy)

- Option to disable GeForce quirks (Alex Williamson)

# gpg: Signature made Tue 06 Feb 2018 18:21:22 GMT
# gpg: using RSA key 239B9B6E3BB08B22
# gpg: Good signature from "Alex Williamson <alex.williamson@redhat.com>"
# gpg: aka "Alex Williamson <alex@shazbot.org>"
# gpg: aka "Alex Williamson <alwillia@redhat.com>"
# gpg: aka "Alex Williamson <alex.l.williamson@gmail.com>"
# Primary key fingerprint: 42F6 C04E 540B D1A9 9E7B 8A90 239B 9B6E 3BB0 8B22

* remotes/awilliam/tags/vfio-update-20180206.0:
vfio/pci: Add option to disable GeForce quirks
vfio/common: Remove redundant copy of local variable
hw/vfio/platform: Init the interrupt mutex
vfio/pci: Allow relocating MSI-X MMIO
qapi: Create DEFINE_PROP_OFF_AUTO_PCIBAR
vfio/pci: Emulate BARs
vfio/pci: Add base BAR MemoryRegion
vfio/pci: Fixup VFIOMSIXInfo comment
spapr/iommu: Enable in-kernel TCE acceleration via VFIO KVM device
vfio/spapr: Use iommu memory region's get_attr()
memory/iommu: Add get_attr()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+320 -31
+11
hw/core/qdev-properties.c
··· 1317 1317 .name = "link", 1318 1318 .create = create_link_property, 1319 1319 }; 1320 + 1321 + /* --- OffAutoPCIBAR off/auto/bar0/bar1/bar2/bar3/bar4/bar5 --- */ 1322 + 1323 + const PropertyInfo qdev_prop_off_auto_pcibar = { 1324 + .name = "OffAutoPCIBAR", 1325 + .description = "off/auto/bar0/bar1/bar2/bar3/bar4/bar5", 1326 + .enum_table = &OffAutoPCIBAR_lookup, 1327 + .get = get_enum, 1328 + .set = set_enum, 1329 + .set_default_value = set_default_value_enum, 1330 + };
+18
hw/ppc/spapr_iommu.c
··· 160 160 return 1ULL << tcet->page_shift; 161 161 } 162 162 163 + static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu, 164 + enum IOMMUMemoryRegionAttr attr, void *data) 165 + { 166 + sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); 167 + 168 + if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) { 169 + *(int *) data = tcet->fd; 170 + return 0; 171 + } 172 + 173 + return -EINVAL; 174 + } 175 + 163 176 static void spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, 164 177 IOMMUNotifierFlag old, 165 178 IOMMUNotifierFlag new) ··· 283 296 g_assert(need_vfio != tcet->need_vfio); 284 297 285 298 tcet->need_vfio = need_vfio; 299 + 300 + if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) { 301 + return; 302 + } 286 303 287 304 oldtable = tcet->table; 288 305 ··· 643 660 imrc->translate = spapr_tce_translate_iommu; 644 661 imrc->get_min_page_size = spapr_tce_get_min_page_size; 645 662 imrc->notify_flag_changed = spapr_tce_notify_flag_changed; 663 + imrc->get_attr = spapr_tce_get_attr; 646 664 } 647 665 648 666 static const TypeInfo spapr_iommu_memory_region_info = {
+27 -1
hw/vfio/common.c
··· 435 435 end = int128_get64(int128_sub(llend, int128_one())); 436 436 437 437 if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { 438 - VFIOHostDMAWindow *hostwin; 439 438 hwaddr pgsize = 0; 440 439 441 440 /* For now intersections are not allowed, we may relax this later */ ··· 457 456 vfio_host_win_add(container, section->offset_within_address_space, 458 457 section->offset_within_address_space + 459 458 int128_get64(section->size) - 1, pgsize); 459 + #ifdef CONFIG_KVM 460 + if (kvm_enabled()) { 461 + VFIOGroup *group; 462 + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); 463 + struct kvm_vfio_spapr_tce param; 464 + struct kvm_device_attr attr = { 465 + .group = KVM_DEV_VFIO_GROUP, 466 + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, 467 + .addr = (uint64_t)(unsigned long)&param, 468 + }; 469 + 470 + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, 471 + &param.tablefd)) { 472 + QLIST_FOREACH(group, &container->group_list, container_next) { 473 + param.groupfd = group->fd; 474 + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { 475 + error_report("vfio: failed to setup fd %d " 476 + "for a group with fd %d: %s", 477 + param.tablefd, param.groupfd, 478 + strerror(errno)); 479 + return; 480 + } 481 + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); 482 + } 483 + } 484 + } 485 + #endif 460 486 } 461 487 462 488 hostwin_found = false;
+6 -3
hw/vfio/pci-quirks.c
··· 542 542 VFIOQuirk *quirk; 543 543 VFIONvidia3d0Quirk *data; 544 544 545 - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 545 + if (vdev->no_geforce_quirks || 546 + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 546 547 !vdev->bars[1].region.size) { 547 548 return; 548 549 } ··· 660 661 VFIONvidiaBAR5Quirk *bar5; 661 662 VFIOConfigWindowQuirk *window; 662 663 663 - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 664 + if (vdev->no_geforce_quirks || 665 + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 664 666 !vdev->vga || nr != 5 || !vdev->bars[5].ioport) { 665 667 return; 666 668 } ··· 754 756 VFIOQuirk *quirk; 755 757 VFIOConfigMirrorQuirk *mirror; 756 758 757 - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 759 + if (vdev->no_geforce_quirks || 760 + !vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || 758 761 !vfio_is_vga(vdev) || nr != 0) { 759 762 return; 760 763 }
+170 -25
hw/vfio/pci.c
··· 1087 1087 { 1088 1088 VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); 1089 1089 VFIORegion *region = &vdev->bars[bar].region; 1090 - MemoryRegion *mmap_mr, *mr; 1090 + MemoryRegion *mmap_mr, *region_mr, *base_mr; 1091 1091 PCIIORegion *r; 1092 1092 pcibus_t bar_addr; 1093 1093 uint64_t size = region->size; ··· 1100 1100 1101 1101 r = &pdev->io_regions[bar]; 1102 1102 bar_addr = r->addr; 1103 - mr = region->mem; 1103 + base_mr = vdev->bars[bar].mr; 1104 + region_mr = region->mem; 1104 1105 mmap_mr = &region->mmaps[0].mem; 1105 1106 1106 1107 /* If BAR is mapped and page aligned, update to fill PAGE_SIZE */ ··· 1111 1112 1112 1113 memory_region_transaction_begin(); 1113 1114 1114 - memory_region_set_size(mr, size); 1115 + if (vdev->bars[bar].size < size) { 1116 + memory_region_set_size(base_mr, size); 1117 + } 1118 + memory_region_set_size(region_mr, size); 1115 1119 memory_region_set_size(mmap_mr, size); 1116 - if (size != region->size && memory_region_is_mapped(mr)) { 1117 - memory_region_del_subregion(r->address_space, mr); 1120 + if (size != vdev->bars[bar].size && memory_region_is_mapped(base_mr)) { 1121 + memory_region_del_subregion(r->address_space, base_mr); 1118 1122 memory_region_add_subregion_overlap(r->address_space, 1119 - bar_addr, mr, 0); 1123 + bar_addr, base_mr, 0); 1120 1124 } 1121 1125 1122 1126 memory_region_transaction_commit(); ··· 1218 1222 1219 1223 for (bar = 0; bar < PCI_ROM_SLOT; bar++) { 1220 1224 if (old_addr[bar] != pdev->io_regions[bar].addr && 1221 - pdev->io_regions[bar].size > 0 && 1222 - pdev->io_regions[bar].size < qemu_real_host_page_size) { 1225 + vdev->bars[bar].region.size > 0 && 1226 + vdev->bars[bar].region.size < qemu_real_host_page_size) { 1223 1227 vfio_sub_page_bar_update_mapping(pdev, bar); 1224 1228 } 1225 1229 } ··· 1352 1356 } 1353 1357 } 1354 1358 1359 + static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp) 1360 + { 1361 + int target_bar = -1; 1362 + size_t msix_sz; 1363 + 1364 + if (!vdev->msix || vdev->msix_relo == OFF_AUTOPCIBAR_OFF) { 1365 + return; 1366 + } 1367 + 1368 + /* The actual minimum size of MSI-X structures */ 1369 + msix_sz = (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE) + 1370 + (QEMU_ALIGN_UP(vdev->msix->entries, 64) / 8); 1371 + /* Round up to host pages, we don't want to share a page */ 1372 + msix_sz = REAL_HOST_PAGE_ALIGN(msix_sz); 1373 + /* PCI BARs must be a power of 2 */ 1374 + msix_sz = pow2ceil(msix_sz); 1375 + 1376 + if (vdev->msix_relo == OFF_AUTOPCIBAR_AUTO) { 1377 + /* 1378 + * TODO: Lookup table for known devices. 1379 + * 1380 + * Logically we might use an algorithm here to select the BAR adding 1381 + * the least additional MMIO space, but we cannot programatically 1382 + * predict the driver dependency on BAR ordering or sizing, therefore 1383 + * 'auto' becomes a lookup for combinations reported to work. 1384 + */ 1385 + if (target_bar < 0) { 1386 + error_setg(errp, "No automatic MSI-X relocation available for " 1387 + "device %04x:%04x", vdev->vendor_id, vdev->device_id); 1388 + return; 1389 + } 1390 + } else { 1391 + target_bar = (int)(vdev->msix_relo - OFF_AUTOPCIBAR_BAR0); 1392 + } 1393 + 1394 + /* I/O port BARs cannot host MSI-X structures */ 1395 + if (vdev->bars[target_bar].ioport) { 1396 + error_setg(errp, "Invalid MSI-X relocation BAR %d, " 1397 + "I/O port BAR", target_bar); 1398 + return; 1399 + } 1400 + 1401 + /* Cannot use a BAR in the "shadow" of a 64-bit BAR */ 1402 + if (!vdev->bars[target_bar].size && 1403 + target_bar > 0 && vdev->bars[target_bar - 1].mem64) { 1404 + error_setg(errp, "Invalid MSI-X relocation BAR %d, " 1405 + "consumed by 64-bit BAR %d", target_bar, target_bar - 1); 1406 + return; 1407 + } 1408 + 1409 + /* 2GB max size for 32-bit BARs, cannot double if already > 1G */ 1410 + if (vdev->bars[target_bar].size > (1 * 1024 * 1024 * 1024) && 1411 + !vdev->bars[target_bar].mem64) { 1412 + error_setg(errp, "Invalid MSI-X relocation BAR %d, " 1413 + "no space to extend 32-bit BAR", target_bar); 1414 + return; 1415 + } 1416 + 1417 + /* 1418 + * If adding a new BAR, test if we can make it 64bit. We make it 1419 + * prefetchable since QEMU MSI-X emulation has no read side effects 1420 + * and doing so makes mapping more flexible. 1421 + */ 1422 + if (!vdev->bars[target_bar].size) { 1423 + if (target_bar < (PCI_ROM_SLOT - 1) && 1424 + !vdev->bars[target_bar + 1].size) { 1425 + vdev->bars[target_bar].mem64 = true; 1426 + vdev->bars[target_bar].type = PCI_BASE_ADDRESS_MEM_TYPE_64; 1427 + } 1428 + vdev->bars[target_bar].type |= PCI_BASE_ADDRESS_MEM_PREFETCH; 1429 + vdev->bars[target_bar].size = msix_sz; 1430 + vdev->msix->table_offset = 0; 1431 + } else { 1432 + vdev->bars[target_bar].size = MAX(vdev->bars[target_bar].size * 2, 1433 + msix_sz * 2); 1434 + /* 1435 + * Due to above size calc, MSI-X always starts halfway into the BAR, 1436 + * which will always be a separate host page. 1437 + */ 1438 + vdev->msix->table_offset = vdev->bars[target_bar].size / 2; 1439 + } 1440 + 1441 + vdev->msix->table_bar = target_bar; 1442 + vdev->msix->pba_bar = target_bar; 1443 + /* Requires 8-byte alignment, but PCI_MSIX_ENTRY_SIZE guarantees that */ 1444 + vdev->msix->pba_offset = vdev->msix->table_offset + 1445 + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE); 1446 + 1447 + trace_vfio_msix_relo(vdev->vbasedev.name, 1448 + vdev->msix->table_bar, vdev->msix->table_offset); 1449 + } 1450 + 1355 1451 /* 1356 1452 * We don't have any control over how pci_add_capability() inserts 1357 1453 * capabilities into the chain. In order to setup MSI-X we need a ··· 1430 1526 vdev->msix = msix; 1431 1527 1432 1528 vfio_pci_fixup_msix_region(vdev); 1529 + 1530 + vfio_pci_relocate_msix(vdev, errp); 1433 1531 } 1434 1532 1435 1533 static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) ··· 1440 1538 vdev->msix->pending = g_malloc0(BITS_TO_LONGS(vdev->msix->entries) * 1441 1539 sizeof(unsigned long)); 1442 1540 ret = msix_init(&vdev->pdev, vdev->msix->entries, 1443 - vdev->bars[vdev->msix->table_bar].region.mem, 1541 + vdev->bars[vdev->msix->table_bar].mr, 1444 1542 vdev->msix->table_bar, vdev->msix->table_offset, 1445 - vdev->bars[vdev->msix->pba_bar].region.mem, 1543 + vdev->bars[vdev->msix->pba_bar].mr, 1446 1544 vdev->msix->pba_bar, vdev->msix->pba_offset, pos, 1447 1545 &err); 1448 1546 if (ret < 0) { ··· 1482 1580 1483 1581 if (vdev->msix) { 1484 1582 msix_uninit(&vdev->pdev, 1485 - vdev->bars[vdev->msix->table_bar].region.mem, 1486 - vdev->bars[vdev->msix->pba_bar].region.mem); 1583 + vdev->bars[vdev->msix->table_bar].mr, 1584 + vdev->bars[vdev->msix->pba_bar].mr); 1487 1585 g_free(vdev->msix->pending); 1488 1586 } 1489 1587 } ··· 1500 1598 } 1501 1599 } 1502 1600 1503 - static void vfio_bar_setup(VFIOPCIDevice *vdev, int nr) 1601 + static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) 1504 1602 { 1505 1603 VFIOBAR *bar = &vdev->bars[nr]; 1506 1604 1507 1605 uint32_t pci_bar; 1508 - uint8_t type; 1509 1606 int ret; 1510 1607 1511 1608 /* Skip both unimplemented BARs and the upper half of 64bit BARS. */ ··· 1524 1621 pci_bar = le32_to_cpu(pci_bar); 1525 1622 bar->ioport = (pci_bar & PCI_BASE_ADDRESS_SPACE_IO); 1526 1623 bar->mem64 = bar->ioport ? 0 : (pci_bar & PCI_BASE_ADDRESS_MEM_TYPE_64); 1527 - type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : 1528 - ~PCI_BASE_ADDRESS_MEM_MASK); 1624 + bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : 1625 + ~PCI_BASE_ADDRESS_MEM_MASK); 1626 + bar->size = bar->region.size; 1627 + } 1628 + 1629 + static void vfio_bars_prepare(VFIOPCIDevice *vdev) 1630 + { 1631 + int i; 1529 1632 1530 - if (vfio_region_mmap(&bar->region)) { 1531 - error_report("Failed to mmap %s BAR %d. Performance may be slow", 1532 - vdev->vbasedev.name, nr); 1633 + for (i = 0; i < PCI_ROM_SLOT; i++) { 1634 + vfio_bar_prepare(vdev, i); 1635 + } 1636 + } 1637 + 1638 + static void vfio_bar_register(VFIOPCIDevice *vdev, int nr) 1639 + { 1640 + VFIOBAR *bar = &vdev->bars[nr]; 1641 + char *name; 1642 + 1643 + if (!bar->size) { 1644 + return; 1533 1645 } 1534 1646 1535 - pci_register_bar(&vdev->pdev, nr, type, bar->region.mem); 1647 + bar->mr = g_new0(MemoryRegion, 1); 1648 + name = g_strdup_printf("%s base BAR %d", vdev->vbasedev.name, nr); 1649 + memory_region_init_io(bar->mr, OBJECT(vdev), NULL, NULL, name, bar->size); 1650 + g_free(name); 1651 + 1652 + if (bar->region.size) { 1653 + memory_region_add_subregion(bar->mr, 0, bar->region.mem); 1654 + 1655 + if (vfio_region_mmap(&bar->region)) { 1656 + error_report("Failed to mmap %s BAR %d. Performance may be slow", 1657 + vdev->vbasedev.name, nr); 1658 + } 1659 + } 1660 + 1661 + pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr); 1536 1662 } 1537 1663 1538 - static void vfio_bars_setup(VFIOPCIDevice *vdev) 1664 + static void vfio_bars_register(VFIOPCIDevice *vdev) 1539 1665 { 1540 1666 int i; 1541 1667 1542 1668 for (i = 0; i < PCI_ROM_SLOT; i++) { 1543 - vfio_bar_setup(vdev, i); 1669 + vfio_bar_register(vdev, i); 1544 1670 } 1545 1671 } 1546 1672 ··· 1549 1675 int i; 1550 1676 1551 1677 for (i = 0; i < PCI_ROM_SLOT; i++) { 1678 + VFIOBAR *bar = &vdev->bars[i]; 1679 + 1552 1680 vfio_bar_quirk_exit(vdev, i); 1553 - vfio_region_exit(&vdev->bars[i].region); 1681 + vfio_region_exit(&bar->region); 1682 + if (bar->region.size) { 1683 + memory_region_del_subregion(bar->mr, bar->region.mem); 1684 + } 1554 1685 } 1555 1686 1556 1687 if (vdev->vga) { ··· 1564 1695 int i; 1565 1696 1566 1697 for (i = 0; i < PCI_ROM_SLOT; i++) { 1698 + VFIOBAR *bar = &vdev->bars[i]; 1699 + 1567 1700 vfio_bar_quirk_finalize(vdev, i); 1568 - vfio_region_finalize(&vdev->bars[i].region); 1701 + vfio_region_finalize(&bar->region); 1702 + if (bar->size) { 1703 + object_unparent(OBJECT(bar->mr)); 1704 + g_free(bar->mr); 1705 + } 1569 1706 } 1570 1707 1571 1708 if (vdev->vga) { ··· 2734 2871 2735 2872 /* QEMU can choose to expose the ROM or not */ 2736 2873 memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); 2874 + /* QEMU can also add or extend BARs */ 2875 + memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); 2737 2876 2738 2877 /* 2739 2878 * The PCI spec reserves vendor ID 0xffff as an invalid value. The ··· 2804 2943 2805 2944 vfio_pci_size_rom(vdev); 2806 2945 2946 + vfio_bars_prepare(vdev); 2947 + 2807 2948 vfio_msix_early_setup(vdev, &err); 2808 2949 if (err) { 2809 2950 error_propagate(errp, err); 2810 2951 goto error; 2811 2952 } 2812 2953 2813 - vfio_bars_setup(vdev); 2954 + vfio_bars_register(vdev); 2814 2955 2815 2956 ret = vfio_add_capabilities(vdev, errp); 2816 2957 if (ret) { ··· 2989 3130 DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false), 2990 3131 DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false), 2991 3132 DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), 3133 + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, 3134 + no_geforce_quirks, false), 2992 3135 DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID), 2993 3136 DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID), 2994 3137 DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice, ··· 2999 3142 DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice, 3000 3143 nv_gpudirect_clique, 3001 3144 qdev_prop_nv_gpudirect_clique, uint8_t), 3145 + DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, 3146 + OFF_AUTOPCIBAR_OFF), 3002 3147 /* 3003 3148 * TODO - support passed fds... is this necessary? 3004 3149 * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
+6 -1
hw/vfio/pci.h
··· 33 33 34 34 typedef struct VFIOBAR { 35 35 VFIORegion region; 36 + MemoryRegion *mr; 37 + size_t size; 38 + uint8_t type; 36 39 bool ioport; 37 40 bool mem64; 38 41 QLIST_HEAD(, VFIOQuirk) quirks; ··· 86 89 VFIO_INT_MSIX = 3, 87 90 }; 88 91 89 - /* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */ 92 + /* Cache of MSI-X setup */ 90 93 typedef struct VFIOMSIXInfo { 91 94 uint8_t table_bar; 92 95 uint8_t pba_bar; ··· 132 135 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT) 133 136 int32_t bootindex; 134 137 uint32_t igd_gms; 138 + OffAutoPCIBAR msix_relo; 135 139 uint8_t pm_cap; 136 140 uint8_t nv_gpudirect_clique; 137 141 bool pci_aer; ··· 142 146 bool no_kvm_intx; 143 147 bool no_kvm_msi; 144 148 bool no_kvm_msix; 149 + bool no_geforce_quirks; 145 150 } VFIOPCIDevice; 146 151 147 152 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
+2
hw/vfio/platform.c
··· 643 643 vbasedev->dev = dev; 644 644 vbasedev->ops = &vfio_platform_ops; 645 645 646 + qemu_mutex_init(&vdev->intp_mutex); 647 + 646 648 trace_vfio_platform_realize(vbasedev->sysfsdev ? 647 649 vbasedev->sysfsdev : vbasedev->name, 648 650 vdev->compat);
+3
hw/vfio/trace-events
··· 16 16 vfio_msix_pba_enable(const char *name) " (%s)" 17 17 vfio_msix_disable(const char *name) " (%s)" 18 18 vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" 19 + vfio_msix_relo_cost(const char *name, int bar, uint64_t cost) " (%s) BAR %d cost 0x%"PRIx64"" 20 + vfio_msix_relo(const char *name, int bar, uint64_t offset) " (%s) BAR %d offset 0x%"PRIx64"" 19 21 vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI vectors" 20 22 vfio_msi_disable(const char *name) " (%s)" 21 23 vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" ··· 123 125 vfio_prereg_unregister(uint64_t va, uint64_t size, int ret) "va=0x%"PRIx64" size=0x%"PRIx64" ret=%d" 124 126 vfio_spapr_create_window(int ps, uint64_t ws, uint64_t off) "pageshift=0x%x winsize=0x%"PRIx64" offset=0x%"PRIx64 125 127 vfio_spapr_remove_window(uint64_t off) "offset=0x%"PRIx64 128 + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
+22
include/exec/memory.h
··· 190 190 const MemoryRegionMmio old_mmio; 191 191 }; 192 192 193 + enum IOMMUMemoryRegionAttr { 194 + IOMMU_ATTR_SPAPR_TCE_FD 195 + }; 196 + 193 197 typedef struct IOMMUMemoryRegionClass { 194 198 /* private */ 195 199 struct DeviceClass parent_class; ··· 210 214 IOMMUNotifierFlag new_flags); 211 215 /* Set this up to provide customized IOMMU replay function */ 212 216 void (*replay)(IOMMUMemoryRegion *iommu, IOMMUNotifier *notifier); 217 + 218 + /* Get IOMMU misc attributes */ 219 + int (*get_attr)(IOMMUMemoryRegion *iommu, enum IOMMUMemoryRegionAttr, 220 + void *data); 213 221 } IOMMUMemoryRegionClass; 214 222 215 223 typedef struct CoalescedMemoryRange CoalescedMemoryRange; ··· 925 933 */ 926 934 void memory_region_unregister_iommu_notifier(MemoryRegion *mr, 927 935 IOMMUNotifier *n); 936 + 937 + /** 938 + * memory_region_iommu_get_attr: return an IOMMU attr if get_attr() is 939 + * defined on the IOMMU. 940 + * 941 + * Returns 0 if succeded, error code otherwise. 942 + * 943 + * @iommu_mr: the memory region 944 + * @attr: the requested attribute 945 + * @data: a pointer to the requested attribute data 946 + */ 947 + int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, 948 + enum IOMMUMemoryRegionAttr attr, 949 + void *data); 928 950 929 951 /** 930 952 * memory_region_name: get a memory region's name
+4
include/hw/qdev-properties.h
··· 34 34 extern const PropertyInfo qdev_prop_uuid; 35 35 extern const PropertyInfo qdev_prop_arraylen; 36 36 extern const PropertyInfo qdev_prop_link; 37 + extern const PropertyInfo qdev_prop_off_auto_pcibar; 37 38 38 39 #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ 39 40 .name = (_name), \ ··· 214 215 DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) 215 216 #define DEFINE_PROP_MEMORY_REGION(_n, _s, _f) \ 216 217 DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, MemoryRegion *) 218 + #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ 219 + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \ 220 + OffAutoPCIBAR) 217 221 218 222 #define DEFINE_PROP_UUID(_name, _state, _field) { \ 219 223 .name = (_name), \
+13
memory.c
··· 1922 1922 } 1923 1923 } 1924 1924 1925 + int memory_region_iommu_get_attr(IOMMUMemoryRegion *iommu_mr, 1926 + enum IOMMUMemoryRegionAttr attr, 1927 + void *data) 1928 + { 1929 + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); 1930 + 1931 + if (!imrc->get_attr) { 1932 + return -EINVAL; 1933 + } 1934 + 1935 + return imrc->get_attr(iommu_mr, attr, data); 1936 + } 1937 + 1925 1938 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) 1926 1939 { 1927 1940 uint8_t mask = 1 << client;
+26
qapi/common.json
··· 100 100 { 'alternate': 'StrOrNull', 101 101 'data': { 's': 'str', 102 102 'n': 'null' } } 103 + 104 + ## 105 + # @OffAutoPCIBAR: 106 + # 107 + # An enumeration of options for specifying a PCI BAR 108 + # 109 + # @off: The specified feature is disabled 110 + # 111 + # @auto: The PCI BAR for the feature is automatically selected 112 + # 113 + # @bar0: PCI BAR0 is used for the feature 114 + # 115 + # @bar1: PCI BAR1 is used for the feature 116 + # 117 + # @bar2: PCI BAR2 is used for the feature 118 + # 119 + # @bar3: PCI BAR3 is used for the feature 120 + # 121 + # @bar4: PCI BAR4 is used for the feature 122 + # 123 + # @bar5: PCI BAR5 is used for the feature 124 + # 125 + # Since: 2.12 126 + ## 127 + { 'enum': 'OffAutoPCIBAR', 128 + 'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] }
+6 -1
target/ppc/kvm.c
··· 137 137 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); 138 138 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); 139 139 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); 140 - cap_spapr_vfio = false; 140 + cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO); 141 141 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); 142 142 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); 143 143 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); ··· 2512 2512 int kvmppc_get_cap_safe_indirect_branch(void) 2513 2513 { 2514 2514 return cap_ppc_safe_indirect_branch; 2515 + } 2516 + 2517 + bool kvmppc_has_cap_spapr_vfio(void) 2518 + { 2519 + return cap_spapr_vfio; 2515 2520 } 2516 2521 2517 2522 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
+6
target/ppc/kvm_ppc.h
··· 46 46 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); 47 47 int kvmppc_reset_htab(int shift_hint); 48 48 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); 49 + bool kvmppc_has_cap_spapr_vfio(void); 49 50 #endif /* !CONFIG_USER_ONLY */ 50 51 bool kvmppc_has_cap_epr(void); 51 52 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function); ··· 230 231 static inline bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path) 231 232 { 232 233 return true; 234 + } 235 + 236 + static inline bool kvmppc_has_cap_spapr_vfio(void) 237 + { 238 + return false; 233 239 } 234 240 235 241 #endif /* !CONFIG_USER_ONLY */