qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

pci-assign: Remove

Legacy PCI device assignment has been removed from Linux in 4.12,
and had been deprecated 2 years ago there. We can remove it from
QEMU as well.

The ROM loading code was shared with Xen PCI passthrough, so move
it to hw/xen.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+6 -1931
+2 -10
docs/qdev-device-use.txt
··· 366 366 === Host Device Assignment === 367 367 368 368 QEMU supports assigning host PCI devices (qemu-kvm only at this time) 369 - and host USB devices. 370 - 371 - The old way to assign a host PCI device is 372 - 373 - -pcidevice host=ADDR,dma=none,id=ID 374 - 375 - The new way is 376 - 377 - -device pci-assign,host=ADDR,iommu=IOMMU,id=ID 369 + and host USB devices. PCI devices can only be assigned with -device: 378 370 379 - The old dma=none becomes iommu=off with -device. 371 + -device vfio-pci,host=ADDR,id=ID 380 372 381 373 The old way to assign a host USB device is 382 374
-1
hw/i386/Makefile.objs
··· 8 8 9 9 obj-y += kvmvapic.o 10 10 obj-y += acpi-build.o 11 - obj-y += pci-assign-load-rom.o
+1 -1
hw/i386/kvm/Makefile.objs
··· 1 - obj-y += clock.o apic.o i8259.o ioapic.o i8254.o pci-assign.o 1 + obj-y += clock.o apic.o i8259.o ioapic.o i8254.o
-1887
hw/i386/kvm/pci-assign.c
··· 1 - /* 2 - * Copyright (c) 2007, Neocleus Corporation. 3 - * 4 - * This work is licensed under the terms of the GNU GPL, version 2. See 5 - * the COPYING file in the top-level directory. 6 - * 7 - * 8 - * Assign a PCI device from the host to a guest VM. 9 - * 10 - * This implementation uses the classic device assignment interface of KVM 11 - * and is only available on x86 hosts. It is expected to be obsoleted by VFIO 12 - * based device assignment. 13 - * 14 - * Adapted for KVM (qemu-kvm) by Qumranet. QEMU version was based on qemu-kvm 15 - * revision 4144fe9d48. See its repository for the history. 16 - * 17 - * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) 18 - * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) 19 - * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) 20 - * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) 21 - * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) 22 - */ 23 - 24 - #include "qemu/osdep.h" 25 - #include <linux/kvm.h> 26 - #include "qapi/error.h" 27 - #include "hw/hw.h" 28 - #include "hw/i386/pc.h" 29 - #include "qemu/error-report.h" 30 - #include "ui/console.h" 31 - #include "hw/loader.h" 32 - #include "monitor/monitor.h" 33 - #include "qemu/range.h" 34 - #include "sysemu/sysemu.h" 35 - #include "hw/pci/pci.h" 36 - #include "hw/pci/msi.h" 37 - #include "kvm_i386.h" 38 - #include "hw/pci/pci-assign.h" 39 - 40 - /* From linux/ioport.h */ 41 - #define IORESOURCE_IO 0x00000100 /* Resource type */ 42 - #define IORESOURCE_MEM 0x00000200 43 - #define IORESOURCE_IRQ 0x00000400 44 - #define IORESOURCE_DMA 0x00000800 45 - #define IORESOURCE_PREFETCH 0x00002000 /* No side effects */ 46 - #define IORESOURCE_MEM_64 0x00100000 47 - 48 - typedef struct PCIRegion { 49 - int type; /* Memory or port I/O */ 50 - int valid; 51 - uint64_t base_addr; 52 - uint64_t size; /* size of the region */ 53 - int resource_fd; 54 - } PCIRegion; 55 - 56 - typedef struct PCIDevRegions { 57 - uint8_t bus, dev, func; /* Bus inside domain, device and function */ 58 - int irq; /* IRQ number */ 59 - uint16_t region_number; /* number of active regions */ 60 - 61 - /* Port I/O or MMIO Regions */ 62 - PCIRegion regions[PCI_NUM_REGIONS - 1]; 63 - int config_fd; 64 - } PCIDevRegions; 65 - 66 - typedef struct AssignedDevRegion { 67 - MemoryRegion container; 68 - MemoryRegion real_iomem; 69 - union { 70 - uint8_t *r_virtbase; /* mmapped access address for memory regions */ 71 - uint32_t r_baseport; /* the base guest port for I/O regions */ 72 - } u; 73 - pcibus_t e_size; /* emulated size of region in bytes */ 74 - pcibus_t r_size; /* real size of region in bytes */ 75 - PCIRegion *region; 76 - } AssignedDevRegion; 77 - 78 - #define ASSIGNED_DEVICE_PREFER_MSI_BIT 0 79 - #define ASSIGNED_DEVICE_SHARE_INTX_BIT 1 80 - 81 - #define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT) 82 - #define ASSIGNED_DEVICE_SHARE_INTX_MASK (1 << ASSIGNED_DEVICE_SHARE_INTX_BIT) 83 - 84 - typedef struct MSIXTableEntry { 85 - uint32_t addr_lo; 86 - uint32_t addr_hi; 87 - uint32_t data; 88 - uint32_t ctrl; 89 - } MSIXTableEntry; 90 - 91 - typedef enum AssignedIRQType { 92 - ASSIGNED_IRQ_NONE = 0, 93 - ASSIGNED_IRQ_INTX_HOST_INTX, 94 - ASSIGNED_IRQ_INTX_HOST_MSI, 95 - ASSIGNED_IRQ_MSI, 96 - ASSIGNED_IRQ_MSIX 97 - } AssignedIRQType; 98 - 99 - typedef struct AssignedDevice { 100 - PCIDevice dev; 101 - PCIHostDeviceAddress host; 102 - uint32_t dev_id; 103 - uint32_t features; 104 - int intpin; 105 - AssignedDevRegion v_addrs[PCI_NUM_REGIONS - 1]; 106 - PCIDevRegions real_device; 107 - PCIINTxRoute intx_route; 108 - AssignedIRQType assigned_irq_type; 109 - struct { 110 - #define ASSIGNED_DEVICE_CAP_MSI (1 << 0) 111 - #define ASSIGNED_DEVICE_CAP_MSIX (1 << 1) 112 - uint32_t available; 113 - #define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0) 114 - #define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1) 115 - #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) 116 - uint32_t state; 117 - } cap; 118 - uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE]; 119 - uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE]; 120 - int msi_virq_nr; 121 - int *msi_virq; 122 - MSIXTableEntry *msix_table; 123 - hwaddr msix_table_addr; 124 - uint16_t msix_table_size; 125 - uint16_t msix_max; 126 - MemoryRegion mmio; 127 - char *configfd_name; 128 - int32_t bootindex; 129 - } AssignedDevice; 130 - 131 - #define TYPE_PCI_ASSIGN "kvm-pci-assign" 132 - #define PCI_ASSIGN(obj) OBJECT_CHECK(AssignedDevice, (obj), TYPE_PCI_ASSIGN) 133 - 134 - static void assigned_dev_update_irq_routing(PCIDevice *dev); 135 - 136 - static void assigned_dev_load_option_rom(AssignedDevice *dev); 137 - 138 - static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); 139 - 140 - static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, 141 - hwaddr addr, int size, 142 - uint64_t *data) 143 - { 144 - uint64_t val = 0; 145 - int fd = dev_region->region->resource_fd; 146 - 147 - if (data) { 148 - DEBUG("pwrite data=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx 149 - ", addr="TARGET_FMT_plx"\n", *data, size, addr, addr); 150 - if (pwrite(fd, data, size, addr) != size) { 151 - error_report("%s - pwrite failed %s", __func__, strerror(errno)); 152 - } 153 - } else { 154 - if (pread(fd, &val, size, addr) != size) { 155 - error_report("%s - pread failed %s", __func__, strerror(errno)); 156 - val = (1UL << (size * 8)) - 1; 157 - } 158 - DEBUG("pread val=%" PRIx64 ", size=%d, e_phys=" TARGET_FMT_plx 159 - ", addr=" TARGET_FMT_plx "\n", val, size, addr, addr); 160 - } 161 - return val; 162 - } 163 - 164 - static void assigned_dev_ioport_write(void *opaque, hwaddr addr, 165 - uint64_t data, unsigned size) 166 - { 167 - assigned_dev_ioport_rw(opaque, addr, size, &data); 168 - } 169 - 170 - static uint64_t assigned_dev_ioport_read(void *opaque, 171 - hwaddr addr, unsigned size) 172 - { 173 - return assigned_dev_ioport_rw(opaque, addr, size, NULL); 174 - } 175 - 176 - static uint32_t slow_bar_readb(void *opaque, hwaddr addr) 177 - { 178 - AssignedDevRegion *d = opaque; 179 - uint8_t *in = d->u.r_virtbase + addr; 180 - uint32_t r; 181 - 182 - r = *in; 183 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); 184 - 185 - return r; 186 - } 187 - 188 - static uint32_t slow_bar_readw(void *opaque, hwaddr addr) 189 - { 190 - AssignedDevRegion *d = opaque; 191 - uint16_t *in = (uint16_t *)(d->u.r_virtbase + addr); 192 - uint32_t r; 193 - 194 - r = *in; 195 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); 196 - 197 - return r; 198 - } 199 - 200 - static uint32_t slow_bar_readl(void *opaque, hwaddr addr) 201 - { 202 - AssignedDevRegion *d = opaque; 203 - uint32_t *in = (uint32_t *)(d->u.r_virtbase + addr); 204 - uint32_t r; 205 - 206 - r = *in; 207 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); 208 - 209 - return r; 210 - } 211 - 212 - static void slow_bar_writeb(void *opaque, hwaddr addr, uint32_t val) 213 - { 214 - AssignedDevRegion *d = opaque; 215 - uint8_t *out = d->u.r_virtbase + addr; 216 - 217 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val); 218 - *out = val; 219 - } 220 - 221 - static void slow_bar_writew(void *opaque, hwaddr addr, uint32_t val) 222 - { 223 - AssignedDevRegion *d = opaque; 224 - uint16_t *out = (uint16_t *)(d->u.r_virtbase + addr); 225 - 226 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val); 227 - *out = val; 228 - } 229 - 230 - static void slow_bar_writel(void *opaque, hwaddr addr, uint32_t val) 231 - { 232 - AssignedDevRegion *d = opaque; 233 - uint32_t *out = (uint32_t *)(d->u.r_virtbase + addr); 234 - 235 - DEBUG("addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val); 236 - *out = val; 237 - } 238 - 239 - static const MemoryRegionOps slow_bar_ops = { 240 - .old_mmio = { 241 - .read = { slow_bar_readb, slow_bar_readw, slow_bar_readl, }, 242 - .write = { slow_bar_writeb, slow_bar_writew, slow_bar_writel, }, 243 - }, 244 - .endianness = DEVICE_NATIVE_ENDIAN, 245 - }; 246 - 247 - static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num, 248 - pcibus_t e_size) 249 - { 250 - AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); 251 - AssignedDevRegion *region = &r_dev->v_addrs[region_num]; 252 - PCIRegion *real_region = &r_dev->real_device.regions[region_num]; 253 - 254 - if (e_size > 0) { 255 - memory_region_init(&region->container, OBJECT(pci_dev), 256 - "assigned-dev-container", e_size); 257 - memory_region_add_subregion(&region->container, 0, &region->real_iomem); 258 - 259 - /* deal with MSI-X MMIO page */ 260 - if (real_region->base_addr <= r_dev->msix_table_addr && 261 - real_region->base_addr + real_region->size > 262 - r_dev->msix_table_addr) { 263 - uint64_t offset = r_dev->msix_table_addr - real_region->base_addr; 264 - 265 - memory_region_add_subregion_overlap(&region->container, 266 - offset, 267 - &r_dev->mmio, 268 - 1); 269 - } 270 - } 271 - } 272 - 273 - static const MemoryRegionOps assigned_dev_ioport_ops = { 274 - .read = assigned_dev_ioport_read, 275 - .write = assigned_dev_ioport_write, 276 - .endianness = DEVICE_NATIVE_ENDIAN, 277 - }; 278 - 279 - static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num, 280 - pcibus_t size) 281 - { 282 - AssignedDevice *r_dev = PCI_ASSIGN(pci_dev); 283 - AssignedDevRegion *region = &r_dev->v_addrs[region_num]; 284 - 285 - region->e_size = size; 286 - memory_region_init(&region->container, OBJECT(pci_dev), 287 - "assigned-dev-container", size); 288 - memory_region_init_io(&region->real_iomem, OBJECT(pci_dev), 289 - &assigned_dev_ioport_ops, r_dev->v_addrs + region_num, 290 - "assigned-dev-iomem", size); 291 - memory_region_add_subregion(&region->container, 0, &region->real_iomem); 292 - } 293 - 294 - static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len) 295 - { 296 - AssignedDevice *pci_dev = PCI_ASSIGN(d); 297 - uint32_t val; 298 - ssize_t ret; 299 - int fd = pci_dev->real_device.config_fd; 300 - 301 - again: 302 - ret = pread(fd, &val, len, pos); 303 - if (ret != len) { 304 - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { 305 - goto again; 306 - } 307 - 308 - hw_error("pci read failed, ret = %zd errno = %d\n", ret, errno); 309 - } 310 - 311 - return val; 312 - } 313 - 314 - static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) 315 - { 316 - return (uint8_t)assigned_dev_pci_read(d, pos, 1); 317 - } 318 - 319 - static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) 320 - { 321 - AssignedDevice *pci_dev = PCI_ASSIGN(d); 322 - ssize_t ret; 323 - int fd = pci_dev->real_device.config_fd; 324 - 325 - again: 326 - ret = pwrite(fd, &val, len, pos); 327 - if (ret != len) { 328 - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) { 329 - goto again; 330 - } 331 - 332 - hw_error("pci write failed, ret = %zd errno = %d\n", ret, errno); 333 - } 334 - } 335 - 336 - static void assigned_dev_emulate_config_read(AssignedDevice *dev, 337 - uint32_t offset, uint32_t len) 338 - { 339 - memset(dev->emulate_config_read + offset, 0xff, len); 340 - } 341 - 342 - static void assigned_dev_direct_config_read(AssignedDevice *dev, 343 - uint32_t offset, uint32_t len) 344 - { 345 - memset(dev->emulate_config_read + offset, 0, len); 346 - } 347 - 348 - static void assigned_dev_direct_config_write(AssignedDevice *dev, 349 - uint32_t offset, uint32_t len) 350 - { 351 - memset(dev->emulate_config_write + offset, 0, len); 352 - } 353 - 354 - static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) 355 - { 356 - int id; 357 - int max_cap = 48; 358 - int pos = start ? start : PCI_CAPABILITY_LIST; 359 - int status; 360 - 361 - status = assigned_dev_pci_read_byte(d, PCI_STATUS); 362 - if ((status & PCI_STATUS_CAP_LIST) == 0) { 363 - return 0; 364 - } 365 - 366 - while (max_cap--) { 367 - pos = assigned_dev_pci_read_byte(d, pos); 368 - if (pos < 0x40) { 369 - break; 370 - } 371 - 372 - pos &= ~3; 373 - id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID); 374 - 375 - if (id == 0xff) { 376 - break; 377 - } 378 - if (id == cap) { 379 - return pos; 380 - } 381 - 382 - pos += PCI_CAP_LIST_NEXT; 383 - } 384 - return 0; 385 - } 386 - 387 - static void assigned_dev_register_regions(PCIRegion *io_regions, 388 - unsigned long regions_num, 389 - AssignedDevice *pci_dev, 390 - Error **errp) 391 - { 392 - uint32_t i; 393 - PCIRegion *cur_region = io_regions; 394 - 395 - for (i = 0; i < regions_num; i++, cur_region++) { 396 - if (!cur_region->valid) { 397 - continue; 398 - } 399 - 400 - /* handle memory io regions */ 401 - if (cur_region->type & IORESOURCE_MEM) { 402 - int t = PCI_BASE_ADDRESS_SPACE_MEMORY; 403 - if (cur_region->type & IORESOURCE_PREFETCH) { 404 - t |= PCI_BASE_ADDRESS_MEM_PREFETCH; 405 - } 406 - if (cur_region->type & IORESOURCE_MEM_64) { 407 - t |= PCI_BASE_ADDRESS_MEM_TYPE_64; 408 - } 409 - 410 - /* map physical memory */ 411 - pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size, 412 - PROT_WRITE | PROT_READ, 413 - MAP_SHARED, 414 - cur_region->resource_fd, 415 - (off_t)0); 416 - 417 - if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) { 418 - pci_dev->v_addrs[i].u.r_virtbase = NULL; 419 - error_setg_errno(errp, errno, "Couldn't mmap 0x%" PRIx64 "!", 420 - cur_region->base_addr); 421 - return; 422 - } 423 - 424 - pci_dev->v_addrs[i].r_size = cur_region->size; 425 - pci_dev->v_addrs[i].e_size = 0; 426 - 427 - /* add offset */ 428 - pci_dev->v_addrs[i].u.r_virtbase += 429 - (cur_region->base_addr & 0xFFF); 430 - 431 - if (cur_region->size & 0xFFF) { 432 - error_report("PCI region %d at address 0x%" PRIx64 " has " 433 - "size 0x%" PRIx64 ", which is not a multiple of " 434 - "4K. You might experience some performance hit " 435 - "due to that.", 436 - i, cur_region->base_addr, cur_region->size); 437 - memory_region_init_io(&pci_dev->v_addrs[i].real_iomem, 438 - OBJECT(pci_dev), &slow_bar_ops, 439 - &pci_dev->v_addrs[i], 440 - "assigned-dev-slow-bar", 441 - cur_region->size); 442 - } else { 443 - void *virtbase = pci_dev->v_addrs[i].u.r_virtbase; 444 - char name[32]; 445 - snprintf(name, sizeof(name), "%s.bar%d", 446 - object_get_typename(OBJECT(pci_dev)), i); 447 - memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem, 448 - OBJECT(pci_dev), name, 449 - cur_region->size, virtbase); 450 - vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem, 451 - &pci_dev->dev.qdev); 452 - } 453 - 454 - assigned_dev_iomem_setup(&pci_dev->dev, i, cur_region->size); 455 - pci_register_bar((PCIDevice *) pci_dev, i, t, 456 - &pci_dev->v_addrs[i].container); 457 - continue; 458 - } else { 459 - /* handle port io regions */ 460 - uint32_t val; 461 - int ret; 462 - 463 - /* Test kernel support for ioport resource read/write. Old 464 - * kernels return EIO. New kernels only allow 1/2/4 byte reads 465 - * so should return EINVAL for a 3 byte read */ 466 - ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0); 467 - if (ret >= 0) { 468 - error_report("Unexpected return from I/O port read: %d", ret); 469 - abort(); 470 - } else if (errno != EINVAL) { 471 - error_report("Kernel doesn't support ioport resource " 472 - "access, hiding this region."); 473 - close(pci_dev->v_addrs[i].region->resource_fd); 474 - cur_region->valid = 0; 475 - continue; 476 - } 477 - 478 - pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; 479 - pci_dev->v_addrs[i].r_size = cur_region->size; 480 - pci_dev->v_addrs[i].e_size = 0; 481 - 482 - assigned_dev_ioport_setup(&pci_dev->dev, i, cur_region->size); 483 - pci_register_bar((PCIDevice *) pci_dev, i, 484 - PCI_BASE_ADDRESS_SPACE_IO, 485 - &pci_dev->v_addrs[i].container); 486 - } 487 - } 488 - 489 - /* success */ 490 - } 491 - 492 - static void get_real_id(const char *devpath, const char *idname, uint16_t *val, 493 - Error **errp) 494 - { 495 - FILE *f; 496 - char name[128]; 497 - long id; 498 - 499 - snprintf(name, sizeof(name), "%s%s", devpath, idname); 500 - f = fopen(name, "r"); 501 - if (f == NULL) { 502 - error_setg_file_open(errp, errno, name); 503 - return; 504 - } 505 - if (fscanf(f, "%li\n", &id) == 1) { 506 - *val = id; 507 - } else { 508 - error_setg(errp, "Failed to parse contents of '%s'", name); 509 - } 510 - fclose(f); 511 - } 512 - 513 - static void get_real_vendor_id(const char *devpath, uint16_t *val, 514 - Error **errp) 515 - { 516 - get_real_id(devpath, "vendor", val, errp); 517 - } 518 - 519 - static void get_real_device_id(const char *devpath, uint16_t *val, 520 - Error **errp) 521 - { 522 - get_real_id(devpath, "device", val, errp); 523 - } 524 - 525 - static void get_real_device(AssignedDevice *pci_dev, Error **errp) 526 - { 527 - char dir[128], name[128]; 528 - int fd, r = 0; 529 - FILE *f; 530 - uint64_t start, end, size, flags; 531 - uint16_t id; 532 - PCIRegion *rp; 533 - PCIDevRegions *dev = &pci_dev->real_device; 534 - Error *local_err = NULL; 535 - 536 - dev->region_number = 0; 537 - 538 - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", 539 - pci_dev->host.domain, pci_dev->host.bus, 540 - pci_dev->host.slot, pci_dev->host.function); 541 - 542 - snprintf(name, sizeof(name), "%sconfig", dir); 543 - 544 - if (pci_dev->configfd_name && *pci_dev->configfd_name) { 545 - dev->config_fd = monitor_fd_param(cur_mon, pci_dev->configfd_name, 546 - &local_err); 547 - if (local_err) { 548 - error_propagate(errp, local_err); 549 - return; 550 - } 551 - } else { 552 - dev->config_fd = open(name, O_RDWR); 553 - 554 - if (dev->config_fd == -1) { 555 - error_setg_file_open(errp, errno, name); 556 - return; 557 - } 558 - } 559 - again: 560 - r = read(dev->config_fd, pci_dev->dev.config, 561 - pci_config_size(&pci_dev->dev)); 562 - if (r < 0) { 563 - if (errno == EINTR || errno == EAGAIN) { 564 - goto again; 565 - } 566 - error_setg_errno(errp, errno, "read(\"%s\")", 567 - (pci_dev->configfd_name && *pci_dev->configfd_name) ? 568 - pci_dev->configfd_name : name); 569 - return; 570 - } 571 - 572 - /* Restore or clear multifunction, this is always controlled by qemu */ 573 - if (pci_dev->dev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { 574 - pci_dev->dev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; 575 - } else { 576 - pci_dev->dev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; 577 - } 578 - 579 - /* Clear host resource mapping info. If we choose not to register a 580 - * BAR, such as might be the case with the option ROM, we can get 581 - * confusing, unwritable, residual addresses from the host here. */ 582 - memset(&pci_dev->dev.config[PCI_BASE_ADDRESS_0], 0, 24); 583 - memset(&pci_dev->dev.config[PCI_ROM_ADDRESS], 0, 4); 584 - 585 - snprintf(name, sizeof(name), "%sresource", dir); 586 - 587 - f = fopen(name, "r"); 588 - if (f == NULL) { 589 - error_setg_file_open(errp, errno, name); 590 - return; 591 - } 592 - 593 - for (r = 0; r < PCI_ROM_SLOT; r++) { 594 - if (fscanf(f, "%" SCNi64 " %" SCNi64 " %" SCNi64 "\n", 595 - &start, &end, &flags) != 3) { 596 - break; 597 - } 598 - 599 - rp = dev->regions + r; 600 - rp->valid = 0; 601 - rp->resource_fd = -1; 602 - size = end - start + 1; 603 - flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH 604 - | IORESOURCE_MEM_64; 605 - if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) { 606 - continue; 607 - } 608 - if (flags & IORESOURCE_MEM) { 609 - flags &= ~IORESOURCE_IO; 610 - } else { 611 - flags &= ~IORESOURCE_PREFETCH; 612 - } 613 - snprintf(name, sizeof(name), "%sresource%d", dir, r); 614 - fd = open(name, O_RDWR); 615 - if (fd == -1) { 616 - continue; 617 - } 618 - rp->resource_fd = fd; 619 - 620 - rp->type = flags; 621 - rp->valid = 1; 622 - rp->base_addr = start; 623 - rp->size = size; 624 - pci_dev->v_addrs[r].region = rp; 625 - DEBUG("region %d size %" PRIu64 " start 0x%" PRIx64 626 - " type %d resource_fd %d\n", 627 - r, rp->size, start, rp->type, rp->resource_fd); 628 - } 629 - 630 - fclose(f); 631 - 632 - /* read and fill vendor ID */ 633 - get_real_vendor_id(dir, &id, &local_err); 634 - if (local_err) { 635 - error_propagate(errp, local_err); 636 - return; 637 - } 638 - pci_dev->dev.config[0] = id & 0xff; 639 - pci_dev->dev.config[1] = (id & 0xff00) >> 8; 640 - 641 - /* read and fill device ID */ 642 - get_real_device_id(dir, &id, &local_err); 643 - if (local_err) { 644 - error_propagate(errp, local_err); 645 - return; 646 - } 647 - pci_dev->dev.config[2] = id & 0xff; 648 - pci_dev->dev.config[3] = (id & 0xff00) >> 8; 649 - 650 - pci_word_test_and_clear_mask(pci_dev->emulate_config_write + PCI_COMMAND, 651 - PCI_COMMAND_MASTER | PCI_COMMAND_INTX_DISABLE); 652 - 653 - dev->region_number = r; 654 - } 655 - 656 - static void free_msi_virqs(AssignedDevice *dev) 657 - { 658 - int i; 659 - 660 - for (i = 0; i < dev->msi_virq_nr; i++) { 661 - if (dev->msi_virq[i] >= 0) { 662 - kvm_irqchip_release_virq(kvm_state, dev->msi_virq[i]); 663 - dev->msi_virq[i] = -1; 664 - } 665 - } 666 - g_free(dev->msi_virq); 667 - dev->msi_virq = NULL; 668 - dev->msi_virq_nr = 0; 669 - } 670 - 671 - static void free_assigned_device(AssignedDevice *dev) 672 - { 673 - int i; 674 - 675 - if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { 676 - assigned_dev_unregister_msix_mmio(dev); 677 - } 678 - for (i = 0; i < dev->real_device.region_number; i++) { 679 - PCIRegion *pci_region = &dev->real_device.regions[i]; 680 - AssignedDevRegion *region = &dev->v_addrs[i]; 681 - 682 - if (!pci_region->valid) { 683 - continue; 684 - } 685 - if (pci_region->type & IORESOURCE_IO) { 686 - if (region->u.r_baseport) { 687 - memory_region_del_subregion(&region->container, 688 - &region->real_iomem); 689 - } 690 - } else if (pci_region->type & IORESOURCE_MEM) { 691 - if (region->u.r_virtbase) { 692 - memory_region_del_subregion(&region->container, 693 - &region->real_iomem); 694 - 695 - /* Remove MSI-X table subregion */ 696 - if (pci_region->base_addr <= dev->msix_table_addr && 697 - pci_region->base_addr + pci_region->size > 698 - dev->msix_table_addr) { 699 - memory_region_del_subregion(&region->container, 700 - &dev->mmio); 701 - } 702 - if (munmap(region->u.r_virtbase, 703 - (pci_region->size + 0xFFF) & 0xFFFFF000)) { 704 - error_report("Failed to unmap assigned device region: %s", 705 - strerror(errno)); 706 - } 707 - } 708 - } 709 - if (pci_region->resource_fd >= 0) { 710 - close(pci_region->resource_fd); 711 - } 712 - } 713 - 714 - if (dev->real_device.config_fd >= 0) { 715 - close(dev->real_device.config_fd); 716 - } 717 - 718 - free_msi_virqs(dev); 719 - } 720 - 721 - /* This function tries to determine the cause of the PCI assignment failure. It 722 - * always returns the cause as a dynamically allocated, human readable string. 723 - * If the function fails to determine the cause for any internal reason, then 724 - * the returned string will state that fact. 725 - */ 726 - static char *assign_failed_examine(const AssignedDevice *dev) 727 - { 728 - char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; 729 - uint16_t vendor_id, device_id; 730 - int r; 731 - Error *local_err = NULL; 732 - 733 - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", 734 - dev->host.domain, dev->host.bus, dev->host.slot, 735 - dev->host.function); 736 - 737 - snprintf(name, sizeof(name), "%sdriver", dir); 738 - 739 - r = readlink(name, driver, sizeof(driver)); 740 - if ((r <= 0) || r >= sizeof(driver)) { 741 - goto fail; 742 - } 743 - 744 - driver[r] = 0; 745 - ns = strrchr(driver, '/'); 746 - if (!ns) { 747 - goto fail; 748 - } 749 - 750 - ns++; 751 - 752 - if ((get_real_vendor_id(dir, &vendor_id, &local_err), local_err) || 753 - (get_real_device_id(dir, &device_id, &local_err), local_err)) { 754 - /* We're already analyzing an assignment error, so we suppress this 755 - * one just like the others above. 756 - */ 757 - error_free(local_err); 758 - goto fail; 759 - } 760 - 761 - return g_strdup_printf( 762 - "*** The driver '%s' is occupying your device %04x:%02x:%02x.%x.\n" 763 - "***\n" 764 - "*** You can try the following commands to free it:\n" 765 - "***\n" 766 - "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/new_id\n" 767 - "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/%s/unbind\n" 768 - "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" 769 - "pci-stub/bind\n" 770 - "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/remove_id\n" 771 - "***\n", 772 - ns, dev->host.domain, dev->host.bus, dev->host.slot, 773 - dev->host.function, vendor_id, device_id, 774 - dev->host.domain, dev->host.bus, dev->host.slot, dev->host.function, 775 - ns, dev->host.domain, dev->host.bus, dev->host.slot, 776 - dev->host.function, vendor_id, device_id); 777 - 778 - fail: 779 - return g_strdup("Couldn't find out why.\n"); 780 - } 781 - 782 - static void assign_device(AssignedDevice *dev, Error **errp) 783 - { 784 - uint32_t flags = KVM_DEV_ASSIGN_ENABLE_IOMMU; 785 - int r; 786 - 787 - /* Only pass non-zero PCI segment to capable module */ 788 - if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && 789 - dev->host.domain) { 790 - error_setg(errp, "Can't assign device inside non-zero PCI segment " 791 - "as this KVM module doesn't support it."); 792 - return; 793 - } 794 - 795 - if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) { 796 - error_setg(errp, "No IOMMU found. Unable to assign device \"%s\"", 797 - dev->dev.qdev.id); 798 - return; 799 - } 800 - 801 - if (dev->features & ASSIGNED_DEVICE_SHARE_INTX_MASK && 802 - kvm_has_intx_set_mask()) { 803 - flags |= KVM_DEV_ASSIGN_PCI_2_3; 804 - } 805 - 806 - r = kvm_device_pci_assign(kvm_state, &dev->host, flags, &dev->dev_id); 807 - if (r < 0) { 808 - switch (r) { 809 - case -EBUSY: { 810 - char *cause; 811 - 812 - cause = assign_failed_examine(dev); 813 - error_setg_errno(errp, -r, "Failed to assign device \"%s\"", 814 - dev->dev.qdev.id); 815 - error_append_hint(errp, "%s", cause); 816 - g_free(cause); 817 - break; 818 - } 819 - default: 820 - error_setg_errno(errp, -r, "Failed to assign device \"%s\"", 821 - dev->dev.qdev.id); 822 - break; 823 - } 824 - } 825 - } 826 - 827 - static int verify_irqchip_in_kernel(Error **errp) 828 - { 829 - if (kvm_irqchip_in_kernel()) { 830 - return -1; 831 - } 832 - error_setg(errp, "pci-assign requires KVM with in-kernel irqchip enabled"); 833 - return 0; 834 - } 835 - 836 - static int assign_intx(AssignedDevice *dev, Error **errp) 837 - { 838 - AssignedIRQType new_type; 839 - PCIINTxRoute intx_route; 840 - bool intx_host_msi; 841 - int r; 842 - 843 - /* Interrupt PIN 0 means don't use INTx */ 844 - if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) { 845 - pci_device_set_intx_routing_notifier(&dev->dev, NULL); 846 - return 0; 847 - } 848 - 849 - if (verify_irqchip_in_kernel(errp) < 0) { 850 - return -ENOTSUP; 851 - } 852 - 853 - pci_device_set_intx_routing_notifier(&dev->dev, 854 - assigned_dev_update_irq_routing); 855 - 856 - intx_route = pci_device_route_intx_to_irq(&dev->dev, dev->intpin); 857 - assert(intx_route.mode != PCI_INTX_INVERTED); 858 - 859 - if (!pci_intx_route_changed(&dev->intx_route, &intx_route)) { 860 - return 0; 861 - } 862 - 863 - switch (dev->assigned_irq_type) { 864 - case ASSIGNED_IRQ_INTX_HOST_INTX: 865 - case ASSIGNED_IRQ_INTX_HOST_MSI: 866 - intx_host_msi = dev->assigned_irq_type == ASSIGNED_IRQ_INTX_HOST_MSI; 867 - r = kvm_device_intx_deassign(kvm_state, dev->dev_id, intx_host_msi); 868 - break; 869 - case ASSIGNED_IRQ_MSI: 870 - r = kvm_device_msi_deassign(kvm_state, dev->dev_id); 871 - break; 872 - case ASSIGNED_IRQ_MSIX: 873 - r = kvm_device_msix_deassign(kvm_state, dev->dev_id); 874 - break; 875 - default: 876 - r = 0; 877 - break; 878 - } 879 - if (r) { 880 - perror("assign_intx: deassignment of previous interrupt failed"); 881 - } 882 - dev->assigned_irq_type = ASSIGNED_IRQ_NONE; 883 - 884 - if (intx_route.mode == PCI_INTX_DISABLED) { 885 - dev->intx_route = intx_route; 886 - return 0; 887 - } 888 - 889 - retry: 890 - if (dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK && 891 - dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { 892 - intx_host_msi = true; 893 - new_type = ASSIGNED_IRQ_INTX_HOST_MSI; 894 - } else { 895 - intx_host_msi = false; 896 - new_type = ASSIGNED_IRQ_INTX_HOST_INTX; 897 - } 898 - 899 - r = kvm_device_intx_assign(kvm_state, dev->dev_id, intx_host_msi, 900 - intx_route.irq); 901 - if (r < 0) { 902 - if (r == -EIO && !(dev->features & ASSIGNED_DEVICE_PREFER_MSI_MASK) && 903 - dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { 904 - /* Retry with host-side MSI. There might be an IRQ conflict and 905 - * either the kernel or the device doesn't support sharing. */ 906 - error_report("Host-side INTx sharing not supported, " 907 - "using MSI instead"); 908 - error_printf("Some devices do not work properly in this mode.\n"); 909 - dev->features |= ASSIGNED_DEVICE_PREFER_MSI_MASK; 910 - goto retry; 911 - } 912 - error_setg_errno(errp, -r, "Failed to assign irq for \"%s\"", 913 - dev->dev.qdev.id); 914 - error_append_hint(errp, "Perhaps you are assigning a device " 915 - "that shares an IRQ with another device?\n"); 916 - return r; 917 - } 918 - 919 - dev->intx_route = intx_route; 920 - dev->assigned_irq_type = new_type; 921 - return r; 922 - } 923 - 924 - static void deassign_device(AssignedDevice *dev) 925 - { 926 - int r; 927 - 928 - r = kvm_device_pci_deassign(kvm_state, dev->dev_id); 929 - assert(r == 0); 930 - } 931 - 932 - /* The pci config space got updated. Check if irq numbers have changed 933 - * for our devices 934 - */ 935 - static void assigned_dev_update_irq_routing(PCIDevice *dev) 936 - { 937 - AssignedDevice *assigned_dev = PCI_ASSIGN(dev); 938 - Error *err = NULL; 939 - int r; 940 - 941 - r = assign_intx(assigned_dev, &err); 942 - if (r < 0) { 943 - error_report_err(err); 944 - err = NULL; 945 - qdev_unplug(&dev->qdev, &err); 946 - assert(!err); 947 - } 948 - } 949 - 950 - static void assigned_dev_update_msi(PCIDevice *pci_dev) 951 - { 952 - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); 953 - uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + 954 - PCI_MSI_FLAGS); 955 - int r; 956 - 957 - /* Some guests gratuitously disable MSI even if they're not using it, 958 - * try to catch this by only deassigning irqs if the guest is using 959 - * MSI or intends to start. */ 960 - if (assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSI || 961 - (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { 962 - r = kvm_device_msi_deassign(kvm_state, assigned_dev->dev_id); 963 - /* -ENXIO means no assigned irq */ 964 - if (r && r != -ENXIO) { 965 - perror("assigned_dev_update_msi: deassign irq"); 966 - } 967 - 968 - free_msi_virqs(assigned_dev); 969 - 970 - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; 971 - pci_device_set_intx_routing_notifier(pci_dev, NULL); 972 - } 973 - 974 - if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { 975 - int virq; 976 - 977 - virq = kvm_irqchip_add_msi_route(kvm_state, 0, pci_dev); 978 - if (virq < 0) { 979 - perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route"); 980 - return; 981 - } 982 - 983 - assigned_dev->msi_virq = g_malloc(sizeof(*assigned_dev->msi_virq)); 984 - assigned_dev->msi_virq_nr = 1; 985 - assigned_dev->msi_virq[0] = virq; 986 - if (kvm_device_msi_assign(kvm_state, assigned_dev->dev_id, virq) < 0) { 987 - perror("assigned_dev_update_msi: kvm_device_msi_assign"); 988 - } 989 - 990 - assigned_dev->intx_route.mode = PCI_INTX_DISABLED; 991 - assigned_dev->intx_route.irq = -1; 992 - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSI; 993 - } else { 994 - Error *local_err = NULL; 995 - 996 - assign_intx(assigned_dev, &local_err); 997 - if (local_err) { 998 - error_report_err(local_err); 999 - } 1000 - } 1001 - } 1002 - 1003 - static void assigned_dev_update_msi_msg(PCIDevice *pci_dev) 1004 - { 1005 - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); 1006 - uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + 1007 - PCI_MSI_FLAGS); 1008 - 1009 - if (assigned_dev->assigned_irq_type != ASSIGNED_IRQ_MSI || 1010 - !(ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { 1011 - return; 1012 - } 1013 - 1014 - kvm_irqchip_update_msi_route(kvm_state, assigned_dev->msi_virq[0], 1015 - msi_get_message(pci_dev, 0), pci_dev); 1016 - kvm_irqchip_commit_routes(kvm_state); 1017 - } 1018 - 1019 - static bool assigned_dev_msix_masked(MSIXTableEntry *entry) 1020 - { 1021 - return (entry->ctrl & cpu_to_le32(0x1)) != 0; 1022 - } 1023 - 1024 - /* 1025 - * When MSI-X is first enabled the vector table typically has all the 1026 - * vectors masked, so we can't use that as the obvious test to figure out 1027 - * how many vectors to initially enable. Instead we look at the data field 1028 - * because this is what worked for pci-assign for a long time. This makes 1029 - * sure the physical MSI-X state tracks the guest's view, which is important 1030 - * for some VF/PF and PF/fw communication channels. 1031 - */ 1032 - static bool assigned_dev_msix_skipped(MSIXTableEntry *entry) 1033 - { 1034 - return !entry->data; 1035 - } 1036 - 1037 - static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) 1038 - { 1039 - AssignedDevice *adev = PCI_ASSIGN(pci_dev); 1040 - uint16_t entries_nr = 0; 1041 - int i, r = 0; 1042 - MSIXTableEntry *entry = adev->msix_table; 1043 - 1044 - /* Get the usable entry number for allocating */ 1045 - for (i = 0; i < adev->msix_max; i++, entry++) { 1046 - if (assigned_dev_msix_skipped(entry)) { 1047 - continue; 1048 - } 1049 - entries_nr++; 1050 - } 1051 - 1052 - DEBUG("MSI-X entries: %d\n", entries_nr); 1053 - 1054 - /* It's valid to enable MSI-X with all entries masked */ 1055 - if (!entries_nr) { 1056 - return 0; 1057 - } 1058 - 1059 - r = kvm_device_msix_init_vectors(kvm_state, adev->dev_id, entries_nr); 1060 - if (r != 0) { 1061 - error_report("fail to set MSI-X entry number for MSIX! %s", 1062 - strerror(-r)); 1063 - return r; 1064 - } 1065 - 1066 - free_msi_virqs(adev); 1067 - 1068 - adev->msi_virq_nr = adev->msix_max; 1069 - adev->msi_virq = g_malloc(adev->msix_max * sizeof(*adev->msi_virq)); 1070 - 1071 - entry = adev->msix_table; 1072 - for (i = 0; i < adev->msix_max; i++, entry++) { 1073 - adev->msi_virq[i] = -1; 1074 - 1075 - if (assigned_dev_msix_skipped(entry)) { 1076 - continue; 1077 - } 1078 - 1079 - r = kvm_irqchip_add_msi_route(kvm_state, i, pci_dev); 1080 - if (r < 0) { 1081 - return r; 1082 - } 1083 - adev->msi_virq[i] = r; 1084 - 1085 - DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i, 1086 - r, entry->addr_hi, entry->addr_lo, entry->data); 1087 - 1088 - r = kvm_device_msix_set_vector(kvm_state, adev->dev_id, i, 1089 - adev->msi_virq[i]); 1090 - if (r) { 1091 - error_report("fail to set MSI-X entry! %s", strerror(-r)); 1092 - break; 1093 - } 1094 - } 1095 - 1096 - return r; 1097 - } 1098 - 1099 - static void assigned_dev_update_msix(PCIDevice *pci_dev) 1100 - { 1101 - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); 1102 - uint16_t ctrl_word = pci_get_word(pci_dev->config + pci_dev->msix_cap + 1103 - PCI_MSIX_FLAGS); 1104 - int r; 1105 - 1106 - /* Some guests gratuitously disable MSIX even if they're not using it, 1107 - * try to catch this by only deassigning irqs if the guest is using 1108 - * MSIX or intends to start. */ 1109 - if ((assigned_dev->assigned_irq_type == ASSIGNED_IRQ_MSIX) || 1110 - (ctrl_word & PCI_MSIX_FLAGS_ENABLE)) { 1111 - r = kvm_device_msix_deassign(kvm_state, assigned_dev->dev_id); 1112 - /* -ENXIO means no assigned irq */ 1113 - if (r && r != -ENXIO) { 1114 - perror("assigned_dev_update_msix: deassign irq"); 1115 - } 1116 - 1117 - free_msi_virqs(assigned_dev); 1118 - 1119 - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_NONE; 1120 - pci_device_set_intx_routing_notifier(pci_dev, NULL); 1121 - } 1122 - 1123 - if (ctrl_word & PCI_MSIX_FLAGS_ENABLE) { 1124 - if (assigned_dev_update_msix_mmio(pci_dev) < 0) { 1125 - perror("assigned_dev_update_msix_mmio"); 1126 - return; 1127 - } 1128 - 1129 - if (assigned_dev->msi_virq_nr > 0) { 1130 - if (kvm_device_msix_assign(kvm_state, assigned_dev->dev_id) < 0) { 1131 - perror("assigned_dev_enable_msix: assign irq"); 1132 - return; 1133 - } 1134 - } 1135 - assigned_dev->intx_route.mode = PCI_INTX_DISABLED; 1136 - assigned_dev->intx_route.irq = -1; 1137 - assigned_dev->assigned_irq_type = ASSIGNED_IRQ_MSIX; 1138 - } else { 1139 - Error *local_err = NULL; 1140 - 1141 - assign_intx(assigned_dev, &local_err); 1142 - if (local_err) { 1143 - error_report_err(local_err); 1144 - } 1145 - } 1146 - } 1147 - 1148 - static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, 1149 - uint32_t address, int len) 1150 - { 1151 - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); 1152 - uint32_t virt_val = pci_default_read_config(pci_dev, address, len); 1153 - uint32_t real_val, emulate_mask, full_emulation_mask; 1154 - 1155 - emulate_mask = 0; 1156 - memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); 1157 - emulate_mask = le32_to_cpu(emulate_mask); 1158 - 1159 - full_emulation_mask = 0xffffffff >> (32 - len * 8); 1160 - 1161 - if (emulate_mask != full_emulation_mask) { 1162 - real_val = assigned_dev_pci_read(pci_dev, address, len); 1163 - return (virt_val & emulate_mask) | (real_val & ~emulate_mask); 1164 - } else { 1165 - return virt_val; 1166 - } 1167 - } 1168 - 1169 - static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, 1170 - uint32_t val, int len) 1171 - { 1172 - AssignedDevice *assigned_dev = PCI_ASSIGN(pci_dev); 1173 - uint16_t old_cmd = pci_get_word(pci_dev->config + PCI_COMMAND); 1174 - uint32_t emulate_mask, full_emulation_mask; 1175 - int ret; 1176 - 1177 - pci_default_write_config(pci_dev, address, val, len); 1178 - 1179 - if (kvm_has_intx_set_mask() && 1180 - range_covers_byte(address, len, PCI_COMMAND + 1)) { 1181 - bool intx_masked = (pci_get_word(pci_dev->config + PCI_COMMAND) & 1182 - PCI_COMMAND_INTX_DISABLE); 1183 - 1184 - if (intx_masked != !!(old_cmd & PCI_COMMAND_INTX_DISABLE)) { 1185 - ret = kvm_device_intx_set_mask(kvm_state, assigned_dev->dev_id, 1186 - intx_masked); 1187 - if (ret) { 1188 - perror("assigned_dev_pci_write_config: set intx mask"); 1189 - } 1190 - } 1191 - } 1192 - if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { 1193 - if (range_covers_byte(address, len, 1194 - pci_dev->msi_cap + PCI_MSI_FLAGS)) { 1195 - assigned_dev_update_msi(pci_dev); 1196 - } else if (ranges_overlap(address, len, /* 32bit MSI only */ 1197 - pci_dev->msi_cap + PCI_MSI_ADDRESS_LO, 6)) { 1198 - assigned_dev_update_msi_msg(pci_dev); 1199 - } 1200 - } 1201 - if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { 1202 - if (range_covers_byte(address, len, 1203 - pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { 1204 - assigned_dev_update_msix(pci_dev); 1205 - } 1206 - } 1207 - 1208 - emulate_mask = 0; 1209 - memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len); 1210 - emulate_mask = le32_to_cpu(emulate_mask); 1211 - 1212 - full_emulation_mask = 0xffffffff >> (32 - len * 8); 1213 - 1214 - if (emulate_mask != full_emulation_mask) { 1215 - if (emulate_mask) { 1216 - val &= ~emulate_mask; 1217 - val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask; 1218 - } 1219 - assigned_dev_pci_write(pci_dev, address, val, len); 1220 - } 1221 - } 1222 - 1223 - static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset, 1224 - uint32_t len) 1225 - { 1226 - assigned_dev_direct_config_read(dev, offset, len); 1227 - assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1); 1228 - } 1229 - 1230 - static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) 1231 - { 1232 - AssignedDevice *dev = PCI_ASSIGN(pci_dev); 1233 - PCIRegion *pci_region = dev->real_device.regions; 1234 - int ret, pos; 1235 - 1236 - /* Clear initial capabilities pointer and status copied from hw */ 1237 - pci_set_byte(pci_dev->config + PCI_CAPABILITY_LIST, 0); 1238 - pci_set_word(pci_dev->config + PCI_STATUS, 1239 - pci_get_word(pci_dev->config + PCI_STATUS) & 1240 - ~PCI_STATUS_CAP_LIST); 1241 - 1242 - /* Expose MSI capability 1243 - * MSI capability is the 1st capability in capability config */ 1244 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI, 0); 1245 - if (pos != 0 && kvm_check_extension(kvm_state, KVM_CAP_ASSIGN_DEV_IRQ)) { 1246 - if (verify_irqchip_in_kernel(errp) < 0) { 1247 - return -ENOTSUP; 1248 - } 1249 - dev->dev.cap_present |= QEMU_PCI_CAP_MSI; 1250 - dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; 1251 - /* Only 32-bit/no-mask currently supported */ 1252 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSI, pos, 10, 1253 - errp); 1254 - if (ret < 0) { 1255 - return ret; 1256 - } 1257 - pci_dev->msi_cap = pos; 1258 - 1259 - pci_set_word(pci_dev->config + pos + PCI_MSI_FLAGS, 1260 - pci_get_word(pci_dev->config + pos + PCI_MSI_FLAGS) & 1261 - PCI_MSI_FLAGS_QMASK); 1262 - pci_set_long(pci_dev->config + pos + PCI_MSI_ADDRESS_LO, 0); 1263 - pci_set_word(pci_dev->config + pos + PCI_MSI_DATA_32, 0); 1264 - 1265 - /* Set writable fields */ 1266 - pci_set_word(pci_dev->wmask + pos + PCI_MSI_FLAGS, 1267 - PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); 1268 - pci_set_long(pci_dev->wmask + pos + PCI_MSI_ADDRESS_LO, 0xfffffffc); 1269 - pci_set_word(pci_dev->wmask + pos + PCI_MSI_DATA_32, 0xffff); 1270 - } 1271 - /* Expose MSI-X capability */ 1272 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX, 0); 1273 - if (pos != 0 && kvm_device_msix_supported(kvm_state)) { 1274 - int bar_nr; 1275 - uint32_t msix_table_entry; 1276 - uint16_t msix_max; 1277 - 1278 - if (verify_irqchip_in_kernel(errp) < 0) { 1279 - return -ENOTSUP; 1280 - } 1281 - dev->dev.cap_present |= QEMU_PCI_CAP_MSIX; 1282 - dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; 1283 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos, 12, 1284 - errp); 1285 - if (ret < 0) { 1286 - return ret; 1287 - } 1288 - pci_dev->msix_cap = pos; 1289 - 1290 - msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) & 1291 - PCI_MSIX_FLAGS_QSIZE) + 1; 1292 - msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV); 1293 - pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1); 1294 - 1295 - /* Only enable and function mask bits are writable */ 1296 - pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS, 1297 - PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 1298 - 1299 - msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); 1300 - bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK; 1301 - msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK; 1302 - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; 1303 - dev->msix_table_size = msix_max * sizeof(MSIXTableEntry); 1304 - dev->msix_max = msix_max; 1305 - } 1306 - 1307 - /* Minimal PM support, nothing writable, device appears to NAK changes */ 1308 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM, 0); 1309 - if (pos) { 1310 - uint16_t pmc; 1311 - 1312 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF, 1313 - errp); 1314 - if (ret < 0) { 1315 - return ret; 1316 - } 1317 - 1318 - assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF); 1319 - 1320 - pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); 1321 - pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); 1322 - pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); 1323 - 1324 - /* assign_device will bring the device up to D0, so we don't need 1325 - * to worry about doing that ourselves here. */ 1326 - pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, 1327 - PCI_PM_CTRL_NO_SOFT_RESET); 1328 - 1329 - pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); 1330 - pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); 1331 - } 1332 - 1333 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP, 0); 1334 - if (pos) { 1335 - uint8_t version, size = 0; 1336 - uint16_t type, devctl, lnksta; 1337 - uint32_t devcap, lnkcap; 1338 - 1339 - version = pci_get_byte(pci_dev->config + pos + PCI_EXP_FLAGS); 1340 - version &= PCI_EXP_FLAGS_VERS; 1341 - if (version == 1) { 1342 - size = 0x14; 1343 - } else if (version == 2) { 1344 - /* 1345 - * Check for non-std size, accept reduced size to 0x34, 1346 - * which is what bcm5761 implemented, violating the 1347 - * PCIe v3.0 spec that regs should exist and be read as 0, 1348 - * not optionally provided and shorten the struct size. 1349 - */ 1350 - size = MIN(0x3c, PCI_CONFIG_SPACE_SIZE - pos); 1351 - if (size < 0x34) { 1352 - error_setg(errp, "Invalid size PCIe cap-id 0x%x", 1353 - PCI_CAP_ID_EXP); 1354 - return -EINVAL; 1355 - } else if (size != 0x3c) { 1356 - warn_report("%s: PCIe cap-id 0x%x has " 1357 - "non-standard size 0x%x; std size should be 0x3c", 1358 - __func__, PCI_CAP_ID_EXP, size); 1359 - } 1360 - } else if (version == 0) { 1361 - uint16_t vid, did; 1362 - vid = pci_get_word(pci_dev->config + PCI_VENDOR_ID); 1363 - did = pci_get_word(pci_dev->config + PCI_DEVICE_ID); 1364 - if (vid == PCI_VENDOR_ID_INTEL && did == 0x10ed) { 1365 - /* 1366 - * quirk for Intel 82599 VF with invalid PCIe capability 1367 - * version, should really be version 2 (same as PF) 1368 - */ 1369 - size = 0x3c; 1370 - } 1371 - } 1372 - 1373 - if (size == 0) { 1374 - error_setg(errp, "Unsupported PCI express capability version %d", 1375 - version); 1376 - return -EINVAL; 1377 - } 1378 - 1379 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, size, 1380 - errp); 1381 - if (ret < 0) { 1382 - return ret; 1383 - } 1384 - 1385 - assigned_dev_setup_cap_read(dev, pos, size); 1386 - 1387 - type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); 1388 - type = (type & PCI_EXP_FLAGS_TYPE) >> 4; 1389 - if (type != PCI_EXP_TYPE_ENDPOINT && 1390 - type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { 1391 - error_setg(errp, "Device assignment only supports endpoint " 1392 - "assignment, device type %d", type); 1393 - return -EINVAL; 1394 - } 1395 - 1396 - /* capabilities, pass existing read-only copy 1397 - * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ 1398 - 1399 - /* device capabilities: hide FLR */ 1400 - devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); 1401 - devcap &= ~PCI_EXP_DEVCAP_FLR; 1402 - pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); 1403 - 1404 - /* device control: clear all error reporting enable bits, leaving 1405 - * only a few host values. Note, these are 1406 - * all writable, but not passed to hw. 1407 - */ 1408 - devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); 1409 - devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | 1410 - PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; 1411 - pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); 1412 - devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; 1413 - pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); 1414 - 1415 - /* Clear device status */ 1416 - pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); 1417 - 1418 - /* Link capabilities, expose links and latencues, clear reporting */ 1419 - lnkcap = pci_get_long(pci_dev->config + pos + PCI_EXP_LNKCAP); 1420 - lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | 1421 - PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | 1422 - PCI_EXP_LNKCAP_L1EL); 1423 - pci_set_long(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); 1424 - 1425 - /* Link control, pass existing read-only copy. Should be writable? */ 1426 - 1427 - /* Link status, only expose current speed and width */ 1428 - lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); 1429 - lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); 1430 - pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); 1431 - 1432 - if (version >= 2) { 1433 - /* Slot capabilities, control, status - not needed for endpoints */ 1434 - pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); 1435 - pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); 1436 - pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); 1437 - 1438 - /* Root control, capabilities, status - not needed for endpoints */ 1439 - pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); 1440 - pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); 1441 - pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); 1442 - 1443 - /* Device capabilities/control 2, pass existing read-only copy */ 1444 - /* Link control 2, pass existing read-only copy */ 1445 - } 1446 - } 1447 - 1448 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX, 0); 1449 - if (pos) { 1450 - uint16_t cmd; 1451 - uint32_t status; 1452 - 1453 - /* Only expose the minimum, 8 byte capability */ 1454 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8, 1455 - errp); 1456 - if (ret < 0) { 1457 - return ret; 1458 - } 1459 - 1460 - assigned_dev_setup_cap_read(dev, pos, 8); 1461 - 1462 - /* Command register, clear upper bits, including extended modes */ 1463 - cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); 1464 - cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | 1465 - PCI_X_CMD_MAX_SPLIT); 1466 - pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); 1467 - 1468 - /* Status register, update with emulated PCI bus location, clear 1469 - * error bits, leave the rest. */ 1470 - status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); 1471 - status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); 1472 - status |= pci_get_bdf(pci_dev); 1473 - status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | 1474 - PCI_X_STATUS_SPL_ERR); 1475 - pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); 1476 - } 1477 - 1478 - pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD, 0); 1479 - if (pos) { 1480 - /* Direct R/W passthrough */ 1481 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8, 1482 - errp); 1483 - if (ret < 0) { 1484 - return ret; 1485 - } 1486 - 1487 - assigned_dev_setup_cap_read(dev, pos, 8); 1488 - 1489 - /* direct write for cap content */ 1490 - assigned_dev_direct_config_write(dev, pos + 2, 6); 1491 - } 1492 - 1493 - /* Devices can have multiple vendor capabilities, get them all */ 1494 - for (pos = 0; (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); 1495 - pos += PCI_CAP_LIST_NEXT) { 1496 - uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); 1497 - /* Direct R/W passthrough */ 1498 - ret = pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len, 1499 - errp); 1500 - if (ret < 0) { 1501 - return ret; 1502 - } 1503 - 1504 - assigned_dev_setup_cap_read(dev, pos, len); 1505 - 1506 - /* direct write for cap content */ 1507 - assigned_dev_direct_config_write(dev, pos + 2, len - 2); 1508 - } 1509 - 1510 - /* If real and virtual capability list status bits differ, virtualize the 1511 - * access. */ 1512 - if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != 1513 - (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & 1514 - PCI_STATUS_CAP_LIST)) { 1515 - dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST; 1516 - } 1517 - 1518 - return 0; 1519 - } 1520 - 1521 - static uint64_t 1522 - assigned_dev_msix_mmio_read(void *opaque, hwaddr addr, 1523 - unsigned size) 1524 - { 1525 - AssignedDevice *adev = opaque; 1526 - uint64_t val; 1527 - 1528 - memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size); 1529 - 1530 - return val; 1531 - } 1532 - 1533 - static void assigned_dev_msix_mmio_write(void *opaque, hwaddr addr, 1534 - uint64_t val, unsigned size) 1535 - { 1536 - AssignedDevice *adev = opaque; 1537 - PCIDevice *pdev = &adev->dev; 1538 - uint16_t ctrl; 1539 - MSIXTableEntry orig; 1540 - int i = addr >> 4; 1541 - 1542 - if (i >= adev->msix_max) { 1543 - return; /* Drop write */ 1544 - } 1545 - 1546 - ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS); 1547 - 1548 - DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val); 1549 - 1550 - if (ctrl & PCI_MSIX_FLAGS_ENABLE) { 1551 - orig = adev->msix_table[i]; 1552 - } 1553 - 1554 - memcpy((uint8_t *)adev->msix_table + addr, &val, size); 1555 - 1556 - if (ctrl & PCI_MSIX_FLAGS_ENABLE) { 1557 - MSIXTableEntry *entry = &adev->msix_table[i]; 1558 - 1559 - if (!assigned_dev_msix_masked(&orig) && 1560 - assigned_dev_msix_masked(entry)) { 1561 - /* 1562 - * Vector masked, disable it 1563 - * 1564 - * XXX It's not clear if we can or should actually attempt 1565 - * to mask or disable the interrupt. KVM doesn't have 1566 - * support for pending bits and kvm_assign_set_msix_entry 1567 - * doesn't modify the device hardware mask. Interrupts 1568 - * while masked are simply not injected to the guest, so 1569 - * are lost. Can we get away with always injecting an 1570 - * interrupt on unmask? 1571 - */ 1572 - } else if (assigned_dev_msix_masked(&orig) && 1573 - !assigned_dev_msix_masked(entry)) { 1574 - /* Vector unmasked */ 1575 - if (i >= adev->msi_virq_nr || adev->msi_virq[i] < 0) { 1576 - /* Previously unassigned vector, start from scratch */ 1577 - assigned_dev_update_msix(pdev); 1578 - return; 1579 - } else { 1580 - /* Update an existing, previously masked vector */ 1581 - MSIMessage msg; 1582 - int ret; 1583 - 1584 - msg.address = entry->addr_lo | 1585 - ((uint64_t)entry->addr_hi << 32); 1586 - msg.data = entry->data; 1587 - 1588 - ret = kvm_irqchip_update_msi_route(kvm_state, 1589 - adev->msi_virq[i], msg, 1590 - pdev); 1591 - if (ret) { 1592 - error_report("Error updating irq routing entry (%d)", ret); 1593 - } 1594 - kvm_irqchip_commit_routes(kvm_state); 1595 - } 1596 - } 1597 - } 1598 - } 1599 - 1600 - static const MemoryRegionOps assigned_dev_msix_mmio_ops = { 1601 - .read = assigned_dev_msix_mmio_read, 1602 - .write = assigned_dev_msix_mmio_write, 1603 - .endianness = DEVICE_NATIVE_ENDIAN, 1604 - .valid = { 1605 - .min_access_size = 4, 1606 - .max_access_size = 8, 1607 - }, 1608 - .impl = { 1609 - .min_access_size = 4, 1610 - .max_access_size = 8, 1611 - }, 1612 - }; 1613 - 1614 - static void assigned_dev_msix_reset(AssignedDevice *dev) 1615 - { 1616 - MSIXTableEntry *entry; 1617 - int i; 1618 - 1619 - if (!dev->msix_table) { 1620 - return; 1621 - } 1622 - 1623 - memset(dev->msix_table, 0, dev->msix_table_size); 1624 - 1625 - for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) { 1626 - entry->ctrl = cpu_to_le32(0x1); /* Masked */ 1627 - } 1628 - } 1629 - 1630 - static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) 1631 - { 1632 - dev->msix_table = mmap(NULL, dev->msix_table_size, PROT_READ | PROT_WRITE, 1633 - MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 1634 - if (dev->msix_table == MAP_FAILED) { 1635 - error_setg_errno(errp, errno, "failed to allocate msix_table"); 1636 - dev->msix_table = NULL; 1637 - return; 1638 - } 1639 - dev->dev.msix_table = (uint8_t *)dev->msix_table; 1640 - 1641 - assigned_dev_msix_reset(dev); 1642 - 1643 - memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops, 1644 - dev, "assigned-dev-msix", dev->msix_table_size); 1645 - } 1646 - 1647 - static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) 1648 - { 1649 - if (!dev->msix_table) { 1650 - return; 1651 - } 1652 - 1653 - if (munmap(dev->msix_table, dev->msix_table_size) == -1) { 1654 - error_report("error unmapping msix_table! %s", strerror(errno)); 1655 - } 1656 - dev->msix_table = NULL; 1657 - dev->dev.msix_table = NULL; 1658 - } 1659 - 1660 - static const VMStateDescription vmstate_assigned_device = { 1661 - .name = "pci-assign", 1662 - .unmigratable = 1, 1663 - }; 1664 - 1665 - static void reset_assigned_device(DeviceState *dev) 1666 - { 1667 - PCIDevice *pci_dev = PCI_DEVICE(dev); 1668 - AssignedDevice *adev = PCI_ASSIGN(pci_dev); 1669 - char reset_file[64]; 1670 - const char reset[] = "1"; 1671 - int fd, ret; 1672 - 1673 - /* 1674 - * If a guest is reset without being shutdown, MSI/MSI-X can still 1675 - * be running. We want to return the device to a known state on 1676 - * reset, so disable those here. We especially do not want MSI-X 1677 - * enabled since it lives in MMIO space, which is about to get 1678 - * disabled. 1679 - */ 1680 - if (adev->assigned_irq_type == ASSIGNED_IRQ_MSIX) { 1681 - uint16_t ctrl = pci_get_word(pci_dev->config + 1682 - pci_dev->msix_cap + PCI_MSIX_FLAGS); 1683 - 1684 - pci_set_word(pci_dev->config + pci_dev->msix_cap + PCI_MSIX_FLAGS, 1685 - ctrl & ~PCI_MSIX_FLAGS_ENABLE); 1686 - assigned_dev_update_msix(pci_dev); 1687 - } else if (adev->assigned_irq_type == ASSIGNED_IRQ_MSI) { 1688 - uint8_t ctrl = pci_get_byte(pci_dev->config + 1689 - pci_dev->msi_cap + PCI_MSI_FLAGS); 1690 - 1691 - pci_set_byte(pci_dev->config + pci_dev->msi_cap + PCI_MSI_FLAGS, 1692 - ctrl & ~PCI_MSI_FLAGS_ENABLE); 1693 - assigned_dev_update_msi(pci_dev); 1694 - } 1695 - 1696 - snprintf(reset_file, sizeof(reset_file), 1697 - "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/reset", 1698 - adev->host.domain, adev->host.bus, adev->host.slot, 1699 - adev->host.function); 1700 - 1701 - /* 1702 - * Issue a device reset via pci-sysfs. Note that we use write(2) here 1703 - * and ignore the return value because some kernels have a bug that 1704 - * returns 0 rather than bytes written on success, sending us into an 1705 - * infinite retry loop using other write mechanisms. 1706 - */ 1707 - fd = open(reset_file, O_WRONLY); 1708 - if (fd != -1) { 1709 - ret = write(fd, reset, strlen(reset)); 1710 - (void)ret; 1711 - close(fd); 1712 - } 1713 - 1714 - /* 1715 - * When a 0 is written to the bus master register, the device is logically 1716 - * disconnected from the PCI bus. This avoids further DMA transfers. 1717 - */ 1718 - assigned_dev_pci_write_config(pci_dev, PCI_COMMAND, 0, 1); 1719 - } 1720 - 1721 - static void assigned_realize(struct PCIDevice *pci_dev, Error **errp) 1722 - { 1723 - AssignedDevice *dev = PCI_ASSIGN(pci_dev); 1724 - uint8_t e_intx; 1725 - int r; 1726 - Error *local_err = NULL; 1727 - 1728 - if (!kvm_enabled()) { 1729 - error_setg(&local_err, "pci-assign requires KVM support"); 1730 - goto exit_with_error; 1731 - } 1732 - 1733 - if (!dev->host.domain && !dev->host.bus && !dev->host.slot && 1734 - !dev->host.function) { 1735 - error_setg(&local_err, "no host device specified"); 1736 - goto exit_with_error; 1737 - } 1738 - 1739 - /* 1740 - * Set up basic config space access control. Will be further refined during 1741 - * device initialization. 1742 - */ 1743 - assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE); 1744 - assigned_dev_direct_config_read(dev, PCI_STATUS, 2); 1745 - assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1); 1746 - assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3); 1747 - assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1); 1748 - assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1); 1749 - assigned_dev_direct_config_read(dev, PCI_BIST, 1); 1750 - assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4); 1751 - assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); 1752 - assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2); 1753 - assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7); 1754 - assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1); 1755 - assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1); 1756 - memcpy(dev->emulate_config_write, dev->emulate_config_read, 1757 - sizeof(dev->emulate_config_read)); 1758 - 1759 - get_real_device(dev, &local_err); 1760 - if (local_err) { 1761 - goto out; 1762 - } 1763 - 1764 - if (assigned_device_pci_cap_init(pci_dev, &local_err) < 0) { 1765 - goto out; 1766 - } 1767 - 1768 - /* intercept MSI-X entry page in the MMIO */ 1769 - if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { 1770 - assigned_dev_register_msix_mmio(dev, &local_err); 1771 - if (local_err) { 1772 - goto out; 1773 - } 1774 - } 1775 - 1776 - /* handle real device's MMIO/PIO BARs */ 1777 - assigned_dev_register_regions(dev->real_device.regions, 1778 - dev->real_device.region_number, dev, 1779 - &local_err); 1780 - if (local_err) { 1781 - goto out; 1782 - } 1783 - 1784 - /* handle interrupt routing */ 1785 - e_intx = dev->dev.config[PCI_INTERRUPT_PIN] - 1; 1786 - dev->intpin = e_intx; 1787 - dev->intx_route.mode = PCI_INTX_DISABLED; 1788 - dev->intx_route.irq = -1; 1789 - 1790 - /* assign device to guest */ 1791 - assign_device(dev, &local_err); 1792 - if (local_err) { 1793 - goto out; 1794 - } 1795 - 1796 - /* assign legacy INTx to the device */ 1797 - r = assign_intx(dev, &local_err); 1798 - if (r < 0) { 1799 - goto assigned_out; 1800 - } 1801 - 1802 - assigned_dev_load_option_rom(dev); 1803 - 1804 - return; 1805 - 1806 - assigned_out: 1807 - deassign_device(dev); 1808 - 1809 - out: 1810 - free_assigned_device(dev); 1811 - 1812 - exit_with_error: 1813 - assert(local_err); 1814 - error_propagate(errp, local_err); 1815 - } 1816 - 1817 - static void assigned_exitfn(struct PCIDevice *pci_dev) 1818 - { 1819 - AssignedDevice *dev = PCI_ASSIGN(pci_dev); 1820 - 1821 - deassign_device(dev); 1822 - free_assigned_device(dev); 1823 - } 1824 - 1825 - static void assigned_dev_instance_init(Object *obj) 1826 - { 1827 - PCIDevice *pci_dev = PCI_DEVICE(obj); 1828 - AssignedDevice *d = PCI_ASSIGN(pci_dev); 1829 - 1830 - device_add_bootindex_property(obj, &d->bootindex, 1831 - "bootindex", NULL, 1832 - &pci_dev->qdev, NULL); 1833 - } 1834 - 1835 - static Property assigned_dev_properties[] = { 1836 - DEFINE_PROP_PCI_HOST_DEVADDR("host", AssignedDevice, host), 1837 - DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features, 1838 - ASSIGNED_DEVICE_PREFER_MSI_BIT, false), 1839 - DEFINE_PROP_BIT("share_intx", AssignedDevice, features, 1840 - ASSIGNED_DEVICE_SHARE_INTX_BIT, true), 1841 - DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name), 1842 - DEFINE_PROP_END_OF_LIST(), 1843 - }; 1844 - 1845 - static void assign_class_init(ObjectClass *klass, void *data) 1846 - { 1847 - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); 1848 - DeviceClass *dc = DEVICE_CLASS(klass); 1849 - 1850 - k->realize = assigned_realize; 1851 - k->exit = assigned_exitfn; 1852 - k->config_read = assigned_dev_pci_read_config; 1853 - k->config_write = assigned_dev_pci_write_config; 1854 - dc->props = assigned_dev_properties; 1855 - dc->vmsd = &vmstate_assigned_device; 1856 - dc->reset = reset_assigned_device; 1857 - set_bit(DEVICE_CATEGORY_MISC, dc->categories); 1858 - dc->desc = "KVM-based PCI passthrough"; 1859 - } 1860 - 1861 - static const TypeInfo assign_info = { 1862 - .name = TYPE_PCI_ASSIGN, 1863 - .parent = TYPE_PCI_DEVICE, 1864 - .instance_size = sizeof(AssignedDevice), 1865 - .class_init = assign_class_init, 1866 - .instance_init = assigned_dev_instance_init, 1867 - .interfaces = (InterfaceInfo[]) { 1868 - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, 1869 - { }, 1870 - }, 1871 - }; 1872 - 1873 - static void assign_register_types(void) 1874 - { 1875 - type_register_static(&assign_info); 1876 - } 1877 - 1878 - type_init(assign_register_types) 1879 - 1880 - static void assigned_dev_load_option_rom(AssignedDevice *dev) 1881 - { 1882 - int size = 0; 1883 - 1884 - pci_assign_dev_load_option_rom(&dev->dev, OBJECT(dev), &size, 1885 - dev->host.domain, dev->host.bus, 1886 - dev->host.slot, dev->host.function); 1887 - }
+2 -2
hw/i386/pci-assign-load-rom.c hw/xen/xen_pt_load_rom.c
··· 12 12 #include "qemu/range.h" 13 13 #include "sysemu/sysemu.h" 14 14 #include "hw/pci/pci.h" 15 - #include "hw/pci/pci-assign.h" 15 + #include "xen_pt.h" 16 16 17 17 /* 18 18 * Scan the assigned devices for the devices that have an option ROM, and then ··· 80 80 fseek(fp, 0, SEEK_SET); 81 81 val = 0; 82 82 if (!fwrite(&val, 1, 1, fp)) { 83 - DEBUG("%s\n", "Failed to disable pci-sysfs rom file"); 83 + XEN_PT_WARN(dev, "%s\n", "Failed to disable pci-sysfs rom file"); 84 84 } 85 85 fclose(fp); 86 86
+1
hw/xen/Makefile.objs
··· 3 3 4 4 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o 5 5 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_graphics.o xen_pt_msi.o 6 + obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt_load_rom.o
-27
include/hw/pci/pci-assign.h
··· 1 - /* 2 - * This work is licensed under the terms of the GNU GPL, version 2. See 3 - * the COPYING file in the top-level directory. 4 - * 5 - * Just split from hw/i386/kvm/pci-assign.c. 6 - */ 7 - #ifndef PCI_ASSIGN_H 8 - #define PCI_ASSIGN_H 9 - 10 - #include "hw/pci/pci.h" 11 - 12 - //#define DEVICE_ASSIGNMENT_DEBUG 13 - 14 - #ifdef DEVICE_ASSIGNMENT_DEBUG 15 - #define DEBUG(fmt, ...) \ 16 - do { \ 17 - fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ 18 - } while (0) 19 - #else 20 - #define DEBUG(fmt, ...) 21 - #endif 22 - 23 - void *pci_assign_dev_load_option_rom(PCIDevice *dev, struct Object *owner, 24 - int *size, unsigned int domain, 25 - unsigned int bus, unsigned int slot, 26 - unsigned int function); 27 - #endif /* PCI_ASSIGN_H */
-1
qdev-monitor.c
··· 46 46 static const QDevAlias qdev_alias_table[] = { 47 47 { "e1000", "e1000-82540em" }, 48 48 { "ich9-ahci", "ahci" }, 49 - { "kvm-pci-assign", "pci-assign" }, 50 49 { "lsi53c895a", "lsi" }, 51 50 { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_S390X }, 52 51 { "virtio-9p-pci", "virtio-9p", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
-2
scripts/device-crash-test
··· 102 102 {'device':'ivshmem', 'expected':True}, # You must specify either 'shm' or 'chardev' 103 103 {'device':'ivshmem-doorbell', 'expected':True}, # You must specify a 'chardev' 104 104 {'device':'ivshmem-plain', 'expected':True}, # You must specify a 'memdev' 105 - {'device':'kvm-pci-assign', 'expected':True}, # no host device specified 106 105 {'device':'loader', 'expected':True}, # please include valid arguments 107 106 {'device':'nand', 'expected':True}, # Unsupported NAND block size 0x1 108 107 {'device':'nvdimm', 'expected':True}, # 'memdev' property is not set ··· 165 164 166 165 # KVM-specific devices shouldn't be tried without accel=kvm: 167 166 {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True}, 168 - {'accel':'(?!kvm).*', 'device':'kvm-pci-assign', 'expected':True}, 169 167 170 168 # xen-specific machines and devices: 171 169 {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},