qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio,vhost,pci,pc: features, cleanups

SRAT tables for DIMM devices
new virtio net flags for speed/duplex
post-copy migration support in vhost
cleanups in pci

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 20 Mar 2018 14:40:43 GMT
# gpg: using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67
# Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (51 commits)
postcopy shared docs
libvhost-user: Claim support for postcopy
postcopy: Allow shared memory
vhost: Huge page align and merge
vhost+postcopy: Wire up POSTCOPY_END notify
vhost-user: Add VHOST_USER_POSTCOPY_END message
libvhost-user: mprotect & madvises for postcopy
vhost+postcopy: Call wakeups
vhost+postcopy: Add vhost waker
postcopy: postcopy_notify_shared_wake
postcopy: helper for waking shared
vhost+postcopy: Resolve client address
postcopy-ram: add a stub for postcopy_request_shared_page
vhost+postcopy: Helper to send requests to source for shared pages
vhost+postcopy: Stash RAMBlock and offset
vhost+postcopy: Send address back to qemu
libvhost-user+postcopy: Register new regions with the ufd
migration/ram: ramblock_recv_bitmap_test_byte_offset
postcopy+vhost-user: Split set_mem_table for postcopy
vhost+postcopy: Transmit 'listen' to slave
...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

# Conflicts:
# scripts/update-linux-headers.sh

+3763 -532
+3 -1
Makefile
··· 777 777 ifdef INSTALL_BLOBS 778 778 BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \ 779 779 vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \ 780 - acpi-dsdt.aml \ 781 780 ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \ 782 781 pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \ 783 782 pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \ ··· 1047 1046 1048 1047 include $(SRC_PATH)/tests/docker/Makefile.include 1049 1048 include $(SRC_PATH)/tests/vm/Makefile.include 1049 + 1050 + printgen: 1051 + @echo $(GENERATED_FILES) 1050 1052 1051 1053 .PHONY: help 1052 1054 help:
+300 -2
contrib/libvhost-user/libvhost-user.c
··· 26 26 #include <sys/socket.h> 27 27 #include <sys/eventfd.h> 28 28 #include <sys/mman.h> 29 + #include "qemu/compiler.h" 30 + 31 + #if defined(__linux__) 32 + #include <sys/syscall.h> 33 + #include <fcntl.h> 34 + #include <sys/ioctl.h> 29 35 #include <linux/vhost.h> 30 36 31 - #include "qemu/compiler.h" 37 + #ifdef __NR_userfaultfd 38 + #include <linux/userfaultfd.h> 39 + #endif 40 + 41 + #endif 42 + 32 43 #include "qemu/atomic.h" 33 44 34 45 #include "libvhost-user.h" ··· 86 97 REQ(VHOST_USER_SET_VRING_ENDIAN), 87 98 REQ(VHOST_USER_GET_CONFIG), 88 99 REQ(VHOST_USER_SET_CONFIG), 100 + REQ(VHOST_USER_POSTCOPY_ADVISE), 101 + REQ(VHOST_USER_POSTCOPY_LISTEN), 102 + REQ(VHOST_USER_POSTCOPY_END), 89 103 REQ(VHOST_USER_MAX), 90 104 }; 91 105 #undef REQ ··· 171 185 } 172 186 } 173 187 188 + /* A test to see if we have userfault available */ 189 + static bool 190 + have_userfault(void) 191 + { 192 + #if defined(__linux__) && defined(__NR_userfaultfd) &&\ 193 + defined(UFFD_FEATURE_MISSING_SHMEM) &&\ 194 + defined(UFFD_FEATURE_MISSING_HUGETLBFS) 195 + /* Now test the kernel we're running on really has the features */ 196 + int ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 197 + struct uffdio_api api_struct; 198 + if (ufd < 0) { 199 + return false; 200 + } 201 + 202 + api_struct.api = UFFD_API; 203 + api_struct.features = UFFD_FEATURE_MISSING_SHMEM | 204 + UFFD_FEATURE_MISSING_HUGETLBFS; 205 + if (ioctl(ufd, UFFDIO_API, &api_struct)) { 206 + close(ufd); 207 + return false; 208 + } 209 + close(ufd); 210 + return true; 211 + 212 + #else 213 + return false; 214 + #endif 215 + } 216 + 174 217 static bool 175 218 vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) 176 219 { ··· 245 288 { 246 289 int rc; 247 290 uint8_t *p = (uint8_t *)vmsg; 291 + char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { }; 292 + struct iovec iov = { 293 + .iov_base = (char *)vmsg, 294 + .iov_len = VHOST_USER_HDR_SIZE, 295 + }; 296 + struct msghdr msg = { 297 + .msg_iov = &iov, 298 + .msg_iovlen = 1, 299 + .msg_control = control, 300 + }; 301 + struct cmsghdr *cmsg; 302 + 303 + memset(control, 0, sizeof(control)); 304 + assert(vmsg->fd_num <= VHOST_MEMORY_MAX_NREGIONS); 305 + if (vmsg->fd_num > 0) { 306 + size_t fdsize = vmsg->fd_num * sizeof(int); 307 + msg.msg_controllen = CMSG_SPACE(fdsize); 308 + cmsg = CMSG_FIRSTHDR(&msg); 309 + cmsg->cmsg_len = CMSG_LEN(fdsize); 310 + cmsg->cmsg_level = SOL_SOCKET; 311 + cmsg->cmsg_type = SCM_RIGHTS; 312 + memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); 313 + } else { 314 + msg.msg_controllen = 0; 315 + } 248 316 249 317 /* Set the version in the flags when sending the reply */ 250 318 vmsg->flags &= ~VHOST_USER_VERSION_MASK; ··· 252 320 vmsg->flags |= VHOST_USER_REPLY_MASK; 253 321 254 322 do { 255 - rc = write(conn_fd, p, VHOST_USER_HDR_SIZE); 323 + rc = sendmsg(conn_fd, &msg, 0); 256 324 } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); 257 325 258 326 do { ··· 345 413 } 346 414 347 415 vmsg->size = sizeof(vmsg->payload.u64); 416 + vmsg->fd_num = 0; 348 417 349 418 DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64); 350 419 ··· 410 479 } 411 480 412 481 static bool 482 + vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) 483 + { 484 + int i; 485 + VhostUserMemory *memory = &vmsg->payload.memory; 486 + dev->nregions = memory->nregions; 487 + 488 + DPRINT("Nregions: %d\n", memory->nregions); 489 + for (i = 0; i < dev->nregions; i++) { 490 + void *mmap_addr; 491 + VhostUserMemoryRegion *msg_region = &memory->regions[i]; 492 + VuDevRegion *dev_region = &dev->regions[i]; 493 + 494 + DPRINT("Region %d\n", i); 495 + DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n", 496 + msg_region->guest_phys_addr); 497 + DPRINT(" memory_size: 0x%016"PRIx64"\n", 498 + msg_region->memory_size); 499 + DPRINT(" userspace_addr 0x%016"PRIx64"\n", 500 + msg_region->userspace_addr); 501 + DPRINT(" mmap_offset 0x%016"PRIx64"\n", 502 + msg_region->mmap_offset); 503 + 504 + dev_region->gpa = msg_region->guest_phys_addr; 505 + dev_region->size = msg_region->memory_size; 506 + dev_region->qva = msg_region->userspace_addr; 507 + dev_region->mmap_offset = msg_region->mmap_offset; 508 + 509 + /* We don't use offset argument of mmap() since the 510 + * mapped address has to be page aligned, and we use huge 511 + * pages. 512 + * In postcopy we're using PROT_NONE here to catch anyone 513 + * accessing it before we userfault 514 + */ 515 + mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset, 516 + PROT_NONE, MAP_SHARED, 517 + vmsg->fds[i], 0); 518 + 519 + if (mmap_addr == MAP_FAILED) { 520 + vu_panic(dev, "region mmap error: %s", strerror(errno)); 521 + } else { 522 + dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr; 523 + DPRINT(" mmap_addr: 0x%016"PRIx64"\n", 524 + dev_region->mmap_addr); 525 + } 526 + 527 + /* Return the address to QEMU so that it can translate the ufd 528 + * fault addresses back. 529 + */ 530 + msg_region->userspace_addr = (uintptr_t)(mmap_addr + 531 + dev_region->mmap_offset); 532 + close(vmsg->fds[i]); 533 + } 534 + 535 + /* Send the message back to qemu with the addresses filled in */ 536 + vmsg->fd_num = 0; 537 + if (!vu_message_write(dev, dev->sock, vmsg)) { 538 + vu_panic(dev, "failed to respond to set-mem-table for postcopy"); 539 + return false; 540 + } 541 + 542 + /* Wait for QEMU to confirm that it's registered the handler for the 543 + * faults. 544 + */ 545 + if (!vu_message_read(dev, dev->sock, vmsg) || 546 + vmsg->size != sizeof(vmsg->payload.u64) || 547 + vmsg->payload.u64 != 0) { 548 + vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table"); 549 + return false; 550 + } 551 + 552 + /* OK, now we can go and register the memory and generate faults */ 553 + for (i = 0; i < dev->nregions; i++) { 554 + VuDevRegion *dev_region = &dev->regions[i]; 555 + int ret; 556 + #ifdef UFFDIO_REGISTER 557 + /* We should already have an open ufd. Mark each memory 558 + * range as ufd. 559 + * Discard any mapping we have here; note I can't use MADV_REMOVE 560 + * or fallocate to make the hole since I don't want to lose 561 + * data that's already arrived in the shared process. 562 + * TODO: How to do hugepage 563 + */ 564 + ret = madvise((void *)dev_region->mmap_addr, 565 + dev_region->size + dev_region->mmap_offset, 566 + MADV_DONTNEED); 567 + if (ret) { 568 + fprintf(stderr, 569 + "%s: Failed to madvise(DONTNEED) region %d: %s\n", 570 + __func__, i, strerror(errno)); 571 + } 572 + /* Turn off transparent hugepages so we dont get lose wakeups 573 + * in neighbouring pages. 574 + * TODO: Turn this backon later. 575 + */ 576 + ret = madvise((void *)dev_region->mmap_addr, 577 + dev_region->size + dev_region->mmap_offset, 578 + MADV_NOHUGEPAGE); 579 + if (ret) { 580 + /* Note: This can happen legally on kernels that are configured 581 + * without madvise'able hugepages 582 + */ 583 + fprintf(stderr, 584 + "%s: Failed to madvise(NOHUGEPAGE) region %d: %s\n", 585 + __func__, i, strerror(errno)); 586 + } 587 + struct uffdio_register reg_struct; 588 + reg_struct.range.start = (uintptr_t)dev_region->mmap_addr; 589 + reg_struct.range.len = dev_region->size + dev_region->mmap_offset; 590 + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; 591 + 592 + if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER, &reg_struct)) { 593 + vu_panic(dev, "%s: Failed to userfault region %d " 594 + "@%p + size:%zx offset: %zx: (ufd=%d)%s\n", 595 + __func__, i, 596 + dev_region->mmap_addr, 597 + dev_region->size, dev_region->mmap_offset, 598 + dev->postcopy_ufd, strerror(errno)); 599 + return false; 600 + } 601 + if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) { 602 + vu_panic(dev, "%s Region (%d) doesn't support COPY", 603 + __func__, i); 604 + return false; 605 + } 606 + DPRINT("%s: region %d: Registered userfault for %llx + %llx\n", 607 + __func__, i, reg_struct.range.start, reg_struct.range.len); 608 + /* Now it's registered we can let the client at it */ 609 + if (mprotect((void *)dev_region->mmap_addr, 610 + dev_region->size + dev_region->mmap_offset, 611 + PROT_READ | PROT_WRITE)) { 612 + vu_panic(dev, "failed to mprotect region %d for postcopy (%s)", 613 + i, strerror(errno)); 614 + return false; 615 + } 616 + /* TODO: Stash 'zero' support flags somewhere */ 617 + #endif 618 + } 619 + 620 + return false; 621 + } 622 + 623 + static bool 413 624 vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) 414 625 { 415 626 int i; ··· 424 635 } 425 636 } 426 637 dev->nregions = memory->nregions; 638 + 639 + if (dev->postcopy_listening) { 640 + return vu_set_mem_table_exec_postcopy(dev, vmsg); 641 + } 427 642 428 643 DPRINT("Nregions: %d\n", memory->nregions); 429 644 for (i = 0; i < dev->nregions; i++) { ··· 500 715 dev->log_size = log_mmap_size; 501 716 502 717 vmsg->size = sizeof(vmsg->payload.u64); 718 + vmsg->fd_num = 0; 503 719 504 720 return true; 505 721 } ··· 752 968 uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | 753 969 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ; 754 970 971 + if (have_userfault()) { 972 + features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; 973 + } 974 + 755 975 if (dev->iface->get_protocol_features) { 756 976 features |= dev->iface->get_protocol_features(dev); 757 977 } 758 978 759 979 vmsg->payload.u64 = features; 760 980 vmsg->size = sizeof(vmsg->payload.u64); 981 + vmsg->fd_num = 0; 761 982 762 983 return true; 763 984 } ··· 857 1078 } 858 1079 859 1080 static bool 1081 + vu_set_postcopy_advise(VuDev *dev, VhostUserMsg *vmsg) 1082 + { 1083 + dev->postcopy_ufd = -1; 1084 + #ifdef UFFDIO_API 1085 + struct uffdio_api api_struct; 1086 + 1087 + dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); 1088 + vmsg->size = 0; 1089 + #endif 1090 + 1091 + if (dev->postcopy_ufd == -1) { 1092 + vu_panic(dev, "Userfaultfd not available: %s", strerror(errno)); 1093 + goto out; 1094 + } 1095 + 1096 + #ifdef UFFDIO_API 1097 + api_struct.api = UFFD_API; 1098 + api_struct.features = 0; 1099 + if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) { 1100 + vu_panic(dev, "Failed UFFDIO_API: %s", strerror(errno)); 1101 + close(dev->postcopy_ufd); 1102 + dev->postcopy_ufd = -1; 1103 + goto out; 1104 + } 1105 + /* TODO: Stash feature flags somewhere */ 1106 + #endif 1107 + 1108 + out: 1109 + /* Return a ufd to the QEMU */ 1110 + vmsg->fd_num = 1; 1111 + vmsg->fds[0] = dev->postcopy_ufd; 1112 + return true; /* = send a reply */ 1113 + } 1114 + 1115 + static bool 1116 + vu_set_postcopy_listen(VuDev *dev, VhostUserMsg *vmsg) 1117 + { 1118 + vmsg->payload.u64 = -1; 1119 + vmsg->size = sizeof(vmsg->payload.u64); 1120 + 1121 + if (dev->nregions) { 1122 + vu_panic(dev, "Regions already registered at postcopy-listen"); 1123 + return true; 1124 + } 1125 + dev->postcopy_listening = true; 1126 + 1127 + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; 1128 + vmsg->payload.u64 = 0; /* Success */ 1129 + return true; 1130 + } 1131 + 1132 + static bool 1133 + vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) 1134 + { 1135 + DPRINT("%s: Entry\n", __func__); 1136 + dev->postcopy_listening = false; 1137 + if (dev->postcopy_ufd > 0) { 1138 + close(dev->postcopy_ufd); 1139 + dev->postcopy_ufd = -1; 1140 + DPRINT("%s: Done close\n", __func__); 1141 + } 1142 + 1143 + vmsg->fd_num = 0; 1144 + vmsg->payload.u64 = 0; 1145 + vmsg->size = sizeof(vmsg->payload.u64); 1146 + vmsg->flags = VHOST_USER_VERSION | VHOST_USER_REPLY_MASK; 1147 + DPRINT("%s: exit\n", __func__); 1148 + return true; 1149 + } 1150 + 1151 + static bool 860 1152 vu_process_message(VuDev *dev, VhostUserMsg *vmsg) 861 1153 { 862 1154 int do_reply = 0; ··· 927 1219 return vu_set_config(dev, vmsg); 928 1220 case VHOST_USER_NONE: 929 1221 break; 1222 + case VHOST_USER_POSTCOPY_ADVISE: 1223 + return vu_set_postcopy_advise(dev, vmsg); 1224 + case VHOST_USER_POSTCOPY_LISTEN: 1225 + return vu_set_postcopy_listen(dev, vmsg); 1226 + case VHOST_USER_POSTCOPY_END: 1227 + return vu_set_postcopy_end(dev, vmsg); 930 1228 default: 931 1229 vmsg_close_fds(vmsg); 932 1230 vu_panic(dev, "Unhandled request: %d", vmsg->request);
+11
contrib/libvhost-user/libvhost-user.h
··· 48 48 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 49 49 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 50 50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 51 + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 52 + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 51 53 52 54 VHOST_USER_PROTOCOL_F_MAX 53 55 }; ··· 81 83 VHOST_USER_SET_VRING_ENDIAN = 23, 82 84 VHOST_USER_GET_CONFIG = 24, 83 85 VHOST_USER_SET_CONFIG = 25, 86 + VHOST_USER_CREATE_CRYPTO_SESSION = 26, 87 + VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 88 + VHOST_USER_POSTCOPY_ADVISE = 28, 89 + VHOST_USER_POSTCOPY_LISTEN = 29, 90 + VHOST_USER_POSTCOPY_END = 30, 84 91 VHOST_USER_MAX 85 92 } VhostUserRequest; 86 93 ··· 277 284 * re-initialize */ 278 285 vu_panic_cb panic; 279 286 const VuDevIface *iface; 287 + 288 + /* Postcopy data */ 289 + int postcopy_ufd; 290 + bool postcopy_listening; 280 291 }; 281 292 282 293 typedef struct VuVirtqElement {
+41
docs/devel/migration.rst
··· 577 577 hugepages works well, however 1GB hugepages are likely to be problematic 578 578 since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, 579 579 and until the full page is transferred the destination thread is blocked. 580 + 581 + Postcopy with shared memory 582 + --------------------------- 583 + 584 + Postcopy migration with shared memory needs explicit support from the other 585 + processes that share memory and from QEMU. There are restrictions on the type of 586 + memory that userfault can support shared. 587 + 588 + The Linux kernel userfault support works on `/dev/shm` memory and on `hugetlbfs` 589 + (although the kernel doesn't provide an equivalent to `madvise(MADV_DONTNEED)` 590 + for hugetlbfs which may be a problem in some configurations). 591 + 592 + The vhost-user code in QEMU supports clients that have Postcopy support, 593 + and the `vhost-user-bridge` (in `tests/`) and the DPDK package have changes 594 + to support postcopy. 595 + 596 + The client needs to open a userfaultfd and register the areas 597 + of memory that it maps with userfault. The client must then pass the 598 + userfaultfd back to QEMU together with a mapping table that allows 599 + fault addresses in the clients address space to be converted back to 600 + RAMBlock/offsets. The client's userfaultfd is added to the postcopy 601 + fault-thread and page requests are made on behalf of the client by QEMU. 602 + QEMU performs 'wake' operations on the client's userfaultfd to allow it 603 + to continue after a page has arrived. 604 + 605 + .. note:: 606 + There are two future improvements that would be nice: 607 + a) Some way to make QEMU ignorant of the addresses in the clients 608 + address space 609 + b) Avoiding the need for QEMU to perform ufd-wake calls after the 610 + pages have arrived 611 + 612 + Retro-fitting postcopy to existing clients is possible: 613 + a) A mechanism is needed for the registration with userfault as above, 614 + and the registration needs to be coordinated with the phases of 615 + postcopy. In vhost-user extra messages are added to the existing 616 + control channel. 617 + b) Any thread that can block due to guest memory accesses must be 618 + identified and the implication understood; for example if the 619 + guest memory access is made while holding a lock then all other 620 + threads waiting for that lock will also be blocked.
+52
docs/interop/vhost-user.txt
··· 290 290 the source. No further update must be done before rings are 291 291 restarted. 292 292 293 + In postcopy migration the slave is started before all the memory has been 294 + received from the source host, and care must be taken to avoid accessing pages 295 + that have yet to be received. The slave opens a 'userfault'-fd and registers 296 + the memory with it; this fd is then passed back over to the master. 297 + The master services requests on the userfaultfd for pages that are accessed 298 + and when the page is available it performs WAKE ioctl's on the userfaultfd 299 + to wake the stalled slave. The client indicates support for this via the 300 + VHOST_USER_PROTOCOL_F_PAGEFAULT feature. 301 + 293 302 Memory access 294 303 ------------- 295 304 ··· 369 378 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 370 379 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 371 380 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7 381 + #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8 372 382 373 383 Master message types 374 384 -------------------- ··· 445 455 Id: 5 446 456 Equivalent ioctl: VHOST_SET_MEM_TABLE 447 457 Master payload: memory regions description 458 + Slave payload: (postcopy only) memory regions description 448 459 449 460 Sets the memory map regions on the slave so it can translate the vring 450 461 addresses. In the ancillary data there is an array of file descriptors 451 462 for each memory mapped region. The size and ordering of the fds matches 452 463 the number and ordering of memory regions. 464 + 465 + When VHOST_USER_POSTCOPY_LISTEN has been received, SET_MEM_TABLE replies with 466 + the bases of the memory mapped regions to the master. The slave must 467 + have mmap'd the regions but not yet accessed them and should not yet generate 468 + a userfault event. Note NEED_REPLY_MASK is not set in this case. 469 + QEMU will then reply back to the list of mappings with an empty 470 + VHOST_USER_SET_MEM_TABLE as an acknowledgment; only upon reception of this 471 + message may the guest start accessing the memory and generating faults. 453 472 454 473 * VHOST_USER_SET_LOG_BASE 455 474 ··· 688 707 This request should be sent only when VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 689 708 feature has been successfully negotiated. 690 709 It's a required feature for crypto devices. 710 + 711 + * VHOST_USER_POSTCOPY_ADVISE 712 + Id: 28 713 + Master payload: N/A 714 + Slave payload: userfault fd 715 + 716 + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, the 717 + master advises slave that a migration with postcopy enabled is underway, 718 + the slave must open a userfaultfd for later use. 719 + Note that at this stage the migration is still in precopy mode. 720 + 721 + * VHOST_USER_POSTCOPY_LISTEN 722 + Id: 29 723 + Master payload: N/A 724 + 725 + Master advises slave that a transition to postcopy mode has happened. 726 + The slave must ensure that shared memory is registered with userfaultfd 727 + to cause faulting of non-present pages. 728 + 729 + This is always sent sometime after a VHOST_USER_POSTCOPY_ADVISE, and 730 + thus only when VHOST_USER_PROTOCOL_F_PAGEFAULT is supported. 731 + 732 + * VHOST_USER_POSTCOPY_END 733 + Id: 30 734 + Slave payload: u64 735 + 736 + Master advises that postcopy migration has now completed. The 737 + slave must disable the userfaultfd. The response is an acknowledgement 738 + only. 739 + When VHOST_USER_PROTOCOL_F_PAGEFAULT is supported, this message 740 + is sent at the end of the migration, after VHOST_USER_POSTCOPY_LISTEN 741 + was previously sent. 742 + The value returned is an error indication; 0 is success. 691 743 692 744 Slave message types 693 745 -------------------
+72 -14
exec.c
··· 99 99 */ 100 100 #define RAM_RESIZEABLE (1 << 2) 101 101 102 + /* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically 103 + * zero the page and wake waiting processes. 104 + * (Set during postcopy) 105 + */ 106 + #define RAM_UF_ZEROPAGE (1 << 3) 102 107 #endif 103 108 104 109 #ifdef TARGET_PAGE_BITS_VARY ··· 1790 1795 return rb->flags & RAM_SHARED; 1791 1796 } 1792 1797 1798 + /* Note: Only set at the start of postcopy */ 1799 + bool qemu_ram_is_uf_zeroable(RAMBlock *rb) 1800 + { 1801 + return rb->flags & RAM_UF_ZEROPAGE; 1802 + } 1803 + 1804 + void qemu_ram_set_uf_zeroable(RAMBlock *rb) 1805 + { 1806 + rb->flags |= RAM_UF_ZEROPAGE; 1807 + } 1808 + 1793 1809 /* Called with iothread lock held. */ 1794 1810 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev) 1795 1811 { ··· 2320 2336 return ramblock_ptr(block, addr); 2321 2337 } 2322 2338 2339 + /* Return the offset of a hostpointer within a ramblock */ 2340 + ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host) 2341 + { 2342 + ram_addr_t res = (uint8_t *)host - (uint8_t *)rb->host; 2343 + assert((uintptr_t)host >= (uintptr_t)rb->host); 2344 + assert(res < rb->max_length); 2345 + 2346 + return res; 2347 + } 2348 + 2323 2349 /* 2324 2350 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset 2325 2351 * in that RAMBlock. ··· 3744 3770 } 3745 3771 3746 3772 if ((start + length) <= rb->used_length) { 3773 + bool need_madvise, need_fallocate; 3747 3774 uint8_t *host_endaddr = host_startaddr + length; 3748 3775 if ((uintptr_t)host_endaddr & (rb->page_size - 1)) { 3749 3776 error_report("ram_block_discard_range: Unaligned end address: %p", ··· 3753 3780 3754 3781 errno = ENOTSUP; /* If we are missing MADVISE etc */ 3755 3782 3756 - if (rb->page_size == qemu_host_page_size) { 3757 - #if defined(CONFIG_MADVISE) 3758 - /* Note: We need the madvise MADV_DONTNEED behaviour of definitely 3759 - * freeing the page. 3760 - */ 3761 - ret = madvise(host_startaddr, length, MADV_DONTNEED); 3762 - #endif 3763 - } else { 3764 - /* Huge page case - unfortunately it can't do DONTNEED, but 3765 - * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the 3766 - * huge page file. 3783 + /* The logic here is messy; 3784 + * madvise DONTNEED fails for hugepages 3785 + * fallocate works on hugepages and shmem 3786 + */ 3787 + need_madvise = (rb->page_size == qemu_host_page_size); 3788 + need_fallocate = rb->fd != -1; 3789 + if (need_fallocate) { 3790 + /* For a file, this causes the area of the file to be zero'd 3791 + * if read, and for hugetlbfs also causes it to be unmapped 3792 + * so a userfault will trigger. 3767 3793 */ 3768 3794 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE 3769 3795 ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 3770 3796 start, length); 3797 + if (ret) { 3798 + ret = -errno; 3799 + error_report("ram_block_discard_range: Failed to fallocate " 3800 + "%s:%" PRIx64 " +%zx (%d)", 3801 + rb->idstr, start, length, ret); 3802 + goto err; 3803 + } 3804 + #else 3805 + ret = -ENOSYS; 3806 + error_report("ram_block_discard_range: fallocate not available/file" 3807 + "%s:%" PRIx64 " +%zx (%d)", 3808 + rb->idstr, start, length, ret); 3809 + goto err; 3771 3810 #endif 3772 3811 } 3773 - if (ret) { 3774 - ret = -errno; 3775 - error_report("ram_block_discard_range: Failed to discard range " 3812 + if (need_madvise) { 3813 + /* For normal RAM this causes it to be unmapped, 3814 + * for shared memory it causes the local mapping to disappear 3815 + * and to fall back on the file contents (which we just 3816 + * fallocate'd away). 3817 + */ 3818 + #if defined(CONFIG_MADVISE) 3819 + ret = madvise(host_startaddr, length, MADV_DONTNEED); 3820 + if (ret) { 3821 + ret = -errno; 3822 + error_report("ram_block_discard_range: Failed to discard range " 3823 + "%s:%" PRIx64 " +%zx (%d)", 3824 + rb->idstr, start, length, ret); 3825 + goto err; 3826 + } 3827 + #else 3828 + ret = -ENOSYS; 3829 + error_report("ram_block_discard_range: MADVISE not available" 3776 3830 "%s:%" PRIx64 " +%zx (%d)", 3777 3831 rb->idstr, start, length, ret); 3832 + goto err; 3833 + #endif 3778 3834 } 3835 + trace_ram_block_discard_range(rb->idstr, host_startaddr, length, 3836 + need_madvise, need_fallocate, ret); 3779 3837 } else { 3780 3838 error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64 3781 3839 "/%zx/" RAM_ADDR_FMT")",
+11 -3
hmp.c
··· 2423 2423 switch (value->type) { 2424 2424 case MEMORY_DEVICE_INFO_KIND_DIMM: 2425 2425 di = value->u.dimm.data; 2426 + break; 2426 2427 2428 + case MEMORY_DEVICE_INFO_KIND_NVDIMM: 2429 + di = value->u.nvdimm.data; 2430 + break; 2431 + 2432 + default: 2433 + di = NULL; 2434 + break; 2435 + } 2436 + 2437 + if (di) { 2427 2438 monitor_printf(mon, "Memory device [%s]: \"%s\"\n", 2428 2439 MemoryDeviceInfoKind_str(value->type), 2429 2440 di->id ? di->id : ""); ··· 2436 2447 di->hotplugged ? "true" : "false"); 2437 2448 monitor_printf(mon, " hotpluggable: %s\n", 2438 2449 di->hotpluggable ? "true" : "false"); 2439 - break; 2440 - default: 2441 - break; 2442 2450 } 2443 2451 } 2444 2452 }
+140
hw/acpi/aml-build.c
··· 258 258 } 259 259 } 260 260 261 + /* Generic Address Structure (GAS) 262 + * ACPI 2.0/3.0: 5.2.3.1 Generic Address Structure 263 + * 2.0 compat note: 264 + * @access_width must be 0, see ACPI 2.0:Table 5-1 265 + */ 266 + void build_append_gas(GArray *table, AmlAddressSpace as, 267 + uint8_t bit_width, uint8_t bit_offset, 268 + uint8_t access_width, uint64_t address) 269 + { 270 + build_append_int_noprefix(table, as, 1); 271 + build_append_int_noprefix(table, bit_width, 1); 272 + build_append_int_noprefix(table, bit_offset, 1); 273 + build_append_int_noprefix(table, access_width, 1); 274 + build_append_int_noprefix(table, address, 8); 275 + } 276 + 261 277 /* 262 278 * Build NAME(XXXX, 0x00000000) where 0x00000000 is encoded as a dword, 263 279 * and return the offset to 0x00000000 for runtime patching. ··· 1662 1678 "SLIT", 1663 1679 table_data->len - slit_start, 1, NULL, NULL); 1664 1680 } 1681 + 1682 + /* build rev1/rev3/rev5.1 FADT */ 1683 + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, 1684 + const char *oem_id, const char *oem_table_id) 1685 + { 1686 + int off; 1687 + int fadt_start = tbl->len; 1688 + 1689 + acpi_data_push(tbl, sizeof(AcpiTableHeader)); 1690 + 1691 + /* FACS address to be filled by Guest linker at runtime */ 1692 + off = tbl->len; 1693 + build_append_int_noprefix(tbl, 0, 4); /* FIRMWARE_CTRL */ 1694 + if (f->facs_tbl_offset) { /* don't patch if not supported by platform */ 1695 + bios_linker_loader_add_pointer(linker, 1696 + ACPI_BUILD_TABLE_FILE, off, 4, 1697 + ACPI_BUILD_TABLE_FILE, *f->facs_tbl_offset); 1698 + } 1699 + 1700 + /* DSDT address to be filled by Guest linker at runtime */ 1701 + off = tbl->len; 1702 + build_append_int_noprefix(tbl, 0, 4); /* DSDT */ 1703 + if (f->dsdt_tbl_offset) { /* don't patch if not supported by platform */ 1704 + bios_linker_loader_add_pointer(linker, 1705 + ACPI_BUILD_TABLE_FILE, off, 4, 1706 + ACPI_BUILD_TABLE_FILE, *f->dsdt_tbl_offset); 1707 + } 1708 + 1709 + /* ACPI1.0: INT_MODEL, ACPI2.0+: Reserved */ 1710 + build_append_int_noprefix(tbl, f->int_model /* Multiple APIC */, 1); 1711 + /* Preferred_PM_Profile */ 1712 + build_append_int_noprefix(tbl, 0 /* Unspecified */, 1); 1713 + build_append_int_noprefix(tbl, f->sci_int, 2); /* SCI_INT */ 1714 + build_append_int_noprefix(tbl, f->smi_cmd, 4); /* SMI_CMD */ 1715 + build_append_int_noprefix(tbl, f->acpi_enable_cmd, 1); /* ACPI_ENABLE */ 1716 + build_append_int_noprefix(tbl, f->acpi_disable_cmd, 1); /* ACPI_DISABLE */ 1717 + build_append_int_noprefix(tbl, 0 /* not supported */, 1); /* S4BIOS_REQ */ 1718 + /* ACPI1.0: Reserved, ACPI2.0+: PSTATE_CNT */ 1719 + build_append_int_noprefix(tbl, 0, 1); 1720 + build_append_int_noprefix(tbl, f->pm1a_evt.address, 4); /* PM1a_EVT_BLK */ 1721 + build_append_int_noprefix(tbl, 0, 4); /* PM1b_EVT_BLK */ 1722 + build_append_int_noprefix(tbl, f->pm1a_cnt.address, 4); /* PM1a_CNT_BLK */ 1723 + build_append_int_noprefix(tbl, 0, 4); /* PM1b_CNT_BLK */ 1724 + build_append_int_noprefix(tbl, 0, 4); /* PM2_CNT_BLK */ 1725 + build_append_int_noprefix(tbl, f->pm_tmr.address, 4); /* PM_TMR_BLK */ 1726 + build_append_int_noprefix(tbl, f->gpe0_blk.address, 4); /* GPE0_BLK */ 1727 + build_append_int_noprefix(tbl, 0, 4); /* GPE1_BLK */ 1728 + /* PM1_EVT_LEN */ 1729 + build_append_int_noprefix(tbl, f->pm1a_evt.bit_width / 8, 1); 1730 + /* PM1_CNT_LEN */ 1731 + build_append_int_noprefix(tbl, f->pm1a_cnt.bit_width / 8, 1); 1732 + build_append_int_noprefix(tbl, 0, 1); /* PM2_CNT_LEN */ 1733 + build_append_int_noprefix(tbl, f->pm_tmr.bit_width / 8, 1); /* PM_TMR_LEN */ 1734 + /* GPE0_BLK_LEN */ 1735 + build_append_int_noprefix(tbl, f->gpe0_blk.bit_width / 8, 1); 1736 + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BLK_LEN */ 1737 + build_append_int_noprefix(tbl, 0, 1); /* GPE1_BASE */ 1738 + build_append_int_noprefix(tbl, 0, 1); /* CST_CNT */ 1739 + build_append_int_noprefix(tbl, f->plvl2_lat, 2); /* P_LVL2_LAT */ 1740 + build_append_int_noprefix(tbl, f->plvl3_lat, 2); /* P_LVL3_LAT */ 1741 + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_SIZE */ 1742 + build_append_int_noprefix(tbl, 0, 2); /* FLUSH_STRIDE */ 1743 + build_append_int_noprefix(tbl, 0, 1); /* DUTY_OFFSET */ 1744 + build_append_int_noprefix(tbl, 0, 1); /* DUTY_WIDTH */ 1745 + build_append_int_noprefix(tbl, 0, 1); /* DAY_ALRM */ 1746 + build_append_int_noprefix(tbl, 0, 1); /* MON_ALRM */ 1747 + build_append_int_noprefix(tbl, f->rtc_century, 1); /* CENTURY */ 1748 + build_append_int_noprefix(tbl, 0, 2); /* IAPC_BOOT_ARCH */ 1749 + build_append_int_noprefix(tbl, 0, 1); /* Reserved */ 1750 + build_append_int_noprefix(tbl, f->flags, 4); /* Flags */ 1751 + 1752 + if (f->rev == 1) { 1753 + goto build_hdr; 1754 + } 1755 + 1756 + build_append_gas_from_struct(tbl, &f->reset_reg); /* RESET_REG */ 1757 + build_append_int_noprefix(tbl, f->reset_val, 1); /* RESET_VALUE */ 1758 + /* Since ACPI 5.1 */ 1759 + if ((f->rev >= 6) || ((f->rev == 5) && f->minor_ver > 0)) { 1760 + build_append_int_noprefix(tbl, f->arm_boot_arch, 2); /* ARM_BOOT_ARCH */ 1761 + /* FADT Minor Version */ 1762 + build_append_int_noprefix(tbl, f->minor_ver, 1); 1763 + } else { 1764 + build_append_int_noprefix(tbl, 0, 3); /* Reserved upto ACPI 5.0 */ 1765 + } 1766 + build_append_int_noprefix(tbl, 0, 8); /* X_FIRMWARE_CTRL */ 1767 + 1768 + /* XDSDT address to be filled by Guest linker at runtime */ 1769 + off = tbl->len; 1770 + build_append_int_noprefix(tbl, 0, 8); /* X_DSDT */ 1771 + if (f->xdsdt_tbl_offset) { 1772 + bios_linker_loader_add_pointer(linker, 1773 + ACPI_BUILD_TABLE_FILE, off, 8, 1774 + ACPI_BUILD_TABLE_FILE, *f->xdsdt_tbl_offset); 1775 + } 1776 + 1777 + build_append_gas_from_struct(tbl, &f->pm1a_evt); /* X_PM1a_EVT_BLK */ 1778 + /* X_PM1b_EVT_BLK */ 1779 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); 1780 + build_append_gas_from_struct(tbl, &f->pm1a_cnt); /* X_PM1a_CNT_BLK */ 1781 + /* X_PM1b_CNT_BLK */ 1782 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); 1783 + /* X_PM2_CNT_BLK */ 1784 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); 1785 + build_append_gas_from_struct(tbl, &f->pm_tmr); /* X_PM_TMR_BLK */ 1786 + build_append_gas_from_struct(tbl, &f->gpe0_blk); /* X_GPE0_BLK */ 1787 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); /* X_GPE1_BLK */ 1788 + 1789 + if (f->rev <= 4) { 1790 + goto build_hdr; 1791 + } 1792 + 1793 + /* SLEEP_CONTROL_REG */ 1794 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); 1795 + /* SLEEP_STATUS_REG */ 1796 + build_append_gas(tbl, AML_AS_SYSTEM_MEMORY, 0 , 0, 0, 0); 1797 + 1798 + /* TODO: extra fields need to be added to support revisions above rev5 */ 1799 + assert(f->rev == 5); 1800 + 1801 + build_hdr: 1802 + build_header(linker, tbl, (void *)(tbl->data + fadt_start), 1803 + "FACP", tbl->len - fadt_start, f->rev, oem_id, oem_table_id); 1804 + }
+15 -24
hw/arm/virt-acpi-build.c
··· 651 651 } 652 652 653 653 /* FADT */ 654 - static void build_fadt(GArray *table_data, BIOSLinker *linker, 655 - VirtMachineState *vms, unsigned dsdt_tbl_offset) 654 + static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker, 655 + VirtMachineState *vms, unsigned dsdt_tbl_offset) 656 656 { 657 - int fadt_start = table_data->len; 658 - AcpiFadtDescriptorRev5_1 *fadt = acpi_data_push(table_data, sizeof(*fadt)); 659 - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; 660 - uint16_t bootflags; 657 + /* ACPI v5.1 */ 658 + AcpiFadtData fadt = { 659 + .rev = 5, 660 + .minor_ver = 1, 661 + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, 662 + .xdsdt_tbl_offset = &dsdt_tbl_offset, 663 + }; 661 664 662 665 switch (vms->psci_conduit) { 663 666 case QEMU_PSCI_CONDUIT_DISABLED: 664 - bootflags = 0; 667 + fadt.arm_boot_arch = 0; 665 668 break; 666 669 case QEMU_PSCI_CONDUIT_HVC: 667 - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT | ACPI_FADT_ARM_PSCI_USE_HVC; 670 + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT | 671 + ACPI_FADT_ARM_PSCI_USE_HVC; 668 672 break; 669 673 case QEMU_PSCI_CONDUIT_SMC: 670 - bootflags = ACPI_FADT_ARM_PSCI_COMPLIANT; 674 + fadt.arm_boot_arch = ACPI_FADT_ARM_PSCI_COMPLIANT; 671 675 break; 672 676 default: 673 677 g_assert_not_reached(); 674 678 } 675 679 676 - /* Hardware Reduced = 1 and use PSCI 0.2+ */ 677 - fadt->flags = cpu_to_le32(1 << ACPI_FADT_F_HW_REDUCED_ACPI); 678 - fadt->arm_boot_flags = cpu_to_le16(bootflags); 679 - 680 - /* ACPI v5.1 (fadt->revision.fadt->minor_revision) */ 681 - fadt->minor_revision = 0x1; 682 - 683 - /* DSDT address to be filled by Guest linker */ 684 - bios_linker_loader_add_pointer(linker, 685 - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), 686 - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); 687 - 688 - build_header(linker, table_data, (void *)(table_data->data + fadt_start), 689 - "FACP", table_data->len - fadt_start, 5, NULL, NULL); 680 + build_fadt(table_data, linker, &fadt, NULL, NULL); 690 681 } 691 682 692 683 /* DSDT */ ··· 761 752 762 753 /* FADT MADT GTDT MCFG SPCR pointed to by RSDT */ 763 754 acpi_add_table(table_offsets, tables_blob); 764 - build_fadt(tables_blob, tables->linker, vms, dsdt); 755 + build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); 765 756 766 757 acpi_add_table(table_offsets, tables_blob); 767 758 build_madt(tables_blob, tables->linker, vms);
+115 -137
hw/i386/acpi-build.c
··· 91 91 } AcpiMcfgInfo; 92 92 93 93 typedef struct AcpiPmInfo { 94 - bool force_rev1_fadt; 95 94 bool s3_disabled; 96 95 bool s4_disabled; 97 96 bool pcihp_bridge_en; 98 97 uint8_t s4_val; 99 - uint16_t sci_int; 100 - uint8_t acpi_enable_cmd; 101 - uint8_t acpi_disable_cmd; 102 - uint32_t gpe0_blk; 103 - uint32_t gpe0_blk_len; 104 - uint32_t io_base; 98 + AcpiFadtData fadt; 105 99 uint16_t cpu_hp_io_base; 106 100 uint16_t pcihp_io_base; 107 101 uint16_t pcihp_io_len; ··· 124 118 bool pcihp_bridge_en; 125 119 } AcpiBuildPciBusHotplugState; 126 120 121 + static void init_common_fadt_data(Object *o, AcpiFadtData *data) 122 + { 123 + uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL); 124 + AmlAddressSpace as = AML_AS_SYSTEM_IO; 125 + AcpiFadtData fadt = { 126 + .rev = 3, 127 + .flags = 128 + (1 << ACPI_FADT_F_WBINVD) | 129 + (1 << ACPI_FADT_F_PROC_C1) | 130 + (1 << ACPI_FADT_F_SLP_BUTTON) | 131 + (1 << ACPI_FADT_F_RTC_S4) | 132 + (1 << ACPI_FADT_F_USE_PLATFORM_CLOCK) | 133 + /* APIC destination mode ("Flat Logical") has an upper limit of 8 134 + * CPUs for more than 8 CPUs, "Clustered Logical" mode has to be 135 + * used 136 + */ 137 + ((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0), 138 + .int_model = 1 /* Multiple APIC */, 139 + .rtc_century = RTC_CENTURY, 140 + .plvl2_lat = 0xfff /* C2 state not supported */, 141 + .plvl3_lat = 0xfff /* C3 state not supported */, 142 + .smi_cmd = ACPI_PORT_SMI_CMD, 143 + .sci_int = object_property_get_uint(o, ACPI_PM_PROP_SCI_INT, NULL), 144 + .acpi_enable_cmd = 145 + object_property_get_uint(o, ACPI_PM_PROP_ACPI_ENABLE_CMD, NULL), 146 + .acpi_disable_cmd = 147 + object_property_get_uint(o, ACPI_PM_PROP_ACPI_DISABLE_CMD, NULL), 148 + .pm1a_evt = { .space_id = as, .bit_width = 4 * 8, .address = io }, 149 + .pm1a_cnt = { .space_id = as, .bit_width = 2 * 8, 150 + .address = io + 0x04 }, 151 + .pm_tmr = { .space_id = as, .bit_width = 4 * 8, .address = io + 0x08 }, 152 + .gpe0_blk = { .space_id = as, .bit_width = 153 + object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK_LEN, NULL) * 8, 154 + .address = object_property_get_uint(o, ACPI_PM_PROP_GPE0_BLK, NULL) 155 + }, 156 + }; 157 + *data = fadt; 158 + } 159 + 127 160 static void acpi_get_pm_info(AcpiPmInfo *pm) 128 161 { 129 162 Object *piix = piix4_pm_find(); 130 163 Object *lpc = ich9_lpc_find(); 131 - Object *obj = NULL; 164 + Object *obj = piix ? piix : lpc; 132 165 QObject *o; 133 - 134 - pm->force_rev1_fadt = false; 135 166 pm->cpu_hp_io_base = 0; 136 167 pm->pcihp_io_base = 0; 137 168 pm->pcihp_io_len = 0; 169 + 170 + init_common_fadt_data(obj, &pm->fadt); 138 171 if (piix) { 139 172 /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ 140 - pm->force_rev1_fadt = true; 141 - obj = piix; 173 + pm->fadt.rev = 1; 142 174 pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; 143 175 pm->pcihp_io_base = 144 176 object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); ··· 146 178 object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); 147 179 } 148 180 if (lpc) { 149 - obj = lpc; 181 + struct AcpiGenericAddress r = { .space_id = AML_AS_SYSTEM_IO, 182 + .bit_width = 8, .address = ICH9_RST_CNT_IOPORT }; 183 + pm->fadt.reset_reg = r; 184 + pm->fadt.reset_val = 0xf; 185 + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; 150 186 pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; 151 187 } 152 188 assert(obj); 189 + 190 + /* The above need not be conditional on machine type because the reset port 191 + * happens to be the same on PIIX (pc) and ICH9 (q35). */ 192 + QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); 153 193 154 194 /* Fill in optional s3/s4 related properties */ 155 195 o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); ··· 174 214 } 175 215 qobject_decref(o); 176 216 177 - /* Fill in mandatory properties */ 178 - pm->sci_int = object_property_get_uint(obj, ACPI_PM_PROP_SCI_INT, NULL); 179 - 180 - pm->acpi_enable_cmd = object_property_get_uint(obj, 181 - ACPI_PM_PROP_ACPI_ENABLE_CMD, 182 - NULL); 183 - pm->acpi_disable_cmd = 184 - object_property_get_uint(obj, 185 - ACPI_PM_PROP_ACPI_DISABLE_CMD, 186 - NULL); 187 - pm->io_base = object_property_get_uint(obj, ACPI_PM_PROP_PM_IO_BASE, 188 - NULL); 189 - pm->gpe0_blk = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK, 190 - NULL); 191 - pm->gpe0_blk_len = object_property_get_uint(obj, ACPI_PM_PROP_GPE0_BLK_LEN, 192 - NULL); 193 217 pm->pcihp_bridge_en = 194 218 object_property_get_bool(obj, "acpi-pci-hotplug-with-bridge-support", 195 219 NULL); ··· 257 281 NULL)); 258 282 } 259 283 260 - #define ACPI_PORT_SMI_CMD 0x00b2 /* TODO: this is APM_CNT_IOPORT */ 261 - 262 284 static void acpi_align_size(GArray *blob, unsigned align) 263 285 { 264 286 /* Align size to multiple of given size. This reduces the chance ··· 274 296 AcpiFacsDescriptorRev1 *facs = acpi_data_push(table_data, sizeof *facs); 275 297 memcpy(&facs->signature, "FACS", 4); 276 298 facs->length = cpu_to_le32(sizeof(*facs)); 277 - } 278 - 279 - /* Load chipset information in FADT */ 280 - static void fadt_setup(AcpiFadtDescriptorRev3 *fadt, AcpiPmInfo *pm) 281 - { 282 - fadt->model = 1; 283 - fadt->reserved1 = 0; 284 - fadt->sci_int = cpu_to_le16(pm->sci_int); 285 - fadt->smi_cmd = cpu_to_le32(ACPI_PORT_SMI_CMD); 286 - fadt->acpi_enable = pm->acpi_enable_cmd; 287 - fadt->acpi_disable = pm->acpi_disable_cmd; 288 - /* EVT, CNT, TMR offset matches hw/acpi/core.c */ 289 - fadt->pm1a_evt_blk = cpu_to_le32(pm->io_base); 290 - fadt->pm1a_cnt_blk = cpu_to_le32(pm->io_base + 0x04); 291 - fadt->pm_tmr_blk = cpu_to_le32(pm->io_base + 0x08); 292 - fadt->gpe0_blk = cpu_to_le32(pm->gpe0_blk); 293 - /* EVT, CNT, TMR length matches hw/acpi/core.c */ 294 - fadt->pm1_evt_len = 4; 295 - fadt->pm1_cnt_len = 2; 296 - fadt->pm_tmr_len = 4; 297 - fadt->gpe0_blk_len = pm->gpe0_blk_len; 298 - fadt->plvl2_lat = cpu_to_le16(0xfff); /* C2 state not supported */ 299 - fadt->plvl3_lat = cpu_to_le16(0xfff); /* C3 state not supported */ 300 - fadt->flags = cpu_to_le32((1 << ACPI_FADT_F_WBINVD) | 301 - (1 << ACPI_FADT_F_PROC_C1) | 302 - (1 << ACPI_FADT_F_SLP_BUTTON) | 303 - (1 << ACPI_FADT_F_RTC_S4)); 304 - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK); 305 - /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs 306 - * For more than 8 CPUs, "Clustered Logical" mode has to be used 307 - */ 308 - if (max_cpus > 8) { 309 - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL); 310 - } 311 - fadt->century = RTC_CENTURY; 312 - if (pm->force_rev1_fadt) { 313 - return; 314 - } 315 - 316 - fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_RESET_REG_SUP); 317 - fadt->reset_value = 0xf; 318 - fadt->reset_register.space_id = AML_SYSTEM_IO; 319 - fadt->reset_register.bit_width = 8; 320 - fadt->reset_register.address = cpu_to_le64(ICH9_RST_CNT_IOPORT); 321 - /* The above need not be conditional on machine type because the reset port 322 - * happens to be the same on PIIX (pc) and ICH9 (q35). */ 323 - QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != RCR_IOPORT); 324 - 325 - fadt->xpm1a_event_block.space_id = AML_SYSTEM_IO; 326 - fadt->xpm1a_event_block.bit_width = fadt->pm1_evt_len * 8; 327 - fadt->xpm1a_event_block.address = cpu_to_le64(pm->io_base); 328 - 329 - fadt->xpm1a_control_block.space_id = AML_SYSTEM_IO; 330 - fadt->xpm1a_control_block.bit_width = fadt->pm1_cnt_len * 8; 331 - fadt->xpm1a_control_block.address = cpu_to_le64(pm->io_base + 0x4); 332 - 333 - fadt->xpm_timer_block.space_id = AML_SYSTEM_IO; 334 - fadt->xpm_timer_block.bit_width = fadt->pm_tmr_len * 8; 335 - fadt->xpm_timer_block.address = cpu_to_le64(pm->io_base + 0x8); 336 - 337 - fadt->xgpe0_block.space_id = AML_SYSTEM_IO; 338 - fadt->xgpe0_block.bit_width = pm->gpe0_blk_len * 8; 339 - fadt->xgpe0_block.address = cpu_to_le64(pm->gpe0_blk); 340 - } 341 - 342 - 343 - /* FADT */ 344 - static void 345 - build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, 346 - unsigned facs_tbl_offset, unsigned dsdt_tbl_offset, 347 - const char *oem_id, const char *oem_table_id) 348 - { 349 - AcpiFadtDescriptorRev3 *fadt = acpi_data_push(table_data, sizeof(*fadt)); 350 - unsigned fw_ctrl_offset = (char *)&fadt->firmware_ctrl - table_data->data; 351 - unsigned dsdt_entry_offset = (char *)&fadt->dsdt - table_data->data; 352 - unsigned xdsdt_entry_offset = (char *)&fadt->x_dsdt - table_data->data; 353 - int fadt_size = sizeof(*fadt); 354 - int rev = 3; 355 - 356 - /* FACS address to be filled by Guest linker */ 357 - bios_linker_loader_add_pointer(linker, 358 - ACPI_BUILD_TABLE_FILE, fw_ctrl_offset, sizeof(fadt->firmware_ctrl), 359 - ACPI_BUILD_TABLE_FILE, facs_tbl_offset); 360 - 361 - /* DSDT address to be filled by Guest linker */ 362 - fadt_setup(fadt, pm); 363 - bios_linker_loader_add_pointer(linker, 364 - ACPI_BUILD_TABLE_FILE, dsdt_entry_offset, sizeof(fadt->dsdt), 365 - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); 366 - if (pm->force_rev1_fadt) { 367 - rev = 1; 368 - fadt_size = offsetof(typeof(*fadt), reset_register); 369 - } else { 370 - bios_linker_loader_add_pointer(linker, 371 - ACPI_BUILD_TABLE_FILE, xdsdt_entry_offset, sizeof(fadt->x_dsdt), 372 - ACPI_BUILD_TABLE_FILE, dsdt_tbl_offset); 373 - } 374 - 375 - build_header(linker, table_data, 376 - (void *)fadt, "FACP", fadt_size, rev, oem_id, oem_table_id); 377 299 } 378 300 379 301 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, ··· 2053 1975 aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); 2054 1976 crs = aml_resource_template(); 2055 1977 aml_append(crs, 2056 - aml_io(AML_DECODE16, pm->gpe0_blk, pm->gpe0_blk, 1, pm->gpe0_blk_len) 1978 + aml_io( 1979 + AML_DECODE16, 1980 + pm->fadt.gpe0_blk.address, 1981 + pm->fadt.gpe0_blk.address, 1982 + 1, 1983 + pm->fadt.gpe0_blk.bit_width / 8) 2057 1984 ); 2058 1985 aml_append(dev, aml_name_decl("_CRS", crs)); 2059 1986 aml_append(scope, dev); ··· 2323 2250 #define HOLE_640K_START (640 * 1024) 2324 2251 #define HOLE_640K_END (1024 * 1024) 2325 2252 2253 + static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base, 2254 + uint64_t len, int default_node) 2255 + { 2256 + MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list(); 2257 + MemoryDeviceInfoList *info; 2258 + MemoryDeviceInfo *mi; 2259 + PCDIMMDeviceInfo *di; 2260 + uint64_t end = base + len, cur, size; 2261 + bool is_nvdimm; 2262 + AcpiSratMemoryAffinity *numamem; 2263 + MemoryAffinityFlags flags; 2264 + 2265 + for (cur = base, info = info_list; 2266 + cur < end; 2267 + cur += size, info = info->next) { 2268 + numamem = acpi_data_push(table_data, sizeof *numamem); 2269 + 2270 + if (!info) { 2271 + build_srat_memory(numamem, cur, end - cur, default_node, 2272 + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); 2273 + break; 2274 + } 2275 + 2276 + mi = info->value; 2277 + is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM); 2278 + di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data; 2279 + 2280 + if (cur < di->addr) { 2281 + build_srat_memory(numamem, cur, di->addr - cur, default_node, 2282 + MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); 2283 + numamem = acpi_data_push(table_data, sizeof *numamem); 2284 + } 2285 + 2286 + size = di->size; 2287 + 2288 + flags = MEM_AFFINITY_ENABLED; 2289 + if (di->hotpluggable) { 2290 + flags |= MEM_AFFINITY_HOTPLUGGABLE; 2291 + } 2292 + if (is_nvdimm) { 2293 + flags |= MEM_AFFINITY_NON_VOLATILE; 2294 + } 2295 + 2296 + build_srat_memory(numamem, di->addr, size, di->node, flags); 2297 + } 2298 + 2299 + qapi_free_MemoryDeviceInfoList(info_list); 2300 + } 2301 + 2326 2302 static void 2327 2303 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) 2328 2304 { ··· 2434 2410 * providing _PXM method if necessary. 2435 2411 */ 2436 2412 if (hotplugabble_address_space_size) { 2437 - numamem = acpi_data_push(table_data, sizeof *numamem); 2438 - build_srat_memory(numamem, pcms->hotplug_memory.base, 2439 - hotplugabble_address_space_size, pcms->numa_nodes - 1, 2440 - MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); 2413 + build_srat_hotpluggable_memory(table_data, pcms->hotplug_memory.base, 2414 + hotplugabble_address_space_size, 2415 + pcms->numa_nodes - 1); 2441 2416 } 2442 2417 2443 2418 build_header(linker, table_data, ··· 2700 2675 /* ACPI tables pointed to by RSDT */ 2701 2676 fadt = tables_blob->len; 2702 2677 acpi_add_table(table_offsets, tables_blob); 2703 - build_fadt(tables_blob, tables->linker, &pm, facs, dsdt, 2678 + pm.fadt.facs_tbl_offset = &facs; 2679 + pm.fadt.dsdt_tbl_offset = &dsdt; 2680 + pm.fadt.xdsdt_tbl_offset = &dsdt; 2681 + build_fadt(tables_blob, tables->linker, &pm.fadt, 2704 2682 slic_oem.id, slic_oem.table_id); 2705 2683 aml_len += tables_blob->len - fadt; 2706 2684
-1
hw/isa/apm.c
··· 34 34 #endif 35 35 36 36 /* fixed I/O location */ 37 - #define APM_CNT_IOPORT 0xb2 38 37 #define APM_STS_IOPORT 0xb3 39 38 40 39 static void apm_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
+52 -39
hw/mem/pc-dimm.c
··· 20 20 21 21 #include "qemu/osdep.h" 22 22 #include "hw/mem/pc-dimm.h" 23 + #include "hw/mem/nvdimm.h" 23 24 #include "qapi/error.h" 24 25 #include "qemu/config-file.h" 25 26 #include "qapi/visitor.h" ··· 162 163 return pc_existing_dimms_capacity(&error_abort); 163 164 } 164 165 165 - int qmp_pc_dimm_device_list(Object *obj, void *opaque) 166 - { 167 - MemoryDeviceInfoList ***prev = opaque; 168 - 169 - if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { 170 - DeviceState *dev = DEVICE(obj); 171 - 172 - if (dev->realized) { 173 - MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); 174 - MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); 175 - PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); 176 - DeviceClass *dc = DEVICE_GET_CLASS(obj); 177 - PCDIMMDevice *dimm = PC_DIMM(obj); 178 - 179 - if (dev->id) { 180 - di->has_id = true; 181 - di->id = g_strdup(dev->id); 182 - } 183 - di->hotplugged = dev->hotplugged; 184 - di->hotpluggable = dc->hotpluggable; 185 - di->addr = dimm->addr; 186 - di->slot = dimm->slot; 187 - di->node = dimm->node; 188 - di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, 189 - NULL); 190 - di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); 191 - 192 - info->u.dimm.data = di; 193 - elem->value = info; 194 - elem->next = NULL; 195 - **prev = elem; 196 - *prev = &elem->next; 197 - } 198 - } 199 - 200 - object_child_foreach(obj, qmp_pc_dimm_device_list, opaque); 201 - return 0; 202 - } 203 - 204 166 static int pc_dimm_slot2bitmap(Object *obj, void *opaque) 205 167 { 206 168 unsigned long *bitmap = opaque; ··· 274 236 275 237 object_child_foreach(obj, pc_dimm_built_list, opaque); 276 238 return 0; 239 + } 240 + 241 + MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) 242 + { 243 + GSList *dimms = NULL, *item; 244 + MemoryDeviceInfoList *list = NULL, *prev = NULL; 245 + 246 + object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &dimms); 247 + 248 + for (item = dimms; item; item = g_slist_next(item)) { 249 + PCDIMMDevice *dimm = PC_DIMM(item->data); 250 + Object *obj = OBJECT(dimm); 251 + MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1); 252 + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); 253 + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); 254 + bool is_nvdimm = object_dynamic_cast(obj, TYPE_NVDIMM); 255 + DeviceClass *dc = DEVICE_GET_CLASS(obj); 256 + DeviceState *dev = DEVICE(obj); 257 + 258 + if (dev->id) { 259 + di->has_id = true; 260 + di->id = g_strdup(dev->id); 261 + } 262 + di->hotplugged = dev->hotplugged; 263 + di->hotpluggable = dc->hotpluggable; 264 + di->addr = dimm->addr; 265 + di->slot = dimm->slot; 266 + di->node = dimm->node; 267 + di->size = object_property_get_uint(obj, PC_DIMM_SIZE_PROP, NULL); 268 + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); 269 + 270 + if (!is_nvdimm) { 271 + info->u.dimm.data = di; 272 + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; 273 + } else { 274 + info->u.nvdimm.data = di; 275 + info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM; 276 + } 277 + elem->value = info; 278 + elem->next = NULL; 279 + if (prev) { 280 + prev->next = elem; 281 + } else { 282 + list = elem; 283 + } 284 + prev = elem; 285 + } 286 + 287 + g_slist_free(dimms); 288 + 289 + return list; 277 290 } 278 291 279 292 uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
+54 -27
hw/net/virtio-net.c
··· 26 26 #include "qapi/qapi-events-net.h" 27 27 #include "hw/virtio/virtio-access.h" 28 28 #include "migration/misc.h" 29 + #include "standard-headers/linux/ethtool.h" 29 30 30 31 #define VIRTIO_NET_VM_VERSION 11 31 32 ··· 48 49 (offsetof(container, field) + sizeof(((container *)0)->field)) 49 50 50 51 typedef struct VirtIOFeature { 51 - uint32_t flags; 52 + uint64_t flags; 52 53 size_t end; 53 54 } VirtIOFeature; 54 55 55 56 static VirtIOFeature feature_sizes[] = { 56 - {.flags = 1 << VIRTIO_NET_F_MAC, 57 + {.flags = 1ULL << VIRTIO_NET_F_MAC, 57 58 .end = endof(struct virtio_net_config, mac)}, 58 - {.flags = 1 << VIRTIO_NET_F_STATUS, 59 + {.flags = 1ULL << VIRTIO_NET_F_STATUS, 59 60 .end = endof(struct virtio_net_config, status)}, 60 - {.flags = 1 << VIRTIO_NET_F_MQ, 61 + {.flags = 1ULL << VIRTIO_NET_F_MQ, 61 62 .end = endof(struct virtio_net_config, max_virtqueue_pairs)}, 62 - {.flags = 1 << VIRTIO_NET_F_MTU, 63 + {.flags = 1ULL << VIRTIO_NET_F_MTU, 63 64 .end = endof(struct virtio_net_config, mtu)}, 65 + {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, 66 + .end = endof(struct virtio_net_config, duplex)}, 64 67 {} 65 68 }; 66 69 ··· 89 92 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues); 90 93 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu); 91 94 memcpy(netcfg.mac, n->mac, ETH_ALEN); 95 + virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); 96 + netcfg.duplex = n->net_conf.duplex; 92 97 memcpy(config, &netcfg, n->config_size); 93 98 } 94 99 ··· 1938 1943 int i; 1939 1944 1940 1945 if (n->net_conf.mtu) { 1941 - n->host_features |= (0x1 << VIRTIO_NET_F_MTU); 1946 + n->host_features |= (1ULL << VIRTIO_NET_F_MTU); 1947 + } 1948 + 1949 + if (n->net_conf.duplex_str) { 1950 + if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) { 1951 + n->net_conf.duplex = DUPLEX_HALF; 1952 + } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) { 1953 + n->net_conf.duplex = DUPLEX_FULL; 1954 + } else { 1955 + error_setg(errp, "'duplex' must be 'half' or 'full'"); 1956 + } 1957 + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 1958 + } else { 1959 + n->net_conf.duplex = DUPLEX_UNKNOWN; 1960 + } 1961 + 1962 + if (n->net_conf.speed < SPEED_UNKNOWN) { 1963 + error_setg(errp, "'speed' must be between 0 and INT_MAX"); 1964 + } else if (n->net_conf.speed >= 0) { 1965 + n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX); 1942 1966 } 1943 1967 1944 1968 virtio_net_set_config_size(n, n->host_features); ··· 2109 2133 }; 2110 2134 2111 2135 static Property virtio_net_properties[] = { 2112 - DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), 2113 - DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features, 2136 + DEFINE_PROP_BIT64("csum", VirtIONet, host_features, 2137 + VIRTIO_NET_F_CSUM, true), 2138 + DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, 2114 2139 VIRTIO_NET_F_GUEST_CSUM, true), 2115 - DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2116 - DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features, 2140 + DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true), 2141 + DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features, 2117 2142 VIRTIO_NET_F_GUEST_TSO4, true), 2118 - DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features, 2143 + DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features, 2119 2144 VIRTIO_NET_F_GUEST_TSO6, true), 2120 - DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features, 2145 + DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features, 2121 2146 VIRTIO_NET_F_GUEST_ECN, true), 2122 - DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features, 2147 + DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features, 2123 2148 VIRTIO_NET_F_GUEST_UFO, true), 2124 - DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features, 2149 + DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features, 2125 2150 VIRTIO_NET_F_GUEST_ANNOUNCE, true), 2126 - DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features, 2151 + DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features, 2127 2152 VIRTIO_NET_F_HOST_TSO4, true), 2128 - DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features, 2153 + DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features, 2129 2154 VIRTIO_NET_F_HOST_TSO6, true), 2130 - DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features, 2155 + DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features, 2131 2156 VIRTIO_NET_F_HOST_ECN, true), 2132 - DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features, 2157 + DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features, 2133 2158 VIRTIO_NET_F_HOST_UFO, true), 2134 - DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features, 2159 + DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features, 2135 2160 VIRTIO_NET_F_MRG_RXBUF, true), 2136 - DEFINE_PROP_BIT("status", VirtIONet, host_features, 2161 + DEFINE_PROP_BIT64("status", VirtIONet, host_features, 2137 2162 VIRTIO_NET_F_STATUS, true), 2138 - DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features, 2163 + DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features, 2139 2164 VIRTIO_NET_F_CTRL_VQ, true), 2140 - DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features, 2165 + DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features, 2141 2166 VIRTIO_NET_F_CTRL_RX, true), 2142 - DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features, 2167 + DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features, 2143 2168 VIRTIO_NET_F_CTRL_VLAN, true), 2144 - DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features, 2169 + DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features, 2145 2170 VIRTIO_NET_F_CTRL_RX_EXTRA, true), 2146 - DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features, 2171 + DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features, 2147 2172 VIRTIO_NET_F_CTRL_MAC_ADDR, true), 2148 - DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features, 2173 + DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, 2149 2174 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), 2150 - DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2175 + DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), 2151 2176 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf), 2152 2177 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer, 2153 2178 TX_TIMER_INTERVAL), ··· 2160 2185 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), 2161 2186 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, 2162 2187 true), 2188 + DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), 2189 + DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), 2163 2190 DEFINE_PROP_END_OF_LIST(), 2164 2191 }; 2165 2192
-14
hw/pci/pci.c
··· 2048 2048 } 2049 2049 } 2050 2050 2051 - static void pci_default_realize(PCIDevice *dev, Error **errp) 2052 - { 2053 - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); 2054 - 2055 - if (pc->init) { 2056 - if (pc->init(dev) < 0) { 2057 - error_setg(errp, "Device initialization failed"); 2058 - return; 2059 - } 2060 - } 2061 - } 2062 - 2063 2051 PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction, 2064 2052 const char *name) 2065 2053 { ··· 2532 2520 static void pci_device_class_init(ObjectClass *klass, void *data) 2533 2521 { 2534 2522 DeviceClass *k = DEVICE_CLASS(klass); 2535 - PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass); 2536 2523 2537 2524 k->realize = pci_qdev_realize; 2538 2525 k->unrealize = pci_qdev_unrealize; 2539 2526 k->bus_type = TYPE_PCI_BUS; 2540 2527 k->props = pci_props; 2541 - pc->realize = pci_default_realize; 2542 2528 } 2543 2529 2544 2530 static void pci_device_class_base_init(ObjectClass *klass, void *data)
+1 -2
hw/ppc/spapr.c
··· 722 722 } 723 723 724 724 if (hotplug_lmb_start) { 725 - MemoryDeviceInfoList **prev = &dimms; 726 - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); 725 + dimms = qmp_pc_dimm_device_list(); 727 726 } 728 727 729 728 /* ibm,dynamic-memory */
+15 -1
hw/virtio/trace-events
··· 3 3 # hw/virtio/vhost.c 4 4 vhost_commit(bool started, bool changed) "Started: %d Changed: %d" 5 5 vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 6 - vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 6 + vhost_region_add_section_merge(const char *name, uint64_t new_size, uint64_t gpa, uint64_t owr) "%s: size: 0x%"PRIx64 " gpa: 0x%"PRIx64 " owr: 0x%"PRIx64 7 + vhost_region_add_section_aligned(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 7 8 vhost_section(const char *name, int r) "%s:%d" 9 + 10 + # hw/virtio/vhost-user.c 11 + vhost_user_postcopy_end_entry(void) "" 12 + vhost_user_postcopy_end_exit(void) "" 13 + vhost_user_postcopy_fault_handler(const char *name, uint64_t fault_address, int nregions) "%s: @0x%"PRIx64" nregions:%d" 14 + vhost_user_postcopy_fault_handler_loop(int i, uint64_t client_base, uint64_t size) "%d: client 0x%"PRIx64" +0x%"PRIx64 15 + vhost_user_postcopy_fault_handler_found(int i, uint64_t region_offset, uint64_t rb_offset) "%d: region_offset: 0x%"PRIx64" rb_offset:0x%"PRIx64 16 + vhost_user_postcopy_listen(void) "" 17 + vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d region %d" 18 + vhost_user_set_mem_table_withfd(int index, const char *name, uint64_t memory_size, uint64_t guest_phys_addr, uint64_t userspace_addr, uint64_t offset) "%d:%s: size:0x%"PRIx64" GPA:0x%"PRIx64" QVA/userspace:0x%"PRIx64" RB offset:0x%"PRIx64 19 + vhost_user_postcopy_waker(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 20 + vhost_user_postcopy_waker_found(uint64_t client_addr) "0x%"PRIx64 21 + vhost_user_postcopy_waker_nomatch(const char *rb, uint64_t rb_offset) "%s + 0x%"PRIx64 8 22 9 23 # hw/virtio/virtio.c 10 24 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
+407 -4
hw/virtio/vhost-user.c
··· 18 18 #include "qemu/error-report.h" 19 19 #include "qemu/sockets.h" 20 20 #include "sysemu/cryptodev.h" 21 + #include "migration/migration.h" 22 + #include "migration/postcopy-ram.h" 23 + #include "trace.h" 21 24 22 25 #include <sys/ioctl.h> 23 26 #include <sys/socket.h> 24 27 #include <sys/un.h> 25 28 #include <linux/vhost.h> 29 + #include <linux/userfaultfd.h> 26 30 27 31 #define VHOST_MEMORY_MAX_NREGIONS 8 28 32 #define VHOST_USER_F_PROTOCOL_FEATURES 30 ··· 41 45 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 42 46 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 43 47 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 44 - 48 + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 45 49 VHOST_USER_PROTOCOL_F_MAX 46 50 }; 47 51 ··· 76 80 VHOST_USER_SET_CONFIG = 25, 77 81 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 78 82 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 83 + VHOST_USER_POSTCOPY_ADVISE = 28, 84 + VHOST_USER_POSTCOPY_LISTEN = 29, 85 + VHOST_USER_POSTCOPY_END = 30, 79 86 VHOST_USER_MAX 80 87 } VhostUserRequest; 81 88 ··· 164 171 #define VHOST_USER_VERSION (0x1) 165 172 166 173 struct vhost_user { 174 + struct vhost_dev *dev; 167 175 CharBackend *chr; 168 176 int slave_fd; 177 + NotifierWithReturn postcopy_notifier; 178 + struct PostCopyFD postcopy_fd; 179 + uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS]; 180 + /* Length of the region_rb and region_rb_offset arrays */ 181 + size_t region_rb_len; 182 + /* RAMBlock associated with a given region */ 183 + RAMBlock **region_rb; 184 + /* The offset from the start of the RAMBlock to the start of the 185 + * vhost region. 186 + */ 187 + ram_addr_t *region_rb_offset; 188 + 189 + /* True once we've entered postcopy_listen */ 190 + bool postcopy_listen; 169 191 }; 170 192 171 193 static bool ioeventfd_enabled(void) ··· 330 352 return 0; 331 353 } 332 354 355 + static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, 356 + struct vhost_memory *mem) 357 + { 358 + struct vhost_user *u = dev->opaque; 359 + int fds[VHOST_MEMORY_MAX_NREGIONS]; 360 + int i, fd; 361 + size_t fd_num = 0; 362 + bool reply_supported = virtio_has_feature(dev->protocol_features, 363 + VHOST_USER_PROTOCOL_F_REPLY_ACK); 364 + VhostUserMsg msg_reply; 365 + int region_i, msg_i; 366 + 367 + VhostUserMsg msg = { 368 + .hdr.request = VHOST_USER_SET_MEM_TABLE, 369 + .hdr.flags = VHOST_USER_VERSION, 370 + }; 371 + 372 + if (reply_supported) { 373 + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; 374 + } 375 + 376 + if (u->region_rb_len < dev->mem->nregions) { 377 + u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions); 378 + u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset, 379 + dev->mem->nregions); 380 + memset(&(u->region_rb[u->region_rb_len]), '\0', 381 + sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len)); 382 + memset(&(u->region_rb_offset[u->region_rb_len]), '\0', 383 + sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len)); 384 + u->region_rb_len = dev->mem->nregions; 385 + } 386 + 387 + for (i = 0; i < dev->mem->nregions; ++i) { 388 + struct vhost_memory_region *reg = dev->mem->regions + i; 389 + ram_addr_t offset; 390 + MemoryRegion *mr; 391 + 392 + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); 393 + mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr, 394 + &offset); 395 + fd = memory_region_get_fd(mr); 396 + if (fd > 0) { 397 + trace_vhost_user_set_mem_table_withfd(fd_num, mr->name, 398 + reg->memory_size, 399 + reg->guest_phys_addr, 400 + reg->userspace_addr, offset); 401 + u->region_rb_offset[i] = offset; 402 + u->region_rb[i] = mr->ram_block; 403 + msg.payload.memory.regions[fd_num].userspace_addr = 404 + reg->userspace_addr; 405 + msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 406 + msg.payload.memory.regions[fd_num].guest_phys_addr = 407 + reg->guest_phys_addr; 408 + msg.payload.memory.regions[fd_num].mmap_offset = offset; 409 + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); 410 + fds[fd_num++] = fd; 411 + } else { 412 + u->region_rb_offset[i] = 0; 413 + u->region_rb[i] = NULL; 414 + } 415 + } 416 + 417 + msg.payload.memory.nregions = fd_num; 418 + 419 + if (!fd_num) { 420 + error_report("Failed initializing vhost-user memory map, " 421 + "consider using -object memory-backend-file share=on"); 422 + return -1; 423 + } 424 + 425 + msg.hdr.size = sizeof(msg.payload.memory.nregions); 426 + msg.hdr.size += sizeof(msg.payload.memory.padding); 427 + msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); 428 + 429 + if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { 430 + return -1; 431 + } 432 + 433 + if (vhost_user_read(dev, &msg_reply) < 0) { 434 + return -1; 435 + } 436 + 437 + if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) { 438 + error_report("%s: Received unexpected msg type." 439 + "Expected %d received %d", __func__, 440 + VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request); 441 + return -1; 442 + } 443 + /* We're using the same structure, just reusing one of the 444 + * fields, so it should be the same size. 445 + */ 446 + if (msg_reply.hdr.size != msg.hdr.size) { 447 + error_report("%s: Unexpected size for postcopy reply " 448 + "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size); 449 + return -1; 450 + } 451 + 452 + memset(u->postcopy_client_bases, 0, 453 + sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS); 454 + 455 + /* They're in the same order as the regions that were sent 456 + * but some of the regions were skipped (above) if they 457 + * didn't have fd's 458 + */ 459 + for (msg_i = 0, region_i = 0; 460 + region_i < dev->mem->nregions; 461 + region_i++) { 462 + if (msg_i < fd_num && 463 + msg_reply.payload.memory.regions[msg_i].guest_phys_addr == 464 + dev->mem->regions[region_i].guest_phys_addr) { 465 + u->postcopy_client_bases[region_i] = 466 + msg_reply.payload.memory.regions[msg_i].userspace_addr; 467 + trace_vhost_user_set_mem_table_postcopy( 468 + msg_reply.payload.memory.regions[msg_i].userspace_addr, 469 + msg.payload.memory.regions[msg_i].userspace_addr, 470 + msg_i, region_i); 471 + msg_i++; 472 + } 473 + } 474 + if (msg_i != fd_num) { 475 + error_report("%s: postcopy reply not fully consumed " 476 + "%d vs %zd", 477 + __func__, msg_i, fd_num); 478 + return -1; 479 + } 480 + /* Now we've registered this with the postcopy code, we ack to the client, 481 + * because now we're in the position to be able to deal with any faults 482 + * it generates. 483 + */ 484 + /* TODO: Use this for failure cases as well with a bad value */ 485 + msg.hdr.size = sizeof(msg.payload.u64); 486 + msg.payload.u64 = 0; /* OK */ 487 + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 488 + return -1; 489 + } 490 + 491 + if (reply_supported) { 492 + return process_message_reply(dev, &msg); 493 + } 494 + 495 + return 0; 496 + } 497 + 333 498 static int vhost_user_set_mem_table(struct vhost_dev *dev, 334 499 struct vhost_memory *mem) 335 500 { 501 + struct vhost_user *u = dev->opaque; 336 502 int fds[VHOST_MEMORY_MAX_NREGIONS]; 337 503 int i, fd; 338 504 size_t fd_num = 0; 505 + bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler; 339 506 bool reply_supported = virtio_has_feature(dev->protocol_features, 340 - VHOST_USER_PROTOCOL_F_REPLY_ACK); 507 + VHOST_USER_PROTOCOL_F_REPLY_ACK) && 508 + !do_postcopy; 509 + 510 + if (do_postcopy) { 511 + /* Postcopy has enough differences that it's best done in it's own 512 + * version 513 + */ 514 + return vhost_user_set_mem_table_postcopy(dev, mem); 515 + } 341 516 342 517 VhostUserMsg msg = { 343 518 .hdr.request = VHOST_USER_SET_MEM_TABLE, ··· 362 537 error_report("Failed preparing vhost-user memory table msg"); 363 538 return -1; 364 539 } 365 - msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; 540 + msg.payload.memory.regions[fd_num].userspace_addr = 541 + reg->userspace_addr; 366 542 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; 367 - msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; 543 + msg.payload.memory.regions[fd_num].guest_phys_addr = 544 + reg->guest_phys_addr; 368 545 msg.payload.memory.regions[fd_num].mmap_offset = offset; 369 546 fds[fd_num++] = fd; 370 547 } ··· 791 968 return ret; 792 969 } 793 970 971 + /* 972 + * Called back from the postcopy fault thread when a fault is received on our 973 + * ufd. 974 + * TODO: This is Linux specific 975 + */ 976 + static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd, 977 + void *ufd) 978 + { 979 + struct vhost_dev *dev = pcfd->data; 980 + struct vhost_user *u = dev->opaque; 981 + struct uffd_msg *msg = ufd; 982 + uint64_t faultaddr = msg->arg.pagefault.address; 983 + RAMBlock *rb = NULL; 984 + uint64_t rb_offset; 985 + int i; 986 + 987 + trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr, 988 + dev->mem->nregions); 989 + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 990 + trace_vhost_user_postcopy_fault_handler_loop(i, 991 + u->postcopy_client_bases[i], dev->mem->regions[i].memory_size); 992 + if (faultaddr >= u->postcopy_client_bases[i]) { 993 + /* Ofset of the fault address in the vhost region */ 994 + uint64_t region_offset = faultaddr - u->postcopy_client_bases[i]; 995 + if (region_offset < dev->mem->regions[i].memory_size) { 996 + rb_offset = region_offset + u->region_rb_offset[i]; 997 + trace_vhost_user_postcopy_fault_handler_found(i, 998 + region_offset, rb_offset); 999 + rb = u->region_rb[i]; 1000 + return postcopy_request_shared_page(pcfd, rb, faultaddr, 1001 + rb_offset); 1002 + } 1003 + } 1004 + } 1005 + error_report("%s: Failed to find region for fault %" PRIx64, 1006 + __func__, faultaddr); 1007 + return -1; 1008 + } 1009 + 1010 + static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb, 1011 + uint64_t offset) 1012 + { 1013 + struct vhost_dev *dev = pcfd->data; 1014 + struct vhost_user *u = dev->opaque; 1015 + int i; 1016 + 1017 + trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset); 1018 + 1019 + if (!u) { 1020 + return 0; 1021 + } 1022 + /* Translate the offset into an address in the clients address space */ 1023 + for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) { 1024 + if (u->region_rb[i] == rb && 1025 + offset >= u->region_rb_offset[i] && 1026 + offset < (u->region_rb_offset[i] + 1027 + dev->mem->regions[i].memory_size)) { 1028 + uint64_t client_addr = (offset - u->region_rb_offset[i]) + 1029 + u->postcopy_client_bases[i]; 1030 + trace_vhost_user_postcopy_waker_found(client_addr); 1031 + return postcopy_wake_shared(pcfd, client_addr, rb); 1032 + } 1033 + } 1034 + 1035 + trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset); 1036 + return 0; 1037 + } 1038 + 1039 + /* 1040 + * Called at the start of an inbound postcopy on reception of the 1041 + * 'advise' command. 1042 + */ 1043 + static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp) 1044 + { 1045 + struct vhost_user *u = dev->opaque; 1046 + CharBackend *chr = u->chr; 1047 + int ufd; 1048 + VhostUserMsg msg = { 1049 + .hdr.request = VHOST_USER_POSTCOPY_ADVISE, 1050 + .hdr.flags = VHOST_USER_VERSION, 1051 + }; 1052 + 1053 + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1054 + error_setg(errp, "Failed to send postcopy_advise to vhost"); 1055 + return -1; 1056 + } 1057 + 1058 + if (vhost_user_read(dev, &msg) < 0) { 1059 + error_setg(errp, "Failed to get postcopy_advise reply from vhost"); 1060 + return -1; 1061 + } 1062 + 1063 + if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) { 1064 + error_setg(errp, "Unexpected msg type. Expected %d received %d", 1065 + VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request); 1066 + return -1; 1067 + } 1068 + 1069 + if (msg.hdr.size) { 1070 + error_setg(errp, "Received bad msg size."); 1071 + return -1; 1072 + } 1073 + ufd = qemu_chr_fe_get_msgfd(chr); 1074 + if (ufd < 0) { 1075 + error_setg(errp, "%s: Failed to get ufd", __func__); 1076 + return -1; 1077 + } 1078 + fcntl(ufd, F_SETFL, O_NONBLOCK); 1079 + 1080 + /* register ufd with userfault thread */ 1081 + u->postcopy_fd.fd = ufd; 1082 + u->postcopy_fd.data = dev; 1083 + u->postcopy_fd.handler = vhost_user_postcopy_fault_handler; 1084 + u->postcopy_fd.waker = vhost_user_postcopy_waker; 1085 + u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */ 1086 + postcopy_register_shared_ufd(&u->postcopy_fd); 1087 + return 0; 1088 + } 1089 + 1090 + /* 1091 + * Called at the switch to postcopy on reception of the 'listen' command. 1092 + */ 1093 + static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp) 1094 + { 1095 + struct vhost_user *u = dev->opaque; 1096 + int ret; 1097 + VhostUserMsg msg = { 1098 + .hdr.request = VHOST_USER_POSTCOPY_LISTEN, 1099 + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1100 + }; 1101 + u->postcopy_listen = true; 1102 + trace_vhost_user_postcopy_listen(); 1103 + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1104 + error_setg(errp, "Failed to send postcopy_listen to vhost"); 1105 + return -1; 1106 + } 1107 + 1108 + ret = process_message_reply(dev, &msg); 1109 + if (ret) { 1110 + error_setg(errp, "Failed to receive reply to postcopy_listen"); 1111 + return ret; 1112 + } 1113 + 1114 + return 0; 1115 + } 1116 + 1117 + /* 1118 + * Called at the end of postcopy 1119 + */ 1120 + static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp) 1121 + { 1122 + VhostUserMsg msg = { 1123 + .hdr.request = VHOST_USER_POSTCOPY_END, 1124 + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, 1125 + }; 1126 + int ret; 1127 + struct vhost_user *u = dev->opaque; 1128 + 1129 + trace_vhost_user_postcopy_end_entry(); 1130 + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { 1131 + error_setg(errp, "Failed to send postcopy_end to vhost"); 1132 + return -1; 1133 + } 1134 + 1135 + ret = process_message_reply(dev, &msg); 1136 + if (ret) { 1137 + error_setg(errp, "Failed to receive reply to postcopy_end"); 1138 + return ret; 1139 + } 1140 + postcopy_unregister_shared_ufd(&u->postcopy_fd); 1141 + u->postcopy_fd.handler = NULL; 1142 + 1143 + trace_vhost_user_postcopy_end_exit(); 1144 + 1145 + return 0; 1146 + } 1147 + 1148 + static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, 1149 + void *opaque) 1150 + { 1151 + struct PostcopyNotifyData *pnd = opaque; 1152 + struct vhost_user *u = container_of(notifier, struct vhost_user, 1153 + postcopy_notifier); 1154 + struct vhost_dev *dev = u->dev; 1155 + 1156 + switch (pnd->reason) { 1157 + case POSTCOPY_NOTIFY_PROBE: 1158 + if (!virtio_has_feature(dev->protocol_features, 1159 + VHOST_USER_PROTOCOL_F_PAGEFAULT)) { 1160 + /* TODO: Get the device name into this error somehow */ 1161 + error_setg(pnd->errp, 1162 + "vhost-user backend not capable of postcopy"); 1163 + return -ENOENT; 1164 + } 1165 + break; 1166 + 1167 + case POSTCOPY_NOTIFY_INBOUND_ADVISE: 1168 + return vhost_user_postcopy_advise(dev, pnd->errp); 1169 + 1170 + case POSTCOPY_NOTIFY_INBOUND_LISTEN: 1171 + return vhost_user_postcopy_listen(dev, pnd->errp); 1172 + 1173 + case POSTCOPY_NOTIFY_INBOUND_END: 1174 + return vhost_user_postcopy_end(dev, pnd->errp); 1175 + 1176 + default: 1177 + /* We ignore notifications we don't know */ 1178 + break; 1179 + } 1180 + 1181 + return 0; 1182 + } 1183 + 794 1184 static int vhost_user_init(struct vhost_dev *dev, void *opaque) 795 1185 { 796 1186 uint64_t features, protocol_features; ··· 802 1192 u = g_new0(struct vhost_user, 1); 803 1193 u->chr = opaque; 804 1194 u->slave_fd = -1; 1195 + u->dev = dev; 805 1196 dev->opaque = u; 806 1197 807 1198 err = vhost_user_get_features(dev, &features); ··· 858 1249 return err; 859 1250 } 860 1251 1252 + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; 1253 + postcopy_add_notifier(&u->postcopy_notifier); 1254 + 861 1255 return 0; 862 1256 } 863 1257 ··· 868 1262 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); 869 1263 870 1264 u = dev->opaque; 1265 + if (u->postcopy_notifier.notify) { 1266 + postcopy_remove_notifier(&u->postcopy_notifier); 1267 + u->postcopy_notifier.notify = NULL; 1268 + } 871 1269 if (u->slave_fd >= 0) { 872 1270 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); 873 1271 close(u->slave_fd); 874 1272 u->slave_fd = -1; 875 1273 } 1274 + g_free(u->region_rb); 1275 + u->region_rb = NULL; 1276 + g_free(u->region_rb_offset); 1277 + u->region_rb_offset = NULL; 1278 + u->region_rb_len = 0; 876 1279 g_free(u); 877 1280 dev->opaque = 0; 878 1281
+56 -10
hw/virtio/vhost.c
··· 522 522 uint64_t mrs_gpa = section->offset_within_address_space; 523 523 uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + 524 524 section->offset_within_region; 525 + RAMBlock *mrs_rb = section->mr->ram_block; 526 + size_t mrs_page = qemu_ram_pagesize(mrs_rb); 525 527 526 528 trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, 527 529 mrs_host); 528 530 531 + /* Round the section to it's page size */ 532 + /* First align the start down to a page boundary */ 533 + uint64_t alignage = mrs_host & (mrs_page - 1); 534 + if (alignage) { 535 + mrs_host -= alignage; 536 + mrs_size += alignage; 537 + mrs_gpa -= alignage; 538 + } 539 + /* Now align the size up to a page boundary */ 540 + alignage = mrs_size & (mrs_page - 1); 541 + if (alignage) { 542 + mrs_size += mrs_page - alignage; 543 + } 544 + trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, 545 + mrs_host); 546 + 529 547 if (dev->n_tmp_sections) { 530 548 /* Since we already have at least one section, lets see if 531 549 * this extends it; since we're scanning in order, we only ··· 542 560 prev_sec->offset_within_region; 543 561 uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); 544 562 545 - if (prev_gpa_end + 1 == mrs_gpa && 546 - prev_host_end + 1 == mrs_host && 547 - section->mr == prev_sec->mr && 548 - (!dev->vhost_ops->vhost_backend_can_merge || 549 - dev->vhost_ops->vhost_backend_can_merge(dev, 563 + if (mrs_gpa <= (prev_gpa_end + 1)) { 564 + /* OK, looks like overlapping/intersecting - it's possible that 565 + * the rounding to page sizes has made them overlap, but they should 566 + * match up in the same RAMBlock if they do. 567 + */ 568 + if (mrs_gpa < prev_gpa_start) { 569 + error_report("%s:Section rounded to %"PRIx64 570 + " prior to previous %"PRIx64, 571 + __func__, mrs_gpa, prev_gpa_start); 572 + /* A way to cleanly fail here would be better */ 573 + return; 574 + } 575 + /* Offset from the start of the previous GPA to this GPA */ 576 + size_t offset = mrs_gpa - prev_gpa_start; 577 + 578 + if (prev_host_start + offset == mrs_host && 579 + section->mr == prev_sec->mr && 580 + (!dev->vhost_ops->vhost_backend_can_merge || 581 + dev->vhost_ops->vhost_backend_can_merge(dev, 550 582 mrs_host, mrs_size, 551 583 prev_host_start, prev_size))) { 552 - /* The two sections abut */ 553 - need_add = false; 554 - prev_sec->size = int128_add(prev_sec->size, section->size); 555 - trace_vhost_region_add_section_abut(section->mr->name, 556 - mrs_size + prev_size); 584 + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); 585 + need_add = false; 586 + prev_sec->offset_within_address_space = 587 + MIN(prev_gpa_start, mrs_gpa); 588 + prev_sec->offset_within_region = 589 + MIN(prev_host_start, mrs_host) - 590 + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); 591 + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, 592 + mrs_host)); 593 + trace_vhost_region_add_section_merge(section->mr->name, 594 + int128_get64(prev_sec->size), 595 + prev_sec->offset_within_address_space, 596 + prev_sec->offset_within_region); 597 + } else { 598 + error_report("%s: Overlapping but not coherent sections " 599 + "at %"PRIx64, 600 + __func__, mrs_gpa); 601 + return; 602 + } 557 603 } 558 604 } 559 605
+4
include/exec/cpu-common.h
··· 68 68 RAMBlock *qemu_ram_block_by_name(const char *name); 69 69 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, 70 70 ram_addr_t *offset); 71 + ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); 71 72 void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev); 72 73 void qemu_ram_unset_idstr(RAMBlock *block); 73 74 const char *qemu_ram_get_idstr(RAMBlock *rb); 74 75 bool qemu_ram_is_shared(RAMBlock *rb); 76 + bool qemu_ram_is_uf_zeroable(RAMBlock *rb); 77 + void qemu_ram_set_uf_zeroable(RAMBlock *rb); 78 + 75 79 size_t qemu_ram_pagesize(RAMBlock *block); 76 80 size_t qemu_ram_pagesize_largest(void); 77 81
+31 -103
include/hw/acpi/acpi-defs.h
··· 40 40 ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE, 41 41 }; 42 42 43 - /* 44 - * ACPI 2.0 Generic Address Space definition. 45 - */ 46 - struct Acpi20GenericAddress { 47 - uint8_t address_space_id; 48 - uint8_t register_bit_width; 49 - uint8_t register_bit_offset; 50 - uint8_t reserved; 51 - uint64_t address; 52 - } QEMU_PACKED; 53 - typedef struct Acpi20GenericAddress Acpi20GenericAddress; 54 - 55 43 struct AcpiRsdpDescriptor { /* Root System Descriptor Pointer */ 56 44 uint64_t signature; /* ACPI signature, contains "RSD PTR " */ 57 45 uint8_t checksum; /* To make sum of struct == 0 */ ··· 87 75 } QEMU_PACKED; 88 76 typedef struct AcpiTableHeader AcpiTableHeader; 89 77 90 - /* 91 - * ACPI Fixed ACPI Description Table (FADT) 92 - */ 93 - #define ACPI_FADT_COMMON_DEF /* FADT common definition */ \ 94 - ACPI_TABLE_HEADER_DEF /* ACPI common table header */ \ 95 - uint32_t firmware_ctrl; /* Physical address of FACS */ \ 96 - uint32_t dsdt; /* Physical address of DSDT */ \ 97 - uint8_t model; /* System Interrupt Model */ \ 98 - uint8_t reserved1; /* Reserved */ \ 99 - uint16_t sci_int; /* System vector of SCI interrupt */ \ 100 - uint32_t smi_cmd; /* Port address of SMI command port */ \ 101 - uint8_t acpi_enable; /* Value to write to smi_cmd to enable ACPI */ \ 102 - uint8_t acpi_disable; /* Value to write to smi_cmd to disable ACPI */ \ 103 - /* Value to write to SMI CMD to enter S4BIOS state */ \ 104 - uint8_t S4bios_req; \ 105 - uint8_t reserved2; /* Reserved - must be zero */ \ 106 - /* Port address of Power Mgt 1a acpi_event Reg Blk */ \ 107 - uint32_t pm1a_evt_blk; \ 108 - /* Port address of Power Mgt 1b acpi_event Reg Blk */ \ 109 - uint32_t pm1b_evt_blk; \ 110 - uint32_t pm1a_cnt_blk; /* Port address of Power Mgt 1a Control Reg Blk */ \ 111 - uint32_t pm1b_cnt_blk; /* Port address of Power Mgt 1b Control Reg Blk */ \ 112 - uint32_t pm2_cnt_blk; /* Port address of Power Mgt 2 Control Reg Blk */ \ 113 - uint32_t pm_tmr_blk; /* Port address of Power Mgt Timer Ctrl Reg Blk */ \ 114 - /* Port addr of General Purpose acpi_event 0 Reg Blk */ \ 115 - uint32_t gpe0_blk; \ 116 - /* Port addr of General Purpose acpi_event 1 Reg Blk */ \ 117 - uint32_t gpe1_blk; \ 118 - uint8_t pm1_evt_len; /* Byte length of ports at pm1_x_evt_blk */ \ 119 - uint8_t pm1_cnt_len; /* Byte length of ports at pm1_x_cnt_blk */ \ 120 - uint8_t pm2_cnt_len; /* Byte Length of ports at pm2_cnt_blk */ \ 121 - uint8_t pm_tmr_len; /* Byte Length of ports at pm_tm_blk */ \ 122 - uint8_t gpe0_blk_len; /* Byte Length of ports at gpe0_blk */ \ 123 - uint8_t gpe1_blk_len; /* Byte Length of ports at gpe1_blk */ \ 124 - uint8_t gpe1_base; /* Offset in gpe model where gpe1 events start */ \ 125 - uint8_t reserved3; /* Reserved */ \ 126 - uint16_t plvl2_lat; /* Worst case HW latency to enter/exit C2 state */ \ 127 - uint16_t plvl3_lat; /* Worst case HW latency to enter/exit C3 state */ \ 128 - uint16_t flush_size; /* Size of area read to flush caches */ \ 129 - uint16_t flush_stride; /* Stride used in flushing caches */ \ 130 - uint8_t duty_offset; /* Bit location of duty cycle field in p_cnt reg */ \ 131 - uint8_t duty_width; /* Bit width of duty cycle field in p_cnt reg */ \ 132 - uint8_t day_alrm; /* Index to day-of-month alarm in RTC CMOS RAM */ \ 133 - uint8_t mon_alrm; /* Index to month-of-year alarm in RTC CMOS RAM */ \ 134 - uint8_t century; /* Index to century in RTC CMOS RAM */ \ 135 - /* IA-PC Boot Architecture Flags (see below for individual flags) */ \ 136 - uint16_t boot_flags; \ 137 - uint8_t reserved; /* Reserved, must be zero */ \ 138 - /* Miscellaneous flag bits (see below for individual flags) */ \ 139 - uint32_t flags; \ 140 - /* 64-bit address of the Reset register */ \ 141 - struct AcpiGenericAddress reset_register; \ 142 - /* Value to write to the reset_register port to reset the system */ \ 143 - uint8_t reset_value; \ 144 - /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ \ 145 - uint16_t arm_boot_flags; \ 146 - uint8_t minor_revision; /* FADT Minor Revision (ACPI 5.1) */ \ 147 - uint64_t x_facs; /* 64-bit physical address of FACS */ \ 148 - uint64_t x_dsdt; /* 64-bit physical address of DSDT */ \ 149 - /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ \ 150 - struct AcpiGenericAddress xpm1a_event_block; \ 151 - /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ \ 152 - struct AcpiGenericAddress xpm1b_event_block; \ 153 - /* 64-bit Extended Power Mgt 1a Control Reg Blk address */ \ 154 - struct AcpiGenericAddress xpm1a_control_block; \ 155 - /* 64-bit Extended Power Mgt 1b Control Reg Blk address */ \ 156 - struct AcpiGenericAddress xpm1b_control_block; \ 157 - /* 64-bit Extended Power Mgt 2 Control Reg Blk address */ \ 158 - struct AcpiGenericAddress xpm2_control_block; \ 159 - /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ \ 160 - struct AcpiGenericAddress xpm_timer_block; \ 161 - /* 64-bit Extended General Purpose Event 0 Reg Blk address */ \ 162 - struct AcpiGenericAddress xgpe0_block; \ 163 - /* 64-bit Extended General Purpose Event 1 Reg Blk address */ \ 164 - struct AcpiGenericAddress xgpe1_block; \ 165 - 166 78 struct AcpiGenericAddress { 167 79 uint8_t space_id; /* Address space where struct or register exists */ 168 80 uint8_t bit_width; /* Size in bits of given register */ 169 81 uint8_t bit_offset; /* Bit offset within the register */ 170 - uint8_t access_width; /* Minimum Access size (ACPI 3.0) */ 82 + uint8_t access_width; /* ACPI 3.0: Minimum Access size (ACPI 3.0), 83 + ACPI 2.0: Reserved, Table 5-1 */ 171 84 uint64_t address; /* 64-bit address of struct or register */ 172 85 } QEMU_PACKED; 173 86 174 - struct AcpiFadtDescriptorRev3 { 175 - ACPI_FADT_COMMON_DEF 176 - } QEMU_PACKED; 177 - typedef struct AcpiFadtDescriptorRev3 AcpiFadtDescriptorRev3; 178 - 179 - struct AcpiFadtDescriptorRev5_1 { 180 - ACPI_FADT_COMMON_DEF 181 - /* 64-bit Sleep Control register (ACPI 5.0) */ 182 - struct AcpiGenericAddress sleep_control; 183 - /* 64-bit Sleep Status register (ACPI 5.0) */ 184 - struct AcpiGenericAddress sleep_status; 185 - } QEMU_PACKED; 87 + typedef struct AcpiFadtData { 88 + struct AcpiGenericAddress pm1a_cnt; /* PM1a_CNT_BLK */ 89 + struct AcpiGenericAddress pm1a_evt; /* PM1a_EVT_BLK */ 90 + struct AcpiGenericAddress pm_tmr; /* PM_TMR_BLK */ 91 + struct AcpiGenericAddress gpe0_blk; /* GPE0_BLK */ 92 + struct AcpiGenericAddress reset_reg; /* RESET_REG */ 93 + uint8_t reset_val; /* RESET_VALUE */ 94 + uint8_t rev; /* Revision */ 95 + uint32_t flags; /* Flags */ 96 + uint32_t smi_cmd; /* SMI_CMD */ 97 + uint16_t sci_int; /* SCI_INT */ 98 + uint8_t int_model; /* INT_MODEL */ 99 + uint8_t acpi_enable_cmd; /* ACPI_ENABLE */ 100 + uint8_t acpi_disable_cmd; /* ACPI_DISABLE */ 101 + uint8_t rtc_century; /* CENTURY */ 102 + uint16_t plvl2_lat; /* P_LVL2_LAT */ 103 + uint16_t plvl3_lat; /* P_LVL3_LAT */ 104 + uint16_t arm_boot_arch; /* ARM_BOOT_ARCH */ 105 + uint8_t minor_ver; /* FADT Minor Version */ 186 106 187 - typedef struct AcpiFadtDescriptorRev5_1 AcpiFadtDescriptorRev5_1; 107 + /* 108 + * respective tables offsets within ACPI_BUILD_TABLE_FILE, 109 + * NULL if table doesn't exist (in that case field's value 110 + * won't be patched by linker and will be kept set to 0) 111 + */ 112 + unsigned *facs_tbl_offset; /* FACS offset in */ 113 + unsigned *dsdt_tbl_offset; 114 + unsigned *xdsdt_tbl_offset; 115 + } AcpiFadtData; 188 116 189 117 #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) 190 118 #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) ··· 456 384 struct Acpi20Hpet { 457 385 ACPI_TABLE_HEADER_DEF /* ACPI common table header */ 458 386 uint32_t timer_block_id; 459 - Acpi20GenericAddress addr; 387 + struct AcpiGenericAddress addr; 460 388 uint8_t hpet_number; 461 389 uint16_t min_tick; 462 390 uint8_t page_protect;
+23
include/hw/acpi/aml-build.h
··· 78 78 } AmlUpdateRule; 79 79 80 80 typedef enum { 81 + AML_AS_SYSTEM_MEMORY = 0X00, 82 + AML_AS_SYSTEM_IO = 0X01, 83 + AML_AS_PCI_CONFIG = 0X02, 84 + AML_AS_EMBEDDED_CTRL = 0X03, 85 + AML_AS_SMBUS = 0X04, 86 + AML_AS_FFH = 0X7F, 87 + } AmlAddressSpace; 88 + 89 + typedef enum { 81 90 AML_SYSTEM_MEMORY = 0X00, 82 91 AML_SYSTEM_IO = 0X01, 83 92 AML_PCI_CONFIG = 0X02, ··· 389 398 build_append_named_dword(GArray *array, const char *name_format, ...) 390 399 GCC_FMT_ATTR(2, 3); 391 400 401 + void build_append_gas(GArray *table, AmlAddressSpace as, 402 + uint8_t bit_width, uint8_t bit_offset, 403 + uint8_t access_width, uint64_t address); 404 + 405 + static inline void 406 + build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) 407 + { 408 + build_append_gas(table, s->space_id, s->bit_width, s->bit_offset, 409 + s->access_width, s->address); 410 + } 411 + 392 412 void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, 393 413 uint64_t len, int node, MemoryAffinityFlags flags); 394 414 395 415 void build_slit(GArray *table_data, BIOSLinker *linker); 416 + 417 + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, 418 + const char *oem_id, const char *oem_table_id); 396 419 #endif
+3
include/hw/isa/apm.h
··· 5 5 #include "hw/hw.h" 6 6 #include "exec/memory.h" 7 7 8 + #define APM_CNT_IOPORT 0xb2 9 + #define ACPI_PORT_SMI_CMD APM_CNT_IOPORT 10 + 8 11 typedef void (*apm_ctrl_changed_t)(uint32_t val, void *arg); 9 12 10 13 typedef struct APMState {
+1 -1
include/hw/mem/pc-dimm.h
··· 93 93 94 94 int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); 95 95 96 - int qmp_pc_dimm_device_list(Object *obj, void *opaque); 96 + MemoryDeviceInfoList *qmp_pc_dimm_device_list(void); 97 97 uint64_t pc_existing_dimms_capacity(Error **errp); 98 98 uint64_t get_plugged_memory_size(void); 99 99 void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
-1
include/hw/pci/pci.h
··· 217 217 DeviceClass parent_class; 218 218 219 219 void (*realize)(PCIDevice *dev, Error **errp); 220 - int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */ 221 220 PCIUnregisterFunc *exit; 222 221 PCIConfigReadFunc *config_read; 223 222 PCIConfigWriteFunc *config_write;
+4 -1
include/hw/virtio/virtio-net.h
··· 38 38 uint16_t rx_queue_size; 39 39 uint16_t tx_queue_size; 40 40 uint16_t mtu; 41 + int32_t speed; 42 + char *duplex_str; 43 + uint8_t duplex; 41 44 } virtio_net_conf; 42 45 43 46 /* Maximum packet size we can receive from tap device: header + 64k */ ··· 67 70 uint32_t has_vnet_hdr; 68 71 size_t host_hdr_len; 69 72 size_t guest_hdr_len; 70 - uint32_t host_features; 73 + uint64_t host_features; 71 74 uint8_t has_ufo; 72 75 uint32_t mergeable_rx_bufs; 73 76 uint8_t promisc;
+1821
include/standard-headers/linux/ethtool.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + /* 3 + * ethtool.h: Defines for Linux ethtool. 4 + * 5 + * Copyright (C) 1998 David S. Miller (davem@redhat.com) 6 + * Copyright 2001 Jeff Garzik <jgarzik@pobox.com> 7 + * Portions Copyright 2001 Sun Microsystems (thockin@sun.com) 8 + * Portions Copyright 2002 Intel (eli.kupermann@intel.com, 9 + * christopher.leech@intel.com, 10 + * scott.feldman@intel.com) 11 + * Portions Copyright (C) Sun Microsystems 2008 12 + */ 13 + 14 + #ifndef _LINUX_ETHTOOL_H 15 + #define _LINUX_ETHTOOL_H 16 + 17 + #include "net/eth.h" 18 + 19 + #include "standard-headers/linux/kernel.h" 20 + #include "standard-headers/linux/types.h" 21 + #include "standard-headers/linux/if_ether.h" 22 + 23 + #include <limits.h> /* for INT_MAX */ 24 + 25 + /* All structures exposed to userland should be defined such that they 26 + * have the same layout for 32-bit and 64-bit userland. 27 + */ 28 + 29 + /** 30 + * struct ethtool_cmd - DEPRECATED, link control and status 31 + * This structure is DEPRECATED, please use struct ethtool_link_settings. 32 + * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET 33 + * @supported: Bitmask of %SUPPORTED_* flags for the link modes, 34 + * physical connectors and other link features for which the 35 + * interface supports autonegotiation or auto-detection. 36 + * Read-only. 37 + * @advertising: Bitmask of %ADVERTISED_* flags for the link modes, 38 + * physical connectors and other link features that are 39 + * advertised through autonegotiation or enabled for 40 + * auto-detection. 41 + * @speed: Low bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN 42 + * @duplex: Duplex mode; one of %DUPLEX_* 43 + * @port: Physical connector type; one of %PORT_* 44 + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not 45 + * applicable. For clause 45 PHYs this is the PRTAD. 46 + * @transceiver: Historically used to distinguish different possible 47 + * PHY types, but not in a consistent way. Deprecated. 48 + * @autoneg: Enable/disable autonegotiation and auto-detection; 49 + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE 50 + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO 51 + * protocols supported by the interface; 0 if unknown. 52 + * Read-only. 53 + * @maxtxpkt: Historically used to report TX IRQ coalescing; now 54 + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. 55 + * @maxrxpkt: Historically used to report RX IRQ coalescing; now 56 + * obsoleted by &struct ethtool_coalesce. Read-only; deprecated. 57 + * @speed_hi: High bits of the speed, 1Mb units, 0 to INT_MAX or SPEED_UNKNOWN 58 + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of 59 + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the 60 + * value will be %ETH_TP_MDI_INVALID. Read-only. 61 + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of 62 + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads 63 + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. 64 + * When written successfully, the link should be renegotiated if 65 + * necessary. 66 + * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes 67 + * and other link features that the link partner advertised 68 + * through autonegotiation; 0 if unknown or not applicable. 69 + * Read-only. 70 + * 71 + * The link speed in Mbps is split between @speed and @speed_hi. Use 72 + * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to 73 + * access it. 74 + * 75 + * If autonegotiation is disabled, the speed and @duplex represent the 76 + * fixed link mode and are writable if the driver supports multiple 77 + * link modes. If it is enabled then they are read-only; if the link 78 + * is up they represent the negotiated link mode; if the link is down, 79 + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and 80 + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. 81 + * 82 + * Some hardware interfaces may have multiple PHYs and/or physical 83 + * connectors fitted or do not allow the driver to detect which are 84 + * fitted. For these interfaces @port and/or @phy_address may be 85 + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. 86 + * Otherwise, attempts to write different values may be ignored or 87 + * rejected. 88 + * 89 + * Users should assume that all fields not marked read-only are 90 + * writable and subject to validation by the driver. They should use 91 + * %ETHTOOL_GSET to get the current values before making specific 92 + * changes and then applying them with %ETHTOOL_SSET. 93 + * 94 + * Drivers that implement set_settings() should validate all fields 95 + * other than @cmd that are not described as read-only or deprecated, 96 + * and must ignore all fields described as read-only. 97 + * 98 + * Deprecated fields should be ignored by both users and drivers. 99 + */ 100 + struct ethtool_cmd { 101 + uint32_t cmd; 102 + uint32_t supported; 103 + uint32_t advertising; 104 + uint16_t speed; 105 + uint8_t duplex; 106 + uint8_t port; 107 + uint8_t phy_address; 108 + uint8_t transceiver; 109 + uint8_t autoneg; 110 + uint8_t mdio_support; 111 + uint32_t maxtxpkt; 112 + uint32_t maxrxpkt; 113 + uint16_t speed_hi; 114 + uint8_t eth_tp_mdix; 115 + uint8_t eth_tp_mdix_ctrl; 116 + uint32_t lp_advertising; 117 + uint32_t reserved[2]; 118 + }; 119 + 120 + static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, 121 + uint32_t speed) 122 + { 123 + ep->speed = (uint16_t)(speed & 0xFFFF); 124 + ep->speed_hi = (uint16_t)(speed >> 16); 125 + } 126 + 127 + static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) 128 + { 129 + return (ep->speed_hi << 16) | ep->speed; 130 + } 131 + 132 + /* Device supports clause 22 register access to PHY or peripherals 133 + * using the interface defined in "standard-headers/linux/mii.h". This should not be 134 + * set if there are known to be no such peripherals present or if 135 + * the driver only emulates clause 22 registers for compatibility. 136 + */ 137 + #define ETH_MDIO_SUPPORTS_C22 1 138 + 139 + /* Device supports clause 45 register access to PHY or peripherals 140 + * using the interface defined in "standard-headers/linux/mii.h" and <linux/mdio.h>. 141 + * This should not be set if there are known to be no such peripherals 142 + * present. 143 + */ 144 + #define ETH_MDIO_SUPPORTS_C45 2 145 + 146 + #define ETHTOOL_FWVERS_LEN 32 147 + #define ETHTOOL_BUSINFO_LEN 32 148 + #define ETHTOOL_EROMVERS_LEN 32 149 + 150 + /** 151 + * struct ethtool_drvinfo - general driver and device information 152 + * @cmd: Command number = %ETHTOOL_GDRVINFO 153 + * @driver: Driver short name. This should normally match the name 154 + * in its bus driver structure (e.g. pci_driver::name). Must 155 + * not be an empty string. 156 + * @version: Driver version string; may be an empty string 157 + * @fw_version: Firmware version string; may be an empty string 158 + * @erom_version: Expansion ROM version string; may be an empty string 159 + * @bus_info: Device bus address. This should match the dev_name() 160 + * string for the underlying bus device, if there is one. May be 161 + * an empty string. 162 + * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and 163 + * %ETHTOOL_SPFLAGS commands; also the number of strings in the 164 + * %ETH_SS_PRIV_FLAGS set 165 + * @n_stats: Number of uint64_t statistics returned by the %ETHTOOL_GSTATS 166 + * command; also the number of strings in the %ETH_SS_STATS set 167 + * @testinfo_len: Number of results returned by the %ETHTOOL_TEST 168 + * command; also the number of strings in the %ETH_SS_TEST set 169 + * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM 170 + * and %ETHTOOL_SEEPROM commands, in bytes 171 + * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS 172 + * command, in bytes 173 + * 174 + * Users can use the %ETHTOOL_GSSET_INFO command to get the number of 175 + * strings in any string set (from Linux 2.6.34). 176 + * 177 + * Drivers should set at most @driver, @version, @fw_version and 178 + * @bus_info in their get_drvinfo() implementation. The ethtool 179 + * core fills in the other fields using other driver operations. 180 + */ 181 + struct ethtool_drvinfo { 182 + uint32_t cmd; 183 + char driver[32]; 184 + char version[32]; 185 + char fw_version[ETHTOOL_FWVERS_LEN]; 186 + char bus_info[ETHTOOL_BUSINFO_LEN]; 187 + char erom_version[ETHTOOL_EROMVERS_LEN]; 188 + char reserved2[12]; 189 + uint32_t n_priv_flags; 190 + uint32_t n_stats; 191 + uint32_t testinfo_len; 192 + uint32_t eedump_len; 193 + uint32_t regdump_len; 194 + }; 195 + 196 + #define SOPASS_MAX 6 197 + 198 + /** 199 + * struct ethtool_wolinfo - Wake-On-Lan configuration 200 + * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL 201 + * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes. 202 + * Read-only. 203 + * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes. 204 + * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE 205 + * is set in @wolopts. 206 + */ 207 + struct ethtool_wolinfo { 208 + uint32_t cmd; 209 + uint32_t supported; 210 + uint32_t wolopts; 211 + uint8_t sopass[SOPASS_MAX]; 212 + }; 213 + 214 + /* for passing single values */ 215 + struct ethtool_value { 216 + uint32_t cmd; 217 + uint32_t data; 218 + }; 219 + 220 + enum tunable_id { 221 + ETHTOOL_ID_UNSPEC, 222 + ETHTOOL_RX_COPYBREAK, 223 + ETHTOOL_TX_COPYBREAK, 224 + /* 225 + * Add your fresh new tubale attribute above and remember to update 226 + * tunable_strings[] in net/core/ethtool.c 227 + */ 228 + __ETHTOOL_TUNABLE_COUNT, 229 + }; 230 + 231 + enum tunable_type_id { 232 + ETHTOOL_TUNABLE_UNSPEC, 233 + ETHTOOL_TUNABLE_U8, 234 + ETHTOOL_TUNABLE_U16, 235 + ETHTOOL_TUNABLE_U32, 236 + ETHTOOL_TUNABLE_U64, 237 + ETHTOOL_TUNABLE_STRING, 238 + ETHTOOL_TUNABLE_S8, 239 + ETHTOOL_TUNABLE_S16, 240 + ETHTOOL_TUNABLE_S32, 241 + ETHTOOL_TUNABLE_S64, 242 + }; 243 + 244 + struct ethtool_tunable { 245 + uint32_t cmd; 246 + uint32_t id; 247 + uint32_t type_id; 248 + uint32_t len; 249 + void *data[0]; 250 + }; 251 + 252 + #define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff 253 + #define DOWNSHIFT_DEV_DISABLE 0 254 + 255 + enum phy_tunable_id { 256 + ETHTOOL_PHY_ID_UNSPEC, 257 + ETHTOOL_PHY_DOWNSHIFT, 258 + /* 259 + * Add your fresh new phy tunable attribute above and remember to update 260 + * phy_tunable_strings[] in net/core/ethtool.c 261 + */ 262 + __ETHTOOL_PHY_TUNABLE_COUNT, 263 + }; 264 + 265 + /** 266 + * struct ethtool_regs - hardware register dump 267 + * @cmd: Command number = %ETHTOOL_GREGS 268 + * @version: Dump format version. This is driver-specific and may 269 + * distinguish different chips/revisions. Drivers must use new 270 + * version numbers whenever the dump format changes in an 271 + * incompatible way. 272 + * @len: On entry, the real length of @data. On return, the number of 273 + * bytes used. 274 + * @data: Buffer for the register dump 275 + * 276 + * Users should use %ETHTOOL_GDRVINFO to find the maximum length of 277 + * a register dump for the interface. They must allocate the buffer 278 + * immediately following this structure. 279 + */ 280 + struct ethtool_regs { 281 + uint32_t cmd; 282 + uint32_t version; 283 + uint32_t len; 284 + uint8_t data[0]; 285 + }; 286 + 287 + /** 288 + * struct ethtool_eeprom - EEPROM dump 289 + * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or 290 + * %ETHTOOL_SEEPROM 291 + * @magic: A 'magic cookie' value to guard against accidental changes. 292 + * The value passed in to %ETHTOOL_SEEPROM must match the value 293 + * returned by %ETHTOOL_GEEPROM for the same device. This is 294 + * unused when @cmd is %ETHTOOL_GMODULEEEPROM. 295 + * @offset: Offset within the EEPROM to begin reading/writing, in bytes 296 + * @len: On entry, number of bytes to read/write. On successful 297 + * return, number of bytes actually read/written. In case of 298 + * error, this may indicate at what point the error occurred. 299 + * @data: Buffer to read/write from 300 + * 301 + * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find 302 + * the length of an on-board or module EEPROM, respectively. They 303 + * must allocate the buffer immediately following this structure. 304 + */ 305 + struct ethtool_eeprom { 306 + uint32_t cmd; 307 + uint32_t magic; 308 + uint32_t offset; 309 + uint32_t len; 310 + uint8_t data[0]; 311 + }; 312 + 313 + /** 314 + * struct ethtool_eee - Energy Efficient Ethernet information 315 + * @cmd: ETHTOOL_{G,S}EEE 316 + * @supported: Mask of %SUPPORTED_* flags for the speed/duplex combinations 317 + * for which there is EEE support. 318 + * @advertised: Mask of %ADVERTISED_* flags for the speed/duplex combinations 319 + * advertised as eee capable. 320 + * @lp_advertised: Mask of %ADVERTISED_* flags for the speed/duplex 321 + * combinations advertised by the link partner as eee capable. 322 + * @eee_active: Result of the eee auto negotiation. 323 + * @eee_enabled: EEE configured mode (enabled/disabled). 324 + * @tx_lpi_enabled: Whether the interface should assert its tx lpi, given 325 + * that eee was negotiated. 326 + * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting 327 + * its tx lpi (after reaching 'idle' state). Effective only when eee 328 + * was negotiated and tx_lpi_enabled was set. 329 + */ 330 + struct ethtool_eee { 331 + uint32_t cmd; 332 + uint32_t supported; 333 + uint32_t advertised; 334 + uint32_t lp_advertised; 335 + uint32_t eee_active; 336 + uint32_t eee_enabled; 337 + uint32_t tx_lpi_enabled; 338 + uint32_t tx_lpi_timer; 339 + uint32_t reserved[2]; 340 + }; 341 + 342 + /** 343 + * struct ethtool_modinfo - plugin module eeprom information 344 + * @cmd: %ETHTOOL_GMODULEINFO 345 + * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx 346 + * @eeprom_len: Length of the eeprom 347 + * 348 + * This structure is used to return the information to 349 + * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. 350 + * The type code indicates the eeprom data format 351 + */ 352 + struct ethtool_modinfo { 353 + uint32_t cmd; 354 + uint32_t type; 355 + uint32_t eeprom_len; 356 + uint32_t reserved[8]; 357 + }; 358 + 359 + /** 360 + * struct ethtool_coalesce - coalescing parameters for IRQs and stats updates 361 + * @cmd: ETHTOOL_{G,S}COALESCE 362 + * @rx_coalesce_usecs: How many usecs to delay an RX interrupt after 363 + * a packet arrives. 364 + * @rx_max_coalesced_frames: Maximum number of packets to receive 365 + * before an RX interrupt. 366 + * @rx_coalesce_usecs_irq: Same as @rx_coalesce_usecs, except that 367 + * this value applies while an IRQ is being serviced by the host. 368 + * @rx_max_coalesced_frames_irq: Same as @rx_max_coalesced_frames, 369 + * except that this value applies while an IRQ is being serviced 370 + * by the host. 371 + * @tx_coalesce_usecs: How many usecs to delay a TX interrupt after 372 + * a packet is sent. 373 + * @tx_max_coalesced_frames: Maximum number of packets to be sent 374 + * before a TX interrupt. 375 + * @tx_coalesce_usecs_irq: Same as @tx_coalesce_usecs, except that 376 + * this value applies while an IRQ is being serviced by the host. 377 + * @tx_max_coalesced_frames_irq: Same as @tx_max_coalesced_frames, 378 + * except that this value applies while an IRQ is being serviced 379 + * by the host. 380 + * @stats_block_coalesce_usecs: How many usecs to delay in-memory 381 + * statistics block updates. Some drivers do not have an 382 + * in-memory statistic block, and in such cases this value is 383 + * ignored. This value must not be zero. 384 + * @use_adaptive_rx_coalesce: Enable adaptive RX coalescing. 385 + * @use_adaptive_tx_coalesce: Enable adaptive TX coalescing. 386 + * @pkt_rate_low: Threshold for low packet rate (packets per second). 387 + * @rx_coalesce_usecs_low: How many usecs to delay an RX interrupt after 388 + * a packet arrives, when the packet rate is below @pkt_rate_low. 389 + * @rx_max_coalesced_frames_low: Maximum number of packets to be received 390 + * before an RX interrupt, when the packet rate is below @pkt_rate_low. 391 + * @tx_coalesce_usecs_low: How many usecs to delay a TX interrupt after 392 + * a packet is sent, when the packet rate is below @pkt_rate_low. 393 + * @tx_max_coalesced_frames_low: Maximum nuumber of packets to be sent before 394 + * a TX interrupt, when the packet rate is below @pkt_rate_low. 395 + * @pkt_rate_high: Threshold for high packet rate (packets per second). 396 + * @rx_coalesce_usecs_high: How many usecs to delay an RX interrupt after 397 + * a packet arrives, when the packet rate is above @pkt_rate_high. 398 + * @rx_max_coalesced_frames_high: Maximum number of packets to be received 399 + * before an RX interrupt, when the packet rate is above @pkt_rate_high. 400 + * @tx_coalesce_usecs_high: How many usecs to delay a TX interrupt after 401 + * a packet is sent, when the packet rate is above @pkt_rate_high. 402 + * @tx_max_coalesced_frames_high: Maximum number of packets to be sent before 403 + * a TX interrupt, when the packet rate is above @pkt_rate_high. 404 + * @rate_sample_interval: How often to do adaptive coalescing packet rate 405 + * sampling, measured in seconds. Must not be zero. 406 + * 407 + * Each pair of (usecs, max_frames) fields specifies that interrupts 408 + * should be coalesced until 409 + * (usecs > 0 && time_since_first_completion >= usecs) || 410 + * (max_frames > 0 && completed_frames >= max_frames) 411 + * 412 + * It is illegal to set both usecs and max_frames to zero as this 413 + * would cause interrupts to never be generated. To disable 414 + * coalescing, set usecs = 0 and max_frames = 1. 415 + * 416 + * Some implementations ignore the value of max_frames and use the 417 + * condition time_since_first_completion >= usecs 418 + * 419 + * This is deprecated. Drivers for hardware that does not support 420 + * counting completions should validate that max_frames == !rx_usecs. 421 + * 422 + * Adaptive RX/TX coalescing is an algorithm implemented by some 423 + * drivers to improve latency under low packet rates and improve 424 + * throughput under high packet rates. Some drivers only implement 425 + * one of RX or TX adaptive coalescing. Anything not implemented by 426 + * the driver causes these values to be silently ignored. 427 + * 428 + * When the packet rate is below @pkt_rate_high but above 429 + * @pkt_rate_low (both measured in packets per second) the 430 + * normal {rx,tx}_* coalescing parameters are used. 431 + */ 432 + struct ethtool_coalesce { 433 + uint32_t cmd; 434 + uint32_t rx_coalesce_usecs; 435 + uint32_t rx_max_coalesced_frames; 436 + uint32_t rx_coalesce_usecs_irq; 437 + uint32_t rx_max_coalesced_frames_irq; 438 + uint32_t tx_coalesce_usecs; 439 + uint32_t tx_max_coalesced_frames; 440 + uint32_t tx_coalesce_usecs_irq; 441 + uint32_t tx_max_coalesced_frames_irq; 442 + uint32_t stats_block_coalesce_usecs; 443 + uint32_t use_adaptive_rx_coalesce; 444 + uint32_t use_adaptive_tx_coalesce; 445 + uint32_t pkt_rate_low; 446 + uint32_t rx_coalesce_usecs_low; 447 + uint32_t rx_max_coalesced_frames_low; 448 + uint32_t tx_coalesce_usecs_low; 449 + uint32_t tx_max_coalesced_frames_low; 450 + uint32_t pkt_rate_high; 451 + uint32_t rx_coalesce_usecs_high; 452 + uint32_t rx_max_coalesced_frames_high; 453 + uint32_t tx_coalesce_usecs_high; 454 + uint32_t tx_max_coalesced_frames_high; 455 + uint32_t rate_sample_interval; 456 + }; 457 + 458 + /** 459 + * struct ethtool_ringparam - RX/TX ring parameters 460 + * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM 461 + * @rx_max_pending: Maximum supported number of pending entries per 462 + * RX ring. Read-only. 463 + * @rx_mini_max_pending: Maximum supported number of pending entries 464 + * per RX mini ring. Read-only. 465 + * @rx_jumbo_max_pending: Maximum supported number of pending entries 466 + * per RX jumbo ring. Read-only. 467 + * @tx_max_pending: Maximum supported number of pending entries per 468 + * TX ring. Read-only. 469 + * @rx_pending: Current maximum number of pending entries per RX ring 470 + * @rx_mini_pending: Current maximum number of pending entries per RX 471 + * mini ring 472 + * @rx_jumbo_pending: Current maximum number of pending entries per RX 473 + * jumbo ring 474 + * @tx_pending: Current maximum supported number of pending entries 475 + * per TX ring 476 + * 477 + * If the interface does not have separate RX mini and/or jumbo rings, 478 + * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0. 479 + * 480 + * There may also be driver-dependent minimum values for the number 481 + * of entries per ring. 482 + */ 483 + struct ethtool_ringparam { 484 + uint32_t cmd; 485 + uint32_t rx_max_pending; 486 + uint32_t rx_mini_max_pending; 487 + uint32_t rx_jumbo_max_pending; 488 + uint32_t tx_max_pending; 489 + uint32_t rx_pending; 490 + uint32_t rx_mini_pending; 491 + uint32_t rx_jumbo_pending; 492 + uint32_t tx_pending; 493 + }; 494 + 495 + /** 496 + * struct ethtool_channels - configuring number of network channel 497 + * @cmd: ETHTOOL_{G,S}CHANNELS 498 + * @max_rx: Read only. Maximum number of receive channel the driver support. 499 + * @max_tx: Read only. Maximum number of transmit channel the driver support. 500 + * @max_other: Read only. Maximum number of other channel the driver support. 501 + * @max_combined: Read only. Maximum number of combined channel the driver 502 + * support. Set of queues RX, TX or other. 503 + * @rx_count: Valid values are in the range 1 to the max_rx. 504 + * @tx_count: Valid values are in the range 1 to the max_tx. 505 + * @other_count: Valid values are in the range 1 to the max_other. 506 + * @combined_count: Valid values are in the range 1 to the max_combined. 507 + * 508 + * This can be used to configure RX, TX and other channels. 509 + */ 510 + 511 + struct ethtool_channels { 512 + uint32_t cmd; 513 + uint32_t max_rx; 514 + uint32_t max_tx; 515 + uint32_t max_other; 516 + uint32_t max_combined; 517 + uint32_t rx_count; 518 + uint32_t tx_count; 519 + uint32_t other_count; 520 + uint32_t combined_count; 521 + }; 522 + 523 + /** 524 + * struct ethtool_pauseparam - Ethernet pause (flow control) parameters 525 + * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM 526 + * @autoneg: Flag to enable autonegotiation of pause frame use 527 + * @rx_pause: Flag to enable reception of pause frames 528 + * @tx_pause: Flag to enable transmission of pause frames 529 + * 530 + * Drivers should reject a non-zero setting of @autoneg when 531 + * autoneogotiation is disabled (or not supported) for the link. 532 + * 533 + * If the link is autonegotiated, drivers should use 534 + * mii_advertise_flowctrl() or similar code to set the advertised 535 + * pause frame capabilities based on the @rx_pause and @tx_pause flags, 536 + * even if @autoneg is zero. They should also allow the advertised 537 + * pause frame capabilities to be controlled directly through the 538 + * advertising field of &struct ethtool_cmd. 539 + * 540 + * If @autoneg is non-zero, the MAC is configured to send and/or 541 + * receive pause frames according to the result of autonegotiation. 542 + * Otherwise, it is configured directly based on the @rx_pause and 543 + * @tx_pause flags. 544 + */ 545 + struct ethtool_pauseparam { 546 + uint32_t cmd; 547 + uint32_t autoneg; 548 + uint32_t rx_pause; 549 + uint32_t tx_pause; 550 + }; 551 + 552 + #define ETH_GSTRING_LEN 32 553 + 554 + /** 555 + * enum ethtool_stringset - string set ID 556 + * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST 557 + * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS 558 + * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with 559 + * %ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS 560 + * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE; 561 + * now deprecated 562 + * @ETH_SS_FEATURES: Device feature names 563 + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names 564 + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS 565 + * @ETH_SS_PHY_TUNABLES: PHY tunable names 566 + */ 567 + enum ethtool_stringset { 568 + ETH_SS_TEST = 0, 569 + ETH_SS_STATS, 570 + ETH_SS_PRIV_FLAGS, 571 + ETH_SS_NTUPLE_FILTERS, 572 + ETH_SS_FEATURES, 573 + ETH_SS_RSS_HASH_FUNCS, 574 + ETH_SS_TUNABLES, 575 + ETH_SS_PHY_STATS, 576 + ETH_SS_PHY_TUNABLES, 577 + }; 578 + 579 + /** 580 + * struct ethtool_gstrings - string set for data tagging 581 + * @cmd: Command number = %ETHTOOL_GSTRINGS 582 + * @string_set: String set ID; one of &enum ethtool_stringset 583 + * @len: On return, the number of strings in the string set 584 + * @data: Buffer for strings. Each string is null-padded to a size of 585 + * %ETH_GSTRING_LEN. 586 + * 587 + * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in 588 + * the string set. They must allocate a buffer of the appropriate 589 + * size immediately following this structure. 590 + */ 591 + struct ethtool_gstrings { 592 + uint32_t cmd; 593 + uint32_t string_set; 594 + uint32_t len; 595 + uint8_t data[0]; 596 + }; 597 + 598 + /** 599 + * struct ethtool_sset_info - string set information 600 + * @cmd: Command number = %ETHTOOL_GSSET_INFO 601 + * @sset_mask: On entry, a bitmask of string sets to query, with bits 602 + * numbered according to &enum ethtool_stringset. On return, a 603 + * bitmask of those string sets queried that are supported. 604 + * @data: Buffer for string set sizes. On return, this contains the 605 + * size of each string set that was queried and supported, in 606 + * order of ID. 607 + * 608 + * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on 609 + * return @sset_mask == 0x6 (sets 1, 2). Then @data[0] contains the 610 + * size of set 1 and @data[1] contains the size of set 2. 611 + * 612 + * Users must allocate a buffer of the appropriate size (4 * number of 613 + * sets queried) immediately following this structure. 614 + */ 615 + struct ethtool_sset_info { 616 + uint32_t cmd; 617 + uint32_t reserved; 618 + uint64_t sset_mask; 619 + uint32_t data[0]; 620 + }; 621 + 622 + /** 623 + * enum ethtool_test_flags - flags definition of ethtool_test 624 + * @ETH_TEST_FL_OFFLINE: if set perform online and offline tests, otherwise 625 + * only online tests. 626 + * @ETH_TEST_FL_FAILED: Driver set this flag if test fails. 627 + * @ETH_TEST_FL_EXTERNAL_LB: Application request to perform external loopback 628 + * test. 629 + * @ETH_TEST_FL_EXTERNAL_LB_DONE: Driver performed the external loopback test 630 + */ 631 + 632 + enum ethtool_test_flags { 633 + ETH_TEST_FL_OFFLINE = (1 << 0), 634 + ETH_TEST_FL_FAILED = (1 << 1), 635 + ETH_TEST_FL_EXTERNAL_LB = (1 << 2), 636 + ETH_TEST_FL_EXTERNAL_LB_DONE = (1 << 3), 637 + }; 638 + 639 + /** 640 + * struct ethtool_test - device self-test invocation 641 + * @cmd: Command number = %ETHTOOL_TEST 642 + * @flags: A bitmask of flags from &enum ethtool_test_flags. Some 643 + * flags may be set by the user on entry; others may be set by 644 + * the driver on return. 645 + * @len: On return, the number of test results 646 + * @data: Array of test results 647 + * 648 + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the 649 + * number of test results that will be returned. They must allocate a 650 + * buffer of the appropriate size (8 * number of results) immediately 651 + * following this structure. 652 + */ 653 + struct ethtool_test { 654 + uint32_t cmd; 655 + uint32_t flags; 656 + uint32_t reserved; 657 + uint32_t len; 658 + uint64_t data[0]; 659 + }; 660 + 661 + /** 662 + * struct ethtool_stats - device-specific statistics 663 + * @cmd: Command number = %ETHTOOL_GSTATS 664 + * @n_stats: On return, the number of statistics 665 + * @data: Array of statistics 666 + * 667 + * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the 668 + * number of statistics that will be returned. They must allocate a 669 + * buffer of the appropriate size (8 * number of statistics) 670 + * immediately following this structure. 671 + */ 672 + struct ethtool_stats { 673 + uint32_t cmd; 674 + uint32_t n_stats; 675 + uint64_t data[0]; 676 + }; 677 + 678 + /** 679 + * struct ethtool_perm_addr - permanent hardware address 680 + * @cmd: Command number = %ETHTOOL_GPERMADDR 681 + * @size: On entry, the size of the buffer. On return, the size of the 682 + * address. The command fails if the buffer is too small. 683 + * @data: Buffer for the address 684 + * 685 + * Users must allocate the buffer immediately following this structure. 686 + * A buffer size of %MAX_ADDR_LEN should be sufficient for any address 687 + * type. 688 + */ 689 + struct ethtool_perm_addr { 690 + uint32_t cmd; 691 + uint32_t size; 692 + uint8_t data[0]; 693 + }; 694 + 695 + /* boolean flags controlling per-interface behavior characteristics. 696 + * When reading, the flag indicates whether or not a certain behavior 697 + * is enabled/present. When writing, the flag indicates whether 698 + * or not the driver should turn on (set) or off (clear) a behavior. 699 + * 700 + * Some behaviors may read-only (unconditionally absent or present). 701 + * If such is the case, return EINVAL in the set-flags operation if the 702 + * flag differs from the read-only value. 703 + */ 704 + enum ethtool_flags { 705 + ETH_FLAG_TXVLAN = (1 << 7), /* TX VLAN offload enabled */ 706 + ETH_FLAG_RXVLAN = (1 << 8), /* RX VLAN offload enabled */ 707 + ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ 708 + ETH_FLAG_NTUPLE = (1 << 27), /* N-tuple filters enabled */ 709 + ETH_FLAG_RXHASH = (1 << 28), 710 + }; 711 + 712 + /* The following structures are for supporting RX network flow 713 + * classification and RX n-tuple configuration. Note, all multibyte 714 + * fields, e.g., ip4src, ip4dst, psrc, pdst, spi, etc. are expected to 715 + * be in network byte order. 716 + */ 717 + 718 + /** 719 + * struct ethtool_tcpip4_spec - flow specification for TCP/IPv4 etc. 720 + * @ip4src: Source host 721 + * @ip4dst: Destination host 722 + * @psrc: Source port 723 + * @pdst: Destination port 724 + * @tos: Type-of-service 725 + * 726 + * This can be used to specify a TCP/IPv4, UDP/IPv4 or SCTP/IPv4 flow. 727 + */ 728 + struct ethtool_tcpip4_spec { 729 + uint32_t ip4src; 730 + uint32_t ip4dst; 731 + uint16_t psrc; 732 + uint16_t pdst; 733 + uint8_t tos; 734 + }; 735 + 736 + /** 737 + * struct ethtool_ah_espip4_spec - flow specification for IPsec/IPv4 738 + * @ip4src: Source host 739 + * @ip4dst: Destination host 740 + * @spi: Security parameters index 741 + * @tos: Type-of-service 742 + * 743 + * This can be used to specify an IPsec transport or tunnel over IPv4. 744 + */ 745 + struct ethtool_ah_espip4_spec { 746 + uint32_t ip4src; 747 + uint32_t ip4dst; 748 + uint32_t spi; 749 + uint8_t tos; 750 + }; 751 + 752 + #define ETH_RX_NFC_IP4 1 753 + 754 + /** 755 + * struct ethtool_usrip4_spec - general flow specification for IPv4 756 + * @ip4src: Source host 757 + * @ip4dst: Destination host 758 + * @l4_4_bytes: First 4 bytes of transport (layer 4) header 759 + * @tos: Type-of-service 760 + * @ip_ver: Value must be %ETH_RX_NFC_IP4; mask must be 0 761 + * @proto: Transport protocol number; mask must be 0 762 + */ 763 + struct ethtool_usrip4_spec { 764 + uint32_t ip4src; 765 + uint32_t ip4dst; 766 + uint32_t l4_4_bytes; 767 + uint8_t tos; 768 + uint8_t ip_ver; 769 + uint8_t proto; 770 + }; 771 + 772 + /** 773 + * struct ethtool_tcpip6_spec - flow specification for TCP/IPv6 etc. 774 + * @ip6src: Source host 775 + * @ip6dst: Destination host 776 + * @psrc: Source port 777 + * @pdst: Destination port 778 + * @tclass: Traffic Class 779 + * 780 + * This can be used to specify a TCP/IPv6, UDP/IPv6 or SCTP/IPv6 flow. 781 + */ 782 + struct ethtool_tcpip6_spec { 783 + uint32_t ip6src[4]; 784 + uint32_t ip6dst[4]; 785 + uint16_t psrc; 786 + uint16_t pdst; 787 + uint8_t tclass; 788 + }; 789 + 790 + /** 791 + * struct ethtool_ah_espip6_spec - flow specification for IPsec/IPv6 792 + * @ip6src: Source host 793 + * @ip6dst: Destination host 794 + * @spi: Security parameters index 795 + * @tclass: Traffic Class 796 + * 797 + * This can be used to specify an IPsec transport or tunnel over IPv6. 798 + */ 799 + struct ethtool_ah_espip6_spec { 800 + uint32_t ip6src[4]; 801 + uint32_t ip6dst[4]; 802 + uint32_t spi; 803 + uint8_t tclass; 804 + }; 805 + 806 + /** 807 + * struct ethtool_usrip6_spec - general flow specification for IPv6 808 + * @ip6src: Source host 809 + * @ip6dst: Destination host 810 + * @l4_4_bytes: First 4 bytes of transport (layer 4) header 811 + * @tclass: Traffic Class 812 + * @l4_proto: Transport protocol number (nexthdr after any Extension Headers) 813 + */ 814 + struct ethtool_usrip6_spec { 815 + uint32_t ip6src[4]; 816 + uint32_t ip6dst[4]; 817 + uint32_t l4_4_bytes; 818 + uint8_t tclass; 819 + uint8_t l4_proto; 820 + }; 821 + 822 + union ethtool_flow_union { 823 + struct ethtool_tcpip4_spec tcp_ip4_spec; 824 + struct ethtool_tcpip4_spec udp_ip4_spec; 825 + struct ethtool_tcpip4_spec sctp_ip4_spec; 826 + struct ethtool_ah_espip4_spec ah_ip4_spec; 827 + struct ethtool_ah_espip4_spec esp_ip4_spec; 828 + struct ethtool_usrip4_spec usr_ip4_spec; 829 + struct ethtool_tcpip6_spec tcp_ip6_spec; 830 + struct ethtool_tcpip6_spec udp_ip6_spec; 831 + struct ethtool_tcpip6_spec sctp_ip6_spec; 832 + struct ethtool_ah_espip6_spec ah_ip6_spec; 833 + struct ethtool_ah_espip6_spec esp_ip6_spec; 834 + struct ethtool_usrip6_spec usr_ip6_spec; 835 + struct eth_header ether_spec; 836 + uint8_t hdata[52]; 837 + }; 838 + 839 + /** 840 + * struct ethtool_flow_ext - additional RX flow fields 841 + * @h_dest: destination MAC address 842 + * @vlan_etype: VLAN EtherType 843 + * @vlan_tci: VLAN tag control information 844 + * @data: user defined data 845 + * 846 + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT 847 + * is set in &struct ethtool_rx_flow_spec @flow_type. 848 + * @h_dest is valid if %FLOW_MAC_EXT is set. 849 + */ 850 + struct ethtool_flow_ext { 851 + uint8_t padding[2]; 852 + unsigned char h_dest[ETH_ALEN]; 853 + uint16_t vlan_etype; 854 + uint16_t vlan_tci; 855 + uint32_t data[2]; 856 + }; 857 + 858 + /** 859 + * struct ethtool_rx_flow_spec - classification rule for RX flows 860 + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW 861 + * @h_u: Flow fields to match (dependent on @flow_type) 862 + * @h_ext: Additional fields to match 863 + * @m_u: Masks for flow field bits to be matched 864 + * @m_ext: Masks for additional field bits to be matched 865 + * Note, all additional fields must be ignored unless @flow_type 866 + * includes the %FLOW_EXT or %FLOW_MAC_EXT flag 867 + * (see &struct ethtool_flow_ext description). 868 + * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC 869 + * if packets should be discarded 870 + * @location: Location of rule in the table. Locations must be 871 + * numbered such that a flow matching multiple rules will be 872 + * classified according to the first (lowest numbered) rule. 873 + */ 874 + struct ethtool_rx_flow_spec { 875 + uint32_t flow_type; 876 + union ethtool_flow_union h_u; 877 + struct ethtool_flow_ext h_ext; 878 + union ethtool_flow_union m_u; 879 + struct ethtool_flow_ext m_ext; 880 + uint64_t ring_cookie; 881 + uint32_t location; 882 + }; 883 + 884 + /* How rings are layed out when accessing virtual functions or 885 + * offloaded queues is device specific. To allow users to do flow 886 + * steering and specify these queues the ring cookie is partitioned 887 + * into a 32bit queue index with an 8 bit virtual function id. 888 + * This also leaves the 3bytes for further specifiers. It is possible 889 + * future devices may support more than 256 virtual functions if 890 + * devices start supporting PCIe w/ARI. However at the moment I 891 + * do not know of any devices that support this so I do not reserve 892 + * space for this at this time. If a future patch consumes the next 893 + * byte it should be aware of this possiblity. 894 + */ 895 + #define ETHTOOL_RX_FLOW_SPEC_RING 0x00000000FFFFFFFFLL 896 + #define ETHTOOL_RX_FLOW_SPEC_RING_VF 0x000000FF00000000LL 897 + #define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32 898 + static inline uint64_t ethtool_get_flow_spec_ring(uint64_t ring_cookie) 899 + { 900 + return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie; 901 + }; 902 + 903 + static inline uint64_t ethtool_get_flow_spec_ring_vf(uint64_t ring_cookie) 904 + { 905 + return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >> 906 + ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF; 907 + }; 908 + 909 + /** 910 + * struct ethtool_rxnfc - command to get or set RX flow classification rules 911 + * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH, 912 + * %ETHTOOL_GRXRINGS, %ETHTOOL_GRXCLSRLCNT, %ETHTOOL_GRXCLSRULE, 913 + * %ETHTOOL_GRXCLSRLALL, %ETHTOOL_SRXCLSRLDEL or %ETHTOOL_SRXCLSRLINS 914 + * @flow_type: Type of flow to be affected, e.g. %TCP_V4_FLOW 915 + * @data: Command-dependent value 916 + * @fs: Flow classification rule 917 + * @rule_cnt: Number of rules to be affected 918 + * @rule_locs: Array of used rule locations 919 + * 920 + * For %ETHTOOL_GRXFH and %ETHTOOL_SRXFH, @data is a bitmask indicating 921 + * the fields included in the flow hash, e.g. %RXH_IP_SRC. The following 922 + * structure fields must not be used. 923 + * 924 + * For %ETHTOOL_GRXRINGS, @data is set to the number of RX rings/queues 925 + * on return. 926 + * 927 + * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined 928 + * rules on return. If @data is non-zero on return then it is the 929 + * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the 930 + * driver supports any special location values. If that flag is not 931 + * set in @data then special location values should not be used. 932 + * 933 + * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an 934 + * existing rule on entry and @fs contains the rule on return. 935 + * 936 + * For %ETHTOOL_GRXCLSRLALL, @rule_cnt specifies the array size of the 937 + * user buffer for @rule_locs on entry. On return, @data is the size 938 + * of the rule table, @rule_cnt is the number of defined rules, and 939 + * @rule_locs contains the locations of the defined rules. Drivers 940 + * must use the second parameter to get_rxnfc() instead of @rule_locs. 941 + * 942 + * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update. 943 + * @fs.@location either specifies the location to use or is a special 944 + * location value with %RX_CLS_LOC_SPECIAL flag set. On return, 945 + * @fs.@location is the actual rule location. 946 + * 947 + * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an 948 + * existing rule on entry. 949 + * 950 + * A driver supporting the special location values for 951 + * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused 952 + * location, and may remove a rule at a later location (lower 953 + * priority) that matches exactly the same set of flows. The special 954 + * values are %RX_CLS_LOC_ANY, selecting any location; 955 + * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum 956 + * priority); and %RX_CLS_LOC_LAST, selecting the last suitable 957 + * location (minimum priority). Additional special values may be 958 + * defined in future and drivers must return -%EINVAL for any 959 + * unrecognised value. 960 + */ 961 + struct ethtool_rxnfc { 962 + uint32_t cmd; 963 + uint32_t flow_type; 964 + uint64_t data; 965 + struct ethtool_rx_flow_spec fs; 966 + uint32_t rule_cnt; 967 + uint32_t rule_locs[0]; 968 + }; 969 + 970 + 971 + /** 972 + * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection 973 + * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR 974 + * @size: On entry, the array size of the user buffer, which may be zero. 975 + * On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware 976 + * indirection table. 977 + * @ring_index: RX ring/queue index for each hash value 978 + * 979 + * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size 980 + * should be returned. For %ETHTOOL_SRXFHINDIR, a @size of zero means 981 + * the table should be reset to default values. This last feature 982 + * is not supported by the original implementations. 983 + */ 984 + struct ethtool_rxfh_indir { 985 + uint32_t cmd; 986 + uint32_t size; 987 + uint32_t ring_index[0]; 988 + }; 989 + 990 + /** 991 + * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key. 992 + * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH 993 + * @rss_context: RSS context identifier. 994 + * @indir_size: On entry, the array size of the user buffer for the 995 + * indirection table, which may be zero, or (for %ETHTOOL_SRSSH), 996 + * %ETH_RXFH_INDIR_NO_CHANGE. On return from %ETHTOOL_GRSSH, 997 + * the array size of the hardware indirection table. 998 + * @key_size: On entry, the array size of the user buffer for the hash key, 999 + * which may be zero. On return from %ETHTOOL_GRSSH, the size of the 1000 + * hardware hash key. 1001 + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). 1002 + * Valid values are one of the %ETH_RSS_HASH_*. 1003 + * @rsvd: Reserved for future extensions. 1004 + * @rss_config: RX ring/queue index for each hash value i.e., indirection table 1005 + * of @indir_size uint32_t elements, followed by hash key of @key_size 1006 + * bytes. 1007 + * 1008 + * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the 1009 + * size should be returned. For %ETHTOOL_SRSSH, an @indir_size of 1010 + * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested 1011 + * and a @indir_size of zero means the indir table should be reset to default 1012 + * values. An hfunc of zero means that hash function setting is not requested. 1013 + */ 1014 + struct ethtool_rxfh { 1015 + uint32_t cmd; 1016 + uint32_t rss_context; 1017 + uint32_t indir_size; 1018 + uint32_t key_size; 1019 + uint8_t hfunc; 1020 + uint8_t rsvd8[3]; 1021 + uint32_t rsvd32; 1022 + uint32_t rss_config[0]; 1023 + }; 1024 + #define ETH_RXFH_INDIR_NO_CHANGE 0xffffffff 1025 + 1026 + /** 1027 + * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter 1028 + * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW 1029 + * @h_u: Flow field values to match (dependent on @flow_type) 1030 + * @m_u: Masks for flow field value bits to be ignored 1031 + * @vlan_tag: VLAN tag to match 1032 + * @vlan_tag_mask: Mask for VLAN tag bits to be ignored 1033 + * @data: Driver-dependent data to match 1034 + * @data_mask: Mask for driver-dependent data bits to be ignored 1035 + * @action: RX ring/queue index to deliver to (non-negative) or other action 1036 + * (negative, e.g. %ETHTOOL_RXNTUPLE_ACTION_DROP) 1037 + * 1038 + * For flow types %TCP_V4_FLOW, %UDP_V4_FLOW and %SCTP_V4_FLOW, where 1039 + * a field value and mask are both zero this is treated as if all mask 1040 + * bits are set i.e. the field is ignored. 1041 + */ 1042 + struct ethtool_rx_ntuple_flow_spec { 1043 + uint32_t flow_type; 1044 + union { 1045 + struct ethtool_tcpip4_spec tcp_ip4_spec; 1046 + struct ethtool_tcpip4_spec udp_ip4_spec; 1047 + struct ethtool_tcpip4_spec sctp_ip4_spec; 1048 + struct ethtool_ah_espip4_spec ah_ip4_spec; 1049 + struct ethtool_ah_espip4_spec esp_ip4_spec; 1050 + struct ethtool_usrip4_spec usr_ip4_spec; 1051 + struct eth_header ether_spec; 1052 + uint8_t hdata[72]; 1053 + } h_u, m_u; 1054 + 1055 + uint16_t vlan_tag; 1056 + uint16_t vlan_tag_mask; 1057 + uint64_t data; 1058 + uint64_t data_mask; 1059 + 1060 + int32_t action; 1061 + #define ETHTOOL_RXNTUPLE_ACTION_DROP (-1) /* drop packet */ 1062 + #define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) /* clear filter */ 1063 + }; 1064 + 1065 + /** 1066 + * struct ethtool_rx_ntuple - command to set or clear RX flow filter 1067 + * @cmd: Command number - %ETHTOOL_SRXNTUPLE 1068 + * @fs: Flow filter specification 1069 + */ 1070 + struct ethtool_rx_ntuple { 1071 + uint32_t cmd; 1072 + struct ethtool_rx_ntuple_flow_spec fs; 1073 + }; 1074 + 1075 + #define ETHTOOL_FLASH_MAX_FILENAME 128 1076 + enum ethtool_flash_op_type { 1077 + ETHTOOL_FLASH_ALL_REGIONS = 0, 1078 + }; 1079 + 1080 + /* for passing firmware flashing related parameters */ 1081 + struct ethtool_flash { 1082 + uint32_t cmd; 1083 + uint32_t region; 1084 + char data[ETHTOOL_FLASH_MAX_FILENAME]; 1085 + }; 1086 + 1087 + /** 1088 + * struct ethtool_dump - used for retrieving, setting device dump 1089 + * @cmd: Command number - %ETHTOOL_GET_DUMP_FLAG, %ETHTOOL_GET_DUMP_DATA, or 1090 + * %ETHTOOL_SET_DUMP 1091 + * @version: FW version of the dump, filled in by driver 1092 + * @flag: driver dependent flag for dump setting, filled in by driver during 1093 + * get and filled in by ethtool for set operation. 1094 + * flag must be initialized by macro ETH_FW_DUMP_DISABLE value when 1095 + * firmware dump is disabled. 1096 + * @len: length of dump data, used as the length of the user buffer on entry to 1097 + * %ETHTOOL_GET_DUMP_DATA and this is returned as dump length by driver 1098 + * for %ETHTOOL_GET_DUMP_FLAG command 1099 + * @data: data collected for get dump data operation 1100 + */ 1101 + struct ethtool_dump { 1102 + uint32_t cmd; 1103 + uint32_t version; 1104 + uint32_t flag; 1105 + uint32_t len; 1106 + uint8_t data[0]; 1107 + }; 1108 + 1109 + #define ETH_FW_DUMP_DISABLE 0 1110 + 1111 + /* for returning and changing feature sets */ 1112 + 1113 + /** 1114 + * struct ethtool_get_features_block - block with state of 32 features 1115 + * @available: mask of changeable features 1116 + * @requested: mask of features requested to be enabled if possible 1117 + * @active: mask of currently enabled features 1118 + * @never_changed: mask of features not changeable for any device 1119 + */ 1120 + struct ethtool_get_features_block { 1121 + uint32_t available; 1122 + uint32_t requested; 1123 + uint32_t active; 1124 + uint32_t never_changed; 1125 + }; 1126 + 1127 + /** 1128 + * struct ethtool_gfeatures - command to get state of device's features 1129 + * @cmd: command number = %ETHTOOL_GFEATURES 1130 + * @size: On entry, the number of elements in the features[] array; 1131 + * on return, the number of elements in features[] needed to hold 1132 + * all features 1133 + * @features: state of features 1134 + */ 1135 + struct ethtool_gfeatures { 1136 + uint32_t cmd; 1137 + uint32_t size; 1138 + struct ethtool_get_features_block features[0]; 1139 + }; 1140 + 1141 + /** 1142 + * struct ethtool_set_features_block - block with request for 32 features 1143 + * @valid: mask of features to be changed 1144 + * @requested: values of features to be changed 1145 + */ 1146 + struct ethtool_set_features_block { 1147 + uint32_t valid; 1148 + uint32_t requested; 1149 + }; 1150 + 1151 + /** 1152 + * struct ethtool_sfeatures - command to request change in device's features 1153 + * @cmd: command number = %ETHTOOL_SFEATURES 1154 + * @size: array size of the features[] array 1155 + * @features: feature change masks 1156 + */ 1157 + struct ethtool_sfeatures { 1158 + uint32_t cmd; 1159 + uint32_t size; 1160 + struct ethtool_set_features_block features[0]; 1161 + }; 1162 + 1163 + /** 1164 + * struct ethtool_ts_info - holds a device's timestamping and PHC association 1165 + * @cmd: command number = %ETHTOOL_GET_TS_INFO 1166 + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags 1167 + * @phc_index: device index of the associated PHC, or -1 if there is none 1168 + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values 1169 + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values 1170 + * 1171 + * The bits in the 'tx_types' and 'rx_filters' fields correspond to 1172 + * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, 1173 + * respectively. For example, if the device supports HWTSTAMP_TX_ON, 1174 + * then (1 << HWTSTAMP_TX_ON) in 'tx_types' will be set. 1175 + * 1176 + * Drivers should only report the filters they actually support without 1177 + * upscaling in the SIOCSHWTSTAMP ioctl. If the SIOCSHWSTAMP request for 1178 + * HWTSTAMP_FILTER_V1_SYNC is supported by HWTSTAMP_FILTER_V1_EVENT, then the 1179 + * driver should only report HWTSTAMP_FILTER_V1_EVENT in this op. 1180 + */ 1181 + struct ethtool_ts_info { 1182 + uint32_t cmd; 1183 + uint32_t so_timestamping; 1184 + int32_t phc_index; 1185 + uint32_t tx_types; 1186 + uint32_t tx_reserved[3]; 1187 + uint32_t rx_filters; 1188 + uint32_t rx_reserved[3]; 1189 + }; 1190 + 1191 + /* 1192 + * %ETHTOOL_SFEATURES changes features present in features[].valid to the 1193 + * values of corresponding bits in features[].requested. Bits in .requested 1194 + * not set in .valid or not changeable are ignored. 1195 + * 1196 + * Returns %EINVAL when .valid contains undefined or never-changeable bits 1197 + * or size is not equal to required number of features words (32-bit blocks). 1198 + * Returns >= 0 if request was completed; bits set in the value mean: 1199 + * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not 1200 + * changeable (not present in %ETHTOOL_GFEATURES' features[].available) 1201 + * those bits were ignored. 1202 + * %ETHTOOL_F_WISH - some or all changes requested were recorded but the 1203 + * resulting state of bits masked by .valid is not equal to .requested. 1204 + * Probably there are other device-specific constraints on some features 1205 + * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered 1206 + * here as though ignored bits were cleared. 1207 + * %ETHTOOL_F_COMPAT - some or all changes requested were made by calling 1208 + * compatibility functions. Requested offload state cannot be properly 1209 + * managed by kernel. 1210 + * 1211 + * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of 1212 + * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands 1213 + * for ETH_SS_FEATURES string set. First entry in the table corresponds to least 1214 + * significant bit in features[0] fields. Empty strings mark undefined features. 1215 + */ 1216 + enum ethtool_sfeatures_retval_bits { 1217 + ETHTOOL_F_UNSUPPORTED__BIT, 1218 + ETHTOOL_F_WISH__BIT, 1219 + ETHTOOL_F_COMPAT__BIT, 1220 + }; 1221 + 1222 + #define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT) 1223 + #define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT) 1224 + #define ETHTOOL_F_COMPAT (1 << ETHTOOL_F_COMPAT__BIT) 1225 + 1226 + #define MAX_NUM_QUEUE 4096 1227 + 1228 + /** 1229 + * struct ethtool_per_queue_op - apply sub command to the queues in mask. 1230 + * @cmd: ETHTOOL_PERQUEUE 1231 + * @sub_command: the sub command which apply to each queues 1232 + * @queue_mask: Bitmap of the queues which sub command apply to 1233 + * @data: A complete command structure following for each of the queues addressed 1234 + */ 1235 + struct ethtool_per_queue_op { 1236 + uint32_t cmd; 1237 + uint32_t sub_command; 1238 + uint32_t queue_mask[__KERNEL_DIV_ROUND_UP(MAX_NUM_QUEUE, 32)]; 1239 + char data[]; 1240 + }; 1241 + 1242 + /** 1243 + * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters 1244 + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM 1245 + * @active_fec: FEC mode which is active on porte 1246 + * @fec: Bitmask of supported/configured FEC modes 1247 + * @rsvd: Reserved for future extensions. i.e FEC bypass feature. 1248 + * 1249 + * Drivers should reject a non-zero setting of @autoneg when 1250 + * autoneogotiation is disabled (or not supported) for the link. 1251 + * 1252 + */ 1253 + struct ethtool_fecparam { 1254 + uint32_t cmd; 1255 + /* bitmask of FEC modes */ 1256 + uint32_t active_fec; 1257 + uint32_t fec; 1258 + uint32_t reserved; 1259 + }; 1260 + 1261 + /** 1262 + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration 1263 + * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported 1264 + * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver 1265 + * @ETHTOOL_FEC_OFF: No FEC Mode 1266 + * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode 1267 + * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode 1268 + */ 1269 + enum ethtool_fec_config_bits { 1270 + ETHTOOL_FEC_NONE_BIT, 1271 + ETHTOOL_FEC_AUTO_BIT, 1272 + ETHTOOL_FEC_OFF_BIT, 1273 + ETHTOOL_FEC_RS_BIT, 1274 + ETHTOOL_FEC_BASER_BIT, 1275 + }; 1276 + 1277 + #define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) 1278 + #define ETHTOOL_FEC_AUTO (1 << ETHTOOL_FEC_AUTO_BIT) 1279 + #define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) 1280 + #define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) 1281 + #define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) 1282 + 1283 + /* CMDs currently supported */ 1284 + #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. 1285 + * Please use ETHTOOL_GLINKSETTINGS 1286 + */ 1287 + #define ETHTOOL_SSET 0x00000002 /* DEPRECATED, Set settings. 1288 + * Please use ETHTOOL_SLINKSETTINGS 1289 + */ 1290 + #define ETHTOOL_GDRVINFO 0x00000003 /* Get driver info. */ 1291 + #define ETHTOOL_GREGS 0x00000004 /* Get NIC registers. */ 1292 + #define ETHTOOL_GWOL 0x00000005 /* Get wake-on-lan options. */ 1293 + #define ETHTOOL_SWOL 0x00000006 /* Set wake-on-lan options. */ 1294 + #define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ 1295 + #define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */ 1296 + #define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */ 1297 + /* Get link status for host, i.e. whether the interface *and* the 1298 + * physical port (if there is one) are up (ethtool_value). */ 1299 + #define ETHTOOL_GLINK 0x0000000a 1300 + #define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ 1301 + #define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */ 1302 + #define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ 1303 + #define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ 1304 + #define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ 1305 + #define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters. */ 1306 + #define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ 1307 + #define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ 1308 + #define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ 1309 + #define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ 1310 + #define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ 1311 + #define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ 1312 + #define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable 1313 + * (ethtool_value) */ 1314 + #define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable 1315 + * (ethtool_value). */ 1316 + #define ETHTOOL_TEST 0x0000001a /* execute NIC self-test. */ 1317 + #define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ 1318 + #define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ 1319 + #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ 1320 + #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ 1321 + #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ 1322 + #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ 1323 + #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ 1324 + #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ 1325 + #define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ 1326 + #define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ 1327 + #define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */ 1328 + #define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */ 1329 + #define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */ 1330 + #define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */ 1331 + 1332 + #define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ 1333 + #define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ 1334 + #define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ 1335 + #define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ 1336 + #define ETHTOOL_GRXRINGS 0x0000002d /* Get RX rings available for LB */ 1337 + #define ETHTOOL_GRXCLSRLCNT 0x0000002e /* Get RX class rule count */ 1338 + #define ETHTOOL_GRXCLSRULE 0x0000002f /* Get RX classification rule */ 1339 + #define ETHTOOL_GRXCLSRLALL 0x00000030 /* Get all RX classification rule */ 1340 + #define ETHTOOL_SRXCLSRLDEL 0x00000031 /* Delete RX classification rule */ 1341 + #define ETHTOOL_SRXCLSRLINS 0x00000032 /* Insert RX classification rule */ 1342 + #define ETHTOOL_FLASHDEV 0x00000033 /* Flash firmware to device */ 1343 + #define ETHTOOL_RESET 0x00000034 /* Reset hardware */ 1344 + #define ETHTOOL_SRXNTUPLE 0x00000035 /* Add an n-tuple filter to device */ 1345 + #define ETHTOOL_GRXNTUPLE 0x00000036 /* deprecated */ 1346 + #define ETHTOOL_GSSET_INFO 0x00000037 /* Get string set info */ 1347 + #define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */ 1348 + #define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */ 1349 + 1350 + #define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */ 1351 + #define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */ 1352 + #define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ 1353 + #define ETHTOOL_SCHANNELS 0x0000003d /* Set no of channels */ 1354 + #define ETHTOOL_SET_DUMP 0x0000003e /* Set dump settings */ 1355 + #define ETHTOOL_GET_DUMP_FLAG 0x0000003f /* Get dump settings */ 1356 + #define ETHTOOL_GET_DUMP_DATA 0x00000040 /* Get dump data */ 1357 + #define ETHTOOL_GET_TS_INFO 0x00000041 /* Get time stamping and PHC info */ 1358 + #define ETHTOOL_GMODULEINFO 0x00000042 /* Get plug-in module information */ 1359 + #define ETHTOOL_GMODULEEEPROM 0x00000043 /* Get plug-in module eeprom */ 1360 + #define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */ 1361 + #define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */ 1362 + 1363 + #define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */ 1364 + #define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */ 1365 + #define ETHTOOL_GTUNABLE 0x00000048 /* Get tunable configuration */ 1366 + #define ETHTOOL_STUNABLE 0x00000049 /* Set tunable configuration */ 1367 + #define ETHTOOL_GPHYSTATS 0x0000004a /* get PHY-specific statistics */ 1368 + 1369 + #define ETHTOOL_PERQUEUE 0x0000004b /* Set per queue options */ 1370 + 1371 + #define ETHTOOL_GLINKSETTINGS 0x0000004c /* Get ethtool_link_settings */ 1372 + #define ETHTOOL_SLINKSETTINGS 0x0000004d /* Set ethtool_link_settings */ 1373 + #define ETHTOOL_PHY_GTUNABLE 0x0000004e /* Get PHY tunable configuration */ 1374 + #define ETHTOOL_PHY_STUNABLE 0x0000004f /* Set PHY tunable configuration */ 1375 + #define ETHTOOL_GFECPARAM 0x00000050 /* Get FEC settings */ 1376 + #define ETHTOOL_SFECPARAM 0x00000051 /* Set FEC settings */ 1377 + 1378 + /* compatibility with older code */ 1379 + #define SPARC_ETH_GSET ETHTOOL_GSET 1380 + #define SPARC_ETH_SSET ETHTOOL_SSET 1381 + 1382 + /* Link mode bit indices */ 1383 + enum ethtool_link_mode_bit_indices { 1384 + ETHTOOL_LINK_MODE_10baseT_Half_BIT = 0, 1385 + ETHTOOL_LINK_MODE_10baseT_Full_BIT = 1, 1386 + ETHTOOL_LINK_MODE_100baseT_Half_BIT = 2, 1387 + ETHTOOL_LINK_MODE_100baseT_Full_BIT = 3, 1388 + ETHTOOL_LINK_MODE_1000baseT_Half_BIT = 4, 1389 + ETHTOOL_LINK_MODE_1000baseT_Full_BIT = 5, 1390 + ETHTOOL_LINK_MODE_Autoneg_BIT = 6, 1391 + ETHTOOL_LINK_MODE_TP_BIT = 7, 1392 + ETHTOOL_LINK_MODE_AUI_BIT = 8, 1393 + ETHTOOL_LINK_MODE_MII_BIT = 9, 1394 + ETHTOOL_LINK_MODE_FIBRE_BIT = 10, 1395 + ETHTOOL_LINK_MODE_BNC_BIT = 11, 1396 + ETHTOOL_LINK_MODE_10000baseT_Full_BIT = 12, 1397 + ETHTOOL_LINK_MODE_Pause_BIT = 13, 1398 + ETHTOOL_LINK_MODE_Asym_Pause_BIT = 14, 1399 + ETHTOOL_LINK_MODE_2500baseX_Full_BIT = 15, 1400 + ETHTOOL_LINK_MODE_Backplane_BIT = 16, 1401 + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT = 17, 1402 + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT = 18, 1403 + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT = 19, 1404 + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT = 20, 1405 + ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT = 21, 1406 + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT = 22, 1407 + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT = 23, 1408 + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT = 24, 1409 + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT = 25, 1410 + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT = 26, 1411 + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT = 27, 1412 + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT = 28, 1413 + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT = 29, 1414 + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT = 30, 1415 + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT = 31, 1416 + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT = 32, 1417 + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT = 33, 1418 + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT = 34, 1419 + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT = 35, 1420 + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT = 36, 1421 + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, 1422 + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, 1423 + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, 1424 + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, 1425 + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, 1426 + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, 1427 + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, 1428 + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, 1429 + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, 1430 + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, 1431 + ETHTOOL_LINK_MODE_2500baseT_Full_BIT = 47, 1432 + ETHTOOL_LINK_MODE_5000baseT_Full_BIT = 48, 1433 + 1434 + ETHTOOL_LINK_MODE_FEC_NONE_BIT = 49, 1435 + ETHTOOL_LINK_MODE_FEC_RS_BIT = 50, 1436 + ETHTOOL_LINK_MODE_FEC_BASER_BIT = 51, 1437 + 1438 + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit 1439 + * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* 1440 + * macro for bits > 31. The only way to use indices > 31 is to 1441 + * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. 1442 + */ 1443 + 1444 + __ETHTOOL_LINK_MODE_LAST 1445 + = ETHTOOL_LINK_MODE_FEC_BASER_BIT, 1446 + }; 1447 + 1448 + #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ 1449 + (1UL << (ETHTOOL_LINK_MODE_ ## base_name ## _BIT)) 1450 + 1451 + /* DEPRECATED macros. Please migrate to 1452 + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT 1453 + * define any new SUPPORTED_* macro for bits > 31. 1454 + */ 1455 + #define SUPPORTED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) 1456 + #define SUPPORTED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) 1457 + #define SUPPORTED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) 1458 + #define SUPPORTED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) 1459 + #define SUPPORTED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) 1460 + #define SUPPORTED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) 1461 + #define SUPPORTED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) 1462 + #define SUPPORTED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) 1463 + #define SUPPORTED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) 1464 + #define SUPPORTED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) 1465 + #define SUPPORTED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) 1466 + #define SUPPORTED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) 1467 + #define SUPPORTED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) 1468 + #define SUPPORTED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) 1469 + #define SUPPORTED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) 1470 + #define SUPPORTED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) 1471 + #define SUPPORTED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) 1472 + #define SUPPORTED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) 1473 + #define SUPPORTED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) 1474 + #define SUPPORTED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) 1475 + #define SUPPORTED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) 1476 + #define SUPPORTED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) 1477 + #define SUPPORTED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) 1478 + #define SUPPORTED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) 1479 + #define SUPPORTED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) 1480 + #define SUPPORTED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) 1481 + #define SUPPORTED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) 1482 + #define SUPPORTED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) 1483 + #define SUPPORTED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) 1484 + #define SUPPORTED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) 1485 + #define SUPPORTED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) 1486 + /* Please do not define any new SUPPORTED_* macro for bits > 31, see 1487 + * notice above. 1488 + */ 1489 + 1490 + /* 1491 + * DEPRECATED macros. Please migrate to 1492 + * ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API. Please do NOT 1493 + * define any new ADERTISE_* macro for bits > 31. 1494 + */ 1495 + #define ADVERTISED_10baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Half) 1496 + #define ADVERTISED_10baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10baseT_Full) 1497 + #define ADVERTISED_100baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Half) 1498 + #define ADVERTISED_100baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(100baseT_Full) 1499 + #define ADVERTISED_1000baseT_Half __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Half) 1500 + #define ADVERTISED_1000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseT_Full) 1501 + #define ADVERTISED_Autoneg __ETHTOOL_LINK_MODE_LEGACY_MASK(Autoneg) 1502 + #define ADVERTISED_TP __ETHTOOL_LINK_MODE_LEGACY_MASK(TP) 1503 + #define ADVERTISED_AUI __ETHTOOL_LINK_MODE_LEGACY_MASK(AUI) 1504 + #define ADVERTISED_MII __ETHTOOL_LINK_MODE_LEGACY_MASK(MII) 1505 + #define ADVERTISED_FIBRE __ETHTOOL_LINK_MODE_LEGACY_MASK(FIBRE) 1506 + #define ADVERTISED_BNC __ETHTOOL_LINK_MODE_LEGACY_MASK(BNC) 1507 + #define ADVERTISED_10000baseT_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseT_Full) 1508 + #define ADVERTISED_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Pause) 1509 + #define ADVERTISED_Asym_Pause __ETHTOOL_LINK_MODE_LEGACY_MASK(Asym_Pause) 1510 + #define ADVERTISED_2500baseX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(2500baseX_Full) 1511 + #define ADVERTISED_Backplane __ETHTOOL_LINK_MODE_LEGACY_MASK(Backplane) 1512 + #define ADVERTISED_1000baseKX_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(1000baseKX_Full) 1513 + #define ADVERTISED_10000baseKX4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKX4_Full) 1514 + #define ADVERTISED_10000baseKR_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseKR_Full) 1515 + #define ADVERTISED_10000baseR_FEC __ETHTOOL_LINK_MODE_LEGACY_MASK(10000baseR_FEC) 1516 + #define ADVERTISED_20000baseMLD2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseMLD2_Full) 1517 + #define ADVERTISED_20000baseKR2_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(20000baseKR2_Full) 1518 + #define ADVERTISED_40000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseKR4_Full) 1519 + #define ADVERTISED_40000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseCR4_Full) 1520 + #define ADVERTISED_40000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseSR4_Full) 1521 + #define ADVERTISED_40000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(40000baseLR4_Full) 1522 + #define ADVERTISED_56000baseKR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseKR4_Full) 1523 + #define ADVERTISED_56000baseCR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseCR4_Full) 1524 + #define ADVERTISED_56000baseSR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseSR4_Full) 1525 + #define ADVERTISED_56000baseLR4_Full __ETHTOOL_LINK_MODE_LEGACY_MASK(56000baseLR4_Full) 1526 + /* Please do not define any new ADVERTISED_* macro for bits > 31, see 1527 + * notice above. 1528 + */ 1529 + 1530 + /* The following are all involved in forcing a particular link 1531 + * mode for the device for setting things. When getting the 1532 + * devices settings, these indicate the current mode and whether 1533 + * it was forced up into this mode or autonegotiated. 1534 + */ 1535 + 1536 + /* The forced speed, in units of 1Mb. All values 0 to INT_MAX are legal. 1537 + * Update drivers/net/phy/phy.c:phy_speed_to_str() and 1538 + * drivers/net/bonding/bond_3ad.c:__get_link_speed() when adding new values. 1539 + */ 1540 + #define SPEED_10 10 1541 + #define SPEED_100 100 1542 + #define SPEED_1000 1000 1543 + #define SPEED_2500 2500 1544 + #define SPEED_5000 5000 1545 + #define SPEED_10000 10000 1546 + #define SPEED_14000 14000 1547 + #define SPEED_20000 20000 1548 + #define SPEED_25000 25000 1549 + #define SPEED_40000 40000 1550 + #define SPEED_50000 50000 1551 + #define SPEED_56000 56000 1552 + #define SPEED_100000 100000 1553 + 1554 + #define SPEED_UNKNOWN -1 1555 + 1556 + static inline int ethtool_validate_speed(uint32_t speed) 1557 + { 1558 + return speed <= INT_MAX || speed == SPEED_UNKNOWN; 1559 + } 1560 + 1561 + /* Duplex, half or full. */ 1562 + #define DUPLEX_HALF 0x00 1563 + #define DUPLEX_FULL 0x01 1564 + #define DUPLEX_UNKNOWN 0xff 1565 + 1566 + static inline int ethtool_validate_duplex(uint8_t duplex) 1567 + { 1568 + switch (duplex) { 1569 + case DUPLEX_HALF: 1570 + case DUPLEX_FULL: 1571 + case DUPLEX_UNKNOWN: 1572 + return 1; 1573 + } 1574 + 1575 + return 0; 1576 + } 1577 + 1578 + /* Which connector port. */ 1579 + #define PORT_TP 0x00 1580 + #define PORT_AUI 0x01 1581 + #define PORT_MII 0x02 1582 + #define PORT_FIBRE 0x03 1583 + #define PORT_BNC 0x04 1584 + #define PORT_DA 0x05 1585 + #define PORT_NONE 0xef 1586 + #define PORT_OTHER 0xff 1587 + 1588 + /* Which transceiver to use. */ 1589 + #define XCVR_INTERNAL 0x00 /* PHY and MAC are in the same package */ 1590 + #define XCVR_EXTERNAL 0x01 /* PHY and MAC are in different packages */ 1591 + #define XCVR_DUMMY1 0x02 1592 + #define XCVR_DUMMY2 0x03 1593 + #define XCVR_DUMMY3 0x04 1594 + 1595 + /* Enable or disable autonegotiation. */ 1596 + #define AUTONEG_DISABLE 0x00 1597 + #define AUTONEG_ENABLE 0x01 1598 + 1599 + /* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then 1600 + * the driver is required to renegotiate link 1601 + */ 1602 + #define ETH_TP_MDI_INVALID 0x00 /* status: unknown; control: unsupported */ 1603 + #define ETH_TP_MDI 0x01 /* status: MDI; control: force MDI */ 1604 + #define ETH_TP_MDI_X 0x02 /* status: MDI-X; control: force MDI-X */ 1605 + #define ETH_TP_MDI_AUTO 0x03 /* control: auto-select */ 1606 + 1607 + /* Wake-On-Lan options. */ 1608 + #define WAKE_PHY (1 << 0) 1609 + #define WAKE_UCAST (1 << 1) 1610 + #define WAKE_MCAST (1 << 2) 1611 + #define WAKE_BCAST (1 << 3) 1612 + #define WAKE_ARP (1 << 4) 1613 + #define WAKE_MAGIC (1 << 5) 1614 + #define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ 1615 + 1616 + /* L2-L4 network traffic flow types */ 1617 + #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ 1618 + #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ 1619 + #define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ 1620 + #define AH_ESP_V4_FLOW 0x04 /* hash only */ 1621 + #define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ 1622 + #define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ 1623 + #define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ 1624 + #define AH_ESP_V6_FLOW 0x08 /* hash only */ 1625 + #define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ 1626 + #define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ 1627 + #define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ 1628 + #define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ 1629 + #define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ 1630 + #define IP_USER_FLOW IPV4_USER_FLOW 1631 + #define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ 1632 + #define IPV4_FLOW 0x10 /* hash only */ 1633 + #define IPV6_FLOW 0x11 /* hash only */ 1634 + #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ 1635 + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ 1636 + #define FLOW_EXT 0x80000000 1637 + #define FLOW_MAC_EXT 0x40000000 1638 + 1639 + /* L3-L4 network traffic flow hash options */ 1640 + #define RXH_L2DA (1 << 1) 1641 + #define RXH_VLAN (1 << 2) 1642 + #define RXH_L3_PROTO (1 << 3) 1643 + #define RXH_IP_SRC (1 << 4) 1644 + #define RXH_IP_DST (1 << 5) 1645 + #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ 1646 + #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ 1647 + #define RXH_DISCARD (1 << 31) 1648 + 1649 + #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL 1650 + 1651 + /* Special RX classification rule insert location values */ 1652 + #define RX_CLS_LOC_SPECIAL 0x80000000 /* flag */ 1653 + #define RX_CLS_LOC_ANY 0xffffffff 1654 + #define RX_CLS_LOC_FIRST 0xfffffffe 1655 + #define RX_CLS_LOC_LAST 0xfffffffd 1656 + 1657 + /* EEPROM Standards for plug in modules */ 1658 + #define ETH_MODULE_SFF_8079 0x1 1659 + #define ETH_MODULE_SFF_8079_LEN 256 1660 + #define ETH_MODULE_SFF_8472 0x2 1661 + #define ETH_MODULE_SFF_8472_LEN 512 1662 + #define ETH_MODULE_SFF_8636 0x3 1663 + #define ETH_MODULE_SFF_8636_LEN 256 1664 + #define ETH_MODULE_SFF_8436 0x4 1665 + #define ETH_MODULE_SFF_8436_LEN 256 1666 + 1667 + /* Reset flags */ 1668 + /* The reset() operation must clear the flags for the components which 1669 + * were actually reset. On successful return, the flags indicate the 1670 + * components which were not reset, either because they do not exist 1671 + * in the hardware or because they cannot be reset independently. The 1672 + * driver must never reset any components that were not requested. 1673 + */ 1674 + enum ethtool_reset_flags { 1675 + /* These flags represent components dedicated to the interface 1676 + * the command is addressed to. Shift any flag left by 1677 + * ETH_RESET_SHARED_SHIFT to reset a shared component of the 1678 + * same type. 1679 + */ 1680 + ETH_RESET_MGMT = 1 << 0, /* Management processor */ 1681 + ETH_RESET_IRQ = 1 << 1, /* Interrupt requester */ 1682 + ETH_RESET_DMA = 1 << 2, /* DMA engine */ 1683 + ETH_RESET_FILTER = 1 << 3, /* Filtering/flow direction */ 1684 + ETH_RESET_OFFLOAD = 1 << 4, /* Protocol offload */ 1685 + ETH_RESET_MAC = 1 << 5, /* Media access controller */ 1686 + ETH_RESET_PHY = 1 << 6, /* Transceiver/PHY */ 1687 + ETH_RESET_RAM = 1 << 7, /* RAM shared between 1688 + * multiple components */ 1689 + ETH_RESET_AP = 1 << 8, /* Application processor */ 1690 + 1691 + ETH_RESET_DEDICATED = 0x0000ffff, /* All components dedicated to 1692 + * this interface */ 1693 + ETH_RESET_ALL = 0xffffffff, /* All components used by this 1694 + * interface, even if shared */ 1695 + }; 1696 + #define ETH_RESET_SHARED_SHIFT 16 1697 + 1698 + 1699 + /** 1700 + * struct ethtool_link_settings - link control and status 1701 + * 1702 + * IMPORTANT, Backward compatibility notice: When implementing new 1703 + * user-space tools, please first try %ETHTOOL_GLINKSETTINGS, and 1704 + * if it succeeds use %ETHTOOL_SLINKSETTINGS to change link 1705 + * settings; do not use %ETHTOOL_SSET if %ETHTOOL_GLINKSETTINGS 1706 + * succeeded: stick to %ETHTOOL_GLINKSETTINGS/%SLINKSETTINGS in 1707 + * that case. Conversely, if %ETHTOOL_GLINKSETTINGS fails, use 1708 + * %ETHTOOL_GSET to query and %ETHTOOL_SSET to change link 1709 + * settings; do not use %ETHTOOL_SLINKSETTINGS if 1710 + * %ETHTOOL_GLINKSETTINGS failed: stick to 1711 + * %ETHTOOL_GSET/%ETHTOOL_SSET in that case. 1712 + * 1713 + * @cmd: Command number = %ETHTOOL_GLINKSETTINGS or %ETHTOOL_SLINKSETTINGS 1714 + * @speed: Link speed (Mbps) 1715 + * @duplex: Duplex mode; one of %DUPLEX_* 1716 + * @port: Physical connector type; one of %PORT_* 1717 + * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not 1718 + * applicable. For clause 45 PHYs this is the PRTAD. 1719 + * @autoneg: Enable/disable autonegotiation and auto-detection; 1720 + * either %AUTONEG_DISABLE or %AUTONEG_ENABLE 1721 + * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO 1722 + * protocols supported by the interface; 0 if unknown. 1723 + * Read-only. 1724 + * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of 1725 + * %ETH_TP_MDI_*. If the status is unknown or not applicable, the 1726 + * value will be %ETH_TP_MDI_INVALID. Read-only. 1727 + * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of 1728 + * %ETH_TP_MDI_*. If MDI(-X) control is not implemented, reads 1729 + * yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected. 1730 + * When written successfully, the link should be renegotiated if 1731 + * necessary. 1732 + * @link_mode_masks_nwords: Number of 32-bit words for each of the 1733 + * supported, advertising, lp_advertising link mode bitmaps. For 1734 + * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user 1735 + * (>= 0); on return, if handshake in progress, negative if 1736 + * request size unsupported by kernel: absolute value indicates 1737 + * kernel expected size and all the other fields but cmd 1738 + * are 0; otherwise (handshake completed), strictly positive 1739 + * to indicate size used by kernel and cmd field stays 1740 + * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For 1741 + * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive 1742 + * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise 1743 + * refused. For drivers: ignore this field (use kernel's 1744 + * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will 1745 + * be overwritten by kernel. 1746 + * @supported: Bitmap with each bit meaning given by 1747 + * %ethtool_link_mode_bit_indices for the link modes, physical 1748 + * connectors and other link features for which the interface 1749 + * supports autonegotiation or auto-detection. Read-only. 1750 + * @advertising: Bitmap with each bit meaning given by 1751 + * %ethtool_link_mode_bit_indices for the link modes, physical 1752 + * connectors and other link features that are advertised through 1753 + * autonegotiation or enabled for auto-detection. 1754 + * @lp_advertising: Bitmap with each bit meaning given by 1755 + * %ethtool_link_mode_bit_indices for the link modes, and other 1756 + * link features that the link partner advertised through 1757 + * autonegotiation; 0 if unknown or not applicable. Read-only. 1758 + * @transceiver: Used to distinguish different possible PHY types, 1759 + * reported consistently by PHYLIB. Read-only. 1760 + * 1761 + * If autonegotiation is disabled, the speed and @duplex represent the 1762 + * fixed link mode and are writable if the driver supports multiple 1763 + * link modes. If it is enabled then they are read-only; if the link 1764 + * is up they represent the negotiated link mode; if the link is down, 1765 + * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and 1766 + * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode. 1767 + * 1768 + * Some hardware interfaces may have multiple PHYs and/or physical 1769 + * connectors fitted or do not allow the driver to detect which are 1770 + * fitted. For these interfaces @port and/or @phy_address may be 1771 + * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE. 1772 + * Otherwise, attempts to write different values may be ignored or 1773 + * rejected. 1774 + * 1775 + * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt 1776 + * are not available in %ethtool_link_settings. Until all drivers are 1777 + * converted to ignore them or to the new %ethtool_link_settings API, 1778 + * for both queries and changes, users should always try 1779 + * %ETHTOOL_GLINKSETTINGS first, and if it fails with -ENOTSUPP stick 1780 + * only to %ETHTOOL_GSET and %ETHTOOL_SSET consistently. If it 1781 + * succeeds, then users should stick to %ETHTOOL_GLINKSETTINGS and 1782 + * %ETHTOOL_SLINKSETTINGS (which would support drivers implementing 1783 + * either %ethtool_cmd or %ethtool_link_settings). 1784 + * 1785 + * Users should assume that all fields not marked read-only are 1786 + * writable and subject to validation by the driver. They should use 1787 + * %ETHTOOL_GLINKSETTINGS to get the current values before making specific 1788 + * changes and then applying them with %ETHTOOL_SLINKSETTINGS. 1789 + * 1790 + * Drivers that implement %get_link_ksettings and/or 1791 + * %set_link_ksettings should ignore the @cmd 1792 + * and @link_mode_masks_nwords fields (any change to them overwritten 1793 + * by kernel), and rely only on kernel's internal 1794 + * %__ETHTOOL_LINK_MODE_MASK_NBITS and 1795 + * %ethtool_link_mode_mask_t. Drivers that implement 1796 + * %set_link_ksettings() should validate all fields other than @cmd 1797 + * and @link_mode_masks_nwords that are not described as read-only or 1798 + * deprecated, and must ignore all fields described as read-only. 1799 + */ 1800 + struct ethtool_link_settings { 1801 + uint32_t cmd; 1802 + uint32_t speed; 1803 + uint8_t duplex; 1804 + uint8_t port; 1805 + uint8_t phy_address; 1806 + uint8_t autoneg; 1807 + uint8_t mdio_support; 1808 + uint8_t eth_tp_mdix; 1809 + uint8_t eth_tp_mdix_ctrl; 1810 + int8_t link_mode_masks_nwords; 1811 + uint8_t transceiver; 1812 + uint8_t reserved1[3]; 1813 + uint32_t reserved[7]; 1814 + uint32_t link_mode_masks[0]; 1815 + /* layout of link_mode_masks fields: 1816 + * uint32_t map_supported[link_mode_masks_nwords]; 1817 + * uint32_t map_advertising[link_mode_masks_nwords]; 1818 + * uint32_t map_lp_advertising[link_mode_masks_nwords]; 1819 + */ 1820 + }; 1821 + #endif /* _LINUX_ETHTOOL_H */
+15
include/standard-headers/linux/kernel.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef _LINUX_KERNEL_H 3 + #define _LINUX_KERNEL_H 4 + 5 + #include "standard-headers/linux/sysinfo.h" 6 + 7 + /* 8 + * 'kernel.h' contains some often-used function prototypes etc 9 + */ 10 + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) 11 + #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) 12 + 13 + #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) 14 + 15 + #endif /* _LINUX_KERNEL_H */
+25
include/standard-headers/linux/sysinfo.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef _LINUX_SYSINFO_H 3 + #define _LINUX_SYSINFO_H 4 + 5 + #include "standard-headers/linux/types.h" 6 + 7 + #define SI_LOAD_SHIFT 16 8 + struct sysinfo { 9 + long uptime; /* Seconds since boot */ 10 + unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ 11 + unsigned long totalram; /* Total usable main memory size */ 12 + unsigned long freeram; /* Available memory size */ 13 + unsigned long sharedram; /* Amount of shared memory */ 14 + unsigned long bufferram; /* Memory used by buffers */ 15 + unsigned long totalswap; /* Total swap space size */ 16 + unsigned long freeswap; /* swap space still available */ 17 + uint16_t procs; /* Number of current processes */ 18 + uint16_t pad; /* Explicit padding for m68k */ 19 + unsigned long totalhigh; /* Total high memory size */ 20 + unsigned long freehigh; /* Available high memory size */ 21 + uint32_t mem_unit; /* Memory unit size in bytes */ 22 + char _f[20-2*sizeof(unsigned long)-sizeof(uint32_t)]; /* Padding: libc5 uses this.. */ 23 + }; 24 + 25 + #endif /* _LINUX_SYSINFO_H */
+6
migration/migration.c
··· 155 155 if (!once) { 156 156 mis_current.state = MIGRATION_STATUS_NONE; 157 157 memset(&mis_current, 0, sizeof(MigrationIncomingState)); 158 + mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE, 159 + sizeof(struct PostCopyFD)); 158 160 qemu_mutex_init(&mis_current.rp_mutex); 159 161 qemu_event_init(&mis_current.main_thread_load_event, false); 160 162 ··· 179 181 if (mis->from_src_file) { 180 182 qemu_fclose(mis->from_src_file); 181 183 mis->from_src_file = NULL; 184 + } 185 + if (mis->postcopy_remote_fds) { 186 + g_array_free(mis->postcopy_remote_fds, TRUE); 187 + mis->postcopy_remote_fds = NULL; 182 188 } 183 189 184 190 qemu_event_reset(&mis->main_thread_load_event);
+4
migration/migration.h
··· 49 49 int userfault_event_fd; 50 50 QEMUFile *to_src_file; 51 51 QemuMutex rp_mutex; /* We send replies from multiple threads */ 52 + /* RAMBlock of last request sent to source */ 53 + RAMBlock *last_rb; 52 54 void *postcopy_tmp_page; 53 55 void *postcopy_tmp_zero_page; 56 + /* PostCopyFD's for external userfaultfds & handlers of shared memory */ 57 + GArray *postcopy_remote_fds; 54 58 55 59 QEMUBH *bh; 56 60
+292 -64
migration/postcopy-ram.c
··· 23 23 #include "savevm.h" 24 24 #include "postcopy-ram.h" 25 25 #include "ram.h" 26 + #include "qapi/error.h" 27 + #include "qemu/notify.h" 26 28 #include "sysemu/sysemu.h" 27 29 #include "sysemu/balloon.h" 28 30 #include "qemu/error-report.h" ··· 45 47 unsigned int nsentcmds; 46 48 }; 47 49 50 + static NotifierWithReturnList postcopy_notifier_list; 51 + 52 + void postcopy_infrastructure_init(void) 53 + { 54 + notifier_with_return_list_init(&postcopy_notifier_list); 55 + } 56 + 57 + void postcopy_add_notifier(NotifierWithReturn *nn) 58 + { 59 + notifier_with_return_list_add(&postcopy_notifier_list, nn); 60 + } 61 + 62 + void postcopy_remove_notifier(NotifierWithReturn *n) 63 + { 64 + notifier_with_return_remove(n); 65 + } 66 + 67 + int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp) 68 + { 69 + struct PostcopyNotifyData pnd; 70 + pnd.reason = reason; 71 + pnd.errp = errp; 72 + 73 + return notifier_with_return_list_notify(&postcopy_notifier_list, 74 + &pnd); 75 + } 76 + 48 77 /* Postcopy needs to detect accesses to pages that haven't yet been copied 49 78 * across, and efficiently map new pages in, the techniques for doing this 50 79 * are target OS specific. ··· 186 215 RAMBlock *rb = qemu_ram_block_by_name(block_name); 187 216 size_t pagesize = qemu_ram_pagesize(rb); 188 217 189 - if (qemu_ram_is_shared(rb)) { 190 - error_report("Postcopy on shared RAM (%s) is not yet supported", 191 - block_name); 192 - return 1; 193 - } 194 - 195 218 if (length % pagesize) { 196 219 error_report("Postcopy requires RAM blocks to be a page size multiple," 197 220 " block %s is 0x" RAM_ADDR_FMT " bytes with a " ··· 215 238 struct uffdio_register reg_struct; 216 239 struct uffdio_range range_struct; 217 240 uint64_t feature_mask; 241 + Error *local_err = NULL; 218 242 219 243 if (qemu_target_page_size() > pagesize) { 220 244 error_report("Target page size bigger than host page size"); ··· 225 249 if (ufd == -1) { 226 250 error_report("%s: userfaultfd not available: %s", __func__, 227 251 strerror(errno)); 252 + goto out; 253 + } 254 + 255 + /* Give devices a chance to object */ 256 + if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { 257 + error_report_err(local_err); 228 258 goto out; 229 259 } 230 260 ··· 377 407 trace_postcopy_ram_incoming_cleanup_entry(); 378 408 379 409 if (mis->have_fault_thread) { 410 + Error *local_err = NULL; 411 + 412 + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) { 413 + error_report_err(local_err); 414 + return -1; 415 + } 416 + 380 417 if (qemu_ram_foreach_block(cleanup_range, mis)) { 381 418 return -1; 382 419 } ··· 481 518 error_report("%s userfault: Region doesn't support COPY", __func__); 482 519 return -1; 483 520 } 521 + if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) { 522 + RAMBlock *rb = qemu_ram_block_by_name(block_name); 523 + qemu_ram_set_uf_zeroable(rb); 524 + } 484 525 485 526 return 0; 486 527 } 487 528 529 + int postcopy_wake_shared(struct PostCopyFD *pcfd, 530 + uint64_t client_addr, 531 + RAMBlock *rb) 532 + { 533 + size_t pagesize = qemu_ram_pagesize(rb); 534 + struct uffdio_range range; 535 + int ret; 536 + trace_postcopy_wake_shared(client_addr, qemu_ram_get_idstr(rb)); 537 + range.start = client_addr & ~(pagesize - 1); 538 + range.len = pagesize; 539 + ret = ioctl(pcfd->fd, UFFDIO_WAKE, &range); 540 + if (ret) { 541 + error_report("%s: Failed to wake: %zx in %s (%s)", 542 + __func__, (size_t)client_addr, qemu_ram_get_idstr(rb), 543 + strerror(errno)); 544 + } 545 + return ret; 546 + } 547 + 548 + /* 549 + * Callback from shared fault handlers to ask for a page, 550 + * the page must be specified by a RAMBlock and an offset in that rb 551 + * Note: Only for use by shared fault handlers (in fault thread) 552 + */ 553 + int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, 554 + uint64_t client_addr, uint64_t rb_offset) 555 + { 556 + size_t pagesize = qemu_ram_pagesize(rb); 557 + uint64_t aligned_rbo = rb_offset & ~(pagesize - 1); 558 + MigrationIncomingState *mis = migration_incoming_get_current(); 559 + 560 + trace_postcopy_request_shared_page(pcfd->idstr, qemu_ram_get_idstr(rb), 561 + rb_offset); 562 + if (ramblock_recv_bitmap_test_byte_offset(rb, aligned_rbo)) { 563 + trace_postcopy_request_shared_page_present(pcfd->idstr, 564 + qemu_ram_get_idstr(rb), rb_offset); 565 + return postcopy_wake_shared(pcfd, client_addr, rb); 566 + } 567 + if (rb != mis->last_rb) { 568 + mis->last_rb = rb; 569 + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), 570 + aligned_rbo, pagesize); 571 + } else { 572 + /* Save some space */ 573 + migrate_send_rp_req_pages(mis, NULL, aligned_rbo, pagesize); 574 + } 575 + return 0; 576 + } 577 + 488 578 /* 489 579 * Handle faults detected by the USERFAULT markings 490 580 */ ··· 493 583 MigrationIncomingState *mis = opaque; 494 584 struct uffd_msg msg; 495 585 int ret; 586 + size_t index; 496 587 RAMBlock *rb = NULL; 497 - RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ 498 588 499 589 trace_postcopy_ram_fault_thread_entry(); 590 + mis->last_rb = NULL; /* last RAMBlock we sent part of */ 500 591 qemu_sem_post(&mis->fault_thread_sem); 501 592 593 + struct pollfd *pfd; 594 + size_t pfd_len = 2 + mis->postcopy_remote_fds->len; 595 + 596 + pfd = g_new0(struct pollfd, pfd_len); 597 + 598 + pfd[0].fd = mis->userfault_fd; 599 + pfd[0].events = POLLIN; 600 + pfd[1].fd = mis->userfault_event_fd; 601 + pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ 602 + trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); 603 + for (index = 0; index < mis->postcopy_remote_fds->len; index++) { 604 + struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, 605 + struct PostCopyFD, index); 606 + pfd[2 + index].fd = pcfd->fd; 607 + pfd[2 + index].events = POLLIN; 608 + trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, 609 + pcfd->fd); 610 + } 611 + 502 612 while (true) { 503 613 ram_addr_t rb_offset; 504 - struct pollfd pfd[2]; 614 + int poll_result; 505 615 506 616 /* 507 617 * We're mainly waiting for the kernel to give us a faulting HVA, 508 618 * however we can be told to quit via userfault_quit_fd which is 509 619 * an eventfd 510 620 */ 511 - pfd[0].fd = mis->userfault_fd; 512 - pfd[0].events = POLLIN; 513 - pfd[0].revents = 0; 514 - pfd[1].fd = mis->userfault_event_fd; 515 - pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ 516 - pfd[1].revents = 0; 517 621 518 - if (poll(pfd, 2, -1 /* Wait forever */) == -1) { 622 + poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); 623 + if (poll_result == -1) { 519 624 error_report("%s: userfault poll: %s", __func__, strerror(errno)); 520 625 break; 521 626 } ··· 535 640 } 536 641 } 537 642 538 - ret = read(mis->userfault_fd, &msg, sizeof(msg)); 539 - if (ret != sizeof(msg)) { 540 - if (errno == EAGAIN) { 541 - /* 542 - * if a wake up happens on the other thread just after 543 - * the poll, there is nothing to read. 544 - */ 545 - continue; 643 + if (pfd[0].revents) { 644 + poll_result--; 645 + ret = read(mis->userfault_fd, &msg, sizeof(msg)); 646 + if (ret != sizeof(msg)) { 647 + if (errno == EAGAIN) { 648 + /* 649 + * if a wake up happens on the other thread just after 650 + * the poll, there is nothing to read. 651 + */ 652 + continue; 653 + } 654 + if (ret < 0) { 655 + error_report("%s: Failed to read full userfault " 656 + "message: %s", 657 + __func__, strerror(errno)); 658 + break; 659 + } else { 660 + error_report("%s: Read %d bytes from userfaultfd " 661 + "expected %zd", 662 + __func__, ret, sizeof(msg)); 663 + break; /* Lost alignment, don't know what we'd read next */ 664 + } 546 665 } 547 - if (ret < 0) { 548 - error_report("%s: Failed to read full userfault message: %s", 549 - __func__, strerror(errno)); 666 + if (msg.event != UFFD_EVENT_PAGEFAULT) { 667 + error_report("%s: Read unexpected event %ud from userfaultfd", 668 + __func__, msg.event); 669 + continue; /* It's not a page fault, shouldn't happen */ 670 + } 671 + 672 + rb = qemu_ram_block_from_host( 673 + (void *)(uintptr_t)msg.arg.pagefault.address, 674 + true, &rb_offset); 675 + if (!rb) { 676 + error_report("postcopy_ram_fault_thread: Fault outside guest: %" 677 + PRIx64, (uint64_t)msg.arg.pagefault.address); 550 678 break; 679 + } 680 + 681 + rb_offset &= ~(qemu_ram_pagesize(rb) - 1); 682 + trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, 683 + qemu_ram_get_idstr(rb), 684 + rb_offset); 685 + /* 686 + * Send the request to the source - we want to request one 687 + * of our host page sizes (which is >= TPS) 688 + */ 689 + if (rb != mis->last_rb) { 690 + mis->last_rb = rb; 691 + migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), 692 + rb_offset, qemu_ram_pagesize(rb)); 551 693 } else { 552 - error_report("%s: Read %d bytes from userfaultfd expected %zd", 553 - __func__, ret, sizeof(msg)); 554 - break; /* Lost alignment, don't know what we'd read next */ 694 + /* Save some space */ 695 + migrate_send_rp_req_pages(mis, NULL, 696 + rb_offset, qemu_ram_pagesize(rb)); 555 697 } 556 698 } 557 - if (msg.event != UFFD_EVENT_PAGEFAULT) { 558 - error_report("%s: Read unexpected event %ud from userfaultfd", 559 - __func__, msg.event); 560 - continue; /* It's not a page fault, shouldn't happen */ 561 - } 562 699 563 - rb = qemu_ram_block_from_host( 564 - (void *)(uintptr_t)msg.arg.pagefault.address, 565 - true, &rb_offset); 566 - if (!rb) { 567 - error_report("postcopy_ram_fault_thread: Fault outside guest: %" 568 - PRIx64, (uint64_t)msg.arg.pagefault.address); 569 - break; 570 - } 700 + /* Now handle any requests from external processes on shared memory */ 701 + /* TODO: May need to handle devices deregistering during postcopy */ 702 + for (index = 2; index < pfd_len && poll_result; index++) { 703 + if (pfd[index].revents) { 704 + struct PostCopyFD *pcfd = 705 + &g_array_index(mis->postcopy_remote_fds, 706 + struct PostCopyFD, index - 2); 571 707 572 - rb_offset &= ~(qemu_ram_pagesize(rb) - 1); 573 - trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, 574 - qemu_ram_get_idstr(rb), 575 - rb_offset); 708 + poll_result--; 709 + if (pfd[index].revents & POLLERR) { 710 + error_report("%s: POLLERR on poll %zd fd=%d", 711 + __func__, index, pcfd->fd); 712 + pfd[index].events = 0; 713 + continue; 714 + } 576 715 577 - /* 578 - * Send the request to the source - we want to request one 579 - * of our host page sizes (which is >= TPS) 580 - */ 581 - if (rb != last_rb) { 582 - last_rb = rb; 583 - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), 584 - rb_offset, qemu_ram_pagesize(rb)); 585 - } else { 586 - /* Save some space */ 587 - migrate_send_rp_req_pages(mis, NULL, 588 - rb_offset, qemu_ram_pagesize(rb)); 716 + ret = read(pcfd->fd, &msg, sizeof(msg)); 717 + if (ret != sizeof(msg)) { 718 + if (errno == EAGAIN) { 719 + /* 720 + * if a wake up happens on the other thread just after 721 + * the poll, there is nothing to read. 722 + */ 723 + continue; 724 + } 725 + if (ret < 0) { 726 + error_report("%s: Failed to read full userfault " 727 + "message: %s (shared) revents=%d", 728 + __func__, strerror(errno), 729 + pfd[index].revents); 730 + /*TODO: Could just disable this sharer */ 731 + break; 732 + } else { 733 + error_report("%s: Read %d bytes from userfaultfd " 734 + "expected %zd (shared)", 735 + __func__, ret, sizeof(msg)); 736 + /*TODO: Could just disable this sharer */ 737 + break; /*Lost alignment,don't know what we'd read next*/ 738 + } 739 + } 740 + if (msg.event != UFFD_EVENT_PAGEFAULT) { 741 + error_report("%s: Read unexpected event %ud " 742 + "from userfaultfd (shared)", 743 + __func__, msg.event); 744 + continue; /* It's not a page fault, shouldn't happen */ 745 + } 746 + /* Call the device handler registered with us */ 747 + ret = pcfd->handler(pcfd, &msg); 748 + if (ret) { 749 + error_report("%s: Failed to resolve shared fault on %zd/%s", 750 + __func__, index, pcfd->idstr); 751 + /* TODO: Fail? Disable this sharer? */ 752 + } 753 + } 589 754 } 590 755 } 591 756 trace_postcopy_ram_fault_thread_exit(); ··· 667 832 return ret; 668 833 } 669 834 835 + int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset) 836 + { 837 + int i; 838 + MigrationIncomingState *mis = migration_incoming_get_current(); 839 + GArray *pcrfds = mis->postcopy_remote_fds; 840 + 841 + for (i = 0; i < pcrfds->len; i++) { 842 + struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); 843 + int ret = cur->waker(cur, rb, offset); 844 + if (ret) { 845 + return ret; 846 + } 847 + } 848 + return 0; 849 + } 850 + 670 851 /* 671 852 * Place a host page (from) at (host) atomically 672 853 * returns 0 on success ··· 690 871 } 691 872 692 873 trace_postcopy_place_page(host); 693 - return 0; 874 + return postcopy_notify_shared_wake(rb, 875 + qemu_ram_block_host_offset(rb, host)); 694 876 } 695 877 696 878 /* ··· 700 882 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, 701 883 RAMBlock *rb) 702 884 { 885 + size_t pagesize = qemu_ram_pagesize(rb); 703 886 trace_postcopy_place_page_zero(host); 704 887 705 - if (qemu_ram_pagesize(rb) == getpagesize()) { 706 - if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, getpagesize(), 707 - rb)) { 888 + /* Normal RAMBlocks can zero a page using UFFDIO_ZEROPAGE 889 + * but it's not available for everything (e.g. hugetlbpages) 890 + */ 891 + if (qemu_ram_is_uf_zeroable(rb)) { 892 + if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) { 708 893 int e = errno; 709 894 error_report("%s: %s zero host: %p", 710 895 __func__, strerror(e), host); 711 896 712 897 return -e; 713 898 } 899 + return postcopy_notify_shared_wake(rb, 900 + qemu_ram_block_host_offset(rb, 901 + host)); 714 902 } else { 715 903 /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */ 716 904 if (!mis->postcopy_tmp_zero_page) { ··· 730 918 return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page, 731 919 rb); 732 920 } 733 - 734 - return 0; 735 921 } 736 922 737 923 /* ··· 784 970 return -1; 785 971 } 786 972 973 + int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, 974 + uint64_t client_addr, uint64_t rb_offset) 975 + { 976 + assert(0); 977 + return -1; 978 + } 979 + 787 980 int postcopy_ram_enable_notify(MigrationIncomingState *mis) 788 981 { 789 982 assert(0); ··· 810 1003 return NULL; 811 1004 } 812 1005 1006 + int postcopy_wake_shared(struct PostCopyFD *pcfd, 1007 + uint64_t client_addr, 1008 + RAMBlock *rb) 1009 + { 1010 + assert(0); 1011 + return -1; 1012 + } 813 1013 #endif 814 1014 815 1015 /* ------------------------------------------------------------------------- */ ··· 927 1127 { 928 1128 return atomic_xchg(&incoming_postcopy_state, new_state); 929 1129 } 1130 + 1131 + /* Register a handler for external shared memory postcopy 1132 + * called on the destination. 1133 + */ 1134 + void postcopy_register_shared_ufd(struct PostCopyFD *pcfd) 1135 + { 1136 + MigrationIncomingState *mis = migration_incoming_get_current(); 1137 + 1138 + mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds, 1139 + *pcfd); 1140 + } 1141 + 1142 + /* Unregister a handler for external shared memory postcopy 1143 + */ 1144 + void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) 1145 + { 1146 + guint i; 1147 + MigrationIncomingState *mis = migration_incoming_get_current(); 1148 + GArray *pcrfds = mis->postcopy_remote_fds; 1149 + 1150 + for (i = 0; i < pcrfds->len; i++) { 1151 + struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); 1152 + if (cur->fd == pcfd->fd) { 1153 + mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i); 1154 + return; 1155 + } 1156 + } 1157 + }
+73
migration/postcopy-ram.h
··· 116 116 117 117 void postcopy_fault_thread_notify(MigrationIncomingState *mis); 118 118 119 + /* 120 + * To be called once at the start before any device initialisation 121 + */ 122 + void postcopy_infrastructure_init(void); 123 + 124 + /* Add a notifier to a list to be called when checking whether the devices 125 + * can support postcopy. 126 + * It's data is a *PostcopyNotifyData 127 + * It should return 0 if OK, or a negative value on failure. 128 + * On failure it must set the data->errp to an error. 129 + * 130 + */ 131 + enum PostcopyNotifyReason { 132 + POSTCOPY_NOTIFY_PROBE = 0, 133 + POSTCOPY_NOTIFY_INBOUND_ADVISE, 134 + POSTCOPY_NOTIFY_INBOUND_LISTEN, 135 + POSTCOPY_NOTIFY_INBOUND_END, 136 + }; 137 + 138 + struct PostcopyNotifyData { 139 + enum PostcopyNotifyReason reason; 140 + Error **errp; 141 + }; 142 + 143 + void postcopy_add_notifier(NotifierWithReturn *nn); 144 + void postcopy_remove_notifier(NotifierWithReturn *n); 145 + /* Call the notifier list set by postcopy_add_start_notifier */ 146 + int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp); 147 + 148 + struct PostCopyFD; 149 + 150 + /* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */ 151 + typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd); 152 + /* Notification to wake, either on place or on reception of 153 + * a fault on something that's already arrived (race) 154 + */ 155 + typedef int (*pcfdwake)(struct PostCopyFD *pcfd, RAMBlock *rb, uint64_t offset); 156 + 157 + struct PostCopyFD { 158 + int fd; 159 + /* Data to pass to handler */ 160 + void *data; 161 + /* Handler to be called whenever we get a poll event */ 162 + pcfdhandler handler; 163 + /* Notification to wake shared client */ 164 + pcfdwake waker; 165 + /* A string to use in error messages */ 166 + const char *idstr; 167 + }; 168 + 169 + /* Register a userfaultfd owned by an external process for 170 + * shared memory. 171 + */ 172 + void postcopy_register_shared_ufd(struct PostCopyFD *pcfd); 173 + void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd); 174 + /* Call each of the shared 'waker's registerd telling them of 175 + * availability of a block. 176 + */ 177 + int postcopy_notify_shared_wake(RAMBlock *rb, uint64_t offset); 178 + /* postcopy_wake_shared: Notify a client ufd that a page is available 179 + * 180 + * Returns 0 on success 181 + * 182 + * @pcfd: Structure with fd, handler and name as above 183 + * @client_addr: Address in the client program, not QEMU 184 + * @rb: The RAMBlock the page is in 185 + */ 186 + int postcopy_wake_shared(struct PostCopyFD *pcfd, uint64_t client_addr, 187 + RAMBlock *rb); 188 + /* Callback from shared fault handlers to ask for a page */ 189 + int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, 190 + uint64_t client_addr, uint64_t offset); 191 + 119 192 #endif
+5
migration/ram.c
··· 169 169 rb->receivedmap); 170 170 } 171 171 172 + bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset) 173 + { 174 + return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); 175 + } 176 + 172 177 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr) 173 178 { 174 179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
+1
migration/ram.h
··· 60 60 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); 61 61 62 62 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); 63 + bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); 63 64 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); 64 65 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); 65 66
+13
migration/savevm.c
··· 1395 1395 { 1396 1396 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); 1397 1397 uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; 1398 + Error *local_err = NULL; 1398 1399 1399 1400 trace_loadvm_postcopy_handle_advise(); 1400 1401 if (ps != POSTCOPY_INCOMING_NONE) { ··· 1457 1458 */ 1458 1459 error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", 1459 1460 (int)remote_tps, qemu_target_page_size()); 1461 + return -1; 1462 + } 1463 + 1464 + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_ADVISE, &local_err)) { 1465 + error_report_err(local_err); 1460 1466 return -1; 1461 1467 } 1462 1468 ··· 1621 1627 { 1622 1628 PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_LISTENING); 1623 1629 trace_loadvm_postcopy_handle_listen(); 1630 + Error *local_err = NULL; 1631 + 1624 1632 if (ps != POSTCOPY_INCOMING_ADVISE && ps != POSTCOPY_INCOMING_DISCARD) { 1625 1633 error_report("CMD_POSTCOPY_LISTEN in wrong postcopy state (%d)", ps); 1626 1634 return -1; ··· 1644 1652 if (postcopy_ram_enable_notify(mis)) { 1645 1653 return -1; 1646 1654 } 1655 + } 1656 + 1657 + if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_LISTEN, &local_err)) { 1658 + error_report_err(local_err); 1659 + return -1; 1647 1660 } 1648 1661 1649 1662 if (mis->have_listen_thread) {
+6
migration/trace-events
··· 190 190 postcopy_ram_enable_notify(void) "" 191 191 postcopy_ram_fault_thread_entry(void) "" 192 192 postcopy_ram_fault_thread_exit(void) "" 193 + postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" 194 + postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" 193 195 postcopy_ram_fault_thread_quit(void) "" 194 196 postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" 195 197 postcopy_ram_incoming_cleanup_closeuf(void) "" 196 198 postcopy_ram_incoming_cleanup_entry(void) "" 197 199 postcopy_ram_incoming_cleanup_exit(void) "" 198 200 postcopy_ram_incoming_cleanup_join(void) "" 201 + postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64 202 + postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64 203 + postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s" 204 + 199 205 save_xbzrle_page_skipping(void) "" 200 206 save_xbzrle_page_overflow(void) "" 201 207 ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations"
+14 -9
numa.c
··· 520 520 521 521 static void numa_stat_memory_devices(NumaNodeMem node_mem[]) 522 522 { 523 - MemoryDeviceInfoList *info_list = NULL; 524 - MemoryDeviceInfoList **prev = &info_list; 523 + MemoryDeviceInfoList *info_list = qmp_pc_dimm_device_list(); 525 524 MemoryDeviceInfoList *info; 526 525 PCDIMMDeviceInfo *pcdimm_info; 527 526 528 - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); 529 527 for (info = info_list; info; info = info->next) { 530 528 MemoryDeviceInfo *value = info->value; 531 529 532 530 if (value) { 533 531 switch (value->type) { 534 - case MEMORY_DEVICE_INFO_KIND_DIMM: { 532 + case MEMORY_DEVICE_INFO_KIND_DIMM: 535 533 pcdimm_info = value->u.dimm.data; 534 + break; 535 + 536 + case MEMORY_DEVICE_INFO_KIND_NVDIMM: 537 + pcdimm_info = value->u.nvdimm.data; 538 + break; 539 + 540 + default: 541 + pcdimm_info = NULL; 542 + break; 543 + } 544 + 545 + if (pcdimm_info) { 536 546 node_mem[pcdimm_info->node].node_mem += pcdimm_info->size; 537 547 if (pcdimm_info->hotpluggable && pcdimm_info->hotplugged) { 538 548 node_mem[pcdimm_info->node].node_plugged_mem += 539 549 pcdimm_info->size; 540 550 } 541 - break; 542 - } 543 - 544 - default: 545 - break; 546 551 } 547 552 } 548 553 }
pc-bios/acpi-dsdt.aml

This is a binary file and will not be displayed.

+5 -1
qapi/misc.json
··· 2878 2878 # 2879 2879 # Since: 2.1 2880 2880 ## 2881 - { 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} } 2881 + { 'union': 'MemoryDeviceInfo', 2882 + 'data': { 'dimm': 'PCDIMMDeviceInfo', 2883 + 'nvdimm': 'PCDIMMDeviceInfo' 2884 + } 2885 + } 2882 2886 2883 2887 ## 2884 2888 # @query-memory-devices:
+1 -6
qmp.c
··· 731 731 732 732 MemoryDeviceInfoList *qmp_query_memory_devices(Error **errp) 733 733 { 734 - MemoryDeviceInfoList *head = NULL; 735 - MemoryDeviceInfoList **prev = &head; 736 - 737 - qmp_pc_dimm_device_list(qdev_get_machine(), &prev); 738 - 739 - return head; 734 + return qmp_pc_dimm_device_list(); 740 735 } 741 736 742 737 ACPIOSTInfoList *qmp_query_acpi_ospm_status(Error **errp)
+10 -1
scripts/update-linux-headers.sh
··· 40 40 -e 'sys/' \ 41 41 -e 'pvrdma_verbs' \ 42 42 -e 'drm.h' \ 43 + -e 'limits' \ 44 + -e 'linux/kernel' \ 45 + -e 'linux/sysinfo' \ 43 46 > /dev/null 44 47 then 45 48 echo "Unexpected #include in input file $f". ··· 62 65 -e '/sys\/ioctl.h/d' \ 63 66 -e 's/SW_MAX/SW_MAX_/' \ 64 67 -e 's/atomic_t/int/' \ 68 + -e 's/__kernel_long_t/long/' \ 69 + -e 's/__kernel_ulong_t/unsigned long/' \ 70 + -e 's/struct ethhdr/struct eth_header/' \ 71 + -e '/\#define _LINUX_ETHTOOL_H/a \\n\#include "net/eth.h"' \ 65 72 "$f" > "$to/$header"; 66 73 } 67 74 ··· 151 158 mkdir -p "$output/include/standard-headers/linux" 152 159 for i in "$tmpdir"/include/linux/*virtio*.h "$tmpdir/include/linux/input.h" \ 153 160 "$tmpdir/include/linux/input-event-codes.h" \ 154 - "$tmpdir/include/linux/pci_regs.h"; do 161 + "$tmpdir/include/linux/pci_regs.h" \ 162 + "$tmpdir/include/linux/ethtool.h" "$tmpdir/include/linux/kernel.h" \ 163 + "$tmpdir/include/linux/sysinfo.h"; do 155 164 cp_portable "$i" "$output/include/standard-headers/linux" 156 165 done 157 166 mkdir -p "$output/include/standard-headers/drm"
+2 -2
stubs/qmp_pc_dimm.c
··· 2 2 #include "qom/object.h" 3 3 #include "hw/mem/pc-dimm.h" 4 4 5 - int qmp_pc_dimm_device_list(Object *obj, void *opaque) 5 + MemoryDeviceInfoList *qmp_pc_dimm_device_list(void) 6 6 { 7 - return 0; 7 + return NULL; 8 8 } 9 9 10 10 uint64_t get_plugged_memory_size(void)
tests/acpi-test-data/pc/APIC.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/pc/DSDT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/pc/NFIT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/pc/SRAT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/pc/SSDT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/q35/APIC.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/q35/DSDT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/q35/NFIT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/q35/SRAT.dimmpxm

This is a binary file and will not be displayed.

tests/acpi-test-data/q35/SSDT.dimmpxm

This is a binary file and will not be displayed.

+55 -63
tests/bios-tables-test.c
··· 29 29 uint32_t rsdp_addr; 30 30 AcpiRsdpDescriptor rsdp_table; 31 31 AcpiRsdtDescriptorRev1 rsdt_table; 32 - AcpiFadtDescriptorRev3 fadt_table; 32 + uint32_t dsdt_addr; 33 + uint32_t facs_addr; 33 34 AcpiFacsDescriptorRev1 facs_table; 34 35 uint32_t *rsdt_tables_addr; 35 36 int rsdt_tables_nr; ··· 127 128 data->rsdt_tables_nr = tables_nr; 128 129 } 129 130 130 - static void test_acpi_fadt_table(test_data *data) 131 + static void fadt_fetch_facs_and_dsdt_ptrs(test_data *data) 131 132 { 132 - AcpiFadtDescriptorRev3 *fadt_table = &data->fadt_table; 133 133 uint32_t addr; 134 + AcpiTableHeader hdr; 134 135 135 136 /* FADT table comes first */ 136 137 addr = le32_to_cpu(data->rsdt_tables_addr[0]); 137 - ACPI_READ_TABLE_HEADER(fadt_table, addr); 138 - 139 - ACPI_READ_FIELD(fadt_table->firmware_ctrl, addr); 140 - ACPI_READ_FIELD(fadt_table->dsdt, addr); 141 - ACPI_READ_FIELD(fadt_table->model, addr); 142 - ACPI_READ_FIELD(fadt_table->reserved1, addr); 143 - ACPI_READ_FIELD(fadt_table->sci_int, addr); 144 - ACPI_READ_FIELD(fadt_table->smi_cmd, addr); 145 - ACPI_READ_FIELD(fadt_table->acpi_enable, addr); 146 - ACPI_READ_FIELD(fadt_table->acpi_disable, addr); 147 - ACPI_READ_FIELD(fadt_table->S4bios_req, addr); 148 - ACPI_READ_FIELD(fadt_table->reserved2, addr); 149 - ACPI_READ_FIELD(fadt_table->pm1a_evt_blk, addr); 150 - ACPI_READ_FIELD(fadt_table->pm1b_evt_blk, addr); 151 - ACPI_READ_FIELD(fadt_table->pm1a_cnt_blk, addr); 152 - ACPI_READ_FIELD(fadt_table->pm1b_cnt_blk, addr); 153 - ACPI_READ_FIELD(fadt_table->pm2_cnt_blk, addr); 154 - ACPI_READ_FIELD(fadt_table->pm_tmr_blk, addr); 155 - ACPI_READ_FIELD(fadt_table->gpe0_blk, addr); 156 - ACPI_READ_FIELD(fadt_table->gpe1_blk, addr); 157 - ACPI_READ_FIELD(fadt_table->pm1_evt_len, addr); 158 - ACPI_READ_FIELD(fadt_table->pm1_cnt_len, addr); 159 - ACPI_READ_FIELD(fadt_table->pm2_cnt_len, addr); 160 - ACPI_READ_FIELD(fadt_table->pm_tmr_len, addr); 161 - ACPI_READ_FIELD(fadt_table->gpe0_blk_len, addr); 162 - ACPI_READ_FIELD(fadt_table->gpe1_blk_len, addr); 163 - ACPI_READ_FIELD(fadt_table->gpe1_base, addr); 164 - ACPI_READ_FIELD(fadt_table->reserved3, addr); 165 - ACPI_READ_FIELD(fadt_table->plvl2_lat, addr); 166 - ACPI_READ_FIELD(fadt_table->plvl3_lat, addr); 167 - ACPI_READ_FIELD(fadt_table->flush_size, addr); 168 - ACPI_READ_FIELD(fadt_table->flush_stride, addr); 169 - ACPI_READ_FIELD(fadt_table->duty_offset, addr); 170 - ACPI_READ_FIELD(fadt_table->duty_width, addr); 171 - ACPI_READ_FIELD(fadt_table->day_alrm, addr); 172 - ACPI_READ_FIELD(fadt_table->mon_alrm, addr); 173 - ACPI_READ_FIELD(fadt_table->century, addr); 174 - ACPI_READ_FIELD(fadt_table->boot_flags, addr); 175 - ACPI_READ_FIELD(fadt_table->reserved, addr); 176 - ACPI_READ_FIELD(fadt_table->flags, addr); 177 - ACPI_READ_GENERIC_ADDRESS(fadt_table->reset_register, addr); 178 - ACPI_READ_FIELD(fadt_table->reset_value, addr); 179 - ACPI_READ_FIELD(fadt_table->arm_boot_flags, addr); 180 - ACPI_READ_FIELD(fadt_table->minor_revision, addr); 181 - ACPI_READ_FIELD(fadt_table->x_facs, addr); 182 - ACPI_READ_FIELD(fadt_table->x_dsdt, addr); 183 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_event_block, addr); 184 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1b_event_block, addr); 185 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1a_control_block, addr); 186 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm1b_control_block, addr); 187 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm2_control_block, addr); 188 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xpm_timer_block, addr); 189 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xgpe0_block, addr); 190 - ACPI_READ_GENERIC_ADDRESS(fadt_table->xgpe1_block, addr); 138 + ACPI_READ_TABLE_HEADER(&hdr, addr); 139 + ACPI_ASSERT_CMP(hdr.signature, "FACP"); 191 140 192 - ACPI_ASSERT_CMP(fadt_table->signature, "FACP"); 193 - g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, 194 - le32_to_cpu(fadt_table->length))); 141 + ACPI_READ_FIELD(data->facs_addr, addr); 142 + ACPI_READ_FIELD(data->dsdt_addr, addr); 195 143 } 196 144 197 145 static void sanitize_fadt_ptrs(test_data *data) ··· 206 154 continue; 207 155 } 208 156 157 + /* check original FADT checksum before sanitizing table */ 158 + g_assert(!(uint8_t)( 159 + acpi_calc_checksum((uint8_t *)sdt, sizeof(AcpiTableHeader)) + 160 + acpi_calc_checksum((uint8_t *)sdt->aml, sdt->aml_len) 161 + )); 162 + 209 163 /* sdt->aml field offset := spec offset - header size */ 210 164 memset(sdt->aml + 0, 0, 4); /* sanitize FIRMWARE_CTRL(36) ptr */ 211 165 memset(sdt->aml + 4, 0, 4); /* sanitize DSDT(40) ptr */ ··· 226 180 static void test_acpi_facs_table(test_data *data) 227 181 { 228 182 AcpiFacsDescriptorRev1 *facs_table = &data->facs_table; 229 - uint32_t addr = le32_to_cpu(data->fadt_table.firmware_ctrl); 183 + uint32_t addr = le32_to_cpu(data->facs_addr); 230 184 231 185 ACPI_READ_FIELD(facs_table->signature, addr); 232 186 ACPI_READ_FIELD(facs_table->length, addr); ··· 265 219 static void test_acpi_dsdt_table(test_data *data) 266 220 { 267 221 AcpiSdtTable dsdt_table; 268 - uint32_t addr = le32_to_cpu(data->fadt_table.dsdt); 222 + uint32_t addr = le32_to_cpu(data->dsdt_addr); 269 223 270 224 fetch_table(&dsdt_table, addr); 271 225 ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT"); ··· 674 628 test_acpi_rsdp_address(data); 675 629 test_acpi_rsdp_table(data); 676 630 test_acpi_rsdt_table(data); 677 - test_acpi_fadt_table(data); 631 + fadt_fetch_facs_and_dsdt_ptrs(data); 678 632 test_acpi_facs_table(data); 679 633 test_acpi_dsdt_table(data); 680 634 fetch_rsdt_referenced_tables(data); ··· 869 823 free_test_data(&data); 870 824 } 871 825 826 + static void test_acpi_tcg_dimm_pxm(const char *machine) 827 + { 828 + test_data data; 829 + 830 + memset(&data, 0, sizeof(data)); 831 + data.machine = machine; 832 + data.variant = ".dimmpxm"; 833 + test_acpi_one(" -machine nvdimm=on" 834 + " -smp 4,sockets=4" 835 + " -m 128M,slots=3,maxmem=1G" 836 + " -numa node,mem=32M,nodeid=0" 837 + " -numa node,mem=32M,nodeid=1" 838 + " -numa node,mem=32M,nodeid=2" 839 + " -numa node,mem=32M,nodeid=3" 840 + " -numa cpu,node-id=0,socket-id=0" 841 + " -numa cpu,node-id=1,socket-id=1" 842 + " -numa cpu,node-id=2,socket-id=2" 843 + " -numa cpu,node-id=3,socket-id=3" 844 + " -object memory-backend-ram,id=ram0,size=128M" 845 + " -object memory-backend-ram,id=nvm0,size=128M" 846 + " -device pc-dimm,id=dimm0,memdev=ram0,node=1" 847 + " -device nvdimm,id=dimm1,memdev=nvm0,node=2", 848 + &data); 849 + free_test_data(&data); 850 + } 851 + 852 + static void test_acpi_q35_tcg_dimm_pxm(void) 853 + { 854 + test_acpi_tcg_dimm_pxm(MACHINE_Q35); 855 + } 856 + 857 + static void test_acpi_piix4_tcg_dimm_pxm(void) 858 + { 859 + test_acpi_tcg_dimm_pxm(MACHINE_PC); 860 + } 861 + 872 862 int main(int argc, char *argv[]) 873 863 { 874 864 const char *arch = qtest_get_arch(); ··· 893 883 qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); 894 884 qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem); 895 885 qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); 886 + qtest_add_func("acpi/piix4/dimmpxm", test_acpi_piix4_tcg_dimm_pxm); 887 + qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); 896 888 } 897 889 ret = g_test_run(); 898 890 boot_sector_cleanup(disk);
+2 -1
trace-events
··· 58 58 dma_blk_cb(void *dbs, int ret) "dbs=%p ret=%d" 59 59 dma_map_wait(void *dbs) "dbs=%p" 60 60 61 - # # exec.c 61 + # exec.c 62 62 find_ram_offset(uint64_t size, uint64_t offset) "size: 0x%" PRIx64 " @ 0x%" PRIx64 63 63 find_ram_offset_loop(uint64_t size, uint64_t candidate, uint64_t offset, uint64_t next, uint64_t mingap) "trying size: 0x%" PRIx64 " @ 0x%" PRIx64 ", offset: 0x%" PRIx64" next: 0x%" PRIx64 " mingap: 0x%" PRIx64 64 + ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_madvise, bool need_fallocate, int ret) "%s@%p + 0x%zx: madvise: %d fallocate: %d ret: %d" 64 65 65 66 # memory.c 66 67 memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
+2
vl.c
··· 94 94 #include "audio/audio.h" 95 95 #include "sysemu/cpus.h" 96 96 #include "migration/colo.h" 97 + #include "migration/postcopy-ram.h" 97 98 #include "sysemu/kvm.h" 98 99 #include "sysemu/hax.h" 99 100 #include "qapi/qobject-input-visitor.h" ··· 3101 3102 module_call_init(MODULE_INIT_OPTS); 3102 3103 3103 3104 runstate_init(); 3105 + postcopy_infrastructure_init(); 3104 3106 3105 3107 if (qcrypto_init(&err) < 0) { 3106 3108 error_reportf_err(err, "cannot initialize crypto: ");