qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Linux headers: update

Update against Linux 5.8-rc1.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>

+818 -33
+15 -2
include/standard-headers/asm-x86/kvm_para.h
··· 31 31 #define KVM_FEATURE_PV_SEND_IPI 11 32 32 #define KVM_FEATURE_POLL_CONTROL 12 33 33 #define KVM_FEATURE_PV_SCHED_YIELD 13 34 + #define KVM_FEATURE_ASYNC_PF_INT 14 34 35 35 36 #define KVM_HINTS_REALTIME 0 36 37 ··· 50 51 #define MSR_KVM_STEAL_TIME 0x4b564d03 51 52 #define MSR_KVM_PV_EOI_EN 0x4b564d04 52 53 #define MSR_KVM_POLL_CONTROL 0x4b564d05 54 + #define MSR_KVM_ASYNC_PF_INT 0x4b564d06 55 + #define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 53 56 54 57 struct kvm_steal_time { 55 58 uint64_t steal; ··· 81 84 #define KVM_ASYNC_PF_ENABLED (1 << 0) 82 85 #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) 83 86 #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) 87 + #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) 88 + 89 + /* MSR_KVM_ASYNC_PF_INT */ 90 + #define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) 91 + 84 92 85 93 /* Operations for KVM_HC_MMU_OP */ 86 94 #define KVM_MMU_OP_WRITE_PTE 1 ··· 112 120 #define KVM_PV_REASON_PAGE_READY 2 113 121 114 122 struct kvm_vcpu_pv_apf_data { 115 - uint32_t reason; 116 - uint8_t pad[60]; 123 + /* Used for 'page not present' events delivered via #PF */ 124 + uint32_t flags; 125 + 126 + /* Used for 'page ready' events delivered via interrupt notification */ 127 + uint32_t token; 128 + 129 + uint8_t pad[56]; 117 130 uint32_t enabled; 118 131 }; 119 132
+126 -14
include/standard-headers/drm/drm_fourcc.h
··· 353 353 * a platform-dependent stride. On top of that the memory can apply 354 354 * platform-depending swizzling of some higher address bits into bit6. 355 355 * 356 - * This format is highly platforms specific and not useful for cross-driver 357 - * sharing. It exists since on a given platform it does uniquely identify the 358 - * layout in a simple way for i915-specific userspace. 356 + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. 357 + * On earlier platforms the is highly platforms specific and not useful for 358 + * cross-driver sharing. It exists since on a given platform it does uniquely 359 + * identify the layout in a simple way for i915-specific userspace, which 360 + * facilitated conversion of userspace to modifiers. Additionally the exact 361 + * format on some really old platforms is not known. 359 362 */ 360 363 #define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) 361 364 ··· 368 371 * memory can apply platform-depending swizzling of some higher address bits 369 372 * into bit6. 370 373 * 371 - * This format is highly platforms specific and not useful for cross-driver 372 - * sharing. It exists since on a given platform it does uniquely identify the 373 - * layout in a simple way for i915-specific userspace. 374 + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. 375 + * On earlier platforms the is highly platforms specific and not useful for 376 + * cross-driver sharing. It exists since on a given platform it does uniquely 377 + * identify the layout in a simple way for i915-specific userspace, which 378 + * facilitated conversion of userspace to modifiers. Additionally the exact 379 + * format on some really old platforms is not known. 374 380 */ 375 381 #define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) 376 382 ··· 520 526 #define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) 521 527 522 528 /* 523 - * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later 529 + * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80, 530 + * and Tegra GPUs starting with Tegra K1. 531 + * 532 + * Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies 533 + * based on the architecture generation. GOBs themselves are then arranged in 534 + * 3D blocks, with the block dimensions (in terms of GOBs) always being a power 535 + * of two, and hence expressible as their log2 equivalent (E.g., "2" represents 536 + * a block depth or height of "4"). 537 + * 538 + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format 539 + * in full detail. 540 + * 541 + * Macro 542 + * Bits Param Description 543 + * ---- ----- ----------------------------------------------------------------- 544 + * 545 + * 3:0 h log2(height) of each block, in GOBs. Placed here for 546 + * compatibility with the existing 547 + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. 548 + * 549 + * 4:4 - Must be 1, to indicate block-linear layout. Necessary for 550 + * compatibility with the existing 551 + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. 552 + * 553 + * 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block 554 + * size). Must be zero. 555 + * 556 + * Note there is no log2(width) parameter. Some portions of the 557 + * hardware support a block width of two gobs, but it is impractical 558 + * to use due to lack of support elsewhere, and has no known 559 + * benefits. 560 + * 561 + * 11:9 - Reserved (To support 2D-array textures with variable array stride 562 + * in blocks, specified via log2(tile width in blocks)). Must be 563 + * zero. 564 + * 565 + * 19:12 k Page Kind. This value directly maps to a field in the page 566 + * tables of all GPUs >= NV50. It affects the exact layout of bits 567 + * in memory and can be derived from the tuple 568 + * 569 + * (format, GPU model, compression type, samples per pixel) 570 + * 571 + * Where compression type is defined below. If GPU model were 572 + * implied by the format modifier, format, or memory buffer, page 573 + * kind would not need to be included in the modifier itself, but 574 + * since the modifier should define the layout of the associated 575 + * memory buffer independent from any device or other context, it 576 + * must be included here. 577 + * 578 + * 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed 579 + * starting with Fermi GPUs. Additionally, the mapping between page 580 + * kind and bit layout has changed at various points. 581 + * 582 + * 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping 583 + * 1 = Gob Height 4, G80 - GT2XX Page Kind mapping 584 + * 2 = Gob Height 8, Turing+ Page Kind mapping 585 + * 3 = Reserved for future use. 586 + * 587 + * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further 588 + * bit remapping step that occurs at an even lower level than the 589 + * page kind and block linear swizzles. This causes the layout of 590 + * surfaces mapped in those SOC's GPUs to be incompatible with the 591 + * equivalent mapping on other GPUs in the same system. 592 + * 593 + * 0 = Tegra K1 - Tegra Parker/TX2 Layout. 594 + * 1 = Desktop GPU and Tegra Xavier+ Layout 595 + * 596 + * 25:23 c Lossless Framebuffer Compression type. 597 + * 598 + * 0 = none 599 + * 1 = ROP/3D, layout 1, exact compression format implied by Page 600 + * Kind field 601 + * 2 = ROP/3D, layout 2, exact compression format implied by Page 602 + * Kind field 603 + * 3 = CDE horizontal 604 + * 4 = CDE vertical 605 + * 5 = Reserved for future use 606 + * 6 = Reserved for future use 607 + * 7 = Reserved for future use 608 + * 609 + * 55:25 - Reserved for future use. Must be zero. 610 + */ 611 + #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ 612 + fourcc_mod_code(NVIDIA, (0x10 | \ 613 + ((h) & 0xf) | \ 614 + (((k) & 0xff) << 12) | \ 615 + (((g) & 0x3) << 20) | \ 616 + (((s) & 0x1) << 22) | \ 617 + (((c) & 0x7) << 23))) 618 + 619 + /* To grandfather in prior block linear format modifiers to the above layout, 620 + * the page kind "0", which corresponds to "pitch/linear" and hence is unusable 621 + * with block-linear layouts, is remapped within drivers to the value 0xfe, 622 + * which corresponds to the "generic" kind used for simple single-sample 623 + * uncompressed color formats on Fermi - Volta GPUs. 624 + */ 625 + static inline uint64_t 626 + drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) 627 + { 628 + if (!(modifier & 0x10) || (modifier & (0xff << 12))) 629 + return modifier; 630 + else 631 + return modifier | (0xfe << 12); 632 + } 633 + 634 + /* 635 + * 16Bx2 Block Linear layout, used by Tegra K1 and later 524 636 * 525 637 * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked 526 638 * vertically by a power of 2 (1 to 32 GOBs) to form a block. ··· 541 653 * in full detail. 542 654 */ 543 655 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ 544 - fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf)) 656 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v)) 545 657 546 658 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ 547 - fourcc_mod_code(NVIDIA, 0x10) 659 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) 548 660 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ 549 - fourcc_mod_code(NVIDIA, 0x11) 661 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) 550 662 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ 551 - fourcc_mod_code(NVIDIA, 0x12) 663 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) 552 664 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ 553 - fourcc_mod_code(NVIDIA, 0x13) 665 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) 554 666 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ 555 - fourcc_mod_code(NVIDIA, 0x14) 667 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) 556 668 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ 557 - fourcc_mod_code(NVIDIA, 0x15) 669 + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) 558 670 559 671 /* 560 672 * Some Broadcom modifiers take parameters, for example the number of
+15 -1
include/standard-headers/linux/ethtool.h
··· 1666 1666 return 0; 1667 1667 } 1668 1668 1669 + #define MASTER_SLAVE_CFG_UNSUPPORTED 0 1670 + #define MASTER_SLAVE_CFG_UNKNOWN 1 1671 + #define MASTER_SLAVE_CFG_MASTER_PREFERRED 2 1672 + #define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3 1673 + #define MASTER_SLAVE_CFG_MASTER_FORCE 4 1674 + #define MASTER_SLAVE_CFG_SLAVE_FORCE 5 1675 + #define MASTER_SLAVE_STATE_UNSUPPORTED 0 1676 + #define MASTER_SLAVE_STATE_UNKNOWN 1 1677 + #define MASTER_SLAVE_STATE_MASTER 2 1678 + #define MASTER_SLAVE_STATE_SLAVE 3 1679 + #define MASTER_SLAVE_STATE_ERR 4 1680 + 1669 1681 /* Which connector port. */ 1670 1682 #define PORT_TP 0x00 1671 1683 #define PORT_AUI 0x01 ··· 1904 1916 uint8_t eth_tp_mdix_ctrl; 1905 1917 int8_t link_mode_masks_nwords; 1906 1918 uint8_t transceiver; 1907 - uint8_t reserved1[3]; 1919 + uint8_t master_slave_cfg; 1920 + uint8_t master_slave_state; 1921 + uint8_t reserved1[1]; 1908 1922 uint32_t reserved[7]; 1909 1923 uint32_t link_mode_masks[0]; 1910 1924 /* layout of link_mode_masks fields:
+1
include/standard-headers/linux/virtio_ids.h
··· 44 44 #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ 45 45 #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ 46 46 #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ 47 + #define VIRTIO_ID_MEM 24 /* virtio mem */ 47 48 #define VIRTIO_ID_FS 26 /* virtio filesystem */ 48 49 #define VIRTIO_ID_PMEM 27 /* virtio pmem */ 49 50 #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+211
include/standard-headers/linux/virtio_mem.h
··· 1 + /* SPDX-License-Identifier: BSD-3-Clause */ 2 + /* 3 + * Virtio Mem Device 4 + * 5 + * Copyright Red Hat, Inc. 2020 6 + * 7 + * Authors: 8 + * David Hildenbrand <david@redhat.com> 9 + * 10 + * This header is BSD licensed so anyone can use the definitions 11 + * to implement compatible drivers/servers: 12 + * 13 + * Redistribution and use in source and binary forms, with or without 14 + * modification, are permitted provided that the following conditions 15 + * are met: 16 + * 1. Redistributions of source code must retain the above copyright 17 + * notice, this list of conditions and the following disclaimer. 18 + * 2. Redistributions in binary form must reproduce the above copyright 19 + * notice, this list of conditions and the following disclaimer in the 20 + * documentation and/or other materials provided with the distribution. 21 + * 3. Neither the name of IBM nor the names of its contributors 22 + * may be used to endorse or promote products derived from this software 23 + * without specific prior written permission. 24 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR 28 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 31 + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 32 + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 + * SUCH DAMAGE. 36 + */ 37 + 38 + #ifndef _LINUX_VIRTIO_MEM_H 39 + #define _LINUX_VIRTIO_MEM_H 40 + 41 + #include "standard-headers/linux/types.h" 42 + #include "standard-headers/linux/virtio_types.h" 43 + #include "standard-headers/linux/virtio_ids.h" 44 + #include "standard-headers/linux/virtio_config.h" 45 + 46 + /* 47 + * Each virtio-mem device manages a dedicated region in physical address 48 + * space. Each device can belong to a single NUMA node, multiple devices 49 + * for a single NUMA node are possible. A virtio-mem device is like a 50 + * "resizable DIMM" consisting of small memory blocks that can be plugged 51 + * or unplugged. The device driver is responsible for (un)plugging memory 52 + * blocks on demand. 53 + * 54 + * Virtio-mem devices can only operate on their assigned memory region in 55 + * order to (un)plug memory. A device cannot (un)plug memory belonging to 56 + * other devices. 57 + * 58 + * The "region_size" corresponds to the maximum amount of memory that can 59 + * be provided by a device. The "size" corresponds to the amount of memory 60 + * that is currently plugged. "requested_size" corresponds to a request 61 + * from the device to the device driver to (un)plug blocks. The 62 + * device driver should try to (un)plug blocks in order to reach the 63 + * "requested_size". It is impossible to plug more memory than requested. 64 + * 65 + * The "usable_region_size" represents the memory region that can actually 66 + * be used to (un)plug memory. It is always at least as big as the 67 + * "requested_size" and will grow dynamically. It will only shrink when 68 + * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). 69 + * 70 + * There are no guarantees what will happen if unplugged memory is 71 + * read/written. Such memory should, in general, not be touched. E.g., 72 + * even writing might succeed, but the values will simply be discarded at 73 + * random points in time. 74 + * 75 + * It can happen that the device cannot process a request, because it is 76 + * busy. The device driver has to retry later. 77 + * 78 + * Usually, during system resets all memory will get unplugged, so the 79 + * device driver can start with a clean state. However, in specific 80 + * scenarios (if the device is busy) it can happen that the device still 81 + * has memory plugged. The device driver can request to unplug all memory 82 + * (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the 83 + * device is busy. 84 + */ 85 + 86 + /* --- virtio-mem: feature bits --- */ 87 + 88 + /* node_id is an ACPI PXM and is valid */ 89 + #define VIRTIO_MEM_F_ACPI_PXM 0 90 + 91 + 92 + /* --- virtio-mem: guest -> host requests --- */ 93 + 94 + /* request to plug memory blocks */ 95 + #define VIRTIO_MEM_REQ_PLUG 0 96 + /* request to unplug memory blocks */ 97 + #define VIRTIO_MEM_REQ_UNPLUG 1 98 + /* request to unplug all blocks and shrink the usable size */ 99 + #define VIRTIO_MEM_REQ_UNPLUG_ALL 2 100 + /* request information about the plugged state of memory blocks */ 101 + #define VIRTIO_MEM_REQ_STATE 3 102 + 103 + struct virtio_mem_req_plug { 104 + __virtio64 addr; 105 + __virtio16 nb_blocks; 106 + __virtio16 padding[3]; 107 + }; 108 + 109 + struct virtio_mem_req_unplug { 110 + __virtio64 addr; 111 + __virtio16 nb_blocks; 112 + __virtio16 padding[3]; 113 + }; 114 + 115 + struct virtio_mem_req_state { 116 + __virtio64 addr; 117 + __virtio16 nb_blocks; 118 + __virtio16 padding[3]; 119 + }; 120 + 121 + struct virtio_mem_req { 122 + __virtio16 type; 123 + __virtio16 padding[3]; 124 + 125 + union { 126 + struct virtio_mem_req_plug plug; 127 + struct virtio_mem_req_unplug unplug; 128 + struct virtio_mem_req_state state; 129 + } u; 130 + }; 131 + 132 + 133 + /* --- virtio-mem: host -> guest response --- */ 134 + 135 + /* 136 + * Request processed successfully, applicable for 137 + * - VIRTIO_MEM_REQ_PLUG 138 + * - VIRTIO_MEM_REQ_UNPLUG 139 + * - VIRTIO_MEM_REQ_UNPLUG_ALL 140 + * - VIRTIO_MEM_REQ_STATE 141 + */ 142 + #define VIRTIO_MEM_RESP_ACK 0 143 + /* 144 + * Request denied - e.g. trying to plug more than requested, applicable for 145 + * - VIRTIO_MEM_REQ_PLUG 146 + */ 147 + #define VIRTIO_MEM_RESP_NACK 1 148 + /* 149 + * Request cannot be processed right now, try again later, applicable for 150 + * - VIRTIO_MEM_REQ_PLUG 151 + * - VIRTIO_MEM_REQ_UNPLUG 152 + * - VIRTIO_MEM_REQ_UNPLUG_ALL 153 + */ 154 + #define VIRTIO_MEM_RESP_BUSY 2 155 + /* 156 + * Error in request (e.g. addresses/alignment), applicable for 157 + * - VIRTIO_MEM_REQ_PLUG 158 + * - VIRTIO_MEM_REQ_UNPLUG 159 + * - VIRTIO_MEM_REQ_STATE 160 + */ 161 + #define VIRTIO_MEM_RESP_ERROR 3 162 + 163 + 164 + /* State of memory blocks is "plugged" */ 165 + #define VIRTIO_MEM_STATE_PLUGGED 0 166 + /* State of memory blocks is "unplugged" */ 167 + #define VIRTIO_MEM_STATE_UNPLUGGED 1 168 + /* State of memory blocks is "mixed" */ 169 + #define VIRTIO_MEM_STATE_MIXED 2 170 + 171 + struct virtio_mem_resp_state { 172 + __virtio16 state; 173 + }; 174 + 175 + struct virtio_mem_resp { 176 + __virtio16 type; 177 + __virtio16 padding[3]; 178 + 179 + union { 180 + struct virtio_mem_resp_state state; 181 + } u; 182 + }; 183 + 184 + /* --- virtio-mem: configuration --- */ 185 + 186 + struct virtio_mem_config { 187 + /* Block size and alignment. Cannot change. */ 188 + uint64_t block_size; 189 + /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ 190 + uint16_t node_id; 191 + uint8_t padding[6]; 192 + /* Start address of the memory region. Cannot change. */ 193 + uint64_t addr; 194 + /* Region size (maximum). Cannot change. */ 195 + uint64_t region_size; 196 + /* 197 + * Currently usable region size. Can grow up to region_size. Can 198 + * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config 199 + * update will be sent). 200 + */ 201 + uint64_t usable_region_size; 202 + /* 203 + * Currently used size. Changes due to plug/unplug requests, but no 204 + * config updates will be sent. 205 + */ 206 + uint64_t plugged_size; 207 + /* Requested size. New plug requests cannot exceed it. Can change. */ 208 + uint64_t requested_size; 209 + }; 210 + 211 + #endif /* _LINUX_VIRTIO_MEM_H */
+38 -10
include/standard-headers/linux/virtio_ring.h
··· 84 84 * at the end of the used ring. Guest should ignore the used->flags field. */ 85 85 #define VIRTIO_RING_F_EVENT_IDX 29 86 86 87 + /* Alignment requirements for vring elements. 88 + * When using pre-virtio 1.0 layout, these fall out naturally. 89 + */ 90 + #define VRING_AVAIL_ALIGN_SIZE 2 91 + #define VRING_USED_ALIGN_SIZE 4 92 + #define VRING_DESC_ALIGN_SIZE 16 93 + 87 94 /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ 88 95 struct vring_desc { 89 96 /* Address (guest-physical). */ ··· 110 117 __virtio32 len; 111 118 }; 112 119 120 + typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE))) 121 + vring_used_elem_t; 122 + 113 123 struct vring_used { 114 124 __virtio16 flags; 115 125 __virtio16 idx; 116 - struct vring_used_elem ring[]; 126 + vring_used_elem_t ring[]; 117 127 }; 118 128 129 + /* 130 + * The ring element addresses are passed between components with different 131 + * alignments assumptions. Thus, we might need to decrease the compiler-selected 132 + * alignment, and so must use a typedef to make sure the aligned attribute 133 + * actually takes hold: 134 + * 135 + * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes 136 + * 137 + * When used on a struct, or struct member, the aligned attribute can only 138 + * increase the alignment; in order to decrease it, the packed attribute must 139 + * be specified as well. When used as part of a typedef, the aligned attribute 140 + * can both increase and decrease alignment, and specifying the packed 141 + * attribute generates a warning. 142 + */ 143 + typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE))) 144 + vring_desc_t; 145 + typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE))) 146 + vring_avail_t; 147 + typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE))) 148 + vring_used_t; 149 + 119 150 struct vring { 120 151 unsigned int num; 121 152 122 - struct vring_desc *desc; 153 + vring_desc_t *desc; 123 154 124 - struct vring_avail *avail; 155 + vring_avail_t *avail; 125 156 126 - struct vring_used *used; 157 + vring_used_t *used; 127 158 }; 128 159 129 - /* Alignment requirements for vring elements. 130 - * When using pre-virtio 1.0 layout, these fall out naturally. 131 - */ 132 - #define VRING_AVAIL_ALIGN_SIZE 2 133 - #define VRING_USED_ALIGN_SIZE 4 134 - #define VRING_DESC_ALIGN_SIZE 16 160 + #ifndef VIRTIO_RING_NO_LEGACY 135 161 136 162 /* The standard layout for the ring is a continuous chunk of memory which looks 137 163 * like this. We assume num is a power of 2. ··· 178 204 + align - 1) & ~(align - 1)) 179 205 + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; 180 206 } 207 + 208 + #endif /* VIRTIO_RING_NO_LEGACY */ 181 209 182 210 /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ 183 211 /* Assuming a given event_idx value from the other side, if
+8
linux-headers/asm-arm64/mman.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef __ASM_MMAN_H 3 + #define __ASM_MMAN_H 4 + 1 5 #include <asm-generic/mman.h> 6 + 7 + #define PROT_BTI 0x10 /* BTI guarded page */ 8 + 9 + #endif /* ! _UAPI__ASM_MMAN_H */
+3 -1
linux-headers/asm-generic/unistd.h
··· 855 855 __SYSCALL(__NR_openat2, sys_openat2) 856 856 #define __NR_pidfd_getfd 438 857 857 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) 858 + #define __NR_faccessat2 439 859 + __SYSCALL(__NR_faccessat2, sys_faccessat2) 858 860 859 861 #undef __NR_syscalls 860 - #define __NR_syscalls 439 862 + #define __NR_syscalls 440 861 863 862 864 /* 863 865 * 32 bit systems traditionally used different
+1
linux-headers/asm-mips/unistd_n32.h
··· 367 367 #define __NR_clone3 (__NR_Linux + 435) 368 368 #define __NR_openat2 (__NR_Linux + 437) 369 369 #define __NR_pidfd_getfd (__NR_Linux + 438) 370 + #define __NR_faccessat2 (__NR_Linux + 439) 370 371 371 372 372 373 #endif /* _ASM_MIPS_UNISTD_N32_H */
+1
linux-headers/asm-mips/unistd_n64.h
··· 343 343 #define __NR_clone3 (__NR_Linux + 435) 344 344 #define __NR_openat2 (__NR_Linux + 437) 345 345 #define __NR_pidfd_getfd (__NR_Linux + 438) 346 + #define __NR_faccessat2 (__NR_Linux + 439) 346 347 347 348 348 349 #endif /* _ASM_MIPS_UNISTD_N64_H */
+1
linux-headers/asm-mips/unistd_o32.h
··· 413 413 #define __NR_clone3 (__NR_Linux + 435) 414 414 #define __NR_openat2 (__NR_Linux + 437) 415 415 #define __NR_pidfd_getfd (__NR_Linux + 438) 416 + #define __NR_faccessat2 (__NR_Linux + 439) 416 417 417 418 418 419 #endif /* _ASM_MIPS_UNISTD_O32_H */
+1
linux-headers/asm-powerpc/unistd_32.h
··· 420 420 #define __NR_clone3 435 421 421 #define __NR_openat2 437 422 422 #define __NR_pidfd_getfd 438 423 + #define __NR_faccessat2 439 423 424 424 425 425 426 #endif /* _ASM_POWERPC_UNISTD_32_H */
+1
linux-headers/asm-powerpc/unistd_64.h
··· 392 392 #define __NR_clone3 435 393 393 #define __NR_openat2 437 394 394 #define __NR_pidfd_getfd 438 395 + #define __NR_faccessat2 439 395 396 396 397 397 398 #endif /* _ASM_POWERPC_UNISTD_64_H */
+1
linux-headers/asm-s390/unistd_32.h
··· 410 410 #define __NR_clone3 435 411 411 #define __NR_openat2 437 412 412 #define __NR_pidfd_getfd 438 413 + #define __NR_faccessat2 439 413 414 414 415 #endif /* _ASM_S390_UNISTD_32_H */
+1
linux-headers/asm-s390/unistd_64.h
··· 358 358 #define __NR_clone3 435 359 359 #define __NR_openat2 437 360 360 #define __NR_pidfd_getfd 438 361 + #define __NR_faccessat2 439 361 362 362 363 #endif /* _ASM_S390_UNISTD_64_H */
+19 -1
linux-headers/asm-x86/kvm.h
··· 385 385 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) 386 386 387 387 #define KVM_STATE_NESTED_FORMAT_VMX 0 388 - #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ 388 + #define KVM_STATE_NESTED_FORMAT_SVM 1 389 389 390 390 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 391 391 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 392 392 #define KVM_STATE_NESTED_EVMCS 0x00000004 393 393 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008 394 + #define KVM_STATE_NESTED_GIF_SET 0x00000100 394 395 395 396 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 396 397 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 397 398 398 399 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 399 400 401 + #define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 402 + 403 + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 404 + 400 405 struct kvm_vmx_nested_state_data { 401 406 __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 402 407 __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; 403 408 }; 404 409 405 410 struct kvm_vmx_nested_state_hdr { 411 + __u32 flags; 406 412 __u64 vmxon_pa; 407 413 __u64 vmcs12_pa; 414 + __u64 preemption_timer_deadline; 408 415 409 416 struct { 410 417 __u16 flags; 411 418 } smm; 412 419 }; 413 420 421 + struct kvm_svm_nested_state_data { 422 + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ 423 + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; 424 + }; 425 + 426 + struct kvm_svm_nested_state_hdr { 427 + __u64 vmcb_pa; 428 + }; 429 + 414 430 /* for KVM_CAP_NESTED_STATE */ 415 431 struct kvm_nested_state { 416 432 __u16 flags; ··· 419 435 420 436 union { 421 437 struct kvm_vmx_nested_state_hdr vmx; 438 + struct kvm_svm_nested_state_hdr svm; 422 439 423 440 /* Pad the header to 128 bytes. */ 424 441 __u8 pad[120]; ··· 431 448 */ 432 449 union { 433 450 struct kvm_vmx_nested_state_data vmx[0]; 451 + struct kvm_svm_nested_state_data svm[0]; 434 452 } data; 435 453 }; 436 454
+9 -2
linux-headers/asm-x86/unistd.h
··· 2 2 #ifndef _ASM_X86_UNISTD_H 3 3 #define _ASM_X86_UNISTD_H 4 4 5 - /* x32 syscall flag bit */ 6 - #define __X32_SYSCALL_BIT 0x40000000UL 5 + /* 6 + * x32 syscall flag bit. Some user programs expect syscall NR macros 7 + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers 8 + * are, for practical purposes, unsigned long. 9 + * 10 + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right 11 + * thing regardless. 12 + */ 13 + #define __X32_SYSCALL_BIT 0x40000000 7 14 8 15 # ifdef __i386__ 9 16 # include <asm/unistd_32.h>
+1
linux-headers/asm-x86/unistd_32.h
··· 428 428 #define __NR_clone3 435 429 429 #define __NR_openat2 437 430 430 #define __NR_pidfd_getfd 438 431 + #define __NR_faccessat2 439 431 432 432 433 433 434 #endif /* _ASM_X86_UNISTD_32_H */
+1
linux-headers/asm-x86/unistd_64.h
··· 350 350 #define __NR_clone3 435 351 351 #define __NR_openat2 437 352 352 #define __NR_pidfd_getfd 438 353 + #define __NR_faccessat2 439 353 354 354 355 355 356 #endif /* _ASM_X86_UNISTD_64_H */
+1
linux-headers/asm-x86/unistd_x32.h
··· 303 303 #define __NR_clone3 (__X32_SYSCALL_BIT + 435) 304 304 #define __NR_openat2 (__X32_SYSCALL_BIT + 437) 305 305 #define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438) 306 + #define __NR_faccessat2 (__X32_SYSCALL_BIT + 439) 306 307 #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) 307 308 #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) 308 309 #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
+16 -2
linux-headers/linux/kvm.h
··· 116 116 * ACPI gsi notion of irq. 117 117 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. 118 118 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. 119 - * For ARM: See Documentation/virt/kvm/api.txt 119 + * For ARM: See Documentation/virt/kvm/api.rst 120 120 */ 121 121 union { 122 122 __u32 irq; ··· 188 188 struct kvm_hyperv_exit { 189 189 #define KVM_EXIT_HYPERV_SYNIC 1 190 190 #define KVM_EXIT_HYPERV_HCALL 2 191 + #define KVM_EXIT_HYPERV_SYNDBG 3 191 192 __u32 type; 193 + __u32 pad1; 192 194 union { 193 195 struct { 194 196 __u32 msr; 197 + __u32 pad2; 195 198 __u64 control; 196 199 __u64 evt_page; 197 200 __u64 msg_page; ··· 201 204 __u64 result; 202 205 __u64 params[2]; 203 206 } hcall; 207 + struct { 208 + __u32 msr; 209 + __u32 pad2; 210 + __u64 control; 211 + __u64 status; 212 + __u64 send_page; 213 + __u64 recv_page; 214 + __u64 pending_page; 215 + } syndbg; 204 216 } u; 205 217 }; 206 218 ··· 1017 1029 #define KVM_CAP_S390_VCPU_RESETS 179 1018 1030 #define KVM_CAP_S390_PROTECTED 180 1019 1031 #define KVM_CAP_PPC_SECURE_GUEST 181 1032 + #define KVM_CAP_HALT_POLL 182 1033 + #define KVM_CAP_ASYNC_PF_INT 183 1020 1034 1021 1035 #ifdef KVM_CAP_IRQ_ROUTING 1022 1036 ··· 1107 1121 * 1108 1122 * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies 1109 1123 * the irqfd to operate in resampling mode for level triggered interrupt 1110 - * emulation. See Documentation/virt/kvm/api.txt. 1124 + * emulation. See Documentation/virt/kvm/api.rst. 1111 1125 */ 1112 1126 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) 1113 1127
+2
linux-headers/linux/psp-sev.h
··· 83 83 __u32 guest_count; /* Out */ 84 84 } __attribute__((packed)); 85 85 86 + #define SEV_STATUS_FLAGS_CONFIG_ES 0x0100 87 + 86 88 /** 87 89 * struct sev_user_data_pek_csr - PEK_CSR command parameters 88 90 *
+322
linux-headers/linux/vfio.h
··· 305 305 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) 306 306 #define VFIO_REGION_TYPE_GFX (1) 307 307 #define VFIO_REGION_TYPE_CCW (2) 308 + #define VFIO_REGION_TYPE_MIGRATION (3) 308 309 309 310 /* sub-types for VFIO_REGION_TYPE_PCI_* */ 310 311 ··· 378 379 379 380 /* sub-types for VFIO_REGION_TYPE_CCW */ 380 381 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) 382 + #define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) 383 + #define VFIO_REGION_SUBTYPE_CCW_CRW (3) 384 + 385 + /* sub-types for VFIO_REGION_TYPE_MIGRATION */ 386 + #define VFIO_REGION_SUBTYPE_MIGRATION (1) 387 + 388 + /* 389 + * The structure vfio_device_migration_info is placed at the 0th offset of 390 + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related 391 + * migration information. Field accesses from this structure are only supported 392 + * at their native width and alignment. Otherwise, the result is undefined and 393 + * vendor drivers should return an error. 394 + * 395 + * device_state: (read/write) 396 + * - The user application writes to this field to inform the vendor driver 397 + * about the device state to be transitioned to. 398 + * - The vendor driver should take the necessary actions to change the 399 + * device state. After successful transition to a given state, the 400 + * vendor driver should return success on write(device_state, state) 401 + * system call. If the device state transition fails, the vendor driver 402 + * should return an appropriate -errno for the fault condition. 403 + * - On the user application side, if the device state transition fails, 404 + * that is, if write(device_state, state) returns an error, read 405 + * device_state again to determine the current state of the device from 406 + * the vendor driver. 407 + * - The vendor driver should return previous state of the device unless 408 + * the vendor driver has encountered an internal error, in which case 409 + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. 410 + * - The user application must use the device reset ioctl to recover the 411 + * device from VFIO_DEVICE_STATE_ERROR state. If the device is 412 + * indicated to be in a valid device state by reading device_state, the 413 + * user application may attempt to transition the device to any valid 414 + * state reachable from the current state or terminate itself. 415 + * 416 + * device_state consists of 3 bits: 417 + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, 418 + * it indicates the _STOP state. When the device state is changed to 419 + * _STOP, driver should stop the device before write() returns. 420 + * - If bit 1 is set, it indicates the _SAVING state, which means that the 421 + * driver should start gathering device state information that will be 422 + * provided to the VFIO user application to save the device's state. 423 + * - If bit 2 is set, it indicates the _RESUMING state, which means that 424 + * the driver should prepare to resume the device. Data provided through 425 + * the migration region should be used to resume the device. 426 + * Bits 3 - 31 are reserved for future use. To preserve them, the user 427 + * application should perform a read-modify-write operation on this 428 + * field when modifying the specified bits. 429 + * 430 + * +------- _RESUMING 431 + * |+------ _SAVING 432 + * ||+----- _RUNNING 433 + * ||| 434 + * 000b => Device Stopped, not saving or resuming 435 + * 001b => Device running, which is the default state 436 + * 010b => Stop the device & save the device state, stop-and-copy state 437 + * 011b => Device running and save the device state, pre-copy state 438 + * 100b => Device stopped and the device state is resuming 439 + * 101b => Invalid state 440 + * 110b => Error state 441 + * 111b => Invalid state 442 + * 443 + * State transitions: 444 + * 445 + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP 446 + * (100b) (001b) (011b) (010b) (000b) 447 + * 0. Running or default state 448 + * | 449 + * 450 + * 1. Normal Shutdown (optional) 451 + * |------------------------------------->| 452 + * 453 + * 2. Save the state or suspend 454 + * |------------------------->|---------->| 455 + * 456 + * 3. Save the state during live migration 457 + * |----------->|------------>|---------->| 458 + * 459 + * 4. Resuming 460 + * |<---------| 461 + * 462 + * 5. Resumed 463 + * |--------->| 464 + * 465 + * 0. Default state of VFIO device is _RUNNNG when the user application starts. 466 + * 1. During normal shutdown of the user application, the user application may 467 + * optionally change the VFIO device state from _RUNNING to _STOP. This 468 + * transition is optional. The vendor driver must support this transition but 469 + * must not require it. 470 + * 2. When the user application saves state or suspends the application, the 471 + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. 472 + * On state transition from _RUNNING to stop-and-copy, driver must stop the 473 + * device, save the device state and send it to the application through the 474 + * migration region. The sequence to be followed for such transition is given 475 + * below. 476 + * 3. In live migration of user application, the state transitions from _RUNNING 477 + * to pre-copy, to stop-and-copy, and to _STOP. 478 + * On state transition from _RUNNING to pre-copy, the driver should start 479 + * gathering the device state while the application is still running and send 480 + * the device state data to application through the migration region. 481 + * On state transition from pre-copy to stop-and-copy, the driver must stop 482 + * the device, save the device state and send it to the user application 483 + * through the migration region. 484 + * Vendor drivers must support the pre-copy state even for implementations 485 + * where no data is provided to the user before the stop-and-copy state. The 486 + * user must not be required to consume all migration data before the device 487 + * transitions to a new state, including the stop-and-copy state. 488 + * The sequence to be followed for above two transitions is given below. 489 + * 4. To start the resuming phase, the device state should be transitioned from 490 + * the _RUNNING to the _RESUMING state. 491 + * In the _RESUMING state, the driver should use the device state data 492 + * received through the migration region to resume the device. 493 + * 5. After providing saved device data to the driver, the application should 494 + * change the state from _RESUMING to _RUNNING. 495 + * 496 + * reserved: 497 + * Reads on this field return zero and writes are ignored. 498 + * 499 + * pending_bytes: (read only) 500 + * The number of pending bytes still to be migrated from the vendor driver. 501 + * 502 + * data_offset: (read only) 503 + * The user application should read data_offset field from the migration 504 + * region. The user application should read the device data from this 505 + * offset within the migration region during the _SAVING state or write 506 + * the device data during the _RESUMING state. See below for details of 507 + * sequence to be followed. 508 + * 509 + * data_size: (read/write) 510 + * The user application should read data_size to get the size in bytes of 511 + * the data copied in the migration region during the _SAVING state and 512 + * write the size in bytes of the data copied in the migration region 513 + * during the _RESUMING state. 514 + * 515 + * The format of the migration region is as follows: 516 + * ------------------------------------------------------------------ 517 + * |vfio_device_migration_info| data section | 518 + * | | /////////////////////////////// | 519 + * ------------------------------------------------------------------ 520 + * ^ ^ 521 + * offset 0-trapped part data_offset 522 + * 523 + * The structure vfio_device_migration_info is always followed by the data 524 + * section in the region, so data_offset will always be nonzero. The offset 525 + * from where the data is copied is decided by the kernel driver. The data 526 + * section can be trapped, mmapped, or partitioned, depending on how the kernel 527 + * driver defines the data section. The data section partition can be defined 528 + * as mapped by the sparse mmap capability. If mmapped, data_offset must be 529 + * page aligned, whereas initial section which contains the 530 + * vfio_device_migration_info structure, might not end at the offset, which is 531 + * page aligned. The user is not required to access through mmap regardless 532 + * of the capabilities of the region mmap. 533 + * The vendor driver should determine whether and how to partition the data 534 + * section. The vendor driver should return data_offset accordingly. 535 + * 536 + * The sequence to be followed while in pre-copy state and stop-and-copy state 537 + * is as follows: 538 + * a. Read pending_bytes, indicating the start of a new iteration to get device 539 + * data. Repeated read on pending_bytes at this stage should have no side 540 + * effects. 541 + * If pending_bytes == 0, the user application should not iterate to get data 542 + * for that device. 543 + * If pending_bytes > 0, perform the following steps. 544 + * b. Read data_offset, indicating that the vendor driver should make data 545 + * available through the data section. The vendor driver should return this 546 + * read operation only after data is available from (region + data_offset) 547 + * to (region + data_offset + data_size). 548 + * c. Read data_size, which is the amount of data in bytes available through 549 + * the migration region. 550 + * Read on data_offset and data_size should return the offset and size of 551 + * the current buffer if the user application reads data_offset and 552 + * data_size more than once here. 553 + * d. Read data_size bytes of data from (region + data_offset) from the 554 + * migration region. 555 + * e. Process the data. 556 + * f. Read pending_bytes, which indicates that the data from the previous 557 + * iteration has been read. If pending_bytes > 0, go to step b. 558 + * 559 + * The user application can transition from the _SAVING|_RUNNING 560 + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the 561 + * number of pending bytes. The user application should iterate in _SAVING 562 + * (stop-and-copy) until pending_bytes is 0. 563 + * 564 + * The sequence to be followed while _RESUMING device state is as follows: 565 + * While data for this device is available, repeat the following steps: 566 + * a. Read data_offset from where the user application should write data. 567 + * b. Write migration data starting at the migration region + data_offset for 568 + * the length determined by data_size from the migration source. 569 + * c. Write data_size, which indicates to the vendor driver that data is 570 + * written in the migration region. Vendor driver must return this write 571 + * operations on consuming data. Vendor driver should apply the 572 + * user-provided migration region data to the device resume state. 573 + * 574 + * If an error occurs during the above sequences, the vendor driver can return 575 + * an error code for next read() or write() operation, which will terminate the 576 + * loop. The user application should then take the next necessary action, for 577 + * example, failing migration or terminating the user application. 578 + * 579 + * For the user application, data is opaque. The user application should write 580 + * data in the same order as the data is received and the data should be of 581 + * same transaction size at the source. 582 + */ 583 + 584 + struct vfio_device_migration_info { 585 + __u32 device_state; /* VFIO device state */ 586 + #define VFIO_DEVICE_STATE_STOP (0) 587 + #define VFIO_DEVICE_STATE_RUNNING (1 << 0) 588 + #define VFIO_DEVICE_STATE_SAVING (1 << 1) 589 + #define VFIO_DEVICE_STATE_RESUMING (1 << 2) 590 + #define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ 591 + VFIO_DEVICE_STATE_SAVING | \ 592 + VFIO_DEVICE_STATE_RESUMING) 593 + 594 + #define VFIO_DEVICE_STATE_VALID(state) \ 595 + (state & VFIO_DEVICE_STATE_RESUMING ? \ 596 + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) 597 + 598 + #define VFIO_DEVICE_STATE_IS_ERROR(state) \ 599 + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ 600 + VFIO_DEVICE_STATE_RESUMING)) 601 + 602 + #define VFIO_DEVICE_STATE_SET_ERROR(state) \ 603 + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ 604 + VFIO_DEVICE_STATE_RESUMING) 605 + 606 + __u32 reserved; 607 + __u64 pending_bytes; 608 + __u64 data_offset; 609 + __u64 data_size; 610 + }; 381 611 382 612 /* 383 613 * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped ··· 577 807 578 808 enum { 579 809 VFIO_CCW_IO_IRQ_INDEX, 810 + VFIO_CCW_CRW_IRQ_INDEX, 580 811 VFIO_CCW_NUM_IRQS 581 812 }; 582 813 ··· 785 1016 struct vfio_iova_range iova_ranges[]; 786 1017 }; 787 1018 1019 + /* 1020 + * The migration capability allows to report supported features for migration. 1021 + * 1022 + * The structures below define version 1 of this capability. 1023 + * 1024 + * The existence of this capability indicates that IOMMU kernel driver supports 1025 + * dirty page logging. 1026 + * 1027 + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty 1028 + * page logging. 1029 + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap 1030 + * size in bytes that can be used by user applications when getting the dirty 1031 + * bitmap. 1032 + */ 1033 + #define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 1034 + 1035 + struct vfio_iommu_type1_info_cap_migration { 1036 + struct vfio_info_cap_header header; 1037 + __u32 flags; 1038 + __u64 pgsize_bitmap; 1039 + __u64 max_dirty_bitmap_size; /* in bytes */ 1040 + }; 1041 + 788 1042 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) 789 1043 790 1044 /** ··· 805 1059 806 1060 #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) 807 1061 1062 + struct vfio_bitmap { 1063 + __u64 pgsize; /* page size for bitmap in bytes */ 1064 + __u64 size; /* in bytes */ 1065 + __u64 *data; /* one bit per page */ 1066 + }; 1067 + 808 1068 /** 809 1069 * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, 810 1070 * struct vfio_dma_unmap) ··· 814 1074 * field. No guarantee is made to the user that arbitrary unmaps of iova 815 1075 * or size different from those used in the original mapping call will 816 1076 * succeed. 1077 + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap 1078 + * before unmapping IO virtual addresses. When this flag is set, the user must 1079 + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated 1080 + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. 1081 + * A bit in the bitmap represents one page, of user provided page size in 1082 + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set 1083 + * indicates that the page at that offset from iova is dirty. A Bitmap of the 1084 + * pages in the range of unmapped size is returned in the user-provided 1085 + * vfio_bitmap.data. 817 1086 */ 818 1087 struct vfio_iommu_type1_dma_unmap { 819 1088 __u32 argsz; 820 1089 __u32 flags; 1090 + #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) 821 1091 __u64 iova; /* IO virtual address */ 822 1092 __u64 size; /* Size of mapping (bytes) */ 1093 + __u8 data[]; 823 1094 }; 824 1095 825 1096 #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) ··· 830 1101 */ 831 1102 #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) 832 1103 #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) 1104 + 1105 + /** 1106 + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, 1107 + * struct vfio_iommu_type1_dirty_bitmap) 1108 + * IOCTL is used for dirty pages logging. 1109 + * Caller should set flag depending on which operation to perform, details as 1110 + * below: 1111 + * 1112 + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs 1113 + * the IOMMU driver to log pages that are dirtied or potentially dirtied by 1114 + * the device; designed to be used when a migration is in progress. Dirty pages 1115 + * are logged until logging is disabled by user application by calling the IOCTL 1116 + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. 1117 + * 1118 + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs 1119 + * the IOMMU driver to stop logging dirtied pages. 1120 + * 1121 + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set 1122 + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. 1123 + * The user must specify the IOVA range and the pgsize through the structure 1124 + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface 1125 + * supports getting a bitmap of the smallest supported pgsize only and can be 1126 + * modified in future to get a bitmap of any specified supported pgsize. The 1127 + * user must provide a zeroed memory area for the bitmap memory and specify its 1128 + * size in bitmap.size. One bit is used to represent one page consecutively 1129 + * starting from iova offset. The user should provide page size in bitmap.pgsize 1130 + * field. A bit set in the bitmap indicates that the page at that offset from 1131 + * iova is dirty. The caller must set argsz to a value including the size of 1132 + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the 1133 + * actual bitmap. If dirty pages logging is not enabled, an error will be 1134 + * returned. 1135 + * 1136 + * Only one of the flags _START, _STOP and _GET may be specified at a time. 1137 + * 1138 + */ 1139 + struct vfio_iommu_type1_dirty_bitmap { 1140 + __u32 argsz; 1141 + __u32 flags; 1142 + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) 1143 + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) 1144 + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) 1145 + __u8 data[]; 1146 + }; 1147 + 1148 + struct vfio_iommu_type1_dirty_bitmap_get { 1149 + __u64 iova; /* IO virtual address */ 1150 + __u64 size; /* Size of iova range */ 1151 + struct vfio_bitmap bitmap; 1152 + }; 1153 + 1154 + #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) 833 1155 834 1156 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ 835 1157
+19
linux-headers/linux/vfio_ccw.h
··· 34 34 __u32 ret_code; 35 35 } __attribute__((packed)); 36 36 37 + /* 38 + * Used for processing commands that read the subchannel-information block 39 + * Reading this region triggers a stsch() to hardware 40 + * Note: this is controlled by a capability 41 + */ 42 + struct ccw_schib_region { 43 + #define SCHIB_AREA_SIZE 52 44 + __u8 schib_area[SCHIB_AREA_SIZE]; 45 + } __attribute__((packed)); 46 + 47 + /* 48 + * Used for returning a Channel Report Word to userspace. 49 + * Note: this is controlled by a capability 50 + */ 51 + struct ccw_crw_region { 52 + __u32 crw; 53 + __u32 pad; 54 + } __attribute__((packed)); 55 + 37 56 #endif
+4
linux-headers/linux/vhost.h
··· 15 15 #include <linux/types.h> 16 16 #include <linux/ioctl.h> 17 17 18 + #define VHOST_FILE_UNBIND -1 19 + 18 20 /* ioctls */ 19 21 20 22 #define VHOST_VIRTIO 0xAF ··· 140 142 /* Get the max ring size. */ 141 143 #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) 142 144 145 + /* Set event fd for config interrupt*/ 146 + #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) 143 147 #endif