qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio
at master 664 lines 18 kB view raw
1/* 2 * Vhost User library 3 * 4 * Copyright (c) 2016 Red Hat, Inc. 5 * 6 * Authors: 7 * Victor Kaplansky <victork@redhat.com> 8 * Marc-André Lureau <mlureau@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or 11 * later. See the COPYING file in the top-level directory. 12 */ 13 14#ifndef LIBVHOST_USER_H 15#define LIBVHOST_USER_H 16 17#include <stdint.h> 18#include <stdbool.h> 19#include <stddef.h> 20#include <sys/poll.h> 21#include <linux/vhost.h> 22#include <pthread.h> 23#include "standard-headers/linux/virtio_ring.h" 24 25/* Based on qemu/hw/virtio/vhost-user.c */ 26#define VHOST_USER_F_PROTOCOL_FEATURES 30 27#define VHOST_LOG_PAGE 4096 28 29#define VIRTQUEUE_MAX_SIZE 1024 30 31#define VHOST_MEMORY_BASELINE_NREGIONS 8 32 33/* 34 * Set a reasonable maximum number of ram slots, which will be supported by 35 * any architecture. 36 */ 37#define VHOST_USER_MAX_RAM_SLOTS 32 38 39typedef enum VhostSetConfigType { 40 VHOST_SET_CONFIG_TYPE_MASTER = 0, 41 VHOST_SET_CONFIG_TYPE_MIGRATION = 1, 42} VhostSetConfigType; 43 44/* 45 * Maximum size of virtio device config space 46 */ 47#define VHOST_USER_MAX_CONFIG_SIZE 256 48 49enum VhostUserProtocolFeature { 50 VHOST_USER_PROTOCOL_F_MQ = 0, 51 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, 52 VHOST_USER_PROTOCOL_F_RARP = 2, 53 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, 54 VHOST_USER_PROTOCOL_F_NET_MTU = 4, 55 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, 56 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, 57 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, 58 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, 59 VHOST_USER_PROTOCOL_F_CONFIG = 9, 60 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, 61 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, 62 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, 63 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, 64 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, 65 66 VHOST_USER_PROTOCOL_F_MAX 67}; 68 69#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) 70 71typedef enum VhostUserRequest { 72 VHOST_USER_NONE = 0, 73 VHOST_USER_GET_FEATURES = 1, 74 VHOST_USER_SET_FEATURES = 2, 75 VHOST_USER_SET_OWNER = 3, 76 VHOST_USER_RESET_OWNER = 4, 77 VHOST_USER_SET_MEM_TABLE = 5, 78 VHOST_USER_SET_LOG_BASE = 6, 79 VHOST_USER_SET_LOG_FD = 7, 80 VHOST_USER_SET_VRING_NUM = 8, 81 VHOST_USER_SET_VRING_ADDR = 9, 82 VHOST_USER_SET_VRING_BASE = 10, 83 VHOST_USER_GET_VRING_BASE = 11, 84 VHOST_USER_SET_VRING_KICK = 12, 85 VHOST_USER_SET_VRING_CALL = 13, 86 VHOST_USER_SET_VRING_ERR = 14, 87 VHOST_USER_GET_PROTOCOL_FEATURES = 15, 88 VHOST_USER_SET_PROTOCOL_FEATURES = 16, 89 VHOST_USER_GET_QUEUE_NUM = 17, 90 VHOST_USER_SET_VRING_ENABLE = 18, 91 VHOST_USER_SEND_RARP = 19, 92 VHOST_USER_NET_SET_MTU = 20, 93 VHOST_USER_SET_SLAVE_REQ_FD = 21, 94 VHOST_USER_IOTLB_MSG = 22, 95 VHOST_USER_SET_VRING_ENDIAN = 23, 96 VHOST_USER_GET_CONFIG = 24, 97 VHOST_USER_SET_CONFIG = 25, 98 VHOST_USER_CREATE_CRYPTO_SESSION = 26, 99 VHOST_USER_CLOSE_CRYPTO_SESSION = 27, 100 VHOST_USER_POSTCOPY_ADVISE = 28, 101 VHOST_USER_POSTCOPY_LISTEN = 29, 102 VHOST_USER_POSTCOPY_END = 30, 103 VHOST_USER_GET_INFLIGHT_FD = 31, 104 VHOST_USER_SET_INFLIGHT_FD = 32, 105 VHOST_USER_GPU_SET_SOCKET = 33, 106 VHOST_USER_VRING_KICK = 35, 107 VHOST_USER_GET_MAX_MEM_SLOTS = 36, 108 VHOST_USER_ADD_MEM_REG = 37, 109 VHOST_USER_REM_MEM_REG = 38, 110 VHOST_USER_MAX 111} VhostUserRequest; 112 113typedef enum VhostUserSlaveRequest { 114 VHOST_USER_SLAVE_NONE = 0, 115 VHOST_USER_SLAVE_IOTLB_MSG = 1, 116 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, 117 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, 118 VHOST_USER_SLAVE_VRING_CALL = 4, 119 VHOST_USER_SLAVE_VRING_ERR = 5, 120 VHOST_USER_SLAVE_MAX 121} VhostUserSlaveRequest; 122 123typedef struct VhostUserMemoryRegion { 124 uint64_t guest_phys_addr; 125 uint64_t memory_size; 126 uint64_t userspace_addr; 127 uint64_t mmap_offset; 128} VhostUserMemoryRegion; 129 130typedef struct VhostUserMemory { 131 uint32_t nregions; 132 uint32_t padding; 133 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; 134} VhostUserMemory; 135 136typedef struct VhostUserMemRegMsg { 137 uint32_t padding; 138 VhostUserMemoryRegion region; 139} VhostUserMemRegMsg; 140 141typedef struct VhostUserLog { 142 uint64_t mmap_size; 143 uint64_t mmap_offset; 144} VhostUserLog; 145 146typedef struct VhostUserConfig { 147 uint32_t offset; 148 uint32_t size; 149 uint32_t flags; 150 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; 151} VhostUserConfig; 152 153static VhostUserConfig c __attribute__ ((unused)); 154#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ 155 + sizeof(c.size) \ 156 + sizeof(c.flags)) 157 158typedef struct VhostUserVringArea { 159 uint64_t u64; 160 uint64_t size; 161 uint64_t offset; 162} VhostUserVringArea; 163 164typedef struct VhostUserInflight { 165 uint64_t mmap_size; 166 uint64_t mmap_offset; 167 uint16_t num_queues; 168 uint16_t queue_size; 169} VhostUserInflight; 170 171#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) 172# define VU_PACKED __attribute__((gcc_struct, packed)) 173#else 174# define VU_PACKED __attribute__((packed)) 175#endif 176 177typedef struct VhostUserMsg { 178 int request; 179 180#define VHOST_USER_VERSION_MASK (0x3) 181#define VHOST_USER_REPLY_MASK (0x1 << 2) 182#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) 183 uint32_t flags; 184 uint32_t size; /* the following payload size */ 185 186 union { 187#define VHOST_USER_VRING_IDX_MASK (0xff) 188#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) 189 uint64_t u64; 190 struct vhost_vring_state state; 191 struct vhost_vring_addr addr; 192 VhostUserMemory memory; 193 VhostUserMemRegMsg memreg; 194 VhostUserLog log; 195 VhostUserConfig config; 196 VhostUserVringArea area; 197 VhostUserInflight inflight; 198 } payload; 199 200 int fds[VHOST_MEMORY_BASELINE_NREGIONS]; 201 int fd_num; 202 uint8_t *data; 203} VU_PACKED VhostUserMsg; 204 205typedef struct VuDevRegion { 206 /* Guest Physical address. */ 207 uint64_t gpa; 208 /* Memory region size. */ 209 uint64_t size; 210 /* QEMU virtual address (userspace). */ 211 uint64_t qva; 212 /* Starting offset in our mmaped space. */ 213 uint64_t mmap_offset; 214 /* Start address of mmaped space. */ 215 uint64_t mmap_addr; 216} VuDevRegion; 217 218typedef struct VuDev VuDev; 219 220typedef uint64_t (*vu_get_features_cb) (VuDev *dev); 221typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features); 222typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg, 223 int *do_reply); 224typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started); 225typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx); 226typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); 227typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, 228 uint32_t offset, uint32_t size, 229 uint32_t flags); 230 231typedef struct VuDevIface { 232 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ 233 vu_get_features_cb get_features; 234 /* enable vhost implementation features */ 235 vu_set_features_cb set_features; 236 /* get the protocol feature bitmask from the underlying vhost 237 * implementation */ 238 vu_get_features_cb get_protocol_features; 239 /* enable protocol features in the underlying vhost implementation. */ 240 vu_set_features_cb set_protocol_features; 241 /* process_msg is called for each vhost-user message received */ 242 /* skip libvhost-user processing if return value != 0 */ 243 vu_process_msg_cb process_msg; 244 /* tells when queues can be processed */ 245 vu_queue_set_started_cb queue_set_started; 246 /* 247 * If the queue is processed in order, in which case it will be 248 * resumed to vring.used->idx. This can help to support resuming 249 * on unmanaged exit/crash. 250 */ 251 vu_queue_is_processed_in_order_cb queue_is_processed_in_order; 252 /* get the config space of the device */ 253 vu_get_config_cb get_config; 254 /* set the config space of the device */ 255 vu_set_config_cb set_config; 256} VuDevIface; 257 258typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); 259 260typedef struct VuRing { 261 unsigned int num; 262 struct vring_desc *desc; 263 struct vring_avail *avail; 264 struct vring_used *used; 265 uint64_t log_guest_addr; 266 uint32_t flags; 267} VuRing; 268 269typedef struct VuDescStateSplit { 270 /* Indicate whether this descriptor is inflight or not. 271 * Only available for head-descriptor. */ 272 uint8_t inflight; 273 274 /* Padding */ 275 uint8_t padding[5]; 276 277 /* Maintain a list for the last batch of used descriptors. 278 * Only available when batching is used for submitting */ 279 uint16_t next; 280 281 /* Used to preserve the order of fetching available descriptors. 282 * Only available for head-descriptor. */ 283 uint64_t counter; 284} VuDescStateSplit; 285 286typedef struct VuVirtqInflight { 287 /* The feature flags of this region. Now it's initialized to 0. */ 288 uint64_t features; 289 290 /* The version of this region. It's 1 currently. 291 * Zero value indicates a vm reset happened. */ 292 uint16_t version; 293 294 /* The size of VuDescStateSplit array. It's equal to the virtqueue 295 * size. Slave could get it from queue size field of VhostUserInflight. */ 296 uint16_t desc_num; 297 298 /* The head of list that track the last batch of used descriptors. */ 299 uint16_t last_batch_head; 300 301 /* Storing the idx value of used ring */ 302 uint16_t used_idx; 303 304 /* Used to track the state of each descriptor in descriptor table */ 305 VuDescStateSplit desc[]; 306} VuVirtqInflight; 307 308typedef struct VuVirtqInflightDesc { 309 uint16_t index; 310 uint64_t counter; 311} VuVirtqInflightDesc; 312 313typedef struct VuVirtq { 314 VuRing vring; 315 316 VuVirtqInflight *inflight; 317 318 VuVirtqInflightDesc *resubmit_list; 319 320 uint16_t resubmit_num; 321 322 uint64_t counter; 323 324 /* Next head to pop */ 325 uint16_t last_avail_idx; 326 327 /* Last avail_idx read from VQ. */ 328 uint16_t shadow_avail_idx; 329 330 uint16_t used_idx; 331 332 /* Last used index value we have signalled on */ 333 uint16_t signalled_used; 334 335 /* Last used index value we have signalled on */ 336 bool signalled_used_valid; 337 338 /* Notification enabled? */ 339 bool notification; 340 341 int inuse; 342 343 vu_queue_handler_cb handler; 344 345 int call_fd; 346 int kick_fd; 347 int err_fd; 348 unsigned int enable; 349 bool started; 350 351 /* Guest addresses of our ring */ 352 struct vhost_vring_addr vra; 353} VuVirtq; 354 355enum VuWatchCondtion { 356 VU_WATCH_IN = POLLIN, 357 VU_WATCH_OUT = POLLOUT, 358 VU_WATCH_PRI = POLLPRI, 359 VU_WATCH_ERR = POLLERR, 360 VU_WATCH_HUP = POLLHUP, 361}; 362 363typedef void (*vu_panic_cb) (VuDev *dev, const char *err); 364typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data); 365typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition, 366 vu_watch_cb cb, void *data); 367typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd); 368 369typedef struct VuDevInflightInfo { 370 int fd; 371 void *addr; 372 uint64_t size; 373} VuDevInflightInfo; 374 375struct VuDev { 376 int sock; 377 uint32_t nregions; 378 VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS]; 379 VuVirtq *vq; 380 VuDevInflightInfo inflight_info; 381 int log_call_fd; 382 /* Must be held while using slave_fd */ 383 pthread_mutex_t slave_mutex; 384 int slave_fd; 385 uint64_t log_size; 386 uint8_t *log_table; 387 uint64_t features; 388 uint64_t protocol_features; 389 bool broken; 390 uint16_t max_queues; 391 392 /* @set_watch: add or update the given fd to the watch set, 393 * call cb when condition is met */ 394 vu_set_watch_cb set_watch; 395 396 /* @remove_watch: remove the given fd from the watch set */ 397 vu_remove_watch_cb remove_watch; 398 399 /* @panic: encountered an unrecoverable error, you may try to 400 * re-initialize */ 401 vu_panic_cb panic; 402 const VuDevIface *iface; 403 404 /* Postcopy data */ 405 int postcopy_ufd; 406 bool postcopy_listening; 407}; 408 409typedef struct VuVirtqElement { 410 unsigned int index; 411 unsigned int out_num; 412 unsigned int in_num; 413 struct iovec *in_sg; 414 struct iovec *out_sg; 415} VuVirtqElement; 416 417/** 418 * vu_init: 419 * @dev: a VuDev context 420 * @max_queues: maximum number of virtqueues 421 * @socket: the socket connected to vhost-user master 422 * @panic: a panic callback 423 * @set_watch: a set_watch callback 424 * @remove_watch: a remove_watch callback 425 * @iface: a VuDevIface structure with vhost-user device callbacks 426 * 427 * Intializes a VuDev vhost-user context. 428 * 429 * Returns: true on success, false on failure. 430 **/ 431bool vu_init(VuDev *dev, 432 uint16_t max_queues, 433 int socket, 434 vu_panic_cb panic, 435 vu_set_watch_cb set_watch, 436 vu_remove_watch_cb remove_watch, 437 const VuDevIface *iface); 438 439 440/** 441 * vu_deinit: 442 * @dev: a VuDev context 443 * 444 * Cleans up the VuDev context 445 */ 446void vu_deinit(VuDev *dev); 447 448/** 449 * vu_dispatch: 450 * @dev: a VuDev context 451 * 452 * Process one vhost-user message. 453 * 454 * Returns: TRUE on success, FALSE on failure. 455 */ 456bool vu_dispatch(VuDev *dev); 457 458/** 459 * vu_gpa_to_va: 460 * @dev: a VuDev context 461 * @plen: guest memory size 462 * @guest_addr: guest address 463 * 464 * Translate a guest address to a pointer. Returns NULL on failure. 465 */ 466void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr); 467 468/** 469 * vu_get_queue: 470 * @dev: a VuDev context 471 * @qidx: queue index 472 * 473 * Returns the queue number @qidx. 474 */ 475VuVirtq *vu_get_queue(VuDev *dev, int qidx); 476 477/** 478 * vu_set_queue_handler: 479 * @dev: a VuDev context 480 * @vq: a VuVirtq queue 481 * @handler: the queue handler callback 482 * 483 * Set the queue handler. This function may be called several times 484 * for the same queue. If called with NULL @handler, the handler is 485 * removed. 486 */ 487void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, 488 vu_queue_handler_cb handler); 489 490/** 491 * vu_set_queue_host_notifier: 492 * @dev: a VuDev context 493 * @vq: a VuVirtq queue 494 * @fd: a file descriptor 495 * @size: host page size 496 * @offset: notifier offset in @fd file 497 * 498 * Set queue's host notifier. This function may be called several 499 * times for the same queue. If called with -1 @fd, the notifier 500 * is removed. 501 */ 502bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, 503 int size, int offset); 504 505/** 506 * vu_queue_set_notification: 507 * @dev: a VuDev context 508 * @vq: a VuVirtq queue 509 * @enable: state 510 * 511 * Set whether the queue notifies (via event index or interrupt) 512 */ 513void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable); 514 515/** 516 * vu_queue_enabled: 517 * @dev: a VuDev context 518 * @vq: a VuVirtq queue 519 * 520 * Returns: whether the queue is enabled. 521 */ 522bool vu_queue_enabled(VuDev *dev, VuVirtq *vq); 523 524/** 525 * vu_queue_started: 526 * @dev: a VuDev context 527 * @vq: a VuVirtq queue 528 * 529 * Returns: whether the queue is started. 530 */ 531bool vu_queue_started(const VuDev *dev, const VuVirtq *vq); 532 533/** 534 * vu_queue_empty: 535 * @dev: a VuDev context 536 * @vq: a VuVirtq queue 537 * 538 * Returns: true if the queue is empty or not ready. 539 */ 540bool vu_queue_empty(VuDev *dev, VuVirtq *vq); 541 542/** 543 * vu_queue_notify: 544 * @dev: a VuDev context 545 * @vq: a VuVirtq queue 546 * 547 * Request to notify the queue via callfd (skipped if unnecessary) 548 */ 549void vu_queue_notify(VuDev *dev, VuVirtq *vq); 550 551/** 552 * vu_queue_notify_sync: 553 * @dev: a VuDev context 554 * @vq: a VuVirtq queue 555 * 556 * Request to notify the queue via callfd (skipped if unnecessary) 557 * or sync message if possible. 558 */ 559void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); 560 561/** 562 * vu_queue_pop: 563 * @dev: a VuDev context 564 * @vq: a VuVirtq queue 565 * @sz: the size of struct to return (must be >= VuVirtqElement) 566 * 567 * Returns: a VuVirtqElement filled from the queue or NULL. The 568 * returned element must be free()-d by the caller. 569 */ 570void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz); 571 572 573/** 574 * vu_queue_unpop: 575 * @dev: a VuDev context 576 * @vq: a VuVirtq queue 577 * @elem: The #VuVirtqElement 578 * @len: number of bytes written 579 * 580 * Pretend the most recent element wasn't popped from the virtqueue. The next 581 * call to vu_queue_pop() will refetch the element. 582 */ 583void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem, 584 size_t len); 585 586/** 587 * vu_queue_rewind: 588 * @dev: a VuDev context 589 * @vq: a VuVirtq queue 590 * @num: number of elements to push back 591 * 592 * Pretend that elements weren't popped from the virtqueue. The next 593 * virtqueue_pop() will refetch the oldest element. 594 * 595 * Returns: true on success, false if @num is greater than the number of in use 596 * elements. 597 */ 598bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num); 599 600/** 601 * vu_queue_fill: 602 * @dev: a VuDev context 603 * @vq: a VuVirtq queue 604 * @elem: a VuVirtqElement 605 * @len: length in bytes to write 606 * @idx: optional offset for the used ring index (0 in general) 607 * 608 * Fill the used ring with @elem element. 609 */ 610void vu_queue_fill(VuDev *dev, VuVirtq *vq, 611 const VuVirtqElement *elem, 612 unsigned int len, unsigned int idx); 613 614/** 615 * vu_queue_push: 616 * @dev: a VuDev context 617 * @vq: a VuVirtq queue 618 * @elem: a VuVirtqElement 619 * @len: length in bytes to write 620 * 621 * Helper that combines vu_queue_fill() with a vu_queue_flush(). 622 */ 623void vu_queue_push(VuDev *dev, VuVirtq *vq, 624 const VuVirtqElement *elem, unsigned int len); 625 626/** 627 * vu_queue_flush: 628 * @dev: a VuDev context 629 * @vq: a VuVirtq queue 630 * @num: number of elements to flush 631 * 632 * Mark the last number of elements as done (used.idx is updated by 633 * num elements). 634*/ 635void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num); 636 637/** 638 * vu_queue_get_avail_bytes: 639 * @dev: a VuDev context 640 * @vq: a VuVirtq queue 641 * @in_bytes: in bytes 642 * @out_bytes: out bytes 643 * @max_in_bytes: stop counting after max_in_bytes 644 * @max_out_bytes: stop counting after max_out_bytes 645 * 646 * Count the number of available bytes, up to max_in_bytes/max_out_bytes. 647 */ 648void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, 649 unsigned int *out_bytes, 650 unsigned max_in_bytes, unsigned max_out_bytes); 651 652/** 653 * vu_queue_avail_bytes: 654 * @dev: a VuDev context 655 * @vq: a VuVirtq queue 656 * @in_bytes: expected in bytes 657 * @out_bytes: expected out bytes 658 * 659 * Returns: true if in_bytes <= in_total && out_bytes <= out_total 660 */ 661bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, 662 unsigned int out_bytes); 663 664#endif /* LIBVHOST_USER_H */