qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

vmbus: vmbus implementation

Add the VMBus infrastructure -- bus, devices, root bridge, vmbus state
machine, vmbus channel interactions, etc.

VMBus is a collection of technologies. At its lowest layer, it's a message
passing and signaling mechanism, allowing efficient passing of messages to and
from guest VMs. A layer higher, it's a mechanism for defining channels of
communication, where each channel is tagged with a type (which implies a
protocol) and a instance ID. A layer higher than that, it's a bus driver,
serving as the basis of device enumeration within a VM, where a channel can
optionally be exposed as a paravirtual device. When a server-side (paravirtual
back-end) component wishes to offer a channel to a guest VM, it does so by
specifying a channel type, a mode, and an instance ID. VMBus then exposes this
in the guest.

More information about VMBus can be found in the file
vmbuskernelmodeclientlibapi.h in Microsoft's WDK.

TODO:
- split into smaller palatable pieces
- more comments
- check and handle corner cases

Kudos to Evgeny Yakovlev (formerly eyakovlev@virtuozzo.com) and Andrey
Smetatin (formerly asmetanin@virtuozzo.com) for research and
prototyping.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Jon Doron <arilou@gmail.com>
Message-Id: <20200424123444.3481728-4-arilou@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Jon Doron and committed by
Paolo Bonzini
0d71f708 973b1fbd

+2956
+1
Makefile.objs
··· 150 150 trace-events-subdirs += hw/char 151 151 trace-events-subdirs += hw/dma 152 152 trace-events-subdirs += hw/hppa 153 + trace-events-subdirs += hw/hyperv 153 154 trace-events-subdirs += hw/i2c 154 155 trace-events-subdirs += hw/i386 155 156 trace-events-subdirs += hw/i386/xen
+5
hw/hyperv/Kconfig
··· 6 6 bool 7 7 default y if TEST_DEVICES 8 8 depends on HYPERV 9 + 10 + config VMBUS 11 + bool 12 + default y 13 + depends on HYPERV
+1
hw/hyperv/Makefile.objs
··· 1 1 obj-y += hyperv.o 2 2 obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o 3 + obj-$(CONFIG_VMBUS) += vmbus.o
+18
hw/hyperv/trace-events
··· 1 + # vmbus 2 + vmbus_recv_message(uint32_t type, uint32_t size) "type %d size %d" 3 + vmbus_signal_event(void) "" 4 + vmbus_channel_notify_guest(uint32_t chan_id) "channel #%d" 5 + vmbus_post_msg(uint32_t type, uint32_t size) "type %d size %d" 6 + vmbus_msg_cb(int status) "message status %d" 7 + vmbus_process_incoming_message(uint32_t message_type) "type %d" 8 + vmbus_initiate_contact(uint16_t major, uint16_t minor, uint32_t vcpu, uint64_t monitor_page1, uint64_t monitor_page2, uint64_t interrupt_page) "version %d.%d target vp %d mon pages 0x%"PRIx64",0x%"PRIx64" int page 0x%"PRIx64 9 + vmbus_send_offer(uint32_t chan_id, void *dev) "channel #%d dev %p" 10 + vmbus_terminate_offers(void) "" 11 + vmbus_gpadl_header(uint32_t gpadl_id, uint16_t num_gfns) "gpadl #%d gfns %d" 12 + vmbus_gpadl_body(uint32_t gpadl_id) "gpadl #%d" 13 + vmbus_gpadl_created(uint32_t gpadl_id) "gpadl #%d" 14 + vmbus_gpadl_teardown(uint32_t gpadl_id) "gpadl #%d" 15 + vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d" 16 + vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d" 17 + vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d" 18 + vmbus_close_channel(uint32_t chan_id) "channel #%d"
+2672
hw/hyperv/vmbus.c
··· 1 + /* 2 + * QEMU Hyper-V VMBus 3 + * 4 + * Copyright (c) 2017-2018 Virtuozzo International GmbH. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 + * See the COPYING file in the top-level directory. 8 + */ 9 + 10 + #include "qemu/osdep.h" 11 + #include "qemu/error-report.h" 12 + #include "qemu/main-loop.h" 13 + #include "qapi/error.h" 14 + #include "migration/vmstate.h" 15 + #include "hw/qdev-properties.h" 16 + #include "hw/hyperv/hyperv.h" 17 + #include "hw/hyperv/vmbus.h" 18 + #include "hw/hyperv/vmbus-bridge.h" 19 + #include "hw/sysbus.h" 20 + #include "cpu.h" 21 + #include "trace.h" 22 + 23 + #define TYPE_VMBUS "vmbus" 24 + #define VMBUS(obj) OBJECT_CHECK(VMBus, (obj), TYPE_VMBUS) 25 + 26 + enum { 27 + VMGPADL_INIT, 28 + VMGPADL_ALIVE, 29 + VMGPADL_TEARINGDOWN, 30 + VMGPADL_TORNDOWN, 31 + }; 32 + 33 + struct VMBusGpadl { 34 + /* GPADL id */ 35 + uint32_t id; 36 + /* associated channel id (rudimentary?) */ 37 + uint32_t child_relid; 38 + 39 + /* number of pages in the GPADL as declared in GPADL_HEADER message */ 40 + uint32_t num_gfns; 41 + /* 42 + * Due to limited message size, GPADL may not fit fully in a single 43 + * GPADL_HEADER message, and is further popluated using GPADL_BODY 44 + * messages. @seen_gfns is the number of pages seen so far; once it 45 + * reaches @num_gfns, the GPADL is ready to use. 46 + */ 47 + uint32_t seen_gfns; 48 + /* array of GFNs (of size @num_gfns once allocated) */ 49 + uint64_t *gfns; 50 + 51 + uint8_t state; 52 + 53 + QTAILQ_ENTRY(VMBusGpadl) link; 54 + VMBus *vmbus; 55 + unsigned refcount; 56 + }; 57 + 58 + /* 59 + * Wrap sequential read from / write to GPADL. 60 + */ 61 + typedef struct GpadlIter { 62 + VMBusGpadl *gpadl; 63 + AddressSpace *as; 64 + DMADirection dir; 65 + /* offset into GPADL where the next i/o will be performed */ 66 + uint32_t off; 67 + /* 68 + * Cached mapping of the currently accessed page, up to page boundary. 69 + * Updated lazily on i/o. 70 + * Note: MemoryRegionCache can not be used here because pages in the GPADL 71 + * are non-contiguous and may belong to different memory regions. 72 + */ 73 + void *map; 74 + /* offset after last i/o (i.e. not affected by seek) */ 75 + uint32_t last_off; 76 + /* 77 + * Indicator that the iterator is active and may have a cached mapping. 78 + * Allows to enforce bracketing of all i/o (which may create cached 79 + * mappings) and thus exclude mapping leaks. 80 + */ 81 + bool active; 82 + } GpadlIter; 83 + 84 + /* 85 + * Ring buffer. There are two of them, sitting in the same GPADL, for each 86 + * channel. 87 + * Each ring buffer consists of a set of pages, with the first page containing 88 + * the ring buffer header, and the remaining pages being for data packets. 89 + */ 90 + typedef struct VMBusRingBufCommon { 91 + AddressSpace *as; 92 + /* GPA of the ring buffer header */ 93 + dma_addr_t rb_addr; 94 + /* start and length of the ring buffer data area within GPADL */ 95 + uint32_t base; 96 + uint32_t len; 97 + 98 + GpadlIter iter; 99 + } VMBusRingBufCommon; 100 + 101 + typedef struct VMBusSendRingBuf { 102 + VMBusRingBufCommon common; 103 + /* current write index, to be committed at the end of send */ 104 + uint32_t wr_idx; 105 + /* write index at the start of send */ 106 + uint32_t last_wr_idx; 107 + /* space to be requested from the guest */ 108 + uint32_t wanted; 109 + /* space reserved for planned sends */ 110 + uint32_t reserved; 111 + /* last seen read index */ 112 + uint32_t last_seen_rd_idx; 113 + } VMBusSendRingBuf; 114 + 115 + typedef struct VMBusRecvRingBuf { 116 + VMBusRingBufCommon common; 117 + /* current read index, to be committed at the end of receive */ 118 + uint32_t rd_idx; 119 + /* read index at the start of receive */ 120 + uint32_t last_rd_idx; 121 + /* last seen write index */ 122 + uint32_t last_seen_wr_idx; 123 + } VMBusRecvRingBuf; 124 + 125 + 126 + enum { 127 + VMOFFER_INIT, 128 + VMOFFER_SENDING, 129 + VMOFFER_SENT, 130 + }; 131 + 132 + enum { 133 + VMCHAN_INIT, 134 + VMCHAN_OPENING, 135 + VMCHAN_OPEN, 136 + }; 137 + 138 + struct VMBusChannel { 139 + VMBusDevice *dev; 140 + 141 + /* channel id */ 142 + uint32_t id; 143 + /* 144 + * subchannel index within the device; subchannel #0 is "primary" and 145 + * always exists 146 + */ 147 + uint16_t subchan_idx; 148 + uint32_t open_id; 149 + /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */ 150 + uint32_t target_vp; 151 + /* GPADL id to use for the ring buffers */ 152 + uint32_t ringbuf_gpadl; 153 + /* start (in pages) of the send ring buffer within @ringbuf_gpadl */ 154 + uint32_t ringbuf_send_offset; 155 + 156 + uint8_t offer_state; 157 + uint8_t state; 158 + bool is_open; 159 + 160 + /* main device worker; copied from the device class */ 161 + VMBusChannelNotifyCb notify_cb; 162 + /* 163 + * guest->host notifications, either sent directly or dispatched via 164 + * interrupt page (older VMBus) 165 + */ 166 + EventNotifier notifier; 167 + 168 + VMBus *vmbus; 169 + /* 170 + * SINT route to signal with host->guest notifications; may be shared with 171 + * the main VMBus SINT route 172 + */ 173 + HvSintRoute *notify_route; 174 + VMBusGpadl *gpadl; 175 + 176 + VMBusSendRingBuf send_ringbuf; 177 + VMBusRecvRingBuf recv_ringbuf; 178 + 179 + QTAILQ_ENTRY(VMBusChannel) link; 180 + }; 181 + 182 + /* 183 + * Hyper-V spec mandates that every message port has 16 buffers, which means 184 + * that the guest can post up to this many messages without blocking. 185 + * Therefore a queue for incoming messages has to be provided. 186 + * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just 187 + * doesn't transition to a new state until the message is known to have been 188 + * successfully delivered to the respective SynIC message slot. 189 + */ 190 + #define HV_MSG_QUEUE_LEN 16 191 + 192 + /* Hyper-V devices never use channel #0. Must be something special. */ 193 + #define VMBUS_FIRST_CHANID 1 194 + /* Each channel occupies one bit within a single event page sint slot. */ 195 + #define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID) 196 + /* Leave a few connection numbers for other purposes. */ 197 + #define VMBUS_CHAN_CONNECTION_OFFSET 16 198 + 199 + /* 200 + * Since the success or failure of sending a message is reported 201 + * asynchronously, the VMBus state machine has effectively two entry points: 202 + * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest 203 + * message delivery status becomes known). Both are run as oneshot BHs on the 204 + * main aio context, ensuring serialization. 205 + */ 206 + enum { 207 + VMBUS_LISTEN, 208 + VMBUS_HANDSHAKE, 209 + VMBUS_OFFER, 210 + VMBUS_CREATE_GPADL, 211 + VMBUS_TEARDOWN_GPADL, 212 + VMBUS_OPEN_CHANNEL, 213 + VMBUS_UNLOAD, 214 + VMBUS_STATE_MAX 215 + }; 216 + 217 + struct VMBus { 218 + BusState parent; 219 + 220 + uint8_t state; 221 + /* protection against recursive aio_poll (see vmbus_run) */ 222 + bool in_progress; 223 + /* whether there's a message being delivered to the guest */ 224 + bool msg_in_progress; 225 + uint32_t version; 226 + /* VP_INDEX of the vCPU to send messages and interrupts to */ 227 + uint32_t target_vp; 228 + HvSintRoute *sint_route; 229 + /* 230 + * interrupt page for older protocol versions; newer ones use SynIC event 231 + * flags directly 232 + */ 233 + hwaddr int_page_gpa; 234 + 235 + DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT); 236 + 237 + /* incoming message queue */ 238 + struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN]; 239 + uint8_t rx_queue_head; 240 + uint8_t rx_queue_size; 241 + QemuMutex rx_queue_lock; 242 + 243 + QTAILQ_HEAD(, VMBusGpadl) gpadl_list; 244 + QTAILQ_HEAD(, VMBusChannel) channel_list; 245 + 246 + /* 247 + * guest->host notifications for older VMBus, to be dispatched via 248 + * interrupt page 249 + */ 250 + EventNotifier notifier; 251 + }; 252 + 253 + static bool gpadl_full(VMBusGpadl *gpadl) 254 + { 255 + return gpadl->seen_gfns == gpadl->num_gfns; 256 + } 257 + 258 + static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id, 259 + uint32_t child_relid, uint32_t num_gfns) 260 + { 261 + VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1); 262 + 263 + gpadl->id = id; 264 + gpadl->child_relid = child_relid; 265 + gpadl->num_gfns = num_gfns; 266 + gpadl->gfns = g_new(uint64_t, num_gfns); 267 + QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link); 268 + gpadl->vmbus = vmbus; 269 + gpadl->refcount = 1; 270 + return gpadl; 271 + } 272 + 273 + static void free_gpadl(VMBusGpadl *gpadl) 274 + { 275 + QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link); 276 + g_free(gpadl->gfns); 277 + g_free(gpadl); 278 + } 279 + 280 + static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id) 281 + { 282 + VMBusGpadl *gpadl; 283 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 284 + if (gpadl->id == gpadl_id) { 285 + return gpadl; 286 + } 287 + } 288 + return NULL; 289 + } 290 + 291 + VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id) 292 + { 293 + VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id); 294 + if (!gpadl || !gpadl_full(gpadl)) { 295 + return NULL; 296 + } 297 + gpadl->refcount++; 298 + return gpadl; 299 + } 300 + 301 + void vmbus_put_gpadl(VMBusGpadl *gpadl) 302 + { 303 + if (!gpadl) { 304 + return; 305 + } 306 + if (--gpadl->refcount) { 307 + return; 308 + } 309 + free_gpadl(gpadl); 310 + } 311 + 312 + uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl) 313 + { 314 + return gpadl->num_gfns * TARGET_PAGE_SIZE; 315 + } 316 + 317 + static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl, 318 + AddressSpace *as, DMADirection dir) 319 + { 320 + iter->gpadl = gpadl; 321 + iter->as = as; 322 + iter->dir = dir; 323 + iter->active = false; 324 + } 325 + 326 + static inline void gpadl_iter_cache_unmap(GpadlIter *iter) 327 + { 328 + uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK; 329 + uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1; 330 + 331 + /* mapping is only done to do non-zero amount of i/o */ 332 + assert(iter->last_off > 0); 333 + assert(map_start_in_page < io_end_in_page); 334 + 335 + dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page, 336 + iter->dir, io_end_in_page - map_start_in_page); 337 + } 338 + 339 + /* 340 + * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf. 341 + * The direction of the copy is determined by @iter->dir. 342 + * The caller must ensure the operation overflows neither @buf nor the GPADL 343 + * (there's an assert for the latter). 344 + * Reuse the currently mapped page in the GPADL if possible. 345 + */ 346 + static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len) 347 + { 348 + ssize_t ret = len; 349 + 350 + assert(iter->active); 351 + 352 + while (len) { 353 + uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK; 354 + uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page; 355 + uint32_t cplen = MIN(pgleft, len); 356 + void *p; 357 + 358 + /* try to reuse the cached mapping */ 359 + if (iter->map) { 360 + uint32_t map_start_in_page = 361 + (uintptr_t)iter->map & ~TARGET_PAGE_MASK; 362 + uint32_t off_base = iter->off & ~TARGET_PAGE_MASK; 363 + uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK; 364 + if (off_base != mapped_base || off_in_page < map_start_in_page) { 365 + gpadl_iter_cache_unmap(iter); 366 + iter->map = NULL; 367 + } 368 + } 369 + 370 + if (!iter->map) { 371 + dma_addr_t maddr; 372 + dma_addr_t mlen = pgleft; 373 + uint32_t idx = iter->off >> TARGET_PAGE_BITS; 374 + assert(idx < iter->gpadl->num_gfns); 375 + 376 + maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page; 377 + 378 + iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir); 379 + if (mlen != pgleft) { 380 + dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0); 381 + iter->map = NULL; 382 + return -EFAULT; 383 + } 384 + } 385 + 386 + p = (void *)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | off_in_page); 387 + if (iter->dir == DMA_DIRECTION_FROM_DEVICE) { 388 + memcpy(p, buf, cplen); 389 + } else { 390 + memcpy(buf, p, cplen); 391 + } 392 + 393 + buf += cplen; 394 + len -= cplen; 395 + iter->off += cplen; 396 + iter->last_off = iter->off; 397 + } 398 + 399 + return ret; 400 + } 401 + 402 + /* 403 + * Position the iterator @iter at new offset @new_off. 404 + * If this results in the cached mapping being unusable with the new offset, 405 + * unmap it. 406 + */ 407 + static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off) 408 + { 409 + assert(iter->active); 410 + iter->off = new_off; 411 + } 412 + 413 + /* 414 + * Start a series of i/o on the GPADL. 415 + * After this i/o and seek operations on @iter become legal. 416 + */ 417 + static inline void gpadl_iter_start_io(GpadlIter *iter) 418 + { 419 + assert(!iter->active); 420 + /* mapping is cached lazily on i/o */ 421 + iter->map = NULL; 422 + iter->active = true; 423 + } 424 + 425 + /* 426 + * End the eariler started series of i/o on the GPADL and release the cached 427 + * mapping if any. 428 + */ 429 + static inline void gpadl_iter_end_io(GpadlIter *iter) 430 + { 431 + assert(iter->active); 432 + 433 + if (iter->map) { 434 + gpadl_iter_cache_unmap(iter); 435 + } 436 + 437 + iter->active = false; 438 + } 439 + 440 + static void vmbus_resched(VMBus *vmbus); 441 + static void vmbus_msg_cb(void *data, int status); 442 + 443 + ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, 444 + const struct iovec *iov, size_t iov_cnt) 445 + { 446 + GpadlIter iter; 447 + size_t i; 448 + ssize_t ret = 0; 449 + 450 + gpadl_iter_init(&iter, gpadl, chan->dev->dma_as, 451 + DMA_DIRECTION_FROM_DEVICE); 452 + gpadl_iter_start_io(&iter); 453 + gpadl_iter_seek(&iter, off); 454 + for (i = 0; i < iov_cnt; i++) { 455 + ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len); 456 + if (ret < 0) { 457 + goto out; 458 + } 459 + } 460 + out: 461 + gpadl_iter_end_io(&iter); 462 + return ret; 463 + } 464 + 465 + int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 466 + unsigned iov_cnt, size_t len, size_t off) 467 + { 468 + int ret_cnt = 0, ret; 469 + unsigned i; 470 + QEMUSGList *sgl = &req->sgl; 471 + ScatterGatherEntry *sg = sgl->sg; 472 + 473 + for (i = 0; i < sgl->nsg; i++) { 474 + if (sg[i].len > off) { 475 + break; 476 + } 477 + off -= sg[i].len; 478 + } 479 + for (; len && i < sgl->nsg; i++) { 480 + dma_addr_t mlen = MIN(sg[i].len - off, len); 481 + dma_addr_t addr = sg[i].base + off; 482 + len -= mlen; 483 + off = 0; 484 + 485 + for (; mlen; ret_cnt++) { 486 + dma_addr_t l = mlen; 487 + dma_addr_t a = addr; 488 + 489 + if (ret_cnt == iov_cnt) { 490 + ret = -ENOBUFS; 491 + goto err; 492 + } 493 + 494 + iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir); 495 + if (!l) { 496 + ret = -EFAULT; 497 + goto err; 498 + } 499 + iov[ret_cnt].iov_len = l; 500 + addr += l; 501 + mlen -= l; 502 + } 503 + } 504 + 505 + return ret_cnt; 506 + err: 507 + vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0); 508 + return ret; 509 + } 510 + 511 + void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 512 + unsigned iov_cnt, size_t accessed) 513 + { 514 + QEMUSGList *sgl = &req->sgl; 515 + unsigned i; 516 + 517 + for (i = 0; i < iov_cnt; i++) { 518 + size_t acsd = MIN(accessed, iov[i].iov_len); 519 + dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd); 520 + accessed -= acsd; 521 + } 522 + } 523 + 524 + static const VMStateDescription vmstate_gpadl = { 525 + .name = "vmbus/gpadl", 526 + .version_id = 0, 527 + .minimum_version_id = 0, 528 + .fields = (VMStateField[]) { 529 + VMSTATE_UINT32(id, VMBusGpadl), 530 + VMSTATE_UINT32(child_relid, VMBusGpadl), 531 + VMSTATE_UINT32(num_gfns, VMBusGpadl), 532 + VMSTATE_UINT32(seen_gfns, VMBusGpadl), 533 + VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0, 534 + vmstate_info_uint64, uint64_t), 535 + VMSTATE_UINT8(state, VMBusGpadl), 536 + VMSTATE_END_OF_LIST() 537 + } 538 + }; 539 + 540 + /* 541 + * Wrap the index into a ring buffer of @len bytes. 542 + * @idx is assumed not to exceed twice the size of the ringbuffer, so only 543 + * single wraparound is considered. 544 + */ 545 + static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len) 546 + { 547 + if (idx >= len) { 548 + idx -= len; 549 + } 550 + return idx; 551 + } 552 + 553 + /* 554 + * Circular difference between two indices into a ring buffer of @len bytes. 555 + * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch 556 + * up write index but not vice versa. 557 + */ 558 + static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len, 559 + bool allow_catchup) 560 + { 561 + return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len); 562 + } 563 + 564 + static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf) 565 + { 566 + vmbus_ring_buffer *rb; 567 + dma_addr_t mlen = sizeof(*rb); 568 + 569 + rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen, 570 + DMA_DIRECTION_FROM_DEVICE); 571 + if (mlen != sizeof(*rb)) { 572 + dma_memory_unmap(ringbuf->as, rb, mlen, 573 + DMA_DIRECTION_FROM_DEVICE, 0); 574 + return NULL; 575 + } 576 + return rb; 577 + } 578 + 579 + static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf, 580 + vmbus_ring_buffer *rb, bool dirty) 581 + { 582 + assert(rb); 583 + 584 + dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE, 585 + dirty ? sizeof(*rb) : 0); 586 + } 587 + 588 + static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl, 589 + AddressSpace *as, DMADirection dir, 590 + uint32_t begin, uint32_t end) 591 + { 592 + ringbuf->as = as; 593 + ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS; 594 + ringbuf->base = (begin + 1) << TARGET_PAGE_BITS; 595 + ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS; 596 + gpadl_iter_init(&ringbuf->iter, gpadl, as, dir); 597 + } 598 + 599 + static int ringbufs_init(VMBusChannel *chan) 600 + { 601 + vmbus_ring_buffer *rb; 602 + VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf; 603 + VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf; 604 + 605 + if (chan->ringbuf_send_offset <= 1 || 606 + chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) { 607 + return -EINVAL; 608 + } 609 + 610 + ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as, 611 + DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset); 612 + ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as, 613 + DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset, 614 + chan->gpadl->num_gfns); 615 + send_ringbuf->wanted = 0; 616 + send_ringbuf->reserved = 0; 617 + 618 + rb = ringbuf_map_hdr(&recv_ringbuf->common); 619 + if (!rb) { 620 + return -EFAULT; 621 + } 622 + recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index; 623 + ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false); 624 + 625 + rb = ringbuf_map_hdr(&send_ringbuf->common); 626 + if (!rb) { 627 + return -EFAULT; 628 + } 629 + send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index; 630 + send_ringbuf->last_seen_rd_idx = rb->read_index; 631 + rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ; 632 + ringbuf_unmap_hdr(&send_ringbuf->common, rb, true); 633 + 634 + if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len || 635 + send_ringbuf->wr_idx >= send_ringbuf->common.len) { 636 + return -EOVERFLOW; 637 + } 638 + 639 + return 0; 640 + } 641 + 642 + /* 643 + * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping 644 + * around if needed. 645 + * @len is assumed not to exceed the size of the ringbuffer, so only single 646 + * wraparound is considered. 647 + */ 648 + static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len) 649 + { 650 + ssize_t ret1 = 0, ret2 = 0; 651 + uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off; 652 + 653 + if (len >= remain) { 654 + ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain); 655 + if (ret1 < 0) { 656 + return ret1; 657 + } 658 + gpadl_iter_seek(&ringbuf->iter, ringbuf->base); 659 + buf += remain; 660 + len -= remain; 661 + } 662 + ret2 = gpadl_iter_io(&ringbuf->iter, buf, len); 663 + if (ret2 < 0) { 664 + return ret2; 665 + } 666 + return ret1 + ret2; 667 + } 668 + 669 + /* 670 + * Position the circular iterator within @ringbuf to offset @new_off, wrapping 671 + * around if needed. 672 + * @new_off is assumed not to exceed twice the size of the ringbuffer, so only 673 + * single wraparound is considered. 674 + */ 675 + static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off) 676 + { 677 + gpadl_iter_seek(&ringbuf->iter, 678 + ringbuf->base + rb_idx_wrap(new_off, ringbuf->len)); 679 + } 680 + 681 + static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf) 682 + { 683 + return ringbuf->iter.off - ringbuf->base; 684 + } 685 + 686 + static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf) 687 + { 688 + gpadl_iter_start_io(&ringbuf->iter); 689 + } 690 + 691 + static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf) 692 + { 693 + gpadl_iter_end_io(&ringbuf->iter); 694 + } 695 + 696 + VMBusDevice *vmbus_channel_device(VMBusChannel *chan) 697 + { 698 + return chan->dev; 699 + } 700 + 701 + VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx) 702 + { 703 + if (chan_idx >= dev->num_channels) { 704 + return NULL; 705 + } 706 + return &dev->channels[chan_idx]; 707 + } 708 + 709 + uint32_t vmbus_channel_idx(VMBusChannel *chan) 710 + { 711 + return chan - chan->dev->channels; 712 + } 713 + 714 + void vmbus_channel_notify_host(VMBusChannel *chan) 715 + { 716 + event_notifier_set(&chan->notifier); 717 + } 718 + 719 + bool vmbus_channel_is_open(VMBusChannel *chan) 720 + { 721 + return chan->is_open; 722 + } 723 + 724 + /* 725 + * Notify the guest side about the data to work on in the channel ring buffer. 726 + * The notification is done by signaling a dedicated per-channel SynIC event 727 + * flag (more recent guests) or setting a bit in the interrupt page and firing 728 + * the VMBus SINT (older guests). 729 + */ 730 + static int vmbus_channel_notify_guest(VMBusChannel *chan) 731 + { 732 + int res = 0; 733 + unsigned long *int_map, mask; 734 + unsigned idx; 735 + hwaddr addr = chan->vmbus->int_page_gpa; 736 + hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0; 737 + 738 + trace_vmbus_channel_notify_guest(chan->id); 739 + 740 + if (!addr) { 741 + return hyperv_set_event_flag(chan->notify_route, chan->id); 742 + } 743 + 744 + int_map = cpu_physical_memory_map(addr, &len, 1); 745 + if (len != TARGET_PAGE_SIZE / 2) { 746 + res = -ENXIO; 747 + goto unmap; 748 + } 749 + 750 + idx = BIT_WORD(chan->id); 751 + mask = BIT_MASK(chan->id); 752 + if ((atomic_fetch_or(&int_map[idx], mask) & mask) != mask) { 753 + res = hyperv_sint_route_set_sint(chan->notify_route); 754 + dirty = len; 755 + } 756 + 757 + unmap: 758 + cpu_physical_memory_unmap(int_map, len, 1, dirty); 759 + return res; 760 + } 761 + 762 + #define VMBUS_PKT_TRAILER sizeof(uint64_t) 763 + 764 + static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr, 765 + uint32_t desclen, uint32_t msglen) 766 + { 767 + hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) + 768 + DIV_ROUND_UP(desclen, sizeof(uint64_t)); 769 + hdr->len_qwords = hdr->offset_qwords + 770 + DIV_ROUND_UP(msglen, sizeof(uint64_t)); 771 + return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER; 772 + } 773 + 774 + /* 775 + * Simplified ring buffer operation with paired barriers annotations in the 776 + * producer and consumer loops: 777 + * 778 + * producer * consumer 779 + * ~~~~~~~~ * ~~~~~~~~ 780 + * write pending_send_sz * read write_index 781 + * smp_mb [A] * smp_mb [C] 782 + * read read_index * read packet 783 + * smp_mb [B] * read/write out-of-band data 784 + * read/write out-of-band data * smp_mb [B] 785 + * write packet * write read_index 786 + * smp_mb [C] * smp_mb [A] 787 + * write write_index * read pending_send_sz 788 + * smp_wmb [D] * smp_rmb [D] 789 + * write pending_send_sz * read write_index 790 + * ... * ... 791 + */ 792 + 793 + static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf) 794 + { 795 + /* don't trust guest data */ 796 + if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) { 797 + return 0; 798 + } 799 + return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx, 800 + ringbuf->common.len, false); 801 + } 802 + 803 + static ssize_t ringbuf_send_update_idx(VMBusChannel *chan) 804 + { 805 + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 806 + vmbus_ring_buffer *rb; 807 + uint32_t written; 808 + 809 + written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx, 810 + ringbuf->common.len, true); 811 + if (!written) { 812 + return 0; 813 + } 814 + 815 + rb = ringbuf_map_hdr(&ringbuf->common); 816 + if (!rb) { 817 + return -EFAULT; 818 + } 819 + 820 + ringbuf->reserved -= written; 821 + 822 + /* prevent reorder with the data operation and packet write */ 823 + smp_mb(); /* barrier pair [C] */ 824 + rb->write_index = ringbuf->wr_idx; 825 + 826 + /* 827 + * If the producer earlier indicated that it wants to be notified when the 828 + * consumer frees certain amount of space in the ring buffer, that amount 829 + * is reduced by the size of the completed write. 830 + */ 831 + if (ringbuf->wanted) { 832 + /* otherwise reservation would fail */ 833 + assert(ringbuf->wanted < written); 834 + ringbuf->wanted -= written; 835 + /* prevent reorder with write_index write */ 836 + smp_wmb(); /* barrier pair [D] */ 837 + rb->pending_send_sz = ringbuf->wanted; 838 + } 839 + 840 + /* prevent reorder with write_index or pending_send_sz write */ 841 + smp_mb(); /* barrier pair [A] */ 842 + ringbuf->last_seen_rd_idx = rb->read_index; 843 + 844 + /* 845 + * The consumer may have missed the reduction of pending_send_sz and skip 846 + * notification, so re-check the blocking condition, and, if it's no longer 847 + * true, ensure processing another iteration by simulating consumer's 848 + * notification. 849 + */ 850 + if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) { 851 + vmbus_channel_notify_host(chan); 852 + } 853 + 854 + /* skip notification by consumer's request */ 855 + if (rb->interrupt_mask) { 856 + goto out; 857 + } 858 + 859 + /* 860 + * The consumer hasn't caught up with the producer's previous state so it's 861 + * not blocked. 862 + * (last_seen_rd_idx comes from the guest but it's safe to use w/o 863 + * validation here as it only affects notification.) 864 + */ 865 + if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx, 866 + ringbuf->common.len, true) > written) { 867 + goto out; 868 + } 869 + 870 + vmbus_channel_notify_guest(chan); 871 + out: 872 + ringbuf_unmap_hdr(&ringbuf->common, rb, true); 873 + ringbuf->last_wr_idx = ringbuf->wr_idx; 874 + return written; 875 + } 876 + 877 + int vmbus_channel_reserve(VMBusChannel *chan, 878 + uint32_t desclen, uint32_t msglen) 879 + { 880 + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 881 + vmbus_ring_buffer *rb = NULL; 882 + vmbus_packet_hdr hdr; 883 + uint32_t needed = ringbuf->reserved + 884 + vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); 885 + 886 + /* avoid touching the guest memory if possible */ 887 + if (likely(needed <= ringbuf_send_avail(ringbuf))) { 888 + goto success; 889 + } 890 + 891 + rb = ringbuf_map_hdr(&ringbuf->common); 892 + if (!rb) { 893 + return -EFAULT; 894 + } 895 + 896 + /* fetch read index from guest memory and try again */ 897 + ringbuf->last_seen_rd_idx = rb->read_index; 898 + 899 + if (likely(needed <= ringbuf_send_avail(ringbuf))) { 900 + goto success; 901 + } 902 + 903 + rb->pending_send_sz = needed; 904 + 905 + /* 906 + * The consumer may have made progress and freed up some space before 907 + * seeing updated pending_send_sz, so re-read read_index (preventing 908 + * reorder with the pending_send_sz write) and try again. 909 + */ 910 + smp_mb(); /* barrier pair [A] */ 911 + ringbuf->last_seen_rd_idx = rb->read_index; 912 + 913 + if (needed > ringbuf_send_avail(ringbuf)) { 914 + goto out; 915 + } 916 + 917 + success: 918 + ringbuf->reserved = needed; 919 + needed = 0; 920 + 921 + /* clear pending_send_sz if it was set */ 922 + if (ringbuf->wanted) { 923 + if (!rb) { 924 + rb = ringbuf_map_hdr(&ringbuf->common); 925 + if (!rb) { 926 + /* failure to clear pending_send_sz is non-fatal */ 927 + goto out; 928 + } 929 + } 930 + 931 + rb->pending_send_sz = 0; 932 + } 933 + 934 + /* prevent reorder of the following data operation with read_index read */ 935 + smp_mb(); /* barrier pair [B] */ 936 + 937 + out: 938 + if (rb) { 939 + ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed); 940 + } 941 + ringbuf->wanted = needed; 942 + return needed ? -ENOSPC : 0; 943 + } 944 + 945 + ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, 946 + void *desc, uint32_t desclen, 947 + void *msg, uint32_t msglen, 948 + bool need_comp, uint64_t transaction_id) 949 + { 950 + ssize_t ret = 0; 951 + vmbus_packet_hdr hdr; 952 + uint32_t totlen; 953 + VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; 954 + 955 + if (!vmbus_channel_is_open(chan)) { 956 + return -EINVAL; 957 + } 958 + 959 + totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); 960 + hdr.type = pkt_type; 961 + hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0; 962 + hdr.transaction_id = transaction_id; 963 + 964 + assert(totlen <= ringbuf->reserved); 965 + 966 + ringbuf_start_io(&ringbuf->common); 967 + ringbuf_seek(&ringbuf->common, ringbuf->wr_idx); 968 + ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)); 969 + if (ret < 0) { 970 + goto out; 971 + } 972 + if (desclen) { 973 + assert(desc); 974 + ret = ringbuf_io(&ringbuf->common, desc, desclen); 975 + if (ret < 0) { 976 + goto out; 977 + } 978 + ringbuf_seek(&ringbuf->common, 979 + ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t)); 980 + } 981 + ret = ringbuf_io(&ringbuf->common, msg, msglen); 982 + if (ret < 0) { 983 + goto out; 984 + } 985 + ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen); 986 + ringbuf->wr_idx = ringbuf_tell(&ringbuf->common); 987 + ret = 0; 988 + out: 989 + ringbuf_end_io(&ringbuf->common); 990 + if (ret) { 991 + return ret; 992 + } 993 + return ringbuf_send_update_idx(chan); 994 + } 995 + 996 + ssize_t vmbus_channel_send_completion(VMBusChanReq *req, 997 + void *msg, uint32_t msglen) 998 + { 999 + assert(req->need_comp); 1000 + return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0, 1001 + msg, msglen, false, req->transaction_id); 1002 + } 1003 + 1004 + static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev, 1005 + VMBusRingBufCommon *ringbuf, uint32_t len) 1006 + { 1007 + int ret; 1008 + vmbus_pkt_gpa_direct hdr; 1009 + hwaddr curaddr = 0; 1010 + hwaddr curlen = 0; 1011 + int num; 1012 + 1013 + if (len < sizeof(hdr)) { 1014 + return -EIO; 1015 + } 1016 + ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr)); 1017 + if (ret < 0) { 1018 + return ret; 1019 + } 1020 + len -= sizeof(hdr); 1021 + 1022 + num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t); 1023 + if (num < 0) { 1024 + return -EIO; 1025 + } 1026 + qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as); 1027 + 1028 + for (; hdr.rangecount; hdr.rangecount--) { 1029 + vmbus_gpa_range range; 1030 + 1031 + if (len < sizeof(range)) { 1032 + goto eio; 1033 + } 1034 + ret = ringbuf_io(ringbuf, &range, sizeof(range)); 1035 + if (ret < 0) { 1036 + goto err; 1037 + } 1038 + len -= sizeof(range); 1039 + 1040 + if (range.byte_offset & TARGET_PAGE_MASK) { 1041 + goto eio; 1042 + } 1043 + 1044 + for (; range.byte_count; range.byte_offset = 0) { 1045 + uint64_t paddr; 1046 + uint32_t plen = MIN(range.byte_count, 1047 + TARGET_PAGE_SIZE - range.byte_offset); 1048 + 1049 + if (len < sizeof(uint64_t)) { 1050 + goto eio; 1051 + } 1052 + ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr)); 1053 + if (ret < 0) { 1054 + goto err; 1055 + } 1056 + len -= sizeof(uint64_t); 1057 + paddr <<= TARGET_PAGE_BITS; 1058 + paddr |= range.byte_offset; 1059 + range.byte_count -= plen; 1060 + 1061 + if (curaddr + curlen == paddr) { 1062 + /* consecutive fragments - join */ 1063 + curlen += plen; 1064 + } else { 1065 + if (curlen) { 1066 + qemu_sglist_add(sgl, curaddr, curlen); 1067 + } 1068 + 1069 + curaddr = paddr; 1070 + curlen = plen; 1071 + } 1072 + } 1073 + } 1074 + 1075 + if (curlen) { 1076 + qemu_sglist_add(sgl, curaddr, curlen); 1077 + } 1078 + 1079 + return 0; 1080 + eio: 1081 + ret = -EIO; 1082 + err: 1083 + qemu_sglist_destroy(sgl); 1084 + return ret; 1085 + } 1086 + 1087 + static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan, 1088 + uint32_t size, uint16_t pkt_type, 1089 + uint32_t msglen, uint64_t transaction_id, 1090 + bool need_comp) 1091 + { 1092 + VMBusChanReq *req; 1093 + uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg)); 1094 + uint32_t totlen = msgoff + msglen; 1095 + 1096 + req = g_malloc0(totlen); 1097 + req->chan = chan; 1098 + req->pkt_type = pkt_type; 1099 + req->msg = (void *)req + msgoff; 1100 + req->msglen = msglen; 1101 + req->transaction_id = transaction_id; 1102 + req->need_comp = need_comp; 1103 + return req; 1104 + } 1105 + 1106 + int vmbus_channel_recv_start(VMBusChannel *chan) 1107 + { 1108 + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1109 + vmbus_ring_buffer *rb; 1110 + 1111 + rb = ringbuf_map_hdr(&ringbuf->common); 1112 + if (!rb) { 1113 + return -EFAULT; 1114 + } 1115 + ringbuf->last_seen_wr_idx = rb->write_index; 1116 + ringbuf_unmap_hdr(&ringbuf->common, rb, false); 1117 + 1118 + if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) { 1119 + return -EOVERFLOW; 1120 + } 1121 + 1122 + /* prevent reorder of the following data operation with write_index read */ 1123 + smp_mb(); /* barrier pair [C] */ 1124 + return 0; 1125 + } 1126 + 1127 + void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size) 1128 + { 1129 + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1130 + vmbus_packet_hdr hdr = {}; 1131 + VMBusChanReq *req; 1132 + uint32_t avail; 1133 + uint32_t totlen, pktlen, msglen, msgoff, desclen; 1134 + 1135 + assert(size >= sizeof(*req)); 1136 + 1137 + /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */ 1138 + avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx, 1139 + ringbuf->common.len, true); 1140 + if (avail < sizeof(hdr)) { 1141 + return NULL; 1142 + } 1143 + 1144 + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx); 1145 + if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) { 1146 + return NULL; 1147 + } 1148 + 1149 + pktlen = hdr.len_qwords * sizeof(uint64_t); 1150 + totlen = pktlen + VMBUS_PKT_TRAILER; 1151 + if (totlen > avail) { 1152 + return NULL; 1153 + } 1154 + 1155 + msgoff = hdr.offset_qwords * sizeof(uint64_t); 1156 + if (msgoff > pktlen || msgoff < sizeof(hdr)) { 1157 + error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen); 1158 + return NULL; 1159 + } 1160 + 1161 + msglen = pktlen - msgoff; 1162 + 1163 + req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id, 1164 + hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION); 1165 + 1166 + switch (hdr.type) { 1167 + case VMBUS_PACKET_DATA_USING_GPA_DIRECT: 1168 + desclen = msgoff - sizeof(hdr); 1169 + if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common, 1170 + desclen) < 0) { 1171 + error_report("%s: failed to convert GPA ranges to SGL", __func__); 1172 + goto free_req; 1173 + } 1174 + break; 1175 + case VMBUS_PACKET_DATA_INBAND: 1176 + case VMBUS_PACKET_COMP: 1177 + break; 1178 + default: 1179 + error_report("%s: unexpected msg type: %x", __func__, hdr.type); 1180 + goto free_req; 1181 + } 1182 + 1183 + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff); 1184 + if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) { 1185 + goto free_req; 1186 + } 1187 + ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen); 1188 + 1189 + return req; 1190 + free_req: 1191 + vmbus_free_req(req); 1192 + return NULL; 1193 + } 1194 + 1195 + void vmbus_channel_recv_pop(VMBusChannel *chan) 1196 + { 1197 + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1198 + ringbuf->rd_idx = ringbuf_tell(&ringbuf->common); 1199 + } 1200 + 1201 + ssize_t vmbus_channel_recv_done(VMBusChannel *chan) 1202 + { 1203 + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1204 + vmbus_ring_buffer *rb; 1205 + uint32_t read; 1206 + 1207 + read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx, 1208 + ringbuf->common.len, true); 1209 + if (!read) { 1210 + return 0; 1211 + } 1212 + 1213 + rb = ringbuf_map_hdr(&ringbuf->common); 1214 + if (!rb) { 1215 + return -EFAULT; 1216 + } 1217 + 1218 + /* prevent reorder with the data operation and packet read */ 1219 + smp_mb(); /* barrier pair [B] */ 1220 + rb->read_index = ringbuf->rd_idx; 1221 + 1222 + /* prevent reorder of the following pending_send_sz read */ 1223 + smp_mb(); /* barrier pair [A] */ 1224 + 1225 + if (rb->interrupt_mask) { 1226 + goto out; 1227 + } 1228 + 1229 + if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) { 1230 + uint32_t wr_idx, wr_avail; 1231 + uint32_t wanted = rb->pending_send_sz; 1232 + 1233 + if (!wanted) { 1234 + goto out; 1235 + } 1236 + 1237 + /* prevent reorder with pending_send_sz read */ 1238 + smp_rmb(); /* barrier pair [D] */ 1239 + wr_idx = rb->write_index; 1240 + 1241 + wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len, 1242 + true); 1243 + 1244 + /* the producer wasn't blocked on the consumer state */ 1245 + if (wr_avail >= read + wanted) { 1246 + goto out; 1247 + } 1248 + /* there's not enough space for the producer to make progress */ 1249 + if (wr_avail < wanted) { 1250 + goto out; 1251 + } 1252 + } 1253 + 1254 + vmbus_channel_notify_guest(chan); 1255 + out: 1256 + ringbuf_unmap_hdr(&ringbuf->common, rb, true); 1257 + ringbuf->last_rd_idx = ringbuf->rd_idx; 1258 + return read; 1259 + } 1260 + 1261 + void vmbus_free_req(void *req) 1262 + { 1263 + VMBusChanReq *r = req; 1264 + 1265 + if (!req) { 1266 + return; 1267 + } 1268 + 1269 + if (r->sgl.dev) { 1270 + qemu_sglist_destroy(&r->sgl); 1271 + } 1272 + g_free(req); 1273 + } 1274 + 1275 + static void channel_event_cb(EventNotifier *e) 1276 + { 1277 + VMBusChannel *chan = container_of(e, VMBusChannel, notifier); 1278 + if (event_notifier_test_and_clear(e)) { 1279 + /* 1280 + * All receives are supposed to happen within the device worker, so 1281 + * bracket it with ringbuf_start/end_io on the receive ringbuffer, and 1282 + * potentially reuse the cached mapping throughout the worker. 1283 + * Can't do this for sends as they may happen outside the device 1284 + * worker. 1285 + */ 1286 + VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; 1287 + ringbuf_start_io(&ringbuf->common); 1288 + chan->notify_cb(chan); 1289 + ringbuf_end_io(&ringbuf->common); 1290 + 1291 + } 1292 + } 1293 + 1294 + static int alloc_chan_id(VMBus *vmbus) 1295 + { 1296 + int ret; 1297 + 1298 + ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0); 1299 + if (ret == VMBUS_CHANID_COUNT) { 1300 + return -ENOMEM; 1301 + } 1302 + return ret + VMBUS_FIRST_CHANID; 1303 + } 1304 + 1305 + static int register_chan_id(VMBusChannel *chan) 1306 + { 1307 + return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID, 1308 + chan->vmbus->chanid_bitmap) ? -EEXIST : 0; 1309 + } 1310 + 1311 + static void unregister_chan_id(VMBusChannel *chan) 1312 + { 1313 + clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap); 1314 + } 1315 + 1316 + static uint32_t chan_connection_id(VMBusChannel *chan) 1317 + { 1318 + return VMBUS_CHAN_CONNECTION_OFFSET + chan->id; 1319 + } 1320 + 1321 + static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc, 1322 + VMBusChannel *chan, uint16_t idx, Error **errp) 1323 + { 1324 + int res; 1325 + 1326 + chan->dev = dev; 1327 + chan->notify_cb = vdc->chan_notify_cb; 1328 + chan->subchan_idx = idx; 1329 + chan->vmbus = vmbus; 1330 + 1331 + res = alloc_chan_id(vmbus); 1332 + if (res < 0) { 1333 + error_setg(errp, "no spare channel id"); 1334 + return; 1335 + } 1336 + chan->id = res; 1337 + register_chan_id(chan); 1338 + 1339 + /* 1340 + * The guest drivers depend on the device subchannels (idx #1+) to be 1341 + * offered after the primary channel (idx #0) of that device. To ensure 1342 + * that, record the channels on the channel list in the order they appear 1343 + * within the device. 1344 + */ 1345 + QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link); 1346 + } 1347 + 1348 + static void deinit_channel(VMBusChannel *chan) 1349 + { 1350 + assert(chan->state == VMCHAN_INIT); 1351 + QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link); 1352 + unregister_chan_id(chan); 1353 + } 1354 + 1355 + static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp) 1356 + { 1357 + uint16_t i; 1358 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev); 1359 + Error *err = NULL; 1360 + 1361 + dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1; 1362 + if (dev->num_channels < 1) { 1363 + error_setg(&err, "invalid #channels: %u", dev->num_channels); 1364 + goto error_out; 1365 + } 1366 + 1367 + dev->channels = g_new0(VMBusChannel, dev->num_channels); 1368 + for (i = 0; i < dev->num_channels; i++) { 1369 + init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err); 1370 + if (err) { 1371 + goto err_init; 1372 + } 1373 + } 1374 + 1375 + return; 1376 + 1377 + err_init: 1378 + while (i--) { 1379 + deinit_channel(&dev->channels[i]); 1380 + } 1381 + error_out: 1382 + error_propagate(errp, err); 1383 + } 1384 + 1385 + static void free_channels(VMBusDevice *dev) 1386 + { 1387 + uint16_t i; 1388 + for (i = 0; i < dev->num_channels; i++) { 1389 + deinit_channel(&dev->channels[i]); 1390 + } 1391 + g_free(dev->channels); 1392 + } 1393 + 1394 + static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index) 1395 + { 1396 + VMBusChannel *chan; 1397 + 1398 + if (vp_index == vmbus->target_vp) { 1399 + hyperv_sint_route_ref(vmbus->sint_route); 1400 + return vmbus->sint_route; 1401 + } 1402 + 1403 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1404 + if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) { 1405 + hyperv_sint_route_ref(chan->notify_route); 1406 + return chan->notify_route; 1407 + } 1408 + } 1409 + 1410 + return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL); 1411 + } 1412 + 1413 + static void open_channel(VMBusChannel *chan) 1414 + { 1415 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1416 + 1417 + chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl); 1418 + if (!chan->gpadl) { 1419 + return; 1420 + } 1421 + 1422 + if (ringbufs_init(chan)) { 1423 + goto put_gpadl; 1424 + } 1425 + 1426 + if (event_notifier_init(&chan->notifier, 0)) { 1427 + goto put_gpadl; 1428 + } 1429 + 1430 + event_notifier_set_handler(&chan->notifier, channel_event_cb); 1431 + 1432 + if (hyperv_set_event_flag_handler(chan_connection_id(chan), 1433 + &chan->notifier)) { 1434 + goto cleanup_notifier; 1435 + } 1436 + 1437 + chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp); 1438 + if (!chan->notify_route) { 1439 + goto clear_event_flag_handler; 1440 + } 1441 + 1442 + if (vdc->open_channel && vdc->open_channel(chan)) { 1443 + goto unref_sint_route; 1444 + } 1445 + 1446 + chan->is_open = true; 1447 + return; 1448 + 1449 + unref_sint_route: 1450 + hyperv_sint_route_unref(chan->notify_route); 1451 + clear_event_flag_handler: 1452 + hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); 1453 + cleanup_notifier: 1454 + event_notifier_set_handler(&chan->notifier, NULL); 1455 + event_notifier_cleanup(&chan->notifier); 1456 + put_gpadl: 1457 + vmbus_put_gpadl(chan->gpadl); 1458 + } 1459 + 1460 + static void close_channel(VMBusChannel *chan) 1461 + { 1462 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1463 + 1464 + if (!chan->is_open) { 1465 + return; 1466 + } 1467 + 1468 + if (vdc->close_channel) { 1469 + vdc->close_channel(chan); 1470 + } 1471 + 1472 + hyperv_sint_route_unref(chan->notify_route); 1473 + hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); 1474 + event_notifier_set_handler(&chan->notifier, NULL); 1475 + event_notifier_cleanup(&chan->notifier); 1476 + vmbus_put_gpadl(chan->gpadl); 1477 + chan->is_open = false; 1478 + } 1479 + 1480 + static int channel_post_load(void *opaque, int version_id) 1481 + { 1482 + VMBusChannel *chan = opaque; 1483 + 1484 + return register_chan_id(chan); 1485 + } 1486 + 1487 + static const VMStateDescription vmstate_channel = { 1488 + .name = "vmbus/channel", 1489 + .version_id = 0, 1490 + .minimum_version_id = 0, 1491 + .post_load = channel_post_load, 1492 + .fields = (VMStateField[]) { 1493 + VMSTATE_UINT32(id, VMBusChannel), 1494 + VMSTATE_UINT16(subchan_idx, VMBusChannel), 1495 + VMSTATE_UINT32(open_id, VMBusChannel), 1496 + VMSTATE_UINT32(target_vp, VMBusChannel), 1497 + VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel), 1498 + VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel), 1499 + VMSTATE_UINT8(offer_state, VMBusChannel), 1500 + VMSTATE_UINT8(state, VMBusChannel), 1501 + VMSTATE_END_OF_LIST() 1502 + } 1503 + }; 1504 + 1505 + static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id) 1506 + { 1507 + VMBusChannel *chan; 1508 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1509 + if (chan->id == id) { 1510 + return chan; 1511 + } 1512 + } 1513 + return NULL; 1514 + } 1515 + 1516 + static int enqueue_incoming_message(VMBus *vmbus, 1517 + const struct hyperv_post_message_input *msg) 1518 + { 1519 + int ret = 0; 1520 + uint8_t idx, prev_size; 1521 + 1522 + qemu_mutex_lock(&vmbus->rx_queue_lock); 1523 + 1524 + if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) { 1525 + ret = -ENOBUFS; 1526 + goto out; 1527 + } 1528 + 1529 + prev_size = vmbus->rx_queue_size; 1530 + idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN; 1531 + memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg)); 1532 + vmbus->rx_queue_size++; 1533 + 1534 + /* only need to resched if the queue was empty before */ 1535 + if (!prev_size) { 1536 + vmbus_resched(vmbus); 1537 + } 1538 + out: 1539 + qemu_mutex_unlock(&vmbus->rx_queue_lock); 1540 + return ret; 1541 + } 1542 + 1543 + static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg, 1544 + void *data) 1545 + { 1546 + VMBus *vmbus = data; 1547 + struct vmbus_message_header *vmbus_msg; 1548 + 1549 + if (msg->message_type != HV_MESSAGE_VMBUS) { 1550 + return HV_STATUS_INVALID_HYPERCALL_INPUT; 1551 + } 1552 + 1553 + if (msg->payload_size < sizeof(struct vmbus_message_header)) { 1554 + return HV_STATUS_INVALID_HYPERCALL_INPUT; 1555 + } 1556 + 1557 + vmbus_msg = (struct vmbus_message_header *)msg->payload; 1558 + 1559 + trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size); 1560 + 1561 + if (vmbus_msg->message_type == VMBUS_MSG_INVALID || 1562 + vmbus_msg->message_type >= VMBUS_MSG_COUNT) { 1563 + error_report("vmbus: unknown message type %#x", 1564 + vmbus_msg->message_type); 1565 + return HV_STATUS_INVALID_HYPERCALL_INPUT; 1566 + } 1567 + 1568 + if (enqueue_incoming_message(vmbus, msg)) { 1569 + return HV_STATUS_INSUFFICIENT_BUFFERS; 1570 + } 1571 + return HV_STATUS_SUCCESS; 1572 + } 1573 + 1574 + static bool vmbus_initialized(VMBus *vmbus) 1575 + { 1576 + return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT; 1577 + } 1578 + 1579 + static void vmbus_reset_all(VMBus *vmbus) 1580 + { 1581 + qbus_reset_all(BUS(vmbus)); 1582 + } 1583 + 1584 + static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen) 1585 + { 1586 + int ret; 1587 + struct hyperv_message msg = { 1588 + .header.message_type = HV_MESSAGE_VMBUS, 1589 + }; 1590 + 1591 + assert(!vmbus->msg_in_progress); 1592 + assert(msglen <= sizeof(msg.payload)); 1593 + assert(msglen >= sizeof(struct vmbus_message_header)); 1594 + 1595 + vmbus->msg_in_progress = true; 1596 + 1597 + trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type, 1598 + msglen); 1599 + 1600 + memcpy(msg.payload, msgdata, msglen); 1601 + msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN); 1602 + 1603 + ret = hyperv_post_msg(vmbus->sint_route, &msg); 1604 + if (ret == 0 || ret == -EAGAIN) { 1605 + return; 1606 + } 1607 + 1608 + error_report("message delivery fatal failure: %d; aborting vmbus", ret); 1609 + vmbus_reset_all(vmbus); 1610 + } 1611 + 1612 + static int vmbus_init(VMBus *vmbus) 1613 + { 1614 + if (vmbus->target_vp != (uint32_t)-1) { 1615 + vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT, 1616 + vmbus_msg_cb, vmbus); 1617 + if (!vmbus->sint_route) { 1618 + error_report("failed to set up SINT route"); 1619 + return -ENOMEM; 1620 + } 1621 + } 1622 + return 0; 1623 + } 1624 + 1625 + static void vmbus_deinit(VMBus *vmbus) 1626 + { 1627 + VMBusGpadl *gpadl, *tmp_gpadl; 1628 + VMBusChannel *chan; 1629 + 1630 + QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) { 1631 + if (gpadl->state == VMGPADL_TORNDOWN) { 1632 + continue; 1633 + } 1634 + vmbus_put_gpadl(gpadl); 1635 + } 1636 + 1637 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1638 + chan->offer_state = VMOFFER_INIT; 1639 + } 1640 + 1641 + hyperv_sint_route_unref(vmbus->sint_route); 1642 + vmbus->sint_route = NULL; 1643 + vmbus->int_page_gpa = 0; 1644 + vmbus->target_vp = (uint32_t)-1; 1645 + vmbus->version = 0; 1646 + vmbus->state = VMBUS_LISTEN; 1647 + vmbus->msg_in_progress = false; 1648 + } 1649 + 1650 + static void handle_initiate_contact(VMBus *vmbus, 1651 + vmbus_message_initiate_contact *msg, 1652 + uint32_t msglen) 1653 + { 1654 + if (msglen < sizeof(*msg)) { 1655 + return; 1656 + } 1657 + 1658 + trace_vmbus_initiate_contact(msg->version_requested >> 16, 1659 + msg->version_requested & 0xffff, 1660 + msg->target_vcpu, msg->monitor_page1, 1661 + msg->monitor_page2, msg->interrupt_page); 1662 + 1663 + /* 1664 + * Reset vmbus on INITIATE_CONTACT regardless of its previous state. 1665 + * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down 1666 + * before handing over to OS loader. 1667 + */ 1668 + vmbus_reset_all(vmbus); 1669 + 1670 + vmbus->target_vp = msg->target_vcpu; 1671 + vmbus->version = msg->version_requested; 1672 + if (vmbus->version < VMBUS_VERSION_WIN8) { 1673 + /* linux passes interrupt page even when it doesn't need it */ 1674 + vmbus->int_page_gpa = msg->interrupt_page; 1675 + } 1676 + vmbus->state = VMBUS_HANDSHAKE; 1677 + 1678 + if (vmbus_init(vmbus)) { 1679 + error_report("failed to init vmbus; aborting"); 1680 + vmbus_deinit(vmbus); 1681 + return; 1682 + } 1683 + } 1684 + 1685 + static void send_handshake(VMBus *vmbus) 1686 + { 1687 + struct vmbus_message_version_response msg = { 1688 + .header.message_type = VMBUS_MSG_VERSION_RESPONSE, 1689 + .version_supported = vmbus_initialized(vmbus), 1690 + }; 1691 + 1692 + post_msg(vmbus, &msg, sizeof(msg)); 1693 + } 1694 + 1695 + static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen) 1696 + { 1697 + VMBusChannel *chan; 1698 + 1699 + if (!vmbus_initialized(vmbus)) { 1700 + return; 1701 + } 1702 + 1703 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1704 + if (chan->offer_state == VMOFFER_INIT) { 1705 + chan->offer_state = VMOFFER_SENDING; 1706 + break; 1707 + } 1708 + } 1709 + 1710 + vmbus->state = VMBUS_OFFER; 1711 + } 1712 + 1713 + static void send_offer(VMBus *vmbus) 1714 + { 1715 + VMBusChannel *chan; 1716 + struct vmbus_message_header alloffers_msg = { 1717 + .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED, 1718 + }; 1719 + 1720 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1721 + if (chan->offer_state == VMOFFER_SENDING) { 1722 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); 1723 + /* Hyper-V wants LE GUIDs */ 1724 + QemuUUID classid = qemu_uuid_bswap(vdc->classid); 1725 + QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid); 1726 + struct vmbus_message_offer_channel msg = { 1727 + .header.message_type = VMBUS_MSG_OFFERCHANNEL, 1728 + .child_relid = chan->id, 1729 + .connection_id = chan_connection_id(chan), 1730 + .channel_flags = vdc->channel_flags, 1731 + .mmio_size_mb = vdc->mmio_size_mb, 1732 + .sub_channel_index = vmbus_channel_idx(chan), 1733 + .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED, 1734 + }; 1735 + 1736 + memcpy(msg.type_uuid, &classid, sizeof(classid)); 1737 + memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid)); 1738 + 1739 + trace_vmbus_send_offer(chan->id, chan->dev); 1740 + 1741 + post_msg(vmbus, &msg, sizeof(msg)); 1742 + return; 1743 + } 1744 + } 1745 + 1746 + /* no more offers, send terminator message */ 1747 + trace_vmbus_terminate_offers(); 1748 + post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg)); 1749 + } 1750 + 1751 + static bool complete_offer(VMBus *vmbus) 1752 + { 1753 + VMBusChannel *chan; 1754 + 1755 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1756 + if (chan->offer_state == VMOFFER_SENDING) { 1757 + chan->offer_state = VMOFFER_SENT; 1758 + goto next_offer; 1759 + } 1760 + } 1761 + /* 1762 + * no transitioning channels found so this is completing the terminator 1763 + * message, and vmbus can move to the next state 1764 + */ 1765 + return true; 1766 + 1767 + next_offer: 1768 + /* try to mark another channel for offering */ 1769 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1770 + if (chan->offer_state == VMOFFER_INIT) { 1771 + chan->offer_state = VMOFFER_SENDING; 1772 + break; 1773 + } 1774 + } 1775 + /* 1776 + * if an offer has been sent there are more offers or the terminator yet to 1777 + * send, so no state transition for vmbus 1778 + */ 1779 + return false; 1780 + } 1781 + 1782 + 1783 + static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg, 1784 + uint32_t msglen) 1785 + { 1786 + VMBusGpadl *gpadl; 1787 + uint32_t num_gfns, i; 1788 + 1789 + /* must include at least one gpa range */ 1790 + if (msglen < sizeof(*msg) + sizeof(msg->range[0]) || 1791 + !vmbus_initialized(vmbus)) { 1792 + return; 1793 + } 1794 + 1795 + num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) / 1796 + sizeof(msg->range[0].pfn_array[0]); 1797 + 1798 + trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns); 1799 + 1800 + /* 1801 + * In theory the GPADL_HEADER message can define a GPADL with multiple GPA 1802 + * ranges each with arbitrary size and alignment. However in practice only 1803 + * single-range page-aligned GPADLs have been observed so just ignore 1804 + * anything else and simplify things greatly. 1805 + */ 1806 + if (msg->rangecount != 1 || msg->range[0].byte_offset || 1807 + (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) { 1808 + return; 1809 + } 1810 + 1811 + /* ignore requests to create already existing GPADLs */ 1812 + if (find_gpadl(vmbus, msg->gpadl_id)) { 1813 + return; 1814 + } 1815 + 1816 + gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns); 1817 + 1818 + for (i = 0; i < num_gfns && 1819 + (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen; 1820 + i++) { 1821 + gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i]; 1822 + } 1823 + 1824 + if (gpadl_full(gpadl)) { 1825 + vmbus->state = VMBUS_CREATE_GPADL; 1826 + } 1827 + } 1828 + 1829 + static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg, 1830 + uint32_t msglen) 1831 + { 1832 + VMBusGpadl *gpadl; 1833 + uint32_t num_gfns_left, i; 1834 + 1835 + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1836 + return; 1837 + } 1838 + 1839 + trace_vmbus_gpadl_body(msg->gpadl_id); 1840 + 1841 + gpadl = find_gpadl(vmbus, msg->gpadl_id); 1842 + if (!gpadl) { 1843 + return; 1844 + } 1845 + 1846 + num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns; 1847 + assert(num_gfns_left); 1848 + 1849 + for (i = 0; i < num_gfns_left && 1850 + (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) { 1851 + gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i]; 1852 + } 1853 + 1854 + if (gpadl_full(gpadl)) { 1855 + vmbus->state = VMBUS_CREATE_GPADL; 1856 + } 1857 + } 1858 + 1859 + static void send_create_gpadl(VMBus *vmbus) 1860 + { 1861 + VMBusGpadl *gpadl; 1862 + 1863 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1864 + if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { 1865 + struct vmbus_message_gpadl_created msg = { 1866 + .header.message_type = VMBUS_MSG_GPADL_CREATED, 1867 + .gpadl_id = gpadl->id, 1868 + .child_relid = gpadl->child_relid, 1869 + }; 1870 + 1871 + trace_vmbus_gpadl_created(gpadl->id); 1872 + post_msg(vmbus, &msg, sizeof(msg)); 1873 + return; 1874 + } 1875 + } 1876 + 1877 + assert(false); 1878 + } 1879 + 1880 + static bool complete_create_gpadl(VMBus *vmbus) 1881 + { 1882 + VMBusGpadl *gpadl; 1883 + 1884 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1885 + if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { 1886 + gpadl->state = VMGPADL_ALIVE; 1887 + 1888 + return true; 1889 + } 1890 + } 1891 + 1892 + assert(false); 1893 + return false; 1894 + } 1895 + 1896 + static void handle_gpadl_teardown(VMBus *vmbus, 1897 + vmbus_message_gpadl_teardown *msg, 1898 + uint32_t msglen) 1899 + { 1900 + VMBusGpadl *gpadl; 1901 + 1902 + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1903 + return; 1904 + } 1905 + 1906 + trace_vmbus_gpadl_teardown(msg->gpadl_id); 1907 + 1908 + gpadl = find_gpadl(vmbus, msg->gpadl_id); 1909 + if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) { 1910 + return; 1911 + } 1912 + 1913 + gpadl->state = VMGPADL_TEARINGDOWN; 1914 + vmbus->state = VMBUS_TEARDOWN_GPADL; 1915 + } 1916 + 1917 + static void send_teardown_gpadl(VMBus *vmbus) 1918 + { 1919 + VMBusGpadl *gpadl; 1920 + 1921 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1922 + if (gpadl->state == VMGPADL_TEARINGDOWN) { 1923 + struct vmbus_message_gpadl_torndown msg = { 1924 + .header.message_type = VMBUS_MSG_GPADL_TORNDOWN, 1925 + .gpadl_id = gpadl->id, 1926 + }; 1927 + 1928 + trace_vmbus_gpadl_torndown(gpadl->id); 1929 + post_msg(vmbus, &msg, sizeof(msg)); 1930 + return; 1931 + } 1932 + } 1933 + 1934 + assert(false); 1935 + } 1936 + 1937 + static bool complete_teardown_gpadl(VMBus *vmbus) 1938 + { 1939 + VMBusGpadl *gpadl; 1940 + 1941 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 1942 + if (gpadl->state == VMGPADL_TEARINGDOWN) { 1943 + gpadl->state = VMGPADL_TORNDOWN; 1944 + vmbus_put_gpadl(gpadl); 1945 + return true; 1946 + } 1947 + } 1948 + 1949 + assert(false); 1950 + return false; 1951 + } 1952 + 1953 + static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg, 1954 + uint32_t msglen) 1955 + { 1956 + VMBusChannel *chan; 1957 + 1958 + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 1959 + return; 1960 + } 1961 + 1962 + trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id, 1963 + msg->target_vp); 1964 + chan = find_channel(vmbus, msg->child_relid); 1965 + if (!chan || chan->state != VMCHAN_INIT) { 1966 + return; 1967 + } 1968 + 1969 + chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id; 1970 + chan->ringbuf_send_offset = msg->ring_buffer_offset; 1971 + chan->target_vp = msg->target_vp; 1972 + chan->open_id = msg->open_id; 1973 + 1974 + open_channel(chan); 1975 + 1976 + chan->state = VMCHAN_OPENING; 1977 + vmbus->state = VMBUS_OPEN_CHANNEL; 1978 + } 1979 + 1980 + static void send_open_channel(VMBus *vmbus) 1981 + { 1982 + VMBusChannel *chan; 1983 + 1984 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 1985 + if (chan->state == VMCHAN_OPENING) { 1986 + struct vmbus_message_open_result msg = { 1987 + .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT, 1988 + .child_relid = chan->id, 1989 + .open_id = chan->open_id, 1990 + .status = !vmbus_channel_is_open(chan), 1991 + }; 1992 + 1993 + trace_vmbus_channel_open(chan->id, msg.status); 1994 + post_msg(vmbus, &msg, sizeof(msg)); 1995 + return; 1996 + } 1997 + } 1998 + 1999 + assert(false); 2000 + } 2001 + 2002 + static bool complete_open_channel(VMBus *vmbus) 2003 + { 2004 + VMBusChannel *chan; 2005 + 2006 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2007 + if (chan->state == VMCHAN_OPENING) { 2008 + if (vmbus_channel_is_open(chan)) { 2009 + chan->state = VMCHAN_OPEN; 2010 + /* 2011 + * simulate guest notification of ringbuffer space made 2012 + * available, for the channel protocols where the host 2013 + * initiates the communication 2014 + */ 2015 + vmbus_channel_notify_host(chan); 2016 + } else { 2017 + chan->state = VMCHAN_INIT; 2018 + } 2019 + return true; 2020 + } 2021 + } 2022 + 2023 + assert(false); 2024 + return false; 2025 + } 2026 + 2027 + static void vdev_reset_on_close(VMBusDevice *vdev) 2028 + { 2029 + uint16_t i; 2030 + 2031 + for (i = 0; i < vdev->num_channels; i++) { 2032 + if (vmbus_channel_is_open(&vdev->channels[i])) { 2033 + return; 2034 + } 2035 + } 2036 + 2037 + /* all channels closed -- reset device */ 2038 + qdev_reset_all(DEVICE(vdev)); 2039 + } 2040 + 2041 + static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg, 2042 + uint32_t msglen) 2043 + { 2044 + VMBusChannel *chan; 2045 + 2046 + if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { 2047 + return; 2048 + } 2049 + 2050 + trace_vmbus_close_channel(msg->child_relid); 2051 + 2052 + chan = find_channel(vmbus, msg->child_relid); 2053 + if (!chan) { 2054 + return; 2055 + } 2056 + 2057 + close_channel(chan); 2058 + chan->state = VMCHAN_INIT; 2059 + 2060 + vdev_reset_on_close(chan->dev); 2061 + } 2062 + 2063 + static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen) 2064 + { 2065 + vmbus->state = VMBUS_UNLOAD; 2066 + } 2067 + 2068 + static void send_unload(VMBus *vmbus) 2069 + { 2070 + vmbus_message_header msg = { 2071 + .message_type = VMBUS_MSG_UNLOAD_RESPONSE, 2072 + }; 2073 + 2074 + qemu_mutex_lock(&vmbus->rx_queue_lock); 2075 + vmbus->rx_queue_size = 0; 2076 + qemu_mutex_unlock(&vmbus->rx_queue_lock); 2077 + 2078 + post_msg(vmbus, &msg, sizeof(msg)); 2079 + return; 2080 + } 2081 + 2082 + static bool complete_unload(VMBus *vmbus) 2083 + { 2084 + vmbus_reset_all(vmbus); 2085 + return true; 2086 + } 2087 + 2088 + static void process_message(VMBus *vmbus) 2089 + { 2090 + struct hyperv_post_message_input *hv_msg; 2091 + struct vmbus_message_header *msg; 2092 + void *msgdata; 2093 + uint32_t msglen; 2094 + 2095 + qemu_mutex_lock(&vmbus->rx_queue_lock); 2096 + 2097 + if (!vmbus->rx_queue_size) { 2098 + goto unlock; 2099 + } 2100 + 2101 + hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head]; 2102 + msglen = hv_msg->payload_size; 2103 + if (msglen < sizeof(*msg)) { 2104 + goto out; 2105 + } 2106 + msgdata = hv_msg->payload; 2107 + msg = (struct vmbus_message_header *)msgdata; 2108 + 2109 + trace_vmbus_process_incoming_message(msg->message_type); 2110 + 2111 + switch (msg->message_type) { 2112 + case VMBUS_MSG_INITIATE_CONTACT: 2113 + handle_initiate_contact(vmbus, msgdata, msglen); 2114 + break; 2115 + case VMBUS_MSG_REQUESTOFFERS: 2116 + handle_request_offers(vmbus, msgdata, msglen); 2117 + break; 2118 + case VMBUS_MSG_GPADL_HEADER: 2119 + handle_gpadl_header(vmbus, msgdata, msglen); 2120 + break; 2121 + case VMBUS_MSG_GPADL_BODY: 2122 + handle_gpadl_body(vmbus, msgdata, msglen); 2123 + break; 2124 + case VMBUS_MSG_GPADL_TEARDOWN: 2125 + handle_gpadl_teardown(vmbus, msgdata, msglen); 2126 + break; 2127 + case VMBUS_MSG_OPENCHANNEL: 2128 + handle_open_channel(vmbus, msgdata, msglen); 2129 + break; 2130 + case VMBUS_MSG_CLOSECHANNEL: 2131 + handle_close_channel(vmbus, msgdata, msglen); 2132 + break; 2133 + case VMBUS_MSG_UNLOAD: 2134 + handle_unload(vmbus, msgdata, msglen); 2135 + break; 2136 + default: 2137 + error_report("unknown message type %#x", msg->message_type); 2138 + break; 2139 + } 2140 + 2141 + out: 2142 + vmbus->rx_queue_size--; 2143 + vmbus->rx_queue_head++; 2144 + vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN; 2145 + 2146 + vmbus_resched(vmbus); 2147 + unlock: 2148 + qemu_mutex_unlock(&vmbus->rx_queue_lock); 2149 + } 2150 + 2151 + static const struct { 2152 + void (*run)(VMBus *vmbus); 2153 + bool (*complete)(VMBus *vmbus); 2154 + } state_runner[] = { 2155 + [VMBUS_LISTEN] = {process_message, NULL}, 2156 + [VMBUS_HANDSHAKE] = {send_handshake, NULL}, 2157 + [VMBUS_OFFER] = {send_offer, complete_offer}, 2158 + [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl}, 2159 + [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl}, 2160 + [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel}, 2161 + [VMBUS_UNLOAD] = {send_unload, complete_unload}, 2162 + }; 2163 + 2164 + static void vmbus_do_run(VMBus *vmbus) 2165 + { 2166 + if (vmbus->msg_in_progress) { 2167 + return; 2168 + } 2169 + 2170 + assert(vmbus->state < VMBUS_STATE_MAX); 2171 + assert(state_runner[vmbus->state].run); 2172 + state_runner[vmbus->state].run(vmbus); 2173 + } 2174 + 2175 + static void vmbus_run(void *opaque) 2176 + { 2177 + VMBus *vmbus = opaque; 2178 + 2179 + /* make sure no recursion happens (e.g. due to recursive aio_poll()) */ 2180 + if (vmbus->in_progress) { 2181 + return; 2182 + } 2183 + 2184 + vmbus->in_progress = true; 2185 + /* 2186 + * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it 2187 + * should go *after* the code that can result in aio_poll; otherwise 2188 + * reschedules can be missed. No idea how to enforce that. 2189 + */ 2190 + vmbus_do_run(vmbus); 2191 + vmbus->in_progress = false; 2192 + } 2193 + 2194 + static void vmbus_msg_cb(void *data, int status) 2195 + { 2196 + VMBus *vmbus = data; 2197 + bool (*complete)(VMBus *vmbus); 2198 + 2199 + assert(vmbus->msg_in_progress); 2200 + 2201 + trace_vmbus_msg_cb(status); 2202 + 2203 + if (status == -EAGAIN) { 2204 + goto out; 2205 + } 2206 + if (status) { 2207 + error_report("message delivery fatal failure: %d; aborting vmbus", 2208 + status); 2209 + vmbus_reset_all(vmbus); 2210 + return; 2211 + } 2212 + 2213 + assert(vmbus->state < VMBUS_STATE_MAX); 2214 + complete = state_runner[vmbus->state].complete; 2215 + if (!complete || complete(vmbus)) { 2216 + vmbus->state = VMBUS_LISTEN; 2217 + } 2218 + out: 2219 + vmbus->msg_in_progress = false; 2220 + vmbus_resched(vmbus); 2221 + } 2222 + 2223 + static void vmbus_resched(VMBus *vmbus) 2224 + { 2225 + aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus); 2226 + } 2227 + 2228 + static void vmbus_signal_event(EventNotifier *e) 2229 + { 2230 + VMBusChannel *chan; 2231 + VMBus *vmbus = container_of(e, VMBus, notifier); 2232 + unsigned long *int_map; 2233 + hwaddr addr, len; 2234 + bool is_dirty = false; 2235 + 2236 + if (!event_notifier_test_and_clear(e)) { 2237 + return; 2238 + } 2239 + 2240 + trace_vmbus_signal_event(); 2241 + 2242 + if (!vmbus->int_page_gpa) { 2243 + return; 2244 + } 2245 + 2246 + addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2; 2247 + len = TARGET_PAGE_SIZE / 2; 2248 + int_map = cpu_physical_memory_map(addr, &len, 1); 2249 + if (len != TARGET_PAGE_SIZE / 2) { 2250 + goto unmap; 2251 + } 2252 + 2253 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2254 + if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) { 2255 + if (!vmbus_channel_is_open(chan)) { 2256 + continue; 2257 + } 2258 + vmbus_channel_notify_host(chan); 2259 + is_dirty = true; 2260 + } 2261 + } 2262 + 2263 + unmap: 2264 + cpu_physical_memory_unmap(int_map, len, 1, is_dirty); 2265 + } 2266 + 2267 + static void vmbus_dev_realize(DeviceState *dev, Error **errp) 2268 + { 2269 + VMBusDevice *vdev = VMBUS_DEVICE(dev); 2270 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2271 + VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); 2272 + BusChild *child; 2273 + Error *err = NULL; 2274 + char idstr[UUID_FMT_LEN + 1]; 2275 + 2276 + assert(!qemu_uuid_is_null(&vdev->instanceid)); 2277 + 2278 + /* Check for instance id collision for this class id */ 2279 + QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) { 2280 + VMBusDevice *child_dev = VMBUS_DEVICE(child->child); 2281 + 2282 + if (child_dev == vdev) { 2283 + continue; 2284 + } 2285 + 2286 + if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) { 2287 + qemu_uuid_unparse(&vdev->instanceid, idstr); 2288 + error_setg(&err, "duplicate vmbus device instance id %s", idstr); 2289 + goto error_out; 2290 + } 2291 + } 2292 + 2293 + vdev->dma_as = &address_space_memory; 2294 + 2295 + create_channels(vmbus, vdev, &err); 2296 + if (err) { 2297 + goto error_out; 2298 + } 2299 + 2300 + if (vdc->vmdev_realize) { 2301 + vdc->vmdev_realize(vdev, &err); 2302 + if (err) { 2303 + goto err_vdc_realize; 2304 + } 2305 + } 2306 + return; 2307 + 2308 + err_vdc_realize: 2309 + free_channels(vdev); 2310 + error_out: 2311 + error_propagate(errp, err); 2312 + } 2313 + 2314 + static void vmbus_dev_reset(DeviceState *dev) 2315 + { 2316 + uint16_t i; 2317 + VMBusDevice *vdev = VMBUS_DEVICE(dev); 2318 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2319 + 2320 + if (vdev->channels) { 2321 + for (i = 0; i < vdev->num_channels; i++) { 2322 + VMBusChannel *chan = &vdev->channels[i]; 2323 + close_channel(chan); 2324 + chan->state = VMCHAN_INIT; 2325 + } 2326 + } 2327 + 2328 + if (vdc->vmdev_reset) { 2329 + vdc->vmdev_reset(vdev); 2330 + } 2331 + } 2332 + 2333 + static void vmbus_dev_unrealize(DeviceState *dev) 2334 + { 2335 + VMBusDevice *vdev = VMBUS_DEVICE(dev); 2336 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2337 + 2338 + if (vdc->vmdev_unrealize) { 2339 + vdc->vmdev_unrealize(vdev); 2340 + } 2341 + free_channels(vdev); 2342 + } 2343 + 2344 + static void vmbus_dev_class_init(ObjectClass *klass, void *data) 2345 + { 2346 + DeviceClass *kdev = DEVICE_CLASS(klass); 2347 + kdev->bus_type = TYPE_VMBUS; 2348 + kdev->realize = vmbus_dev_realize; 2349 + kdev->unrealize = vmbus_dev_unrealize; 2350 + kdev->reset = vmbus_dev_reset; 2351 + } 2352 + 2353 + static Property vmbus_dev_instanceid = 2354 + DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid); 2355 + 2356 + static void vmbus_dev_instance_init(Object *obj) 2357 + { 2358 + VMBusDevice *vdev = VMBUS_DEVICE(obj); 2359 + VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); 2360 + 2361 + if (!qemu_uuid_is_null(&vdc->instanceid)) { 2362 + /* Class wants to only have a single instance with a fixed UUID */ 2363 + vdev->instanceid = vdc->instanceid; 2364 + } else { 2365 + qdev_property_add_static(DEVICE(vdev), &vmbus_dev_instanceid); 2366 + } 2367 + } 2368 + 2369 + const VMStateDescription vmstate_vmbus_dev = { 2370 + .name = TYPE_VMBUS_DEVICE, 2371 + .version_id = 0, 2372 + .minimum_version_id = 0, 2373 + .fields = (VMStateField[]) { 2374 + VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16), 2375 + VMSTATE_UINT16(num_channels, VMBusDevice), 2376 + VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice, 2377 + num_channels, vmstate_channel, 2378 + VMBusChannel), 2379 + VMSTATE_END_OF_LIST() 2380 + } 2381 + }; 2382 + 2383 + /* vmbus generic device base */ 2384 + static const TypeInfo vmbus_dev_type_info = { 2385 + .name = TYPE_VMBUS_DEVICE, 2386 + .parent = TYPE_DEVICE, 2387 + .abstract = true, 2388 + .instance_size = sizeof(VMBusDevice), 2389 + .class_size = sizeof(VMBusDeviceClass), 2390 + .class_init = vmbus_dev_class_init, 2391 + .instance_init = vmbus_dev_instance_init, 2392 + }; 2393 + 2394 + static void vmbus_realize(BusState *bus, Error **errp) 2395 + { 2396 + int ret = 0; 2397 + Error *local_err = NULL; 2398 + VMBus *vmbus = VMBUS(bus); 2399 + 2400 + qemu_mutex_init(&vmbus->rx_queue_lock); 2401 + 2402 + QTAILQ_INIT(&vmbus->gpadl_list); 2403 + QTAILQ_INIT(&vmbus->channel_list); 2404 + 2405 + ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, 2406 + vmbus_recv_message, vmbus); 2407 + if (ret != 0) { 2408 + error_setg(&local_err, "hyperv set message handler failed: %d", ret); 2409 + goto error_out; 2410 + } 2411 + 2412 + ret = event_notifier_init(&vmbus->notifier, 0); 2413 + if (ret != 0) { 2414 + error_setg(&local_err, "event notifier failed to init with %d", ret); 2415 + goto remove_msg_handler; 2416 + } 2417 + 2418 + event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event); 2419 + ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, 2420 + &vmbus->notifier); 2421 + if (ret != 0) { 2422 + error_setg(&local_err, "hyperv set event handler failed with %d", ret); 2423 + goto clear_event_notifier; 2424 + } 2425 + 2426 + return; 2427 + 2428 + clear_event_notifier: 2429 + event_notifier_cleanup(&vmbus->notifier); 2430 + remove_msg_handler: 2431 + hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); 2432 + error_out: 2433 + qemu_mutex_destroy(&vmbus->rx_queue_lock); 2434 + error_propagate(errp, local_err); 2435 + } 2436 + 2437 + static void vmbus_unrealize(BusState *bus) 2438 + { 2439 + VMBus *vmbus = VMBUS(bus); 2440 + 2441 + hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); 2442 + hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL); 2443 + event_notifier_cleanup(&vmbus->notifier); 2444 + 2445 + qemu_mutex_destroy(&vmbus->rx_queue_lock); 2446 + } 2447 + 2448 + static void vmbus_reset(BusState *bus) 2449 + { 2450 + vmbus_deinit(VMBUS(bus)); 2451 + } 2452 + 2453 + static char *vmbus_get_dev_path(DeviceState *dev) 2454 + { 2455 + BusState *bus = qdev_get_parent_bus(dev); 2456 + return qdev_get_dev_path(bus->parent); 2457 + } 2458 + 2459 + static char *vmbus_get_fw_dev_path(DeviceState *dev) 2460 + { 2461 + VMBusDevice *vdev = VMBUS_DEVICE(dev); 2462 + char uuid[UUID_FMT_LEN + 1]; 2463 + 2464 + qemu_uuid_unparse(&vdev->instanceid, uuid); 2465 + return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); 2466 + } 2467 + 2468 + static void vmbus_class_init(ObjectClass *klass, void *data) 2469 + { 2470 + BusClass *k = BUS_CLASS(klass); 2471 + 2472 + k->get_dev_path = vmbus_get_dev_path; 2473 + k->get_fw_dev_path = vmbus_get_fw_dev_path; 2474 + k->realize = vmbus_realize; 2475 + k->unrealize = vmbus_unrealize; 2476 + k->reset = vmbus_reset; 2477 + } 2478 + 2479 + static int vmbus_pre_load(void *opaque) 2480 + { 2481 + VMBusChannel *chan; 2482 + VMBus *vmbus = VMBUS(opaque); 2483 + 2484 + /* 2485 + * channel IDs allocated by the source will come in the migration stream 2486 + * for each channel, so clean up the ones allocated at realize 2487 + */ 2488 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2489 + unregister_chan_id(chan); 2490 + } 2491 + 2492 + return 0; 2493 + } 2494 + static int vmbus_post_load(void *opaque, int version_id) 2495 + { 2496 + int ret; 2497 + VMBus *vmbus = VMBUS(opaque); 2498 + VMBusGpadl *gpadl; 2499 + VMBusChannel *chan; 2500 + 2501 + ret = vmbus_init(vmbus); 2502 + if (ret) { 2503 + return ret; 2504 + } 2505 + 2506 + QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { 2507 + gpadl->vmbus = vmbus; 2508 + gpadl->refcount = 1; 2509 + } 2510 + 2511 + /* 2512 + * reopening channels depends on initialized vmbus so it's done here 2513 + * instead of channel_post_load() 2514 + */ 2515 + QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { 2516 + 2517 + if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) { 2518 + open_channel(chan); 2519 + } 2520 + 2521 + if (chan->state != VMCHAN_OPEN) { 2522 + continue; 2523 + } 2524 + 2525 + if (!vmbus_channel_is_open(chan)) { 2526 + /* reopen failed, abort loading */ 2527 + return -1; 2528 + } 2529 + 2530 + /* resume processing on the guest side if it missed the notification */ 2531 + hyperv_sint_route_set_sint(chan->notify_route); 2532 + /* ditto on the host side */ 2533 + vmbus_channel_notify_host(chan); 2534 + } 2535 + 2536 + vmbus_resched(vmbus); 2537 + return 0; 2538 + } 2539 + 2540 + static const VMStateDescription vmstate_post_message_input = { 2541 + .name = "vmbus/hyperv_post_message_input", 2542 + .version_id = 0, 2543 + .minimum_version_id = 0, 2544 + .fields = (VMStateField[]) { 2545 + /* 2546 + * skip connection_id and message_type as they are validated before 2547 + * queueing and ignored on dequeueing 2548 + */ 2549 + VMSTATE_UINT32(payload_size, struct hyperv_post_message_input), 2550 + VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input, 2551 + HV_MESSAGE_PAYLOAD_SIZE), 2552 + VMSTATE_END_OF_LIST() 2553 + } 2554 + }; 2555 + 2556 + static bool vmbus_rx_queue_needed(void *opaque) 2557 + { 2558 + VMBus *vmbus = VMBUS(opaque); 2559 + return vmbus->rx_queue_size; 2560 + } 2561 + 2562 + static const VMStateDescription vmstate_rx_queue = { 2563 + .name = "vmbus/rx_queue", 2564 + .version_id = 0, 2565 + .minimum_version_id = 0, 2566 + .needed = vmbus_rx_queue_needed, 2567 + .fields = (VMStateField[]) { 2568 + VMSTATE_UINT8(rx_queue_head, VMBus), 2569 + VMSTATE_UINT8(rx_queue_size, VMBus), 2570 + VMSTATE_STRUCT_ARRAY(rx_queue, VMBus, 2571 + HV_MSG_QUEUE_LEN, 0, 2572 + vmstate_post_message_input, 2573 + struct hyperv_post_message_input), 2574 + VMSTATE_END_OF_LIST() 2575 + } 2576 + }; 2577 + 2578 + static const VMStateDescription vmstate_vmbus = { 2579 + .name = TYPE_VMBUS, 2580 + .version_id = 0, 2581 + .minimum_version_id = 0, 2582 + .pre_load = vmbus_pre_load, 2583 + .post_load = vmbus_post_load, 2584 + .fields = (VMStateField[]) { 2585 + VMSTATE_UINT8(state, VMBus), 2586 + VMSTATE_UINT32(version, VMBus), 2587 + VMSTATE_UINT32(target_vp, VMBus), 2588 + VMSTATE_UINT64(int_page_gpa, VMBus), 2589 + VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0, 2590 + vmstate_gpadl, VMBusGpadl, link), 2591 + VMSTATE_END_OF_LIST() 2592 + }, 2593 + .subsections = (const VMStateDescription * []) { 2594 + &vmstate_rx_queue, 2595 + NULL 2596 + } 2597 + }; 2598 + 2599 + static const TypeInfo vmbus_type_info = { 2600 + .name = TYPE_VMBUS, 2601 + .parent = TYPE_BUS, 2602 + .instance_size = sizeof(VMBus), 2603 + .class_init = vmbus_class_init, 2604 + }; 2605 + 2606 + static void vmbus_bridge_realize(DeviceState *dev, Error **errp) 2607 + { 2608 + VMBusBridge *bridge = VMBUS_BRIDGE(dev); 2609 + 2610 + /* 2611 + * here there's at least one vmbus bridge that is being realized, so 2612 + * vmbus_bridge_find can only return NULL if it's not unique 2613 + */ 2614 + if (!vmbus_bridge_find()) { 2615 + error_setg(errp, "there can be at most one %s in the system", 2616 + TYPE_VMBUS_BRIDGE); 2617 + return; 2618 + } 2619 + 2620 + if (!hyperv_is_synic_enabled()) { 2621 + error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX"); 2622 + return; 2623 + } 2624 + 2625 + bridge->bus = VMBUS(qbus_create(TYPE_VMBUS, dev, "vmbus")); 2626 + } 2627 + 2628 + static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev) 2629 + { 2630 + /* there can be only one VMBus */ 2631 + return g_strdup("0"); 2632 + } 2633 + 2634 + static const VMStateDescription vmstate_vmbus_bridge = { 2635 + .name = TYPE_VMBUS_BRIDGE, 2636 + .version_id = 0, 2637 + .minimum_version_id = 0, 2638 + .fields = (VMStateField[]) { 2639 + VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus), 2640 + VMSTATE_END_OF_LIST() 2641 + }, 2642 + }; 2643 + 2644 + static void vmbus_bridge_class_init(ObjectClass *klass, void *data) 2645 + { 2646 + DeviceClass *k = DEVICE_CLASS(klass); 2647 + SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass); 2648 + 2649 + k->realize = vmbus_bridge_realize; 2650 + k->fw_name = "vmbus"; 2651 + sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address; 2652 + set_bit(DEVICE_CATEGORY_BRIDGE, k->categories); 2653 + k->vmsd = &vmstate_vmbus_bridge; 2654 + /* override SysBusDevice's default */ 2655 + k->user_creatable = true; 2656 + } 2657 + 2658 + static const TypeInfo vmbus_bridge_type_info = { 2659 + .name = TYPE_VMBUS_BRIDGE, 2660 + .parent = TYPE_SYS_BUS_DEVICE, 2661 + .instance_size = sizeof(VMBusBridge), 2662 + .class_init = vmbus_bridge_class_init, 2663 + }; 2664 + 2665 + static void vmbus_register_types(void) 2666 + { 2667 + type_register_static(&vmbus_bridge_type_info); 2668 + type_register_static(&vmbus_dev_type_info); 2669 + type_register_static(&vmbus_type_info); 2670 + } 2671 + 2672 + type_init(vmbus_register_types)
+32
include/hw/hyperv/vmbus-bridge.h
··· 1 + /* 2 + * QEMU Hyper-V VMBus root bridge 3 + * 4 + * Copyright (c) 2017-2018 Virtuozzo International GmbH. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 + * See the COPYING file in the top-level directory. 8 + */ 9 + 10 + #ifndef HW_HYPERV_VMBUS_BRIDGE_H 11 + #define HW_HYPERV_VMBUS_BRIDGE_H 12 + 13 + #include "hw/sysbus.h" 14 + 15 + #define TYPE_VMBUS_BRIDGE "vmbus-bridge" 16 + 17 + typedef struct VMBus VMBus; 18 + 19 + typedef struct VMBusBridge { 20 + SysBusDevice parent_obj; 21 + 22 + VMBus *bus; 23 + } VMBusBridge; 24 + 25 + #define VMBUS_BRIDGE(obj) OBJECT_CHECK(VMBusBridge, (obj), TYPE_VMBUS_BRIDGE) 26 + 27 + static inline VMBusBridge *vmbus_bridge_find(void) 28 + { 29 + return VMBUS_BRIDGE(object_resolve_path_type("", TYPE_VMBUS_BRIDGE, NULL)); 30 + } 31 + 32 + #endif
+227
include/hw/hyperv/vmbus.h
··· 1 + /* 2 + * QEMU Hyper-V VMBus 3 + * 4 + * Copyright (c) 2017-2018 Virtuozzo International GmbH. 5 + * 6 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 + * See the COPYING file in the top-level directory. 8 + */ 9 + 10 + #ifndef HW_HYPERV_VMBUS_H 11 + #define HW_HYPERV_VMBUS_H 12 + 13 + #include "sysemu/sysemu.h" 14 + #include "sysemu/dma.h" 15 + #include "hw/qdev-core.h" 16 + #include "migration/vmstate.h" 17 + #include "hw/hyperv/vmbus-proto.h" 18 + #include "qemu/uuid.h" 19 + 20 + #define TYPE_VMBUS_DEVICE "vmbus-dev" 21 + 22 + #define VMBUS_DEVICE(obj) \ 23 + OBJECT_CHECK(VMBusDevice, (obj), TYPE_VMBUS_DEVICE) 24 + #define VMBUS_DEVICE_CLASS(klass) \ 25 + OBJECT_CLASS_CHECK(VMBusDeviceClass, (klass), TYPE_VMBUS_DEVICE) 26 + #define VMBUS_DEVICE_GET_CLASS(obj) \ 27 + OBJECT_GET_CLASS(VMBusDeviceClass, (obj), TYPE_VMBUS_DEVICE) 28 + 29 + /* 30 + * Object wrapping a GPADL -- GPA Descriptor List -- an array of guest physical 31 + * pages, to be used for various buffers shared between the host and the guest. 32 + */ 33 + typedef struct VMBusGpadl VMBusGpadl; 34 + /* 35 + * VMBus channel -- a pair of ring buffers for either direction, placed within 36 + * one GPADL, and the associated notification means. 37 + */ 38 + typedef struct VMBusChannel VMBusChannel; 39 + /* 40 + * Base class for VMBus devices. Includes one or more channels. Identified by 41 + * class GUID and instance GUID. 42 + */ 43 + typedef struct VMBusDevice VMBusDevice; 44 + 45 + typedef void(*VMBusChannelNotifyCb)(struct VMBusChannel *chan); 46 + 47 + typedef struct VMBusDeviceClass { 48 + DeviceClass parent; 49 + 50 + QemuUUID classid; 51 + QemuUUID instanceid; /* Fixed UUID for singleton devices */ 52 + uint16_t channel_flags; 53 + uint16_t mmio_size_mb; 54 + 55 + /* Extentions to standard device callbacks */ 56 + void (*vmdev_realize)(VMBusDevice *vdev, Error **errp); 57 + void (*vmdev_unrealize)(VMBusDevice *vdev); 58 + void (*vmdev_reset)(VMBusDevice *vdev); 59 + /* 60 + * Calculate the number of channels based on the device properties. Called 61 + * at realize time. 62 + **/ 63 + uint16_t (*num_channels)(VMBusDevice *vdev); 64 + /* 65 + * Device-specific actions to complete the otherwise successful process of 66 + * opening a channel. 67 + * Return 0 on success, -errno on failure. 68 + */ 69 + int (*open_channel)(VMBusChannel *chan); 70 + /* 71 + * Device-specific actions to perform before closing a channel. 72 + */ 73 + void (*close_channel)(VMBusChannel *chan); 74 + /* 75 + * Main device worker; invoked in response to notifications from either 76 + * side, when there's work to do with the data in the channel ring buffers. 77 + */ 78 + VMBusChannelNotifyCb chan_notify_cb; 79 + } VMBusDeviceClass; 80 + 81 + struct VMBusDevice { 82 + DeviceState parent; 83 + QemuUUID instanceid; 84 + uint16_t num_channels; 85 + VMBusChannel *channels; 86 + AddressSpace *dma_as; 87 + }; 88 + 89 + extern const VMStateDescription vmstate_vmbus_dev; 90 + 91 + /* 92 + * A unit of work parsed out of a message in the receive (i.e. guest->host) 93 + * ring buffer of a channel. It's supposed to be subclassed (through 94 + * embedding) by the specific devices. 95 + */ 96 + typedef struct VMBusChanReq { 97 + VMBusChannel *chan; 98 + uint16_t pkt_type; 99 + uint32_t msglen; 100 + void *msg; 101 + uint64_t transaction_id; 102 + bool need_comp; 103 + QEMUSGList sgl; 104 + } VMBusChanReq; 105 + 106 + VMBusDevice *vmbus_channel_device(VMBusChannel *chan); 107 + VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx); 108 + uint32_t vmbus_channel_idx(VMBusChannel *chan); 109 + bool vmbus_channel_is_open(VMBusChannel *chan); 110 + 111 + /* 112 + * Notify (on guest's behalf) the host side of the channel that there's data in 113 + * the ringbuffer to process. 114 + */ 115 + void vmbus_channel_notify_host(VMBusChannel *chan); 116 + 117 + /* 118 + * Reserve space for a packet in the send (i.e. host->guest) ringbuffer. If 119 + * there isn't enough room, indicate that to the guest, to be notified when it 120 + * becomes available. 121 + * Return 0 on success, negative errno on failure. 122 + * The ringbuffer indices are NOT updated, the requested space indicator may. 123 + */ 124 + int vmbus_channel_reserve(VMBusChannel *chan, 125 + uint32_t desclen, uint32_t msglen); 126 + 127 + /* 128 + * Send a packet to the guest. The space for the packet MUST be reserved 129 + * first. 130 + * Return total number of bytes placed in the send ringbuffer on success, 131 + * negative errno on failure. 132 + * The ringbuffer indices are updated on success, and the guest is signaled if 133 + * needed. 134 + */ 135 + ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, 136 + void *desc, uint32_t desclen, 137 + void *msg, uint32_t msglen, 138 + bool need_comp, uint64_t transaction_id); 139 + 140 + /* 141 + * Prepare to fetch a batch of packets from the receive ring buffer. 142 + * Return 0 on success, negative errno on failure. 143 + */ 144 + int vmbus_channel_recv_start(VMBusChannel *chan); 145 + 146 + /* 147 + * Shortcut for a common case of sending a simple completion packet with no 148 + * auxiliary descriptors. 149 + */ 150 + ssize_t vmbus_channel_send_completion(VMBusChanReq *req, 151 + void *msg, uint32_t msglen); 152 + 153 + /* 154 + * Peek at the receive (i.e. guest->host) ring buffer and extract a unit of 155 + * work (a device-specific subclass of VMBusChanReq) from a packet if there's 156 + * one. 157 + * Return an allocated buffer, containing the request of @size with filled 158 + * VMBusChanReq at the beginning, followed by the message payload, or NULL on 159 + * failure. 160 + * The ringbuffer indices are NOT updated, nor is the private copy of the read 161 + * index. 162 + */ 163 + void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size); 164 + 165 + /* 166 + * Update the private copy of the read index once the preceding peek is deemed 167 + * successful. 168 + * The ringbuffer indices are NOT updated. 169 + */ 170 + void vmbus_channel_recv_pop(VMBusChannel *chan); 171 + 172 + /* 173 + * Propagate the private copy of the read index into the receive ring buffer, 174 + * and thus complete the reception of a series of packets. Notify guest if 175 + * needed. 176 + * Return the number of bytes popped off the receive ring buffer by the 177 + * preceding recv_peek/recv_pop calls on success, negative errno on failure. 178 + */ 179 + ssize_t vmbus_channel_recv_done(VMBusChannel *chan); 180 + 181 + /* 182 + * Free the request allocated by vmbus_channel_recv_peek, together with its 183 + * fields. 184 + */ 185 + void vmbus_free_req(void *req); 186 + 187 + /* 188 + * Find and reference a GPADL by @gpadl_id. 189 + * If not found return NULL. 190 + */ 191 + VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id); 192 + 193 + /* 194 + * Unreference @gpadl. If the reference count drops to zero, free it. 195 + * @gpadl may be NULL, in which case nothing is done. 196 + */ 197 + void vmbus_put_gpadl(VMBusGpadl *gpadl); 198 + 199 + /* 200 + * Calculate total length in bytes of @gpadl. 201 + * @gpadl must be valid. 202 + */ 203 + uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl); 204 + 205 + /* 206 + * Copy data from @iov to @gpadl at offset @off. 207 + * Return the number of bytes copied, or a negative status on failure. 208 + */ 209 + ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, 210 + const struct iovec *iov, size_t iov_cnt); 211 + 212 + /* 213 + * Map SGList contained in the request @req, at offset @off and no more than 214 + * @len bytes, for io in direction @dir, and populate @iov with the mapped 215 + * iovecs. 216 + * Return the number of iovecs mapped, or negative status on failure. 217 + */ 218 + int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 219 + unsigned iov_cnt, size_t len, size_t off); 220 + 221 + /* 222 + * Unmap *iov mapped with vmbus_map_sgl, marking the number of bytes @accessed. 223 + */ 224 + void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, 225 + unsigned iov_cnt, size_t accessed); 226 + 227 + #endif