qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

virtio-balloon: Provide an interface for free page reporting

Add support for free page reporting. The idea is to function very similar
to how the balloon works in that we basically end up madvising the page as
not being used. However we don't really need to bother with any deflate
type logic since the page will be faulted back into the guest when it is
read or written to.

This provides a new way of letting the guest proactively report free
pages to the hypervisor, so the hypervisor can reuse them. In contrast to
inflate/deflate that is triggered via the hypervisor explicitly.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Message-Id: <20200527041407.12700.73735.stgit@localhost.localdomain>

authored by

Alexander Duyck and committed by
Michael S. Tsirkin
91b86719 7483cbba

+73 -1
+72
hw/virtio/virtio-balloon.c
··· 321 321 balloon_stats_change_timer(s, 0); 322 322 } 323 323 324 + static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq) 325 + { 326 + VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); 327 + VirtQueueElement *elem; 328 + 329 + while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) { 330 + unsigned int i; 331 + 332 + /* 333 + * When we discard the page it has the effect of removing the page 334 + * from the hypervisor itself and causing it to be zeroed when it 335 + * is returned to us. So we must not discard the page if it is 336 + * accessible by another device or process, or if the guest is 337 + * expecting it to retain a non-zero value. 338 + */ 339 + if (qemu_balloon_is_inhibited() || dev->poison_val) { 340 + goto skip_element; 341 + } 342 + 343 + for (i = 0; i < elem->in_num; i++) { 344 + void *addr = elem->in_sg[i].iov_base; 345 + size_t size = elem->in_sg[i].iov_len; 346 + ram_addr_t ram_offset; 347 + RAMBlock *rb; 348 + 349 + /* 350 + * There is no need to check the memory section to see if 351 + * it is ram/readonly/romd like there is for handle_output 352 + * below. If the region is not meant to be written to then 353 + * address_space_map will have allocated a bounce buffer 354 + * and it will be freed in address_space_unmap and trigger 355 + * and unassigned_mem_write before failing to copy over the 356 + * buffer. If more than one bad descriptor is provided it 357 + * will return NULL after the first bounce buffer and fail 358 + * to map any resources. 359 + */ 360 + rb = qemu_ram_block_from_host(addr, false, &ram_offset); 361 + if (!rb) { 362 + trace_virtio_balloon_bad_addr(elem->in_addr[i]); 363 + continue; 364 + } 365 + 366 + /* 367 + * For now we will simply ignore unaligned memory regions, or 368 + * regions that overrun the end of the RAMBlock. 369 + */ 370 + if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) || 371 + (ram_offset + size) > qemu_ram_get_used_length(rb)) { 372 + continue; 373 + } 374 + 375 + ram_block_discard_range(rb, ram_offset, size); 376 + } 377 + 378 + skip_element: 379 + virtqueue_push(vq, elem, 0); 380 + virtio_notify(vdev, vq); 381 + g_free(elem); 382 + } 383 + } 384 + 324 385 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) 325 386 { 326 387 VirtIOBalloon *s = VIRTIO_BALLOON(vdev); ··· 835 896 s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), 836 897 virtio_ballloon_get_free_page_hints, s); 837 898 } 899 + 900 + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { 901 + s->reporting_vq = virtio_add_queue(vdev, 32, 902 + virtio_balloon_handle_report); 903 + } 904 + 838 905 reset_stats(s); 839 906 } 840 907 ··· 857 924 virtio_delete_queue(s->svq); 858 925 if (s->free_page_vq) { 859 926 virtio_delete_queue(s->free_page_vq); 927 + } 928 + if (s->reporting_vq) { 929 + virtio_delete_queue(s->reporting_vq); 860 930 } 861 931 virtio_cleanup(vdev); 862 932 } ··· 945 1015 VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), 946 1016 DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features, 947 1017 VIRTIO_BALLOON_F_PAGE_POISON, true), 1018 + DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features, 1019 + VIRTIO_BALLOON_F_REPORTING, false), 948 1020 /* QEMU 4.0 accidentally changed the config size even when free-page-hint 949 1021 * is disabled, resulting in QEMU 3.1 migration incompatibility. This 950 1022 * property retains this quirk for QEMU 4.1 machine types.
+1 -1
include/hw/virtio/virtio-balloon.h
··· 42 42 43 43 typedef struct VirtIOBalloon { 44 44 VirtIODevice parent_obj; 45 - VirtQueue *ivq, *dvq, *svq, *free_page_vq; 45 + VirtQueue *ivq, *dvq, *svq, *free_page_vq, *reporting_vq; 46 46 uint32_t free_page_report_status; 47 47 uint32_t num_pages; 48 48 uint32_t actual;