qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

mem: add share parameter to memory-backend-ram

Currently only file backed memory backend can
be created with a "share" flag in order to allow
sharing guest RAM with other processes in the host.

Add the "share" flag also to RAM Memory Backend
in order to allow remapping parts of the guest RAM
to different host virtual addresses. This is needed
by the RDMA devices in order to remap non-contiguous
QEMU virtual addresses to a contiguous virtual address range.

Moved the "share" flag to the Host Memory base class,
modified phys_mem_alloc to include the new parameter
and a new interface memory_region_init_ram_shared_nomigrate.

There are no functional changes if the new flag is not used.

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>

+94 -50
+1 -24
backends/hostmem-file.c
··· 31 31 struct HostMemoryBackendFile { 32 32 HostMemoryBackend parent_obj; 33 33 34 - bool share; 35 34 bool discard_data; 36 35 char *mem_path; 37 36 uint64_t align; ··· 59 58 path = object_get_canonical_path(OBJECT(backend)); 60 59 memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), 61 60 path, 62 - backend->size, fb->align, fb->share, 61 + backend->size, fb->align, backend->share, 63 62 fb->mem_path, errp); 64 63 g_free(path); 65 64 } ··· 86 85 fb->mem_path = g_strdup(str); 87 86 } 88 87 89 - static bool file_memory_backend_get_share(Object *o, Error **errp) 90 - { 91 - HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); 92 - 93 - return fb->share; 94 - } 95 - 96 - static void file_memory_backend_set_share(Object *o, bool value, Error **errp) 97 - { 98 - HostMemoryBackend *backend = MEMORY_BACKEND(o); 99 - HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); 100 - 101 - if (host_memory_backend_mr_inited(backend)) { 102 - error_setg(errp, "cannot change property value"); 103 - return; 104 - } 105 - fb->share = value; 106 - } 107 - 108 88 static bool file_memory_backend_get_discard_data(Object *o, Error **errp) 109 89 { 110 90 return MEMORY_BACKEND_FILE(o)->discard_data; ··· 171 151 bc->alloc = file_backend_memory_alloc; 172 152 oc->unparent = file_backend_unparent; 173 153 174 - object_class_property_add_bool(oc, "share", 175 - file_memory_backend_get_share, file_memory_backend_set_share, 176 - &error_abort); 177 154 object_class_property_add_bool(oc, "discard-data", 178 155 file_memory_backend_get_discard_data, file_memory_backend_set_discard_data, 179 156 &error_abort);
+2 -2
backends/hostmem-ram.c
··· 28 28 } 29 29 30 30 path = object_get_canonical_path_component(OBJECT(backend)); 31 - memory_region_init_ram_nomigrate(&backend->mr, OBJECT(backend), path, 32 - backend->size, errp); 31 + memory_region_init_ram_shared_nomigrate(&backend->mr, OBJECT(backend), path, 32 + backend->size, backend->share, errp); 33 33 g_free(path); 34 34 } 35 35
+21
backends/hostmem.c
··· 368 368 backend->id = g_strdup(str); 369 369 } 370 370 371 + static bool host_memory_backend_get_share(Object *o, Error **errp) 372 + { 373 + HostMemoryBackend *backend = MEMORY_BACKEND(o); 374 + 375 + return backend->share; 376 + } 377 + 378 + static void host_memory_backend_set_share(Object *o, bool value, Error **errp) 379 + { 380 + HostMemoryBackend *backend = MEMORY_BACKEND(o); 381 + 382 + if (host_memory_backend_mr_inited(backend)) { 383 + error_setg(errp, "cannot change property value"); 384 + return; 385 + } 386 + backend->share = value; 387 + } 388 + 371 389 static void 372 390 host_memory_backend_class_init(ObjectClass *oc, void *data) 373 391 { ··· 398 416 host_memory_backend_get_policy, 399 417 host_memory_backend_set_policy, &error_abort); 400 418 object_class_property_add_str(oc, "id", get_id, set_id, &error_abort); 419 + object_class_property_add_bool(oc, "share", 420 + host_memory_backend_get_share, host_memory_backend_set_share, 421 + &error_abort); 401 422 } 402 423 403 424 static void host_memory_backend_finalize(Object *o)
+15 -11
exec.c
··· 1285 1285 uint16_t section); 1286 1286 static subpage_t *subpage_init(FlatView *fv, hwaddr base); 1287 1287 1288 - static void *(*phys_mem_alloc)(size_t size, uint64_t *align) = 1288 + static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) = 1289 1289 qemu_anon_ram_alloc; 1290 1290 1291 1291 /* ··· 1293 1293 * Accelerators with unusual needs may need this. Hopefully, we can 1294 1294 * get rid of it eventually. 1295 1295 */ 1296 - void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align)) 1296 + void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared)) 1297 1297 { 1298 1298 phys_mem_alloc = alloc; 1299 1299 } ··· 1921 1921 } 1922 1922 } 1923 1923 1924 - static void ram_block_add(RAMBlock *new_block, Error **errp) 1924 + static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) 1925 1925 { 1926 1926 RAMBlock *block; 1927 1927 RAMBlock *last_block = NULL; ··· 1944 1944 } 1945 1945 } else { 1946 1946 new_block->host = phys_mem_alloc(new_block->max_length, 1947 - &new_block->mr->align); 1947 + &new_block->mr->align, shared); 1948 1948 if (!new_block->host) { 1949 1949 error_setg_errno(errp, errno, 1950 1950 "cannot set up guest memory '%s'", ··· 2049 2049 return NULL; 2050 2050 } 2051 2051 2052 - ram_block_add(new_block, &local_err); 2052 + ram_block_add(new_block, &local_err, share); 2053 2053 if (local_err) { 2054 2054 g_free(new_block); 2055 2055 error_propagate(errp, local_err); ··· 2091 2091 void (*resized)(const char*, 2092 2092 uint64_t length, 2093 2093 void *host), 2094 - void *host, bool resizeable, 2094 + void *host, bool resizeable, bool share, 2095 2095 MemoryRegion *mr, Error **errp) 2096 2096 { 2097 2097 RAMBlock *new_block; ··· 2114 2114 if (resizeable) { 2115 2115 new_block->flags |= RAM_RESIZEABLE; 2116 2116 } 2117 - ram_block_add(new_block, &local_err); 2117 + ram_block_add(new_block, &local_err, share); 2118 2118 if (local_err) { 2119 2119 g_free(new_block); 2120 2120 error_propagate(errp, local_err); ··· 2126 2126 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 2127 2127 MemoryRegion *mr, Error **errp) 2128 2128 { 2129 - return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp); 2129 + return qemu_ram_alloc_internal(size, size, NULL, host, false, 2130 + false, mr, errp); 2130 2131 } 2131 2132 2132 - RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp) 2133 + RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, 2134 + MemoryRegion *mr, Error **errp) 2133 2135 { 2134 - return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp); 2136 + return qemu_ram_alloc_internal(size, size, NULL, NULL, false, 2137 + share, mr, errp); 2135 2138 } 2136 2139 2137 2140 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz, ··· 2140 2143 void *host), 2141 2144 MemoryRegion *mr, Error **errp) 2142 2145 { 2143 - return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp); 2146 + return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, 2147 + false, mr, errp); 2144 2148 } 2145 2149 2146 2150 static void reclaim_ramblock(RAMBlock *block)
+23
include/exec/memory.h
··· 436 436 Error **errp); 437 437 438 438 /** 439 + * memory_region_init_ram_shared_nomigrate: Initialize RAM memory region. 440 + * Accesses into the region will 441 + * modify memory directly. 442 + * 443 + * @mr: the #MemoryRegion to be initialized. 444 + * @owner: the object that tracks the region's reference count 445 + * @name: Region name, becomes part of RAMBlock name used in migration stream 446 + * must be unique within any device 447 + * @size: size of the region. 448 + * @share: allow remapping RAM to different addresses 449 + * @errp: pointer to Error*, to store an error if it happens. 450 + * 451 + * Note that this function is similar to memory_region_init_ram_nomigrate. 452 + * The only difference is part of the RAM region can be remapped. 453 + */ 454 + void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, 455 + struct Object *owner, 456 + const char *name, 457 + uint64_t size, 458 + bool share, 459 + Error **errp); 460 + 461 + /** 439 462 * memory_region_init_resizeable_ram: Initialize memory region with resizeable 440 463 * RAM. Accesses into the region will 441 464 * modify memory directly. Only an initial
+2 -1
include/exec/ram_addr.h
··· 80 80 Error **errp); 81 81 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, 82 82 MemoryRegion *mr, Error **errp); 83 - RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp); 83 + RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, 84 + Error **errp); 84 85 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size, 85 86 void (*resized)(const char*, 86 87 uint64_t length,
+1 -1
include/qemu/osdep.h
··· 255 255 int qemu_daemon(int nochdir, int noclose); 256 256 void *qemu_try_memalign(size_t alignment, size_t size); 257 257 void *qemu_memalign(size_t alignment, size_t size); 258 - void *qemu_anon_ram_alloc(size_t size, uint64_t *align); 258 + void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared); 259 259 void qemu_vfree(void *ptr); 260 260 void qemu_anon_ram_free(void *ptr, size_t size); 261 261
+1 -1
include/sysemu/hostmem.h
··· 54 54 char *id; 55 55 uint64_t size; 56 56 bool merge, dump; 57 - bool prealloc, force_prealloc, is_mapped; 57 + bool prealloc, force_prealloc, is_mapped, share; 58 58 DECLARE_BITMAP(host_nodes, MAX_NODES + 1); 59 59 HostMemPolicy policy; 60 60
+1 -1
include/sysemu/kvm.h
··· 248 248 249 249 /* interface with exec.c */ 250 250 251 - void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align)); 251 + void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared)); 252 252 253 253 /* internal API */ 254 254
+13 -3
memory.c
··· 1539 1539 uint64_t size, 1540 1540 Error **errp) 1541 1541 { 1542 + memory_region_init_ram_shared_nomigrate(mr, owner, name, size, false, errp); 1543 + } 1544 + 1545 + void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, 1546 + Object *owner, 1547 + const char *name, 1548 + uint64_t size, 1549 + bool share, 1550 + Error **errp) 1551 + { 1542 1552 memory_region_init(mr, owner, name, size); 1543 1553 mr->ram = true; 1544 1554 mr->terminates = true; 1545 1555 mr->destructor = memory_region_destructor_ram; 1546 - mr->ram_block = qemu_ram_alloc(size, mr, errp); 1556 + mr->ram_block = qemu_ram_alloc(size, share, mr, errp); 1547 1557 mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; 1548 1558 } 1549 1559 ··· 1654 1664 mr->readonly = true; 1655 1665 mr->terminates = true; 1656 1666 mr->destructor = memory_region_destructor_ram; 1657 - mr->ram_block = qemu_ram_alloc(size, mr, errp); 1667 + mr->ram_block = qemu_ram_alloc(size, false, mr, errp); 1658 1668 mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; 1659 1669 } 1660 1670 ··· 1673 1683 mr->terminates = true; 1674 1684 mr->rom_device = true; 1675 1685 mr->destructor = memory_region_destructor_ram; 1676 - mr->ram_block = qemu_ram_alloc(size, mr, errp); 1686 + mr->ram_block = qemu_ram_alloc(size, false, mr, errp); 1677 1687 } 1678 1688 1679 1689 void memory_region_init_iommu(void *_iommu_mr,
+9 -1
qemu-options.hx
··· 3975 3975 region is marked as private to QEMU, or shared. The latter allows 3976 3976 a co-operating external process to access the QEMU memory region. 3977 3977 3978 + The @option{share} is also required for pvrdma devices due to 3979 + limitations in the RDMA API provided by Linux. 3980 + 3981 + Setting share=on might affect the ability to configure NUMA 3982 + bindings for the memory backend under some circumstances, see 3983 + Documentation/vm/numa_memory_policy.txt on the Linux kernel 3984 + source tree for additional details. 3985 + 3978 3986 Setting the @option{discard-data} boolean option to @var{on} 3979 3987 indicates that file contents can be destroyed when QEMU exits, 3980 3988 to avoid unnecessarily flushing data to the backing file. Note ··· 4017 4025 the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In 4018 4026 such cases, users can specify the required alignment via this option. 4019 4027 4020 - @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} 4028 + @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} 4021 4029 4022 4030 Creates a memory backend object, which can be used to back the guest RAM. 4023 4031 Memory backend objects offer more control than the @option{-m} option that is
+2 -2
target/s390x/kvm.c
··· 144 144 145 145 static int active_cmma; 146 146 147 - static void *legacy_s390_alloc(size_t size, uint64_t *align); 147 + static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared); 148 148 149 149 static int kvm_s390_query_mem_limit(uint64_t *memory_limit) 150 150 { ··· 752 752 * to grow. We also have to use MAP parameters that avoid 753 753 * read-only mapping of guest pages. 754 754 */ 755 - static void *legacy_s390_alloc(size_t size, uint64_t *align) 755 + static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared) 756 756 { 757 757 void *mem; 758 758
+2 -2
util/oslib-posix.c
··· 127 127 } 128 128 129 129 /* alloc shared memory pages */ 130 - void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) 130 + void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) 131 131 { 132 132 size_t align = QEMU_VMALLOC_ALIGN; 133 - void *ptr = qemu_ram_mmap(-1, size, align, false); 133 + void *ptr = qemu_ram_mmap(-1, size, align, shared); 134 134 135 135 if (ptr == MAP_FAILED) { 136 136 return NULL;
+1 -1
util/oslib-win32.c
··· 67 67 return qemu_oom_check(qemu_try_memalign(alignment, size)); 68 68 } 69 69 70 - void *qemu_anon_ram_alloc(size_t size, uint64_t *align) 70 + void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared) 71 71 { 72 72 void *ptr; 73 73