qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20190514b' into staging

Migration pull 2019-05-14

Small fixes/cleanups
One HMP/monitor fix

# gpg: Signature made Tue 14 May 2019 19:03:53 BST
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7

* remotes/dgilbert/tags/pull-migration-20190514b:
monitor: Call mon_get_cpu() only once at hmp_gva2gpa()
migration/ram.c: fix typos in comments
migration: Fix use-after-free during process exit
migration/savevm: wrap into qemu_loadvm_state_header()
migration/savevm: load_header before load_setup
migration/savevm: remove duplicate check of migration_is_blocked
migration: update comments of migration bitmap
migration/ram.c: start of migration_bitmap_sync_range is always 0
qemu-option.hx: Update missed parameter for colo-compare
migration/colo.h: Remove obsolete codes
migration/colo.c: Remove redundant input parameter
migration: savevm: fix error code with migration blockers
vmstate: check subsection_found is enough
migration: remove not used field xfer_limit
migration: not necessary to check ops again
migration: comment VMSTATE_UNUSED*() properly

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+105 -75
+1 -3
include/migration/colo.h
··· 22 22 COLO_EVENT_FAILOVER, 23 23 }; 24 24 25 - void colo_info_init(void); 26 - 27 25 void migrate_start_colo_process(MigrationState *s); 28 26 bool migration_in_colo_state(void); 29 27 ··· 37 35 COLOMode get_colo_mode(void); 38 36 39 37 /* failover */ 40 - void colo_do_failover(MigrationState *s); 38 + void colo_do_failover(void); 41 39 42 40 void colo_checkpoint_notify(void *opaque); 43 41 #endif
+14
include/migration/vmstate.h
··· 1035 1035 #define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \ 1036 1036 VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size) 1037 1037 1038 + /* 1039 + * These VMSTATE_UNUSED*() macros can be used to fill in the holes 1040 + * when some of the vmstate fields are obsolete to be compatible with 1041 + * migrations between new/old binaries. 1042 + * 1043 + * CAUTION: when using any of the VMSTATE_UNUSED*() macros please be 1044 + * sure that the size passed in is the size that was actually *sent* 1045 + * rather than the size of the *structure*. One example is the 1046 + * boolean type - the size of the structure can vary depending on the 1047 + * definition of boolean, however the size we actually sent is always 1048 + * 1 byte (please refer to implementation of VMSTATE_BOOL_V and 1049 + * vmstate_info_bool). So here we should always pass in size==1 1050 + * rather than size==sizeof(bool). 1051 + */ 1038 1052 #define VMSTATE_UNUSED_V(_v, _size) \ 1039 1053 VMSTATE_UNUSED_BUFFER(NULL, _v, _size) 1040 1054
+1 -1
migration/colo-failover.c
··· 39 39 return; 40 40 } 41 41 42 - colo_do_failover(NULL); 42 + colo_do_failover(); 43 43 } 44 44 45 45 void failover_request_active(Error **errp)
+1 -1
migration/colo.c
··· 193 193 } 194 194 } 195 195 196 - void colo_do_failover(MigrationState *s) 196 + void colo_do_failover(void) 197 197 { 198 198 /* Make sure VM stopped while failover happened. */ 199 199 if (!colo_runstate_is_stopped()) {
+20 -6
migration/migration.c
··· 1495 1495 } 1496 1496 } 1497 1497 1498 - static void migrate_fd_cleanup(void *opaque) 1498 + static void migrate_fd_cleanup(MigrationState *s) 1499 1499 { 1500 - MigrationState *s = opaque; 1501 - 1502 1500 qemu_bh_delete(s->cleanup_bh); 1503 1501 s->cleanup_bh = NULL; 1504 1502 ··· 1541 1539 } 1542 1540 notifier_list_notify(&migration_state_notifiers, s); 1543 1541 block_cleanup_parameters(s); 1542 + } 1543 + 1544 + static void migrate_fd_cleanup_schedule(MigrationState *s) 1545 + { 1546 + /* 1547 + * Ref the state for bh, because it may be called when 1548 + * there're already no other refs 1549 + */ 1550 + object_ref(OBJECT(s)); 1551 + qemu_bh_schedule(s->cleanup_bh); 1552 + } 1553 + 1554 + static void migrate_fd_cleanup_bh(void *opaque) 1555 + { 1556 + MigrationState *s = opaque; 1557 + migrate_fd_cleanup(s); 1558 + object_unref(OBJECT(s)); 1544 1559 } 1545 1560 1546 1561 void migrate_set_error(MigrationState *s, const Error *error) ··· 1681 1696 * locks. 1682 1697 */ 1683 1698 s->bytes_xfer = 0; 1684 - s->xfer_limit = 0; 1685 1699 s->cleanup_bh = 0; 1686 1700 s->to_dst_file = NULL; 1687 1701 s->state = MIGRATION_STATUS_NONE; ··· 3144 3158 error_report("%s: Unknown ending state %d", __func__, s->state); 3145 3159 break; 3146 3160 } 3147 - qemu_bh_schedule(s->cleanup_bh); 3161 + migrate_fd_cleanup_schedule(s); 3148 3162 qemu_mutex_unlock_iothread(); 3149 3163 } 3150 3164 ··· 3279 3293 bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; 3280 3294 3281 3295 s->expected_downtime = s->parameters.downtime_limit; 3282 - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); 3296 + s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); 3283 3297 if (error_in) { 3284 3298 migrate_fd_error(s, error_in); 3285 3299 migrate_fd_cleanup(s);
-1
migration/migration.h
··· 117 117 118 118 /*< public >*/ 119 119 size_t bytes_xfer; 120 - size_t xfer_limit; 121 120 QemuThread thread; 122 121 QEMUBH *cleanup_bh; 123 122 QEMUFile *to_dst_file;
+10 -12
migration/ram.c
··· 917 917 * - to make easier to know what to free at the end of migration 918 918 * 919 919 * This way we always know who is the owner of each "pages" struct, 920 - * and we don't need any loocking. It belongs to the migration thread 920 + * and we don't need any locking. It belongs to the migration thread 921 921 * or to the channel thread. Switching is safe because the migration 922 922 * thread is using the channel mutex when changing it, and the channel 923 923 * have to had finish with its own, otherwise pending_job can't be ··· 1630 1630 /** 1631 1631 * migration_bitmap_find_dirty: find the next dirty page from start 1632 1632 * 1633 - * Called with rcu_read_lock() to protect migration_bitmap 1634 - * 1635 - * Returns the byte offset within memory region of the start of a dirty page 1633 + * Returns the page offset within memory region of the start of a dirty page 1636 1634 * 1637 1635 * @rs: current RAM state 1638 1636 * @rb: RAMBlock where to search for dirty pages ··· 1681 1679 } 1682 1680 1683 1681 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, 1684 - ram_addr_t start, ram_addr_t length) 1682 + ram_addr_t length) 1685 1683 { 1686 1684 rs->migration_dirty_pages += 1687 - cpu_physical_memory_sync_dirty_bitmap(rb, start, length, 1685 + cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, 1688 1686 &rs->num_dirty_pages_period); 1689 1687 } 1690 1688 ··· 1773 1771 qemu_mutex_lock(&rs->bitmap_mutex); 1774 1772 rcu_read_lock(); 1775 1773 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 1776 - migration_bitmap_sync_range(rs, block, 0, block->used_length); 1774 + migration_bitmap_sync_range(rs, block, block->used_length); 1777 1775 } 1778 1776 ram_counters.remaining = ram_bytes_remaining(); 1779 1777 rcu_read_unlock(); ··· 2146 2144 * find_dirty_block: find the next dirty page and update any state 2147 2145 * associated with the search process. 2148 2146 * 2149 - * Returns if a page is found 2147 + * Returns true if a page is found 2150 2148 * 2151 2149 * @rs: current RAM state 2152 2150 * @pss: data about the state of the current dirty page scan ··· 2242 2240 * 2243 2241 * Skips pages that are already sent (!dirty) 2244 2242 * 2245 - * Returns if a queued page is found 2243 + * Returns true if a queued page is found 2246 2244 * 2247 2245 * @rs: current RAM state 2248 2246 * @pss: data about the state of the current dirty page scan ··· 2681 2679 RAMBlock *block; 2682 2680 2683 2681 /* caller have hold iothread lock or is in a bh, so there is 2684 - * no writing race against this migration_bitmap 2682 + * no writing race against the migration bitmap 2685 2683 */ 2686 2684 memory_global_dirty_log_stop(); 2687 2685 ··· 3449 3447 3450 3448 /* we want to check in the 1st loop, just in case it was the 1st time 3451 3449 and we had to sync the dirty bitmap. 3452 - qemu_get_clock_ns() is a bit expensive, so we only check each some 3450 + qemu_clock_get_ns() is a bit expensive, so we only check each some 3453 3451 iterations 3454 3452 */ 3455 3453 if ((i & 63) == 0) { ··· 4196 4194 memory_global_dirty_log_sync(); 4197 4195 rcu_read_lock(); 4198 4196 RAMBLOCK_FOREACH_NOT_IGNORED(block) { 4199 - migration_bitmap_sync_range(ram_state, block, 0, block->used_length); 4197 + migration_bitmap_sync_range(ram_state, block, block->used_length); 4200 4198 } 4201 4199 rcu_read_unlock(); 4202 4200
+47 -42
migration/savevm.c
··· 1157 1157 if (!se->ops || !se->ops->save_live_iterate) { 1158 1158 continue; 1159 1159 } 1160 - if (se->ops && se->ops->is_active) { 1161 - if (!se->ops->is_active(se->opaque)) { 1162 - continue; 1163 - } 1160 + if (se->ops->is_active && 1161 + !se->ops->is_active(se->opaque)) { 1162 + continue; 1164 1163 } 1165 - if (se->ops && se->ops->is_active_iterate) { 1166 - if (!se->ops->is_active_iterate(se->opaque)) { 1167 - continue; 1168 - } 1164 + if (se->ops->is_active_iterate && 1165 + !se->ops->is_active_iterate(se->opaque)) { 1166 + continue; 1169 1167 } 1170 1168 /* 1171 1169 * In the postcopy phase, any device that doesn't know how to ··· 1417 1415 ms->state == MIGRATION_STATUS_CANCELLING || 1418 1416 ms->state == MIGRATION_STATUS_COLO) { 1419 1417 error_setg(errp, QERR_MIGRATION_ACTIVE); 1420 - return -EINVAL; 1421 - } 1422 - 1423 - if (migration_is_blocked(errp)) { 1424 1418 return -EINVAL; 1425 1419 } 1426 1420 ··· 2268 2262 return 0; 2269 2263 } 2270 2264 2265 + static int qemu_loadvm_state_header(QEMUFile *f) 2266 + { 2267 + unsigned int v; 2268 + int ret; 2269 + 2270 + v = qemu_get_be32(f); 2271 + if (v != QEMU_VM_FILE_MAGIC) { 2272 + error_report("Not a migration stream"); 2273 + return -EINVAL; 2274 + } 2275 + 2276 + v = qemu_get_be32(f); 2277 + if (v == QEMU_VM_FILE_VERSION_COMPAT) { 2278 + error_report("SaveVM v2 format is obsolete and don't work anymore"); 2279 + return -ENOTSUP; 2280 + } 2281 + if (v != QEMU_VM_FILE_VERSION) { 2282 + error_report("Unsupported migration stream version"); 2283 + return -ENOTSUP; 2284 + } 2285 + 2286 + if (migrate_get_current()->send_configuration) { 2287 + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { 2288 + error_report("Configuration section missing"); 2289 + qemu_loadvm_state_cleanup(); 2290 + return -EINVAL; 2291 + } 2292 + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); 2293 + 2294 + if (ret) { 2295 + qemu_loadvm_state_cleanup(); 2296 + return ret; 2297 + } 2298 + } 2299 + return 0; 2300 + } 2301 + 2271 2302 static int qemu_loadvm_state_setup(QEMUFile *f) 2272 2303 { 2273 2304 SaveStateEntry *se; ··· 2416 2447 { 2417 2448 MigrationIncomingState *mis = migration_incoming_get_current(); 2418 2449 Error *local_err = NULL; 2419 - unsigned int v; 2420 2450 int ret; 2421 2451 2422 2452 if (qemu_savevm_state_blocked(&local_err)) { ··· 2424 2454 return -EINVAL; 2425 2455 } 2426 2456 2427 - v = qemu_get_be32(f); 2428 - if (v != QEMU_VM_FILE_MAGIC) { 2429 - error_report("Not a migration stream"); 2430 - return -EINVAL; 2431 - } 2432 - 2433 - v = qemu_get_be32(f); 2434 - if (v == QEMU_VM_FILE_VERSION_COMPAT) { 2435 - error_report("SaveVM v2 format is obsolete and don't work anymore"); 2436 - return -ENOTSUP; 2437 - } 2438 - if (v != QEMU_VM_FILE_VERSION) { 2439 - error_report("Unsupported migration stream version"); 2440 - return -ENOTSUP; 2457 + ret = qemu_loadvm_state_header(f); 2458 + if (ret) { 2459 + return ret; 2441 2460 } 2442 2461 2443 2462 if (qemu_loadvm_state_setup(f) != 0) { 2444 2463 return -EINVAL; 2445 - } 2446 - 2447 - if (migrate_get_current()->send_configuration) { 2448 - if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { 2449 - error_report("Configuration section missing"); 2450 - qemu_loadvm_state_cleanup(); 2451 - return -EINVAL; 2452 - } 2453 - ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); 2454 - 2455 - if (ret) { 2456 - qemu_loadvm_state_cleanup(); 2457 - return ret; 2458 - } 2459 2464 } 2460 2465 2461 2466 cpu_synchronize_all_pre_loadvm(); ··· 2544 2549 AioContext *aio_context; 2545 2550 2546 2551 if (migration_is_blocked(errp)) { 2547 - return false; 2552 + return ret; 2548 2553 } 2549 2554 2550 2555 if (!replay_can_snapshot()) {
+4 -4
migration/vmstate.c
··· 496 496 void *opaque, QJSON *vmdesc) 497 497 { 498 498 const VMStateDescription **sub = vmsd->subsections; 499 - bool subsection_found = false; 499 + bool vmdesc_has_subsections = false; 500 500 int ret = 0; 501 501 502 502 trace_vmstate_subsection_save_top(vmsd->name); ··· 508 508 trace_vmstate_subsection_save_loop(vmsd->name, vmsdsub->name); 509 509 if (vmdesc) { 510 510 /* Only create subsection array when we have any */ 511 - if (!subsection_found) { 511 + if (!vmdesc_has_subsections) { 512 512 json_start_array(vmdesc, "subsections"); 513 - subsection_found = true; 513 + vmdesc_has_subsections = true; 514 514 } 515 515 516 516 json_start_object(vmdesc, NULL); ··· 533 533 sub++; 534 534 } 535 535 536 - if (vmdesc && subsection_found) { 536 + if (vmdesc_has_subsections) { 537 537 json_end_array(vmdesc); 538 538 } 539 539
+1 -2
monitor.c
··· 1685 1685 return; 1686 1686 } 1687 1687 1688 - gpa = cpu_get_phys_page_attrs_debug(mon_get_cpu(), 1689 - addr & TARGET_PAGE_MASK, &attrs); 1688 + gpa = cpu_get_phys_page_attrs_debug(cs, addr & TARGET_PAGE_MASK, &attrs); 1690 1689 if (gpa == -1) { 1691 1690 monitor_printf(mon, "Unmapped\n"); 1692 1691 } else {
+6 -3
qemu-options.hx
··· 4425 4425 The file format is libpcap, so it can be analyzed with tools such as tcpdump 4426 4426 or Wireshark. 4427 4427 4428 - @item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support] 4428 + @item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid},iothread=@var{id}[,vnet_hdr_support] 4429 4429 4430 4430 Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with 4431 4431 secondary packet. If the packets are same, we will output primary 4432 4432 packet to outdev@var{chardevid}, else we will notify colo-frame 4433 4433 do checkpoint and send primary packet to outdev@var{chardevid}. 4434 - if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len. 4434 + In order to improve efficiency, we need to put the task of comparison 4435 + in another thread. If it has the vnet_hdr_support flag, colo compare 4436 + will send/recv packet with vnet_hdr_len. 4435 4437 4436 4438 we must use it with the help of filter-mirror and filter-redirector. 4437 4439 ··· 4446 4448 -chardev socket,id=compare0-0,host=3.3.3.3,port=9001 4447 4449 -chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait 4448 4450 -chardev socket,id=compare_out0,host=3.3.3.3,port=9005 4451 + -object iothread,id=iothread1 4449 4452 -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0 4450 4453 -object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out 4451 4454 -object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0 4452 - -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0 4455 + -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,iothread=iothread1 4453 4456 4454 4457 secondary: 4455 4458 -netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown