qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

migration: add postcopy migration of dirty bitmaps

Postcopy migration of dirty bitmaps. Only named dirty bitmaps are migrated.

If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), then, if their granularities are
the same the migration will be done, otherwise the error will be generated.

If destination qemu doesn't contain such bitmap it will be created.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: 20180313180320.339796-12-vsementsov@virtuozzo.com
[Changed '+' to '*' as per list discussion. --js]
Signed-off-by: John Snow <jsnow@redhat.com>

authored by

Vladimir Sementsov-Ogievskiy and committed by
John Snow
b35ebdf0 16b0fd32

+775
+3
include/migration/misc.h
··· 56 56 bool migration_in_postcopy_after_devices(MigrationState *); 57 57 void migration_global_dump(Monitor *mon); 58 58 59 + /* migration/block-dirty-bitmap.c */ 60 + void dirty_bitmap_mig_init(void); 61 + 59 62 #endif
+1
migration/Makefile.objs
··· 6 6 common-obj-y += qemu-file-channel.o 7 7 common-obj-y += xbzrle.o postcopy-ram.o 8 8 common-obj-y += qjson.o 9 + common-obj-y += block-dirty-bitmap.o 9 10 10 11 common-obj-$(CONFIG_RDMA) += rdma.o 11 12
+746
migration/block-dirty-bitmap.c
··· 1 + /* 2 + * Block dirty bitmap postcopy migration 3 + * 4 + * Copyright IBM, Corp. 2009 5 + * Copyright (c) 2016-2017 Virtuozzo International GmbH. All rights reserved. 6 + * 7 + * Authors: 8 + * Liran Schour <lirans@il.ibm.com> 9 + * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> 10 + * 11 + * This work is licensed under the terms of the GNU GPL, version 2. See 12 + * the COPYING file in the top-level directory. 13 + * This file is derived from migration/block.c, so it's author and IBM copyright 14 + * are here, although content is quite different. 15 + * 16 + * Contributions after 2012-01-13 are licensed under the terms of the 17 + * GNU GPL, version 2 or (at your option) any later version. 18 + * 19 + * *** 20 + * 21 + * Here postcopy migration of dirty bitmaps is realized. Only QMP-addressable 22 + * bitmaps are migrated. 23 + * 24 + * Bitmap migration implies creating bitmap with the same name and granularity 25 + * in destination QEMU. If the bitmap with the same name (for the same node) 26 + * already exists on destination an error will be generated. 27 + * 28 + * format of migration: 29 + * 30 + * # Header (shared for different chunk types) 31 + * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags) 32 + * [ 1 byte: node name size ] \ flags & DEVICE_NAME 33 + * [ n bytes: node name ] / 34 + * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME 35 + * [ n bytes: bitmap name ] / 36 + * 37 + * # Start of bitmap migration (flags & START) 38 + * header 39 + * be64: granularity 40 + * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap) 41 + * bit 0 - bitmap is enabled 42 + * bit 1 - bitmap is persistent 43 + * bit 2 - bitmap is autoloading 44 + * bits 3-7 - reserved, must be zero 45 + * 46 + * # Complete of bitmap migration (flags & COMPLETE) 47 + * header 48 + * 49 + * # Data chunk of bitmap migration 50 + * header 51 + * be64: start sector 52 + * be32: number of sectors 53 + * [ be64: buffer size ] \ ! (flags & ZEROES) 54 + * [ n bytes: buffer ] / 55 + * 56 + * The last chunk in stream should contain flags & EOS. The chunk may skip 57 + * device and/or bitmap names, assuming them to be the same with the previous 58 + * chunk. 59 + */ 60 + 61 + #include "qemu/osdep.h" 62 + #include "block/block.h" 63 + #include "block/block_int.h" 64 + #include "sysemu/block-backend.h" 65 + #include "qemu/main-loop.h" 66 + #include "qemu/error-report.h" 67 + #include "migration/misc.h" 68 + #include "migration/migration.h" 69 + #include "migration/qemu-file.h" 70 + #include "migration/vmstate.h" 71 + #include "migration/register.h" 72 + #include "qemu/hbitmap.h" 73 + #include "sysemu/sysemu.h" 74 + #include "qemu/cutils.h" 75 + #include "qapi/error.h" 76 + #include "trace.h" 77 + 78 + #define CHUNK_SIZE (1 << 10) 79 + 80 + /* Flags occupy one, two or four bytes (Big Endian). The size is determined as 81 + * follows: 82 + * in first (most significant) byte bit 8 is clear --> one byte 83 + * in first byte bit 8 is set --> two or four bytes, depending on second 84 + * byte: 85 + * | in second byte bit 8 is clear --> two bytes 86 + * | in second byte bit 8 is set --> four bytes 87 + */ 88 + #define DIRTY_BITMAP_MIG_FLAG_EOS 0x01 89 + #define DIRTY_BITMAP_MIG_FLAG_ZEROES 0x02 90 + #define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME 0x04 91 + #define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME 0x08 92 + #define DIRTY_BITMAP_MIG_FLAG_START 0x10 93 + #define DIRTY_BITMAP_MIG_FLAG_COMPLETE 0x20 94 + #define DIRTY_BITMAP_MIG_FLAG_BITS 0x40 95 + 96 + #define DIRTY_BITMAP_MIG_EXTRA_FLAGS 0x80 97 + 98 + #define DIRTY_BITMAP_MIG_START_FLAG_ENABLED 0x01 99 + #define DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT 0x02 100 + /* 0x04 was "AUTOLOAD" flags on elder versions, no it is ignored */ 101 + #define DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK 0xf8 102 + 103 + typedef struct DirtyBitmapMigBitmapState { 104 + /* Written during setup phase. */ 105 + BlockDriverState *bs; 106 + const char *node_name; 107 + BdrvDirtyBitmap *bitmap; 108 + uint64_t total_sectors; 109 + uint64_t sectors_per_chunk; 110 + QSIMPLEQ_ENTRY(DirtyBitmapMigBitmapState) entry; 111 + uint8_t flags; 112 + 113 + /* For bulk phase. */ 114 + bool bulk_completed; 115 + uint64_t cur_sector; 116 + } DirtyBitmapMigBitmapState; 117 + 118 + typedef struct DirtyBitmapMigState { 119 + QSIMPLEQ_HEAD(dbms_list, DirtyBitmapMigBitmapState) dbms_list; 120 + 121 + bool bulk_completed; 122 + bool no_bitmaps; 123 + 124 + /* for send_bitmap_bits() */ 125 + BlockDriverState *prev_bs; 126 + BdrvDirtyBitmap *prev_bitmap; 127 + } DirtyBitmapMigState; 128 + 129 + typedef struct DirtyBitmapLoadState { 130 + uint32_t flags; 131 + char node_name[256]; 132 + char bitmap_name[256]; 133 + BlockDriverState *bs; 134 + BdrvDirtyBitmap *bitmap; 135 + } DirtyBitmapLoadState; 136 + 137 + static DirtyBitmapMigState dirty_bitmap_mig_state; 138 + 139 + typedef struct DirtyBitmapLoadBitmapState { 140 + BlockDriverState *bs; 141 + BdrvDirtyBitmap *bitmap; 142 + bool migrated; 143 + } DirtyBitmapLoadBitmapState; 144 + static GSList *enabled_bitmaps; 145 + QemuMutex finish_lock; 146 + 147 + void init_dirty_bitmap_incoming_migration(void) 148 + { 149 + qemu_mutex_init(&finish_lock); 150 + } 151 + 152 + static uint32_t qemu_get_bitmap_flags(QEMUFile *f) 153 + { 154 + uint8_t flags = qemu_get_byte(f); 155 + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) { 156 + flags = flags << 8 | qemu_get_byte(f); 157 + if (flags & DIRTY_BITMAP_MIG_EXTRA_FLAGS) { 158 + flags = flags << 16 | qemu_get_be16(f); 159 + } 160 + } 161 + 162 + return flags; 163 + } 164 + 165 + static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags) 166 + { 167 + /* The code currently do not send flags more than one byte */ 168 + assert(!(flags & (0xffffff00 | DIRTY_BITMAP_MIG_EXTRA_FLAGS))); 169 + 170 + qemu_put_byte(f, flags); 171 + } 172 + 173 + static void send_bitmap_header(QEMUFile *f, DirtyBitmapMigBitmapState *dbms, 174 + uint32_t additional_flags) 175 + { 176 + BlockDriverState *bs = dbms->bs; 177 + BdrvDirtyBitmap *bitmap = dbms->bitmap; 178 + uint32_t flags = additional_flags; 179 + trace_send_bitmap_header_enter(); 180 + 181 + if (bs != dirty_bitmap_mig_state.prev_bs) { 182 + dirty_bitmap_mig_state.prev_bs = bs; 183 + flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME; 184 + } 185 + 186 + if (bitmap != dirty_bitmap_mig_state.prev_bitmap) { 187 + dirty_bitmap_mig_state.prev_bitmap = bitmap; 188 + flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME; 189 + } 190 + 191 + qemu_put_bitmap_flags(f, flags); 192 + 193 + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { 194 + qemu_put_counted_string(f, dbms->node_name); 195 + } 196 + 197 + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { 198 + qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap)); 199 + } 200 + } 201 + 202 + static void send_bitmap_start(QEMUFile *f, DirtyBitmapMigBitmapState *dbms) 203 + { 204 + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START); 205 + qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap)); 206 + qemu_put_byte(f, dbms->flags); 207 + } 208 + 209 + static void send_bitmap_complete(QEMUFile *f, DirtyBitmapMigBitmapState *dbms) 210 + { 211 + send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE); 212 + } 213 + 214 + static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState *dbms, 215 + uint64_t start_sector, uint32_t nr_sectors) 216 + { 217 + /* align for buffer_is_zero() */ 218 + uint64_t align = 4 * sizeof(long); 219 + uint64_t unaligned_size = 220 + bdrv_dirty_bitmap_serialization_size( 221 + dbms->bitmap, start_sector << BDRV_SECTOR_BITS, 222 + (uint64_t)nr_sectors << BDRV_SECTOR_BITS); 223 + uint64_t buf_size = QEMU_ALIGN_UP(unaligned_size, align); 224 + uint8_t *buf = g_malloc0(buf_size); 225 + uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS; 226 + 227 + bdrv_dirty_bitmap_serialize_part( 228 + dbms->bitmap, buf, start_sector << BDRV_SECTOR_BITS, 229 + (uint64_t)nr_sectors << BDRV_SECTOR_BITS); 230 + 231 + if (buffer_is_zero(buf, buf_size)) { 232 + g_free(buf); 233 + buf = NULL; 234 + flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES; 235 + } 236 + 237 + trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size); 238 + 239 + send_bitmap_header(f, dbms, flags); 240 + 241 + qemu_put_be64(f, start_sector); 242 + qemu_put_be32(f, nr_sectors); 243 + 244 + /* if a block is zero we need to flush here since the network 245 + * bandwidth is now a lot higher than the storage device bandwidth. 246 + * thus if we queue zero blocks we slow down the migration. */ 247 + if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { 248 + qemu_fflush(f); 249 + } else { 250 + qemu_put_be64(f, buf_size); 251 + qemu_put_buffer(f, buf, buf_size); 252 + } 253 + 254 + g_free(buf); 255 + } 256 + 257 + /* Called with iothread lock taken. */ 258 + static void dirty_bitmap_mig_cleanup(void) 259 + { 260 + DirtyBitmapMigBitmapState *dbms; 261 + 262 + while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) { 263 + QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry); 264 + bdrv_dirty_bitmap_set_qmp_locked(dbms->bitmap, false); 265 + bdrv_unref(dbms->bs); 266 + g_free(dbms); 267 + } 268 + } 269 + 270 + /* Called with iothread lock taken. */ 271 + static int init_dirty_bitmap_migration(void) 272 + { 273 + BlockDriverState *bs; 274 + BdrvDirtyBitmap *bitmap; 275 + DirtyBitmapMigBitmapState *dbms; 276 + BdrvNextIterator it; 277 + 278 + dirty_bitmap_mig_state.bulk_completed = false; 279 + dirty_bitmap_mig_state.prev_bs = NULL; 280 + dirty_bitmap_mig_state.prev_bitmap = NULL; 281 + dirty_bitmap_mig_state.no_bitmaps = false; 282 + 283 + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 284 + const char *drive_name = bdrv_get_device_or_node_name(bs); 285 + 286 + /* skip automatically inserted nodes */ 287 + while (bs && bs->drv && bs->implicit) { 288 + bs = backing_bs(bs); 289 + } 290 + 291 + for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap; 292 + bitmap = bdrv_dirty_bitmap_next(bs, bitmap)) 293 + { 294 + if (!bdrv_dirty_bitmap_name(bitmap)) { 295 + continue; 296 + } 297 + 298 + if (drive_name == NULL) { 299 + error_report("Found bitmap '%s' in unnamed node %p. It can't " 300 + "be migrated", bdrv_dirty_bitmap_name(bitmap), bs); 301 + goto fail; 302 + } 303 + 304 + if (bdrv_dirty_bitmap_frozen(bitmap)) { 305 + error_report("Can't migrate frozen dirty bitmap: '%s", 306 + bdrv_dirty_bitmap_name(bitmap)); 307 + goto fail; 308 + } 309 + 310 + if (bdrv_dirty_bitmap_qmp_locked(bitmap)) { 311 + error_report("Can't migrate locked dirty bitmap: '%s", 312 + bdrv_dirty_bitmap_name(bitmap)); 313 + goto fail; 314 + } 315 + 316 + bdrv_ref(bs); 317 + bdrv_dirty_bitmap_set_qmp_locked(bitmap, true); 318 + 319 + dbms = g_new0(DirtyBitmapMigBitmapState, 1); 320 + dbms->bs = bs; 321 + dbms->node_name = drive_name; 322 + dbms->bitmap = bitmap; 323 + dbms->total_sectors = bdrv_nb_sectors(bs); 324 + dbms->sectors_per_chunk = CHUNK_SIZE * 8 * 325 + bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; 326 + if (bdrv_dirty_bitmap_enabled(bitmap)) { 327 + dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; 328 + } 329 + if (bdrv_dirty_bitmap_get_persistance(bitmap)) { 330 + dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; 331 + } 332 + 333 + QSIMPLEQ_INSERT_TAIL(&dirty_bitmap_mig_state.dbms_list, 334 + dbms, entry); 335 + } 336 + } 337 + 338 + /* unset persistance here, to not roll back it */ 339 + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { 340 + bdrv_dirty_bitmap_set_persistance(dbms->bitmap, false); 341 + } 342 + 343 + if (QSIMPLEQ_EMPTY(&dirty_bitmap_mig_state.dbms_list)) { 344 + dirty_bitmap_mig_state.no_bitmaps = true; 345 + } 346 + 347 + return 0; 348 + 349 + fail: 350 + dirty_bitmap_mig_cleanup(); 351 + 352 + return -1; 353 + } 354 + 355 + /* Called with no lock taken. */ 356 + static void bulk_phase_send_chunk(QEMUFile *f, DirtyBitmapMigBitmapState *dbms) 357 + { 358 + uint32_t nr_sectors = MIN(dbms->total_sectors - dbms->cur_sector, 359 + dbms->sectors_per_chunk); 360 + 361 + send_bitmap_bits(f, dbms, dbms->cur_sector, nr_sectors); 362 + 363 + dbms->cur_sector += nr_sectors; 364 + if (dbms->cur_sector >= dbms->total_sectors) { 365 + dbms->bulk_completed = true; 366 + } 367 + } 368 + 369 + /* Called with no lock taken. */ 370 + static void bulk_phase(QEMUFile *f, bool limit) 371 + { 372 + DirtyBitmapMigBitmapState *dbms; 373 + 374 + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { 375 + while (!dbms->bulk_completed) { 376 + bulk_phase_send_chunk(f, dbms); 377 + if (limit && qemu_file_rate_limit(f)) { 378 + return; 379 + } 380 + } 381 + } 382 + 383 + dirty_bitmap_mig_state.bulk_completed = true; 384 + } 385 + 386 + /* for SaveVMHandlers */ 387 + static void dirty_bitmap_save_cleanup(void *opaque) 388 + { 389 + dirty_bitmap_mig_cleanup(); 390 + } 391 + 392 + static int dirty_bitmap_save_iterate(QEMUFile *f, void *opaque) 393 + { 394 + trace_dirty_bitmap_save_iterate(migration_in_postcopy()); 395 + 396 + if (migration_in_postcopy() && !dirty_bitmap_mig_state.bulk_completed) { 397 + bulk_phase(f, true); 398 + } 399 + 400 + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS); 401 + 402 + return dirty_bitmap_mig_state.bulk_completed; 403 + } 404 + 405 + /* Called with iothread lock taken. */ 406 + 407 + static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque) 408 + { 409 + DirtyBitmapMigBitmapState *dbms; 410 + trace_dirty_bitmap_save_complete_enter(); 411 + 412 + if (!dirty_bitmap_mig_state.bulk_completed) { 413 + bulk_phase(f, false); 414 + } 415 + 416 + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { 417 + send_bitmap_complete(f, dbms); 418 + } 419 + 420 + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS); 421 + 422 + trace_dirty_bitmap_save_complete_finish(); 423 + 424 + dirty_bitmap_mig_cleanup(); 425 + return 0; 426 + } 427 + 428 + static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque, 429 + uint64_t max_size, 430 + uint64_t *res_precopy_only, 431 + uint64_t *res_compatible, 432 + uint64_t *res_postcopy_only) 433 + { 434 + DirtyBitmapMigBitmapState *dbms; 435 + uint64_t pending = 0; 436 + 437 + qemu_mutex_lock_iothread(); 438 + 439 + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { 440 + uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap); 441 + uint64_t sectors = dbms->bulk_completed ? 0 : 442 + dbms->total_sectors - dbms->cur_sector; 443 + 444 + pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran); 445 + } 446 + 447 + qemu_mutex_unlock_iothread(); 448 + 449 + trace_dirty_bitmap_save_pending(pending, max_size); 450 + 451 + *res_postcopy_only += pending; 452 + } 453 + 454 + /* First occurrence of this bitmap. It should be created if doesn't exist */ 455 + static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s) 456 + { 457 + Error *local_err = NULL; 458 + uint32_t granularity = qemu_get_be32(f); 459 + uint8_t flags = qemu_get_byte(f); 460 + 461 + if (s->bitmap) { 462 + error_report("Bitmap with the same name ('%s') already exists on " 463 + "destination", bdrv_dirty_bitmap_name(s->bitmap)); 464 + return -EINVAL; 465 + } else { 466 + s->bitmap = bdrv_create_dirty_bitmap(s->bs, granularity, 467 + s->bitmap_name, &local_err); 468 + if (!s->bitmap) { 469 + error_report_err(local_err); 470 + return -EINVAL; 471 + } 472 + } 473 + 474 + if (flags & DIRTY_BITMAP_MIG_START_FLAG_RESERVED_MASK) { 475 + error_report("Unknown flags in migrated dirty bitmap header: %x", 476 + flags); 477 + return -EINVAL; 478 + } 479 + 480 + if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) { 481 + bdrv_dirty_bitmap_set_persistance(s->bitmap, true); 482 + } 483 + 484 + bdrv_disable_dirty_bitmap(s->bitmap); 485 + if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) { 486 + DirtyBitmapLoadBitmapState *b; 487 + 488 + bdrv_dirty_bitmap_create_successor(s->bs, s->bitmap, &local_err); 489 + if (local_err) { 490 + error_report_err(local_err); 491 + return -EINVAL; 492 + } 493 + 494 + b = g_new(DirtyBitmapLoadBitmapState, 1); 495 + b->bs = s->bs; 496 + b->bitmap = s->bitmap; 497 + b->migrated = false; 498 + enabled_bitmaps = g_slist_prepend(enabled_bitmaps, b); 499 + } 500 + 501 + return 0; 502 + } 503 + 504 + void dirty_bitmap_mig_before_vm_start(void) 505 + { 506 + GSList *item; 507 + 508 + qemu_mutex_lock(&finish_lock); 509 + 510 + for (item = enabled_bitmaps; item; item = g_slist_next(item)) { 511 + DirtyBitmapLoadBitmapState *b = item->data; 512 + 513 + if (b->migrated) { 514 + bdrv_enable_dirty_bitmap(b->bitmap); 515 + } else { 516 + bdrv_dirty_bitmap_enable_successor(b->bitmap); 517 + } 518 + 519 + g_free(b); 520 + } 521 + 522 + g_slist_free(enabled_bitmaps); 523 + enabled_bitmaps = NULL; 524 + 525 + qemu_mutex_unlock(&finish_lock); 526 + } 527 + 528 + static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s) 529 + { 530 + GSList *item; 531 + trace_dirty_bitmap_load_complete(); 532 + bdrv_dirty_bitmap_deserialize_finish(s->bitmap); 533 + 534 + qemu_mutex_lock(&finish_lock); 535 + 536 + for (item = enabled_bitmaps; item; item = g_slist_next(item)) { 537 + DirtyBitmapLoadBitmapState *b = item->data; 538 + 539 + if (b->bitmap == s->bitmap) { 540 + b->migrated = true; 541 + break; 542 + } 543 + } 544 + 545 + if (bdrv_dirty_bitmap_frozen(s->bitmap)) { 546 + bdrv_dirty_bitmap_lock(s->bitmap); 547 + if (enabled_bitmaps == NULL) { 548 + /* in postcopy */ 549 + bdrv_reclaim_dirty_bitmap_locked(s->bs, s->bitmap, &error_abort); 550 + bdrv_enable_dirty_bitmap(s->bitmap); 551 + } else { 552 + /* target not started, successor must be empty */ 553 + int64_t count = bdrv_get_dirty_count(s->bitmap); 554 + BdrvDirtyBitmap *ret = bdrv_reclaim_dirty_bitmap_locked(s->bs, 555 + s->bitmap, 556 + NULL); 557 + /* bdrv_reclaim_dirty_bitmap can fail only on no successor (it 558 + * must be) or on merge fail, but merge can't fail when second 559 + * bitmap is empty 560 + */ 561 + assert(ret == s->bitmap && 562 + count == bdrv_get_dirty_count(s->bitmap)); 563 + } 564 + bdrv_dirty_bitmap_unlock(s->bitmap); 565 + } 566 + 567 + qemu_mutex_unlock(&finish_lock); 568 + } 569 + 570 + static int dirty_bitmap_load_bits(QEMUFile *f, DirtyBitmapLoadState *s) 571 + { 572 + uint64_t first_byte = qemu_get_be64(f) << BDRV_SECTOR_BITS; 573 + uint64_t nr_bytes = (uint64_t)qemu_get_be32(f) << BDRV_SECTOR_BITS; 574 + trace_dirty_bitmap_load_bits_enter(first_byte >> BDRV_SECTOR_BITS, 575 + nr_bytes >> BDRV_SECTOR_BITS); 576 + 577 + if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { 578 + trace_dirty_bitmap_load_bits_zeroes(); 579 + bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, 580 + false); 581 + } else { 582 + size_t ret; 583 + uint8_t *buf; 584 + uint64_t buf_size = qemu_get_be64(f); 585 + uint64_t needed_size = 586 + bdrv_dirty_bitmap_serialization_size(s->bitmap, 587 + first_byte, nr_bytes); 588 + 589 + if (needed_size > buf_size || 590 + buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) 591 + /* Here used same alignment as in send_bitmap_bits */ 592 + ) { 593 + error_report("Migrated bitmap granularity doesn't " 594 + "match the destination bitmap '%s' granularity", 595 + bdrv_dirty_bitmap_name(s->bitmap)); 596 + return -EINVAL; 597 + } 598 + 599 + buf = g_malloc(buf_size); 600 + ret = qemu_get_buffer(f, buf, buf_size); 601 + if (ret != buf_size) { 602 + error_report("Failed to read bitmap bits"); 603 + return -EIO; 604 + } 605 + 606 + bdrv_dirty_bitmap_deserialize_part(s->bitmap, buf, first_byte, nr_bytes, 607 + false); 608 + g_free(buf); 609 + } 610 + 611 + return 0; 612 + } 613 + 614 + static int dirty_bitmap_load_header(QEMUFile *f, DirtyBitmapLoadState *s) 615 + { 616 + Error *local_err = NULL; 617 + bool nothing; 618 + s->flags = qemu_get_bitmap_flags(f); 619 + trace_dirty_bitmap_load_header(s->flags); 620 + 621 + nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS); 622 + 623 + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { 624 + if (!qemu_get_counted_string(f, s->node_name)) { 625 + error_report("Unable to read node name string"); 626 + return -EINVAL; 627 + } 628 + s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err); 629 + if (!s->bs) { 630 + error_report_err(local_err); 631 + return -EINVAL; 632 + } 633 + } else if (!s->bs && !nothing) { 634 + error_report("Error: block device name is not set"); 635 + return -EINVAL; 636 + } 637 + 638 + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { 639 + if (!qemu_get_counted_string(f, s->bitmap_name)) { 640 + error_report("Unable to read bitmap name string"); 641 + return -EINVAL; 642 + } 643 + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name); 644 + 645 + /* bitmap may be NULL here, it wouldn't be an error if it is the 646 + * first occurrence of the bitmap */ 647 + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) { 648 + error_report("Error: unknown dirty bitmap " 649 + "'%s' for block device '%s'", 650 + s->bitmap_name, s->node_name); 651 + return -EINVAL; 652 + } 653 + } else if (!s->bitmap && !nothing) { 654 + error_report("Error: block device name is not set"); 655 + return -EINVAL; 656 + } 657 + 658 + return 0; 659 + } 660 + 661 + static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) 662 + { 663 + static DirtyBitmapLoadState s; 664 + int ret = 0; 665 + 666 + trace_dirty_bitmap_load_enter(); 667 + 668 + if (version_id != 1) { 669 + return -EINVAL; 670 + } 671 + 672 + do { 673 + ret = dirty_bitmap_load_header(f, &s); 674 + 675 + if (s.flags & DIRTY_BITMAP_MIG_FLAG_START) { 676 + ret = dirty_bitmap_load_start(f, &s); 677 + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_COMPLETE) { 678 + dirty_bitmap_load_complete(f, &s); 679 + } else if (s.flags & DIRTY_BITMAP_MIG_FLAG_BITS) { 680 + ret = dirty_bitmap_load_bits(f, &s); 681 + } 682 + 683 + if (!ret) { 684 + ret = qemu_file_get_error(f); 685 + } 686 + 687 + if (ret) { 688 + return ret; 689 + } 690 + } while (!(s.flags & DIRTY_BITMAP_MIG_FLAG_EOS)); 691 + 692 + trace_dirty_bitmap_load_success(); 693 + return 0; 694 + } 695 + 696 + static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) 697 + { 698 + DirtyBitmapMigBitmapState *dbms = NULL; 699 + if (init_dirty_bitmap_migration() < 0) { 700 + return -1; 701 + } 702 + 703 + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { 704 + send_bitmap_start(f, dbms); 705 + } 706 + qemu_put_bitmap_flags(f, DIRTY_BITMAP_MIG_FLAG_EOS); 707 + 708 + return 0; 709 + } 710 + 711 + static bool dirty_bitmap_is_active(void *opaque) 712 + { 713 + return migrate_dirty_bitmaps() && !dirty_bitmap_mig_state.no_bitmaps; 714 + } 715 + 716 + static bool dirty_bitmap_is_active_iterate(void *opaque) 717 + { 718 + return dirty_bitmap_is_active(opaque) && !runstate_is_running(); 719 + } 720 + 721 + static bool dirty_bitmap_has_postcopy(void *opaque) 722 + { 723 + return true; 724 + } 725 + 726 + static SaveVMHandlers savevm_dirty_bitmap_handlers = { 727 + .save_setup = dirty_bitmap_save_setup, 728 + .save_live_complete_postcopy = dirty_bitmap_save_complete, 729 + .save_live_complete_precopy = dirty_bitmap_save_complete, 730 + .has_postcopy = dirty_bitmap_has_postcopy, 731 + .save_live_pending = dirty_bitmap_save_pending, 732 + .save_live_iterate = dirty_bitmap_save_iterate, 733 + .is_active_iterate = dirty_bitmap_is_active_iterate, 734 + .load_state = dirty_bitmap_load, 735 + .save_cleanup = dirty_bitmap_save_cleanup, 736 + .is_active = dirty_bitmap_is_active, 737 + }; 738 + 739 + void dirty_bitmap_mig_init(void) 740 + { 741 + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list); 742 + 743 + register_savevm_live(NULL, "dirty-bitmap", 0, 1, 744 + &savevm_dirty_bitmap_handlers, 745 + &dirty_bitmap_mig_state); 746 + }
+5
migration/migration.c
··· 157 157 memset(&mis_current, 0, sizeof(MigrationIncomingState)); 158 158 qemu_mutex_init(&mis_current.rp_mutex); 159 159 qemu_event_init(&mis_current.main_thread_load_event, false); 160 + 161 + init_dirty_bitmap_incoming_migration(); 162 + 160 163 once = true; 161 164 } 162 165 return &mis_current; ··· 319 322 /* If global state section was not received or we are in running 320 323 state, we need to obey autostart. Any other state is set with 321 324 runstate_set. */ 325 + 326 + dirty_bitmap_mig_before_vm_start(); 322 327 323 328 if (!global_state_received() || 324 329 global_state_get_runstate() == RUN_STATE_RUNNING) {
+3
migration/migration.h
··· 235 235 int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname, 236 236 ram_addr_t start, size_t len); 237 237 238 + void dirty_bitmap_mig_before_vm_start(void); 239 + void init_dirty_bitmap_incoming_migration(void); 240 + 238 241 #endif
+2
migration/savevm.c
··· 1693 1693 1694 1694 trace_loadvm_postcopy_handle_run_vmstart(); 1695 1695 1696 + dirty_bitmap_mig_before_vm_start(); 1697 + 1696 1698 if (autostart) { 1697 1699 /* Hold onto your hats, starting the CPU */ 1698 1700 vm_start();
+14
migration/trace-events
··· 227 227 colo_send_message(const char *msg) "Send '%s' message" 228 228 colo_receive_message(const char *msg) "Receive '%s' message" 229 229 colo_failover_set_state(const char *new_state) "new state %s" 230 + 231 + # migration/block-dirty-bitmap.c 232 + send_bitmap_header_enter(void) "" 233 + send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uint64_t data_size) "flags: 0x%x, start_sector: %" PRIu64 ", nr_sectors: %" PRIu32 ", data_size: %" PRIu64 234 + dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d" 235 + dirty_bitmap_save_complete_enter(void) "" 236 + dirty_bitmap_save_complete_finish(void) "" 237 + dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64 238 + dirty_bitmap_load_complete(void) "" 239 + dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32 240 + dirty_bitmap_load_bits_zeroes(void) "" 241 + dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" 242 + dirty_bitmap_load_enter(void) "" 243 + dirty_bitmap_load_success(void) ""
+1
vl.c
··· 4502 4502 4503 4503 blk_mig_init(); 4504 4504 ram_mig_init(); 4505 + dirty_bitmap_mig_init(); 4505 4506 4506 4507 /* If the currently selected machine wishes to override the units-per-bus 4507 4508 * property of its default HBA interface type, do so now. */