qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

block: introduce backup-top filter driver

Backup-top filter caches write operations and does copy-before-write
operations.

The driver will be used in backup instead of write-notifiers.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20191001131409.14202-5-vsementsov@virtuozzo.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>

authored by

Vladimir Sementsov-Ogievskiy and committed by
Max Reitz
7df7868b 0f4b02b7

+323
+1
block/Makefile.objs
··· 42 42 block-obj-y += crypto.o 43 43 44 44 block-obj-y += aio_task.o 45 + block-obj-y += backup-top.o 45 46 46 47 common-obj-y += stream.o 47 48
+281
block/backup-top.c
··· 1 + /* 2 + * backup-top filter driver 3 + * 4 + * The driver performs Copy-Before-Write (CBW) operation: it is injected above 5 + * some node, and before each write it copies _old_ data to the target node. 6 + * 7 + * Copyright (c) 2018-2019 Virtuozzo International GmbH. 8 + * 9 + * Author: 10 + * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com> 11 + * 12 + * This program is free software; you can redistribute it and/or modify 13 + * it under the terms of the GNU General Public License as published by 14 + * the Free Software Foundation; either version 2 of the License, or 15 + * (at your option) any later version. 16 + * 17 + * This program is distributed in the hope that it will be useful, 18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 + * GNU General Public License for more details. 21 + * 22 + * You should have received a copy of the GNU General Public License 23 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 24 + */ 25 + 26 + #include "qemu/osdep.h" 27 + 28 + #include "sysemu/block-backend.h" 29 + #include "qemu/cutils.h" 30 + #include "qapi/error.h" 31 + #include "block/block_int.h" 32 + #include "block/qdict.h" 33 + #include "block/block-copy.h" 34 + 35 + #include "block/backup-top.h" 36 + 37 + typedef struct BDRVBackupTopState { 38 + BlockCopyState *bcs; 39 + BdrvChild *target; 40 + bool active; 41 + } BDRVBackupTopState; 42 + 43 + static coroutine_fn int backup_top_co_preadv( 44 + BlockDriverState *bs, uint64_t offset, uint64_t bytes, 45 + QEMUIOVector *qiov, int flags) 46 + { 47 + return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); 48 + } 49 + 50 + static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset, 51 + uint64_t bytes) 52 + { 53 + /* 54 + * Here we'd like to use block_copy(), but block-copy need to be moved to 55 + * use BdrvChildren to correctly use it in backup-top filter. It's a TODO. 56 + */ 57 + 58 + abort(); 59 + } 60 + 61 + static int coroutine_fn backup_top_co_pdiscard(BlockDriverState *bs, 62 + int64_t offset, int bytes) 63 + { 64 + int ret = backup_top_cbw(bs, offset, bytes); 65 + if (ret < 0) { 66 + return ret; 67 + } 68 + 69 + return bdrv_co_pdiscard(bs->backing, offset, bytes); 70 + } 71 + 72 + static int coroutine_fn backup_top_co_pwrite_zeroes(BlockDriverState *bs, 73 + int64_t offset, int bytes, BdrvRequestFlags flags) 74 + { 75 + int ret = backup_top_cbw(bs, offset, bytes); 76 + if (ret < 0) { 77 + return ret; 78 + } 79 + 80 + return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags); 81 + } 82 + 83 + static coroutine_fn int backup_top_co_pwritev(BlockDriverState *bs, 84 + uint64_t offset, 85 + uint64_t bytes, 86 + QEMUIOVector *qiov, int flags) 87 + { 88 + if (!(flags & BDRV_REQ_WRITE_UNCHANGED)) { 89 + int ret = backup_top_cbw(bs, offset, bytes); 90 + if (ret < 0) { 91 + return ret; 92 + } 93 + } 94 + 95 + return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags); 96 + } 97 + 98 + static int coroutine_fn backup_top_co_flush(BlockDriverState *bs) 99 + { 100 + if (!bs->backing) { 101 + return 0; 102 + } 103 + 104 + return bdrv_co_flush(bs->backing->bs); 105 + } 106 + 107 + static void backup_top_refresh_filename(BlockDriverState *bs) 108 + { 109 + if (bs->backing == NULL) { 110 + /* 111 + * we can be here after failed bdrv_attach_child in 112 + * bdrv_set_backing_hd 113 + */ 114 + return; 115 + } 116 + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), 117 + bs->backing->bs->filename); 118 + } 119 + 120 + static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, 121 + const BdrvChildRole *role, 122 + BlockReopenQueue *reopen_queue, 123 + uint64_t perm, uint64_t shared, 124 + uint64_t *nperm, uint64_t *nshared) 125 + { 126 + BDRVBackupTopState *s = bs->opaque; 127 + 128 + if (!s->active) { 129 + /* 130 + * The filter node may be in process of bdrv_append(), which firstly do 131 + * bdrv_set_backing_hd() and then bdrv_replace_node(). This means that 132 + * we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So, 133 + * let's require nothing during bdrv_append() and refresh permissions 134 + * after it (see bdrv_backup_top_append()). 135 + */ 136 + *nperm = 0; 137 + *nshared = BLK_PERM_ALL; 138 + return; 139 + } 140 + 141 + if (role == &child_file) { 142 + /* 143 + * Target child 144 + * 145 + * Share write to target (child_file), to not interfere 146 + * with guest writes to its disk which may be in target backing chain. 147 + */ 148 + *nshared = BLK_PERM_ALL; 149 + *nperm = BLK_PERM_WRITE; 150 + } else { 151 + /* Source child */ 152 + bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, 153 + nperm, nshared); 154 + 155 + if (perm & BLK_PERM_WRITE) { 156 + *nperm = *nperm | BLK_PERM_CONSISTENT_READ; 157 + } 158 + *nshared &= ~BLK_PERM_WRITE; 159 + } 160 + } 161 + 162 + BlockDriver bdrv_backup_top_filter = { 163 + .format_name = "backup-top", 164 + .instance_size = sizeof(BDRVBackupTopState), 165 + 166 + .bdrv_co_preadv = backup_top_co_preadv, 167 + .bdrv_co_pwritev = backup_top_co_pwritev, 168 + .bdrv_co_pwrite_zeroes = backup_top_co_pwrite_zeroes, 169 + .bdrv_co_pdiscard = backup_top_co_pdiscard, 170 + .bdrv_co_flush = backup_top_co_flush, 171 + 172 + .bdrv_co_block_status = bdrv_co_block_status_from_backing, 173 + 174 + .bdrv_refresh_filename = backup_top_refresh_filename, 175 + 176 + .bdrv_child_perm = backup_top_child_perm, 177 + 178 + .is_filter = true, 179 + }; 180 + 181 + BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, 182 + BlockDriverState *target, 183 + const char *filter_node_name, 184 + uint64_t cluster_size, 185 + BdrvRequestFlags write_flags, 186 + BlockCopyState **bcs, 187 + Error **errp) 188 + { 189 + Error *local_err = NULL; 190 + BDRVBackupTopState *state; 191 + BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter, 192 + filter_node_name, 193 + BDRV_O_RDWR, errp); 194 + 195 + if (!top) { 196 + return NULL; 197 + } 198 + 199 + top->total_sectors = source->total_sectors; 200 + top->opaque = state = g_new0(BDRVBackupTopState, 1); 201 + 202 + bdrv_ref(target); 203 + state->target = bdrv_attach_child(top, target, "target", &child_file, errp); 204 + if (!state->target) { 205 + bdrv_unref(target); 206 + bdrv_unref(top); 207 + return NULL; 208 + } 209 + 210 + bdrv_drained_begin(source); 211 + 212 + bdrv_ref(top); 213 + bdrv_append(top, source, &local_err); 214 + if (local_err) { 215 + error_prepend(&local_err, "Cannot append backup-top filter: "); 216 + goto append_failed; 217 + } 218 + 219 + /* 220 + * bdrv_append() finished successfully, now we can require permissions 221 + * we want. 222 + */ 223 + state->active = true; 224 + bdrv_child_refresh_perms(top, top->backing, &local_err); 225 + if (local_err) { 226 + error_prepend(&local_err, 227 + "Cannot set permissions for backup-top filter: "); 228 + goto failed_after_append; 229 + } 230 + 231 + /* 232 + * TODO: Create block-copy-state here (which will utilize @cluster_size and 233 + * @write_flags parameters which are unused now). For this, block-copy 234 + * should be refactored to use BdrvChildren. 235 + */ 236 + state->bcs = NULL; 237 + if (!state->bcs) { 238 + error_setg(&local_err, "Cannot create block-copy-state"); 239 + goto failed_after_append; 240 + } 241 + *bcs = state->bcs; 242 + 243 + bdrv_drained_end(source); 244 + 245 + return top; 246 + 247 + failed_after_append: 248 + state->active = false; 249 + bdrv_backup_top_drop(top); 250 + 251 + append_failed: 252 + bdrv_drained_end(source); 253 + bdrv_unref_child(top, state->target); 254 + bdrv_unref(top); 255 + error_propagate(errp, local_err); 256 + 257 + return NULL; 258 + } 259 + 260 + void bdrv_backup_top_drop(BlockDriverState *bs) 261 + { 262 + BDRVBackupTopState *s = bs->opaque; 263 + AioContext *aio_context = bdrv_get_aio_context(bs); 264 + 265 + block_copy_state_free(s->bcs); 266 + 267 + aio_context_acquire(aio_context); 268 + 269 + bdrv_drained_begin(bs); 270 + 271 + s->active = false; 272 + bdrv_child_refresh_perms(bs, bs->backing, &error_abort); 273 + bdrv_replace_node(bs, backing_bs(bs), &error_abort); 274 + bdrv_set_backing_hd(bs, NULL, &error_abort); 275 + 276 + bdrv_drained_end(bs); 277 + 278 + bdrv_unref(bs); 279 + 280 + aio_context_release(aio_context); 281 + }
+41
block/backup-top.h
··· 1 + /* 2 + * backup-top filter driver 3 + * 4 + * The driver performs Copy-Before-Write (CBW) operation: it is injected above 5 + * some node, and before each write it copies _old_ data to the target node. 6 + * 7 + * Copyright (c) 2018-2019 Virtuozzo International GmbH. 8 + * 9 + * Author: 10 + * Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com> 11 + * 12 + * This program is free software; you can redistribute it and/or modify 13 + * it under the terms of the GNU General Public License as published by 14 + * the Free Software Foundation; either version 2 of the License, or 15 + * (at your option) any later version. 16 + * 17 + * This program is distributed in the hope that it will be useful, 18 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 + * GNU General Public License for more details. 21 + * 22 + * You should have received a copy of the GNU General Public License 23 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 24 + */ 25 + 26 + #ifndef BACKUP_TOP_H 27 + #define BACKUP_TOP_H 28 + 29 + #include "block/block_int.h" 30 + #include "block/block-copy.h" 31 + 32 + BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, 33 + BlockDriverState *target, 34 + const char *filter_node_name, 35 + uint64_t cluster_size, 36 + BdrvRequestFlags write_flags, 37 + BlockCopyState **bcs, 38 + Error **errp); 39 + void bdrv_backup_top_drop(BlockDriverState *bs); 40 + 41 + #endif /* BACKUP_TOP_H */