qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches:

- Fix slow pre-zeroing in qemu-img convert
- Test case for block job pausing on I/O errors

# gpg: Signature made Tue 26 Mar 2019 15:28:00 GMT
# gpg: using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK
qemu-img: Use BDRV_REQ_NO_FALLBACK for pre-zeroing
file-posix: Support BDRV_REQ_NO_FALLBACK for zero writes
block: Advertise BDRV_REQ_NO_FALLBACK in filter drivers
block: Add BDRV_REQ_NO_FALLBACK
block: Remove error messages in bdrv_make_zero()
iotests: add 248: test resume mirror after auto pause on ENOSPC

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+133 -24
+1 -1
block/blkdebug.c
··· 401 401 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 402 402 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 403 403 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 404 - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & 404 + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 405 405 bs->file->bs->supported_zero_flags); 406 406 ret = -EINVAL; 407 407
+3 -4
block/copy-on-read.c
··· 34 34 } 35 35 36 36 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 37 - (BDRV_REQ_FUA & 38 - bs->file->bs->supported_write_flags); 37 + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 39 38 40 39 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 41 - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & 42 - bs->file->bs->supported_zero_flags); 40 + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 41 + bs->file->bs->supported_zero_flags); 43 42 44 43 return 0; 45 44 }
+16 -8
block/file-posix.c
··· 652 652 } 653 653 #endif 654 654 655 - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; 655 + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; 656 656 ret = 0; 657 657 fail: 658 658 if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { ··· 1500 1500 } 1501 1501 1502 1502 #ifdef BLKZEROOUT 1503 - do { 1504 - uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; 1505 - if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { 1506 - return 0; 1507 - } 1508 - } while (errno == EINTR); 1503 + /* The BLKZEROOUT implementation in the kernel doesn't set 1504 + * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow 1505 + * fallbacks. */ 1506 + if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) { 1507 + do { 1508 + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; 1509 + if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { 1510 + return 0; 1511 + } 1512 + } while (errno == EINTR); 1509 1513 1510 - ret = translate_err(-errno); 1514 + ret = translate_err(-errno); 1515 + } 1511 1516 #endif 1512 1517 1513 1518 if (ret == -ENOTSUP) { ··· 2658 2663 2659 2664 if (blkdev) { 2660 2665 acb.aio_type |= QEMU_AIO_BLKDEV; 2666 + } 2667 + if (flags & BDRV_REQ_NO_FALLBACK) { 2668 + acb.aio_type |= QEMU_AIO_NO_FALLBACK; 2661 2669 } 2662 2670 2663 2671 if (flags & BDRV_REQ_MAY_UNMAP) {
+11 -5
block/io.c
··· 909 909 } 910 910 ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); 911 911 if (ret < 0) { 912 - error_report("error getting block status at offset %" PRId64 ": %s", 913 - offset, strerror(-ret)); 914 912 return ret; 915 913 } 916 914 if (ret & BDRV_BLOCK_ZERO) { ··· 919 917 } 920 918 ret = bdrv_pwrite_zeroes(child, offset, bytes, flags); 921 919 if (ret < 0) { 922 - error_report("error writing zeroes at offset %" PRId64 ": %s", 923 - offset, strerror(-ret)); 924 920 return ret; 925 921 } 926 922 offset += bytes; ··· 1019 1015 unsigned int nb_sectors; 1020 1016 1021 1017 assert(!(flags & ~BDRV_REQ_MASK)); 1018 + assert(!(flags & BDRV_REQ_NO_FALLBACK)); 1022 1019 1023 1020 if (!drv) { 1024 1021 return -ENOMEDIUM; ··· 1065 1062 int ret; 1066 1063 1067 1064 assert(!(flags & ~BDRV_REQ_MASK)); 1065 + assert(!(flags & BDRV_REQ_NO_FALLBACK)); 1068 1066 1069 1067 if (!drv) { 1070 1068 return -ENOMEDIUM; ··· 1471 1469 return -ENOMEDIUM; 1472 1470 } 1473 1471 1472 + if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) { 1473 + return -ENOTSUP; 1474 + } 1475 + 1474 1476 assert(alignment % bs->bl.request_alignment == 0); 1475 1477 head = offset % alignment; 1476 1478 tail = (offset + bytes) % alignment; ··· 1514 1516 assert(!bs->supported_zero_flags); 1515 1517 } 1516 1518 1517 - if (ret == -ENOTSUP) { 1519 + if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) { 1518 1520 /* Fall back to bounce buffer if write zeroes is unsupported */ 1519 1521 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; 1520 1522 ··· 2952 2954 { 2953 2955 BdrvTrackedRequest req; 2954 2956 int ret; 2957 + 2958 + /* TODO We can support BDRV_REQ_NO_FALLBACK here */ 2959 + assert(!(read_flags & BDRV_REQ_NO_FALLBACK)); 2960 + assert(!(write_flags & BDRV_REQ_NO_FALLBACK)); 2955 2961 2956 2962 if (!dst || !dst->bs) { 2957 2963 return -ENOMEDIUM;
+2 -1
block/mirror.c
··· 1548 1548 } 1549 1549 mirror_top_bs->total_sectors = bs->total_sectors; 1550 1550 mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; 1551 - mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; 1551 + mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 1552 + BDRV_REQ_NO_FALLBACK; 1552 1553 bs_opaque = g_new0(MirrorBDSOpaque, 1); 1553 1554 mirror_top_bs->opaque = bs_opaque; 1554 1555 bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
+1 -1
block/raw-format.c
··· 434 434 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | 435 435 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); 436 436 bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | 437 - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & 437 + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & 438 438 bs->file->bs->supported_zero_flags); 439 439 440 440 if (bs->probed && !bdrv_is_read_only(bs)) {
+6 -1
include/block/block.h
··· 83 83 */ 84 84 BDRV_REQ_SERIALISING = 0x80, 85 85 86 + /* Execute the request only if the operation can be offloaded or otherwise 87 + * be executed efficiently, but return an error instead of using a slow 88 + * fallback. */ 89 + BDRV_REQ_NO_FALLBACK = 0x100, 90 + 86 91 /* Mask of valid flags */ 87 - BDRV_REQ_MASK = 0xff, 92 + BDRV_REQ_MASK = 0x1ff, 88 93 } BdrvRequestFlags; 89 94 90 95 typedef struct BlockSizes {
+1
include/block/raw-aio.h
··· 40 40 /* AIO flags */ 41 41 #define QEMU_AIO_MISALIGNED 0x1000 42 42 #define QEMU_AIO_BLKDEV 0x2000 43 + #define QEMU_AIO_NO_FALLBACK 0x4000 43 44 44 45 45 46 /* linux-aio.c - Linux native implementation */
+1 -1
qemu-img.c
··· 1932 1932 if (!s->has_zero_init && !s->target_has_backing && 1933 1933 bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) 1934 1934 { 1935 - ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP); 1935 + ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); 1936 1936 if (ret == 0) { 1937 1937 s->has_zero_init = true; 1938 1938 }
+11 -2
qemu-io-cmds.c
··· 946 946 " -b, -- write to the VM state rather than the virtual disk\n" 947 947 " -c, -- write compressed data with blk_write_compressed\n" 948 948 " -f, -- use Force Unit Access semantics\n" 949 + " -n, -- with -z, don't allow slow fallback\n" 949 950 " -p, -- ignored for backwards compatibility\n" 950 951 " -P, -- use different pattern to fill file\n" 951 952 " -C, -- report statistics in a machine parsable format\n" ··· 964 965 .perm = BLK_PERM_WRITE, 965 966 .argmin = 2, 966 967 .argmax = -1, 967 - .args = "[-bcCfquz] [-P pattern] off len", 968 + .args = "[-bcCfnquz] [-P pattern] off len", 968 969 .oneline = "writes a number of bytes at a specified offset", 969 970 .help = write_help, 970 971 }; ··· 983 984 int64_t total = 0; 984 985 int pattern = 0xcd; 985 986 986 - while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) { 987 + while ((c = getopt(argc, argv, "bcCfnpP:quz")) != -1) { 987 988 switch (c) { 988 989 case 'b': 989 990 bflag = true; ··· 997 998 case 'f': 998 999 flags |= BDRV_REQ_FUA; 999 1000 break; 1001 + case 'n': 1002 + flags |= BDRV_REQ_NO_FALLBACK; 1003 + break; 1000 1004 case 'p': 1001 1005 /* Ignored for backwards compatibility */ 1002 1006 break; ··· 1034 1038 1035 1039 if ((flags & BDRV_REQ_FUA) && (bflag || cflag)) { 1036 1040 printf("-f and -b or -c cannot be specified at the same time\n"); 1041 + return -EINVAL; 1042 + } 1043 + 1044 + if ((flags & BDRV_REQ_NO_FALLBACK) && !zflag) { 1045 + printf("-n requires -z to be specified\n"); 1037 1046 return -EINVAL; 1038 1047 } 1039 1048
+71
tests/qemu-iotests/248
··· 1 + #!/usr/bin/env python 2 + # 3 + # Test resume mirror after auto pause on ENOSPC 4 + # 5 + # Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. 6 + # 7 + # This program is free software; you can redistribute it and/or modify 8 + # it under the terms of the GNU General Public License as published by 9 + # the Free Software Foundation; either version 2 of the License, or 10 + # (at your option) any later version. 11 + # 12 + # This program is distributed in the hope that it will be useful, 13 + # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 + # GNU General Public License for more details. 16 + # 17 + # You should have received a copy of the GNU General Public License 18 + # along with this program. If not, see <http://www.gnu.org/licenses/>. 19 + # 20 + 21 + import iotests 22 + from iotests import qemu_img_create, qemu_io, file_path, filter_qmp_testfiles 23 + 24 + iotests.verify_image_format(supported_fmts=['qcow2']) 25 + 26 + source, target = file_path('source', 'target') 27 + size = 5 * 1024 * 1024 28 + limit = 2 * 1024 * 1024 29 + 30 + qemu_img_create('-f', iotests.imgfmt, source, str(size)) 31 + qemu_img_create('-f', iotests.imgfmt, target, str(size)) 32 + qemu_io('-c', 'write 0 {}'.format(size), source) 33 + 34 + # raw format don't like empty files 35 + qemu_io('-c', 'write 0 {}'.format(size), target) 36 + 37 + vm = iotests.VM().add_drive(source) 38 + vm.launch() 39 + 40 + blockdev_opts = { 41 + 'driver': iotests.imgfmt, 42 + 'node-name': 'target', 43 + 'file': { 44 + 'driver': 'raw', 45 + 'size': limit, 46 + 'file': { 47 + 'driver': 'file', 48 + 'filename': target 49 + } 50 + } 51 + } 52 + vm.qmp_log('blockdev-add', filters=[filter_qmp_testfiles], **blockdev_opts) 53 + 54 + vm.qmp_log('blockdev-mirror', device='drive0', sync='full', target='target', 55 + on_target_error='enospc') 56 + 57 + vm.event_wait('JOB_STATUS_CHANGE', timeout=3.0, 58 + match={'data': {'status': 'paused'}}) 59 + 60 + # drop other cached events, to not interfere with further wait for 'running' 61 + vm.get_qmp_events() 62 + 63 + del blockdev_opts['file']['size'] 64 + vm.qmp_log('x-blockdev-reopen', filters=[filter_qmp_testfiles], 65 + **blockdev_opts) 66 + 67 + vm.qmp_log('block-job-resume', device='drive0') 68 + vm.event_wait('JOB_STATUS_CHANGE', timeout=1.0, 69 + match={'data': {'status': 'running'}}) 70 + 71 + vm.shutdown()
+8
tests/qemu-iotests/248.out
··· 1 + {"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}, "size": 2097152}, "node-name": "target"}} 2 + {"return": {}} 3 + {"execute": "blockdev-mirror", "arguments": {"device": "drive0", "on-target-error": "enospc", "sync": "full", "target": "target"}} 4 + {"return": {}} 5 + {"execute": "x-blockdev-reopen", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}}, "node-name": "target"}} 6 + {"return": {}} 7 + {"execute": "block-job-resume", "arguments": {"device": "drive0"}} 8 + {"return": {}}
+1
tests/qemu-iotests/group
··· 246 246 245 rw auto 247 247 246 rw auto quick 248 248 247 rw auto quick 249 + 248 rw auto quick