qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Tue 26 Sep 2017 14:52:32 BST
# gpg: using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (24 commits)
block/qcow2-bitmap: fix use of uninitialized pointer
qemu-iotests: add shrinking image test
qcow2: add shrink image support
qcow2: add qcow2_cache_discard
qemu-img: add --shrink flag for resize
iotests: fix 181: enable postcopy-ram capability on target
qemu-iotests: Test change-backing-file command
block: Fix permissions after bdrv_reopen()
block: reopen: Queue children after their parents
block: Base permissions on rw state after reopen
block: Add reopen queue to bdrv_check_perm()
block: Add reopen_queue to bdrv_child_perm()
qemu-io: Drop write permissions before read-only reopen
block: Clean up some bad code in the vvfat driver
block/throttle-groups.c: allocate RestartData on the heap
throttle: Assert that bkt->max is valid in throttle_compute_wait()
iotests: Print full path of bad output if mismatch
iotests: use virtio aliases for 067
iotests: use -ccw on s390x for 051
iotests: use -ccw on s390x for 040, 139, and 182
...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+1708 -895
+5 -1
Makefile
··· 209 209 DOCS=qemu-doc.html qemu-doc.txt qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8 210 210 DOCS+=docs/interop/qemu-qmp-ref.html docs/interop/qemu-qmp-ref.txt docs/interop/qemu-qmp-ref.7 211 211 DOCS+=docs/interop/qemu-ga-ref.html docs/interop/qemu-ga-ref.txt docs/interop/qemu-ga-ref.7 212 + DOCS+=docs/qemu-block-drivers.7 212 213 ifdef CONFIG_VIRTFS 213 214 DOCS+=fsdev/virtfs-proxy-helper.1 214 215 endif ··· 532 533 rm -f docs/interop/qemu-qmp-ref.txt docs/interop/qemu-ga-ref.txt 533 534 rm -f docs/interop/qemu-qmp-ref.pdf docs/interop/qemu-ga-ref.pdf 534 535 rm -f docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html 536 + rm -f docs/qemu-block-drivers.7 535 537 for d in $(TARGET_DIRS); do \ 536 538 rm -rf $$d || exit 1 ; \ 537 539 done ··· 576 578 $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" 577 579 $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" 578 580 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" 581 + $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" 579 582 ifneq ($(TOOLS),) 580 583 $(INSTALL_DATA) qemu-img.1 "$(DESTDIR)$(mandir)/man1" 581 584 $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8" ··· 721 724 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi 722 725 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi 723 726 qemu-ga.8: qemu-ga.texi 727 + docs/qemu-block-drivers.7: docs/qemu-block-drivers.texi 724 728 725 729 html: qemu-doc.html docs/interop/qemu-qmp-ref.html docs/interop/qemu-ga-ref.html 726 730 info: qemu-doc.info docs/interop/qemu-qmp-ref.info docs/interop/qemu-ga-ref.info ··· 730 734 qemu-doc.html qemu-doc.info qemu-doc.pdf qemu-doc.txt: \ 731 735 qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \ 732 736 qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \ 733 - qemu-monitor-info.texi 737 + qemu-monitor-info.texi docs/qemu-block-drivers.texi 734 738 735 739 docs/interop/qemu-ga-ref.dvi docs/interop/qemu-ga-ref.html \ 736 740 docs/interop/qemu-ga-ref.info docs/interop/qemu-ga-ref.pdf \
+143 -48
block.c
··· 239 239 return bs->read_only; 240 240 } 241 241 242 - /* Returns whether the image file can be written to right now */ 243 - bool bdrv_is_writable(BlockDriverState *bs) 244 - { 245 - return !bdrv_is_read_only(bs) && !(bs->open_flags & BDRV_O_INACTIVE); 246 - } 247 - 248 242 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 249 243 bool ignore_allow_rdw, Error **errp) 250 244 { ··· 1531 1525 return 0; 1532 1526 } 1533 1527 1534 - static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 1528 + static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 1529 + uint64_t perm, uint64_t shared, 1535 1530 GSList *ignore_children, Error **errp); 1536 1531 static void bdrv_child_abort_perm_update(BdrvChild *c); 1537 1532 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); 1538 1533 1534 + typedef struct BlockReopenQueueEntry { 1535 + bool prepared; 1536 + BDRVReopenState state; 1537 + QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 1538 + } BlockReopenQueueEntry; 1539 + 1540 + /* 1541 + * Return the flags that @bs will have after the reopens in @q have 1542 + * successfully completed. If @q is NULL (or @bs is not contained in @q), 1543 + * return the current flags. 1544 + */ 1545 + static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs) 1546 + { 1547 + BlockReopenQueueEntry *entry; 1548 + 1549 + if (q != NULL) { 1550 + QSIMPLEQ_FOREACH(entry, q, entry) { 1551 + if (entry->state.bs == bs) { 1552 + return entry->state.flags; 1553 + } 1554 + } 1555 + } 1556 + 1557 + return bs->open_flags; 1558 + } 1559 + 1560 + /* Returns whether the image file can be written to after the reopen queue @q 1561 + * has been successfully applied, or right now if @q is NULL. */ 1562 + static bool bdrv_is_writable(BlockDriverState *bs, BlockReopenQueue *q) 1563 + { 1564 + int flags = bdrv_reopen_get_flags(q, bs); 1565 + 1566 + return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR; 1567 + } 1568 + 1539 1569 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, 1540 - BdrvChild *c, 1541 - const BdrvChildRole *role, 1570 + BdrvChild *c, const BdrvChildRole *role, 1571 + BlockReopenQueue *reopen_queue, 1542 1572 uint64_t parent_perm, uint64_t parent_shared, 1543 1573 uint64_t *nperm, uint64_t *nshared) 1544 1574 { 1545 1575 if (bs->drv && bs->drv->bdrv_child_perm) { 1546 - bs->drv->bdrv_child_perm(bs, c, role, 1576 + bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, 1547 1577 parent_perm, parent_shared, 1548 1578 nperm, nshared); 1549 1579 } 1580 + /* TODO Take force_share from reopen_queue */ 1550 1581 if (child_bs && child_bs->force_share) { 1551 1582 *nshared = BLK_PERM_ALL; 1552 1583 } ··· 1561 1592 * A call to this function must always be followed by a call to bdrv_set_perm() 1562 1593 * or bdrv_abort_perm_update(). 1563 1594 */ 1564 - static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms, 1595 + static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, 1596 + uint64_t cumulative_perms, 1565 1597 uint64_t cumulative_shared_perms, 1566 1598 GSList *ignore_children, Error **errp) 1567 1599 { ··· 1571 1603 1572 1604 /* Write permissions never work with read-only images */ 1573 1605 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) && 1574 - !bdrv_is_writable(bs)) 1606 + !bdrv_is_writable(bs, q)) 1575 1607 { 1576 1608 error_setg(errp, "Block node is read-only"); 1577 1609 return -EPERM; ··· 1596 1628 /* Check all children */ 1597 1629 QLIST_FOREACH(c, &bs->children, next) { 1598 1630 uint64_t cur_perm, cur_shared; 1599 - bdrv_child_perm(bs, c->bs, c, c->role, 1631 + bdrv_child_perm(bs, c->bs, c, c->role, q, 1600 1632 cumulative_perms, cumulative_shared_perms, 1601 1633 &cur_perm, &cur_shared); 1602 - ret = bdrv_child_check_perm(c, cur_perm, cur_shared, ignore_children, 1603 - errp); 1634 + ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, 1635 + ignore_children, errp); 1604 1636 if (ret < 0) { 1605 1637 return ret; 1606 1638 } ··· 1658 1690 /* Update all children */ 1659 1691 QLIST_FOREACH(c, &bs->children, next) { 1660 1692 uint64_t cur_perm, cur_shared; 1661 - bdrv_child_perm(bs, c->bs, c, c->role, 1693 + bdrv_child_perm(bs, c->bs, c, c->role, NULL, 1662 1694 cumulative_perms, cumulative_shared_perms, 1663 1695 &cur_perm, &cur_shared); 1664 1696 bdrv_child_set_perm(c, cur_perm, cur_shared); ··· 1726 1758 * 1727 1759 * Needs to be followed by a call to either bdrv_set_perm() or 1728 1760 * bdrv_abort_perm_update(). */ 1729 - static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm, 1761 + static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q, 1762 + uint64_t new_used_perm, 1730 1763 uint64_t new_shared_perm, 1731 1764 GSList *ignore_children, Error **errp) 1732 1765 { ··· 1768 1801 cumulative_shared_perms &= c->shared_perm; 1769 1802 } 1770 1803 1771 - return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, 1804 + return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms, 1772 1805 ignore_children, errp); 1773 1806 } 1774 1807 1775 1808 /* Needs to be followed by a call to either bdrv_child_set_perm() or 1776 1809 * bdrv_child_abort_perm_update(). */ 1777 - static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared, 1810 + static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, 1811 + uint64_t perm, uint64_t shared, 1778 1812 GSList *ignore_children, Error **errp) 1779 1813 { 1780 1814 int ret; 1781 1815 1782 1816 ignore_children = g_slist_prepend(g_slist_copy(ignore_children), c); 1783 - ret = bdrv_check_update_perm(c->bs, perm, shared, ignore_children, errp); 1817 + ret = bdrv_check_update_perm(c->bs, q, perm, shared, ignore_children, errp); 1784 1818 g_slist_free(ignore_children); 1785 1819 1786 1820 return ret; ··· 1808 1842 { 1809 1843 int ret; 1810 1844 1811 - ret = bdrv_child_check_perm(c, perm, shared, NULL, errp); 1845 + ret = bdrv_child_check_perm(c, NULL, perm, shared, NULL, errp); 1812 1846 if (ret < 0) { 1813 1847 bdrv_child_abort_perm_update(c); 1814 1848 return ret; ··· 1827 1861 1828 1862 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 1829 1863 const BdrvChildRole *role, 1864 + BlockReopenQueue *reopen_queue, 1830 1865 uint64_t perm, uint64_t shared, 1831 1866 uint64_t *nperm, uint64_t *nshared) 1832 1867 { ··· 1844 1879 1845 1880 void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, 1846 1881 const BdrvChildRole *role, 1882 + BlockReopenQueue *reopen_queue, 1847 1883 uint64_t perm, uint64_t shared, 1848 1884 uint64_t *nperm, uint64_t *nshared) 1849 1885 { ··· 1853 1889 if (!backing) { 1854 1890 /* Apart from the modifications below, the same permissions are 1855 1891 * forwarded and left alone as for filters */ 1856 - bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared); 1892 + bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, 1893 + &perm, &shared); 1857 1894 1858 1895 /* Format drivers may touch metadata even if the guest doesn't write */ 1859 - if (bdrv_is_writable(bs)) { 1896 + if (bdrv_is_writable(bs, reopen_queue)) { 1860 1897 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; 1861 1898 } 1862 1899 ··· 1945 1982 * because we're just taking a parent away, so we're loosening 1946 1983 * restrictions. */ 1947 1984 bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); 1948 - bdrv_check_perm(old_bs, perm, shared_perm, NULL, &error_abort); 1985 + bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); 1949 1986 bdrv_set_perm(old_bs, perm, shared_perm); 1950 1987 } 1951 1988 ··· 1964 2001 BdrvChild *child; 1965 2002 int ret; 1966 2003 1967 - ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp); 2004 + ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); 1968 2005 if (ret < 0) { 1969 2006 bdrv_abort_perm_update(child_bs); 1970 2007 return NULL; ··· 1999 2036 2000 2037 assert(parent_bs->drv); 2001 2038 assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); 2002 - bdrv_child_perm(parent_bs, child_bs, NULL, child_role, 2039 + bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, 2003 2040 perm, shared_perm, &perm, &shared_perm); 2004 2041 2005 2042 child = bdrv_root_attach_child(child_bs, child_name, child_role, ··· 2633 2670 NULL, errp); 2634 2671 } 2635 2672 2636 - typedef struct BlockReopenQueueEntry { 2637 - bool prepared; 2638 - BDRVReopenState state; 2639 - QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry; 2640 - } BlockReopenQueueEntry; 2641 - 2642 2673 /* 2643 2674 * Adds a BlockDriverState to a simple queue for an atomic, transactional 2644 2675 * reopen of multiple devices. ··· 2737 2768 flags |= BDRV_O_ALLOW_RDWR; 2738 2769 } 2739 2770 2771 + if (!bs_entry) { 2772 + bs_entry = g_new0(BlockReopenQueueEntry, 1); 2773 + QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 2774 + } else { 2775 + QDECREF(bs_entry->state.options); 2776 + QDECREF(bs_entry->state.explicit_options); 2777 + } 2778 + 2779 + bs_entry->state.bs = bs; 2780 + bs_entry->state.options = options; 2781 + bs_entry->state.explicit_options = explicit_options; 2782 + bs_entry->state.flags = flags; 2783 + 2784 + /* This needs to be overwritten in bdrv_reopen_prepare() */ 2785 + bs_entry->state.perm = UINT64_MAX; 2786 + bs_entry->state.shared_perm = 0; 2787 + 2740 2788 QLIST_FOREACH(child, &bs->children, next) { 2741 2789 QDict *new_child_options; 2742 2790 char *child_key_dot; ··· 2755 2803 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0, 2756 2804 child->role, options, flags); 2757 2805 } 2758 - 2759 - if (!bs_entry) { 2760 - bs_entry = g_new0(BlockReopenQueueEntry, 1); 2761 - QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry); 2762 - } else { 2763 - QDECREF(bs_entry->state.options); 2764 - QDECREF(bs_entry->state.explicit_options); 2765 - } 2766 - 2767 - bs_entry->state.bs = bs; 2768 - bs_entry->state.options = options; 2769 - bs_entry->state.explicit_options = explicit_options; 2770 - bs_entry->state.flags = flags; 2771 2806 2772 2807 return bs_queue; 2773 2808 } ··· 2856 2891 return ret; 2857 2892 } 2858 2893 2894 + static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q, 2895 + BdrvChild *c) 2896 + { 2897 + BlockReopenQueueEntry *entry; 2898 + 2899 + QSIMPLEQ_FOREACH(entry, q, entry) { 2900 + BlockDriverState *bs = entry->state.bs; 2901 + BdrvChild *child; 2902 + 2903 + QLIST_FOREACH(child, &bs->children, next) { 2904 + if (child == c) { 2905 + return entry; 2906 + } 2907 + } 2908 + } 2909 + 2910 + return NULL; 2911 + } 2912 + 2913 + static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, 2914 + uint64_t *perm, uint64_t *shared) 2915 + { 2916 + BdrvChild *c; 2917 + BlockReopenQueueEntry *parent; 2918 + uint64_t cumulative_perms = 0; 2919 + uint64_t cumulative_shared_perms = BLK_PERM_ALL; 2920 + 2921 + QLIST_FOREACH(c, &bs->parents, next_parent) { 2922 + parent = find_parent_in_reopen_queue(q, c); 2923 + if (!parent) { 2924 + cumulative_perms |= c->perm; 2925 + cumulative_shared_perms &= c->shared_perm; 2926 + } else { 2927 + uint64_t nperm, nshared; 2928 + 2929 + bdrv_child_perm(parent->state.bs, bs, c, c->role, q, 2930 + parent->state.perm, parent->state.shared_perm, 2931 + &nperm, &nshared); 2932 + 2933 + cumulative_perms |= nperm; 2934 + cumulative_shared_perms &= nshared; 2935 + } 2936 + } 2937 + *perm = cumulative_perms; 2938 + *shared = cumulative_shared_perms; 2939 + } 2859 2940 2860 2941 /* 2861 2942 * Prepares a BlockDriverState for reopen. All changes are staged in the ··· 2921 3002 goto error; 2922 3003 } 2923 3004 3005 + /* Calculate required permissions after reopening */ 3006 + bdrv_reopen_perm(queue, reopen_state->bs, 3007 + &reopen_state->perm, &reopen_state->shared_perm); 2924 3008 2925 3009 ret = bdrv_flush(reopen_state->bs); 2926 3010 if (ret) { ··· 2976 3060 } while ((entry = qdict_next(reopen_state->options, entry))); 2977 3061 } 2978 3062 3063 + ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm, 3064 + reopen_state->shared_perm, NULL, errp); 3065 + if (ret < 0) { 3066 + goto error; 3067 + } 3068 + 2979 3069 ret = 0; 2980 3070 2981 3071 error: ··· 3016 3106 3017 3107 bdrv_refresh_limits(bs, NULL); 3018 3108 3109 + bdrv_set_perm(reopen_state->bs, reopen_state->perm, 3110 + reopen_state->shared_perm); 3111 + 3019 3112 new_can_write = 3020 3113 !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE); 3021 3114 if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) { ··· 3049 3142 } 3050 3143 3051 3144 QDECREF(reopen_state->explicit_options); 3145 + 3146 + bdrv_abort_perm_update(reopen_state->bs); 3052 3147 } 3053 3148 3054 3149 ··· 3179 3274 3180 3275 /* Check whether the required permissions can be granted on @to, ignoring 3181 3276 * all BdrvChild in @list so that they can't block themselves. */ 3182 - ret = bdrv_check_update_perm(to, perm, shared, list, errp); 3277 + ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp); 3183 3278 if (ret < 0) { 3184 3279 bdrv_abort_perm_update(to); 3185 3280 goto out; ··· 4049 4144 4050 4145 /* Update permissions, they may differ for inactive nodes */ 4051 4146 bdrv_get_cumulative_perm(bs, &perm, &shared_perm); 4052 - ret = bdrv_check_perm(bs, perm, shared_perm, NULL, &local_err); 4147 + ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &local_err); 4053 4148 if (ret < 0) { 4054 4149 bs->open_flags |= BDRV_O_INACTIVE; 4055 4150 error_propagate(errp, local_err); ··· 4116 4211 4117 4212 /* Update permissions, they may differ for inactive nodes */ 4118 4213 bdrv_get_cumulative_perm(bs, &perm, &shared_perm); 4119 - bdrv_check_perm(bs, perm, shared_perm, NULL, &error_abort); 4214 + bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, &error_abort); 4120 4215 bdrv_set_perm(bs, perm, shared_perm); 4121 4216 } 4122 4217
+1
block/commit.c
··· 257 257 258 258 static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, 259 259 const BdrvChildRole *role, 260 + BlockReopenQueue *reopen_queue, 260 261 uint64_t perm, uint64_t shared, 261 262 uint64_t *nperm, uint64_t *nshared) 262 263 {
+10
block/file-posix.c
··· 2730 2730 ret = -ENOSPC; 2731 2731 } 2732 2732 2733 + if (!ret && total_size) { 2734 + uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; 2735 + int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); 2736 + if (lseek(fd, 0, SEEK_SET) == -1) { 2737 + ret = -errno; 2738 + } else { 2739 + ret = qemu_write_full(fd, buf, zero_size); 2740 + ret = ret == zero_size ? 0 : -errno; 2741 + } 2742 + } 2733 2743 qemu_close(fd); 2734 2744 return ret; 2735 2745 }
+1
block/mirror.c
··· 1084 1084 1085 1085 static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c, 1086 1086 const BdrvChildRole *role, 1087 + BlockReopenQueue *reopen_queue, 1087 1088 uint64_t perm, uint64_t shared, 1088 1089 uint64_t *nperm, uint64_t *nshared) 1089 1090 {
+1 -1
block/qcow2-bitmap.c
··· 602 602 goto fail; 603 603 } 604 604 605 - bm = g_new(Qcow2Bitmap, 1); 605 + bm = g_new0(Qcow2Bitmap, 1); 606 606 bm->table.offset = e->bitmap_table_offset; 607 607 bm->table.size = e->bitmap_table_size; 608 608 bm->flags = e->flags;
+26
block/qcow2-cache.c
··· 411 411 assert(c->entries[i].offset != 0); 412 412 c->entries[i].dirty = true; 413 413 } 414 + 415 + void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, 416 + uint64_t offset) 417 + { 418 + int i; 419 + 420 + for (i = 0; i < c->size; i++) { 421 + if (c->entries[i].offset == offset) { 422 + return qcow2_cache_get_table_addr(bs, c, i); 423 + } 424 + } 425 + return NULL; 426 + } 427 + 428 + void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table) 429 + { 430 + int i = qcow2_cache_get_table_idx(bs, c, table); 431 + 432 + assert(c->entries[i].ref == 0); 433 + 434 + c->entries[i].offset = 0; 435 + c->entries[i].lru_counter = 0; 436 + c->entries[i].dirty = false; 437 + 438 + qcow2_cache_table_release(bs, c, i, 1); 439 + }
+50
block/qcow2-cluster.c
··· 32 32 #include "qemu/bswap.h" 33 33 #include "trace.h" 34 34 35 + int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t exact_size) 36 + { 37 + BDRVQcow2State *s = bs->opaque; 38 + int new_l1_size, i, ret; 39 + 40 + if (exact_size >= s->l1_size) { 41 + return 0; 42 + } 43 + 44 + new_l1_size = exact_size; 45 + 46 + #ifdef DEBUG_ALLOC2 47 + fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size); 48 + #endif 49 + 50 + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE); 51 + ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset + 52 + new_l1_size * sizeof(uint64_t), 53 + (s->l1_size - new_l1_size) * sizeof(uint64_t), 0); 54 + if (ret < 0) { 55 + goto fail; 56 + } 57 + 58 + ret = bdrv_flush(bs->file->bs); 59 + if (ret < 0) { 60 + goto fail; 61 + } 62 + 63 + BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS); 64 + for (i = s->l1_size - 1; i > new_l1_size - 1; i--) { 65 + if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) { 66 + continue; 67 + } 68 + qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK, 69 + s->cluster_size, QCOW2_DISCARD_ALWAYS); 70 + s->l1_table[i] = 0; 71 + } 72 + return 0; 73 + 74 + fail: 75 + /* 76 + * If the write in the l1_table failed the image may contain a partially 77 + * overwritten l1_table. In this case it would be better to clear the 78 + * l1_table in memory to avoid possible image corruption. 79 + */ 80 + memset(s->l1_table + new_l1_size, 0, 81 + (s->l1_size - new_l1_size) * sizeof(uint64_t)); 82 + return ret; 83 + } 84 + 35 85 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 36 86 bool exact_size) 37 87 {
+138 -2
block/qcow2-refcount.c
··· 29 29 #include "block/qcow2.h" 30 30 #include "qemu/range.h" 31 31 #include "qemu/bswap.h" 32 + #include "qemu/cutils.h" 32 33 33 34 static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); 34 35 static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, ··· 861 862 } 862 863 s->set_refcount(refcount_block, block_index, refcount); 863 864 864 - if (refcount == 0 && s->discard_passthrough[type]) { 865 - update_refcount_discard(bs, cluster_offset, s->cluster_size); 865 + if (refcount == 0) { 866 + void *table; 867 + 868 + table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, 869 + offset); 870 + if (table != NULL) { 871 + qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block); 872 + qcow2_cache_discard(bs, s->refcount_block_cache, table); 873 + } 874 + 875 + table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset); 876 + if (table != NULL) { 877 + qcow2_cache_discard(bs, s->l2_table_cache, table); 878 + } 879 + 880 + if (s->discard_passthrough[type]) { 881 + update_refcount_discard(bs, cluster_offset, s->cluster_size); 882 + } 866 883 } 867 884 } 868 885 ··· 3045 3062 qemu_vfree(new_refblock); 3046 3063 return ret; 3047 3064 } 3065 + 3066 + static int qcow2_discard_refcount_block(BlockDriverState *bs, 3067 + uint64_t discard_block_offs) 3068 + { 3069 + BDRVQcow2State *s = bs->opaque; 3070 + uint64_t refblock_offs = get_refblock_offset(s, discard_block_offs); 3071 + uint64_t cluster_index = discard_block_offs >> s->cluster_bits; 3072 + uint32_t block_index = cluster_index & (s->refcount_block_size - 1); 3073 + void *refblock; 3074 + int ret; 3075 + 3076 + assert(discard_block_offs != 0); 3077 + 3078 + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, 3079 + &refblock); 3080 + if (ret < 0) { 3081 + return ret; 3082 + } 3083 + 3084 + if (s->get_refcount(refblock, block_index) != 1) { 3085 + qcow2_signal_corruption(bs, true, -1, -1, "Invalid refcount:" 3086 + " refblock offset %#" PRIx64 3087 + ", reftable index %u" 3088 + ", block offset %#" PRIx64 3089 + ", refcount %#" PRIx64, 3090 + refblock_offs, 3091 + offset_to_reftable_index(s, discard_block_offs), 3092 + discard_block_offs, 3093 + s->get_refcount(refblock, block_index)); 3094 + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); 3095 + return -EINVAL; 3096 + } 3097 + s->set_refcount(refblock, block_index, 0); 3098 + 3099 + qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock); 3100 + 3101 + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); 3102 + 3103 + if (cluster_index < s->free_cluster_index) { 3104 + s->free_cluster_index = cluster_index; 3105 + } 3106 + 3107 + refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache, 3108 + discard_block_offs); 3109 + if (refblock) { 3110 + /* discard refblock from the cache if refblock is cached */ 3111 + qcow2_cache_discard(bs, s->refcount_block_cache, refblock); 3112 + } 3113 + update_refcount_discard(bs, discard_block_offs, s->cluster_size); 3114 + 3115 + return 0; 3116 + } 3117 + 3118 + int qcow2_shrink_reftable(BlockDriverState *bs) 3119 + { 3120 + BDRVQcow2State *s = bs->opaque; 3121 + uint64_t *reftable_tmp = 3122 + g_malloc(s->refcount_table_size * sizeof(uint64_t)); 3123 + int i, ret; 3124 + 3125 + for (i = 0; i < s->refcount_table_size; i++) { 3126 + int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK; 3127 + void *refblock; 3128 + bool unused_block; 3129 + 3130 + if (refblock_offs == 0) { 3131 + reftable_tmp[i] = 0; 3132 + continue; 3133 + } 3134 + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs, 3135 + &refblock); 3136 + if (ret < 0) { 3137 + goto out; 3138 + } 3139 + 3140 + /* the refblock has own reference */ 3141 + if (i == offset_to_reftable_index(s, refblock_offs)) { 3142 + uint64_t block_index = (refblock_offs >> s->cluster_bits) & 3143 + (s->refcount_block_size - 1); 3144 + uint64_t refcount = s->get_refcount(refblock, block_index); 3145 + 3146 + s->set_refcount(refblock, block_index, 0); 3147 + 3148 + unused_block = buffer_is_zero(refblock, s->cluster_size); 3149 + 3150 + s->set_refcount(refblock, block_index, refcount); 3151 + } else { 3152 + unused_block = buffer_is_zero(refblock, s->cluster_size); 3153 + } 3154 + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); 3155 + 3156 + reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]); 3157 + } 3158 + 3159 + ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp, 3160 + s->refcount_table_size * sizeof(uint64_t)); 3161 + /* 3162 + * If the write in the reftable failed the image may contain a partially 3163 + * overwritten reftable. In this case it would be better to clear the 3164 + * reftable in memory to avoid possible image corruption. 3165 + */ 3166 + for (i = 0; i < s->refcount_table_size; i++) { 3167 + if (s->refcount_table[i] && !reftable_tmp[i]) { 3168 + if (ret == 0) { 3169 + ret = qcow2_discard_refcount_block(bs, s->refcount_table[i] & 3170 + REFT_OFFSET_MASK); 3171 + } 3172 + s->refcount_table[i] = 0; 3173 + } 3174 + } 3175 + 3176 + if (!s->cache_discards) { 3177 + qcow2_process_discards(bs, ret); 3178 + } 3179 + 3180 + out: 3181 + g_free(reftable_tmp); 3182 + return ret; 3183 + }
+34 -9
block/qcow2.c
··· 3104 3104 } 3105 3105 3106 3106 old_length = bs->total_sectors * 512; 3107 + new_l1_size = size_to_l1(s, offset); 3107 3108 3108 - /* shrinking is currently not supported */ 3109 3109 if (offset < old_length) { 3110 - error_setg(errp, "qcow2 doesn't support shrinking images yet"); 3111 - return -ENOTSUP; 3112 - } 3110 + if (prealloc != PREALLOC_MODE_OFF) { 3111 + error_setg(errp, 3112 + "Preallocation can't be used for shrinking an image"); 3113 + return -EINVAL; 3114 + } 3115 + 3116 + ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size), 3117 + old_length - ROUND_UP(offset, 3118 + s->cluster_size), 3119 + QCOW2_DISCARD_ALWAYS, true); 3120 + if (ret < 0) { 3121 + error_setg_errno(errp, -ret, "Failed to discard cropped clusters"); 3122 + return ret; 3123 + } 3124 + 3125 + ret = qcow2_shrink_l1_table(bs, new_l1_size); 3126 + if (ret < 0) { 3127 + error_setg_errno(errp, -ret, 3128 + "Failed to reduce the number of L2 tables"); 3129 + return ret; 3130 + } 3113 3131 3114 - new_l1_size = size_to_l1(s, offset); 3115 - ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3116 - if (ret < 0) { 3117 - error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3118 - return ret; 3132 + ret = qcow2_shrink_reftable(bs); 3133 + if (ret < 0) { 3134 + error_setg_errno(errp, -ret, 3135 + "Failed to discard unused refblocks"); 3136 + return ret; 3137 + } 3138 + } else { 3139 + ret = qcow2_grow_l1_table(bs, new_l1_size, true); 3140 + if (ret < 0) { 3141 + error_setg_errno(errp, -ret, "Failed to grow the L1 table"); 3142 + return ret; 3143 + } 3119 3144 } 3120 3145 3121 3146 switch (prealloc) {
+17
block/qcow2.h
··· 521 521 return r1 > r2 ? r1 - r2 : r2 - r1; 522 522 } 523 523 524 + static inline 525 + uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) 526 + { 527 + return offset >> (s->refcount_block_bits + s->cluster_bits); 528 + } 529 + 530 + static inline uint64_t get_refblock_offset(BDRVQcow2State *s, uint64_t offset) 531 + { 532 + uint32_t index = offset_to_reftable_index(s, offset); 533 + return s->refcount_table[index] & REFT_OFFSET_MASK; 534 + } 535 + 524 536 /* qcow2.c functions */ 525 537 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, 526 538 int64_t sector_num, int nb_sectors); ··· 584 596 int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, 585 597 BlockDriverAmendStatusCB *status_cb, 586 598 void *cb_opaque, Error **errp); 599 + int qcow2_shrink_reftable(BlockDriverState *bs); 587 600 588 601 /* qcow2-cluster.c functions */ 589 602 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, 590 603 bool exact_size); 604 + int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); 591 605 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); 592 606 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); 593 607 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, ··· 649 663 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, 650 664 void **table); 651 665 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); 666 + void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c, 667 + uint64_t offset); 668 + void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table); 652 669 653 670 /* qcow2-bitmap.c functions */ 654 671 int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+1
block/replication.c
··· 157 157 158 158 static void replication_child_perm(BlockDriverState *bs, BdrvChild *c, 159 159 const BdrvChildRole *role, 160 + BlockReopenQueue *reopen_queue, 160 161 uint64_t perm, uint64_t shared, 161 162 uint64_t *nperm, uint64_t *nshared) 162 163 {
+7 -5
block/throttle-groups.c
··· 403 403 schedule_next_request(tgm, is_write); 404 404 qemu_mutex_unlock(&tg->lock); 405 405 } 406 + 407 + g_free(data); 406 408 } 407 409 408 410 static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) 409 411 { 410 412 Coroutine *co; 411 - RestartData rd = { 412 - .tgm = tgm, 413 - .is_write = is_write 414 - }; 413 + RestartData *rd = g_new0(RestartData, 1); 415 414 416 - co = qemu_coroutine_create(throttle_group_restart_queue_entry, &rd); 415 + rd->tgm = tgm; 416 + rd->is_write = is_write; 417 + 418 + co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); 417 419 aio_co_enter(tgm->aio_context, co); 418 420 } 419 421
+3 -24
block/vvfat.c
··· 57 57 58 58 static void checkpoint(void); 59 59 60 - #ifdef __MINGW32__ 61 - void nonono(const char* file, int line, const char* msg) { 62 - fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg); 63 - exit(-5); 64 - } 65 - #undef assert 66 - #define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0) 67 - #endif 68 - 69 60 #else 70 61 71 62 #define DLOG(a) ··· 3211 3202 3212 3203 static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c, 3213 3204 const BdrvChildRole *role, 3205 + BlockReopenQueue *reopen_queue, 3214 3206 uint64_t perm, uint64_t shared, 3215 3207 uint64_t *nperm, uint64_t *nshared) 3216 3208 { ··· 3270 3262 block_init(bdrv_vvfat_init); 3271 3263 3272 3264 #ifdef DEBUG 3273 - static void checkpoint(void) { 3265 + static void checkpoint(void) 3266 + { 3274 3267 assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); 3275 3268 check1(vvv); 3276 3269 check2(vvv); 3277 3270 assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); 3278 - #if 0 3279 - if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf) 3280 - fprintf(stderr, "Nonono!\n"); 3281 - mapping_t* mapping; 3282 - direntry_t* direntry; 3283 - assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next); 3284 - assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next); 3285 - if (vvv->mapping.next<47) 3286 - return; 3287 - assert((mapping = array_get(&(vvv->mapping), 47))); 3288 - assert(mapping->dir_index < vvv->directory.next); 3289 - direntry = array_get(&(vvv->directory), mapping->dir_index); 3290 - assert(!memcmp(direntry->name, "USB H ", 11) || direntry->name[0]==0); 3291 - #endif 3292 3271 } 3293 3272 #endif
+804
docs/qemu-block-drivers.texi
··· 1 + @c man begin SYNOPSIS 2 + QEMU block driver reference manual 3 + @c man end 4 + 5 + @c man begin DESCRIPTION 6 + 7 + @node disk_images_formats 8 + @subsection Disk image file formats 9 + 10 + QEMU supports many image file formats that can be used with VMs as well as with 11 + any of the tools (like @code{qemu-img}). This includes the preferred formats 12 + raw and qcow2 as well as formats that are supported for compatibility with 13 + older QEMU versions or other hypervisors. 14 + 15 + Depending on the image format, different options can be passed to 16 + @code{qemu-img create} and @code{qemu-img convert} using the @code{-o} option. 17 + This section describes each format and the options that are supported for it. 18 + 19 + @table @option 20 + @item raw 21 + 22 + Raw disk image format. This format has the advantage of 23 + being simple and easily exportable to all other emulators. If your 24 + file system supports @emph{holes} (for example in ext2 or ext3 on 25 + Linux or NTFS on Windows), then only the written sectors will reserve 26 + space. Use @code{qemu-img info} to know the real size used by the 27 + image or @code{ls -ls} on Unix/Linux. 28 + 29 + Supported options: 30 + @table @code 31 + @item preallocation 32 + Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}). 33 + @code{falloc} mode preallocates space for image by calling posix_fallocate(). 34 + @code{full} mode preallocates space for image by writing zeros to underlying 35 + storage. 36 + @end table 37 + 38 + @item qcow2 39 + QEMU image format, the most versatile format. Use it to have smaller 40 + images (useful if your filesystem does not supports holes, for example 41 + on Windows), zlib based compression and support of multiple VM 42 + snapshots. 43 + 44 + Supported options: 45 + @table @code 46 + @item compat 47 + Determines the qcow2 version to use. @code{compat=0.10} uses the 48 + traditional image format that can be read by any QEMU since 0.10. 49 + @code{compat=1.1} enables image format extensions that only QEMU 1.1 and 50 + newer understand (this is the default). Amongst others, this includes 51 + zero clusters, which allow efficient copy-on-read for sparse images. 52 + 53 + @item backing_file 54 + File name of a base image (see @option{create} subcommand) 55 + @item backing_fmt 56 + Image format of the base image 57 + @item encryption 58 + This option is deprecated and equivalent to @code{encrypt.format=aes} 59 + 60 + @item encrypt.format 61 + 62 + If this is set to @code{luks}, it requests that the qcow2 payload (not 63 + qcow2 header) be encrypted using the LUKS format. The passphrase to 64 + use to unlock the LUKS key slot is given by the @code{encrypt.key-secret} 65 + parameter. LUKS encryption parameters can be tuned with the other 66 + @code{encrypt.*} parameters. 67 + 68 + If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. 69 + The encryption key is given by the @code{encrypt.key-secret} parameter. 70 + This encryption format is considered to be flawed by modern cryptography 71 + standards, suffering from a number of design problems: 72 + 73 + @itemize @minus 74 + @item The AES-CBC cipher is used with predictable initialization vectors based 75 + on the sector number. This makes it vulnerable to chosen plaintext attacks 76 + which can reveal the existence of encrypted data. 77 + @item The user passphrase is directly used as the encryption key. A poorly 78 + chosen or short passphrase will compromise the security of the encryption. 79 + @item In the event of the passphrase being compromised there is no way to 80 + change the passphrase to protect data in any qcow images. The files must 81 + be cloned, using a different encryption passphrase in the new file. The 82 + original file must then be securely erased using a program like shred, 83 + though even this is ineffective with many modern storage technologies. 84 + @end itemize 85 + 86 + The use of this is no longer supported in system emulators. Support only 87 + remains in the command line utilities, for the purposes of data liberation 88 + and interoperability with old versions of QEMU. The @code{luks} format 89 + should be used instead. 90 + 91 + @item encrypt.key-secret 92 + 93 + Provides the ID of a @code{secret} object that contains the passphrase 94 + (@code{encrypt.format=luks}) or encryption key (@code{encrypt.format=aes}). 95 + 96 + @item encrypt.cipher-alg 97 + 98 + Name of the cipher algorithm and key length. Currently defaults 99 + to @code{aes-256}. Only used when @code{encrypt.format=luks}. 100 + 101 + @item encrypt.cipher-mode 102 + 103 + Name of the encryption mode to use. Currently defaults to @code{xts}. 104 + Only used when @code{encrypt.format=luks}. 105 + 106 + @item encrypt.ivgen-alg 107 + 108 + Name of the initialization vector generator algorithm. Currently defaults 109 + to @code{plain64}. Only used when @code{encrypt.format=luks}. 110 + 111 + @item encrypt.ivgen-hash-alg 112 + 113 + Name of the hash algorithm to use with the initialization vector generator 114 + (if required). Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. 115 + 116 + @item encrypt.hash-alg 117 + 118 + Name of the hash algorithm to use for PBKDF algorithm 119 + Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. 120 + 121 + @item encrypt.iter-time 122 + 123 + Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. 124 + Defaults to @code{2000}. Only used when @code{encrypt.format=luks}. 125 + 126 + @item cluster_size 127 + Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster 128 + sizes can improve the image file size whereas larger cluster sizes generally 129 + provide better performance. 130 + 131 + @item preallocation 132 + Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc}, 133 + @code{full}). An image with preallocated metadata is initially larger but can 134 + improve performance when the image needs to grow. @code{falloc} and @code{full} 135 + preallocations are like the same options of @code{raw} format, but sets up 136 + metadata also. 137 + 138 + @item lazy_refcounts 139 + If this option is set to @code{on}, reference count updates are postponed with 140 + the goal of avoiding metadata I/O and improving performance. This is 141 + particularly interesting with @option{cache=writethrough} which doesn't batch 142 + metadata updates. The tradeoff is that after a host crash, the reference count 143 + tables must be rebuilt, i.e. on the next open an (automatic) @code{qemu-img 144 + check -r all} is required, which may take some time. 145 + 146 + This option can only be enabled if @code{compat=1.1} is specified. 147 + 148 + @item nocow 149 + If this option is set to @code{on}, it will turn off COW of the file. It's only 150 + valid on btrfs, no effect on other file systems. 151 + 152 + Btrfs has low performance when hosting a VM image file, even more when the guest 153 + on the VM also using btrfs as file system. Turning off COW is a way to mitigate 154 + this bad performance. Generally there are two ways to turn off COW on btrfs: 155 + a) Disable it by mounting with nodatacow, then all newly created files will be 156 + NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option 157 + does. 158 + 159 + Note: this option is only valid to new or empty files. If there is an existing 160 + file which is COW and has data blocks already, it couldn't be changed to NOCOW 161 + by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if 162 + the NOCOW flag is set or not (Capital 'C' is NOCOW flag). 163 + 164 + @end table 165 + 166 + @item qed 167 + Old QEMU image format with support for backing files and compact image files 168 + (when your filesystem or transport medium does not support holes). 169 + 170 + When converting QED images to qcow2, you might want to consider using the 171 + @code{lazy_refcounts=on} option to get a more QED-like behaviour. 172 + 173 + Supported options: 174 + @table @code 175 + @item backing_file 176 + File name of a base image (see @option{create} subcommand). 177 + @item backing_fmt 178 + Image file format of backing file (optional). Useful if the format cannot be 179 + autodetected because it has no header, like some vhd/vpc files. 180 + @item cluster_size 181 + Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller 182 + cluster sizes can improve the image file size whereas larger cluster sizes 183 + generally provide better performance. 184 + @item table_size 185 + Changes the number of clusters per L1/L2 table (must be power-of-2 between 1 186 + and 16). There is normally no need to change this value but this option can be 187 + used for performance benchmarking. 188 + @end table 189 + 190 + @item qcow 191 + Old QEMU image format with support for backing files, compact image files, 192 + encryption and compression. 193 + 194 + Supported options: 195 + @table @code 196 + @item backing_file 197 + File name of a base image (see @option{create} subcommand) 198 + @item encryption 199 + This option is deprecated and equivalent to @code{encrypt.format=aes} 200 + 201 + @item encrypt.format 202 + If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. 203 + The encryption key is given by the @code{encrypt.key-secret} parameter. 204 + This encryption format is considered to be flawed by modern cryptography 205 + standards, suffering from a number of design problems enumerated previously 206 + against the @code{qcow2} image format. 207 + 208 + The use of this is no longer supported in system emulators. Support only 209 + remains in the command line utilities, for the purposes of data liberation 210 + and interoperability with old versions of QEMU. 211 + 212 + Users requiring native encryption should use the @code{qcow2} format 213 + instead with @code{encrypt.format=luks}. 214 + 215 + @item encrypt.key-secret 216 + 217 + Provides the ID of a @code{secret} object that contains the encryption 218 + key (@code{encrypt.format=aes}). 219 + 220 + @end table 221 + 222 + @item luks 223 + 224 + LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup 225 + 226 + Supported options: 227 + @table @code 228 + 229 + @item key-secret 230 + 231 + Provides the ID of a @code{secret} object that contains the passphrase. 232 + 233 + @item cipher-alg 234 + 235 + Name of the cipher algorithm and key length. Currently defaults 236 + to @code{aes-256}. 237 + 238 + @item cipher-mode 239 + 240 + Name of the encryption mode to use. Currently defaults to @code{xts}. 241 + 242 + @item ivgen-alg 243 + 244 + Name of the initialization vector generator algorithm. Currently defaults 245 + to @code{plain64}. 246 + 247 + @item ivgen-hash-alg 248 + 249 + Name of the hash algorithm to use with the initialization vector generator 250 + (if required). Defaults to @code{sha256}. 251 + 252 + @item hash-alg 253 + 254 + Name of the hash algorithm to use for PBKDF algorithm 255 + Defaults to @code{sha256}. 256 + 257 + @item iter-time 258 + 259 + Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. 260 + Defaults to @code{2000}. 261 + 262 + @end table 263 + 264 + @item vdi 265 + VirtualBox 1.1 compatible image format. 266 + Supported options: 267 + @table @code 268 + @item static 269 + If this option is set to @code{on}, the image is created with metadata 270 + preallocation. 271 + @end table 272 + 273 + @item vmdk 274 + VMware 3 and 4 compatible image format. 275 + 276 + Supported options: 277 + @table @code 278 + @item backing_file 279 + File name of a base image (see @option{create} subcommand). 280 + @item compat6 281 + Create a VMDK version 6 image (instead of version 4) 282 + @item hwversion 283 + Specify vmdk virtual hardware version. Compat6 flag cannot be enabled 284 + if hwversion is specified. 285 + @item subformat 286 + Specifies which VMDK subformat to use. Valid options are 287 + @code{monolithicSparse} (default), 288 + @code{monolithicFlat}, 289 + @code{twoGbMaxExtentSparse}, 290 + @code{twoGbMaxExtentFlat} and 291 + @code{streamOptimized}. 292 + @end table 293 + 294 + @item vpc 295 + VirtualPC compatible image format (VHD). 296 + Supported options: 297 + @table @code 298 + @item subformat 299 + Specifies which VHD subformat to use. Valid options are 300 + @code{dynamic} (default) and @code{fixed}. 301 + @end table 302 + 303 + @item VHDX 304 + Hyper-V compatible image format (VHDX). 305 + Supported options: 306 + @table @code 307 + @item subformat 308 + Specifies which VHDX subformat to use. Valid options are 309 + @code{dynamic} (default) and @code{fixed}. 310 + @item block_state_zero 311 + Force use of payload blocks of type 'ZERO'. Can be set to @code{on} (default) 312 + or @code{off}. When set to @code{off}, new blocks will be created as 313 + @code{PAYLOAD_BLOCK_NOT_PRESENT}, which means parsers are free to return 314 + arbitrary data for those blocks. Do not set to @code{off} when using 315 + @code{qemu-img convert} with @code{subformat=dynamic}. 316 + @item block_size 317 + Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on image size. 318 + @item log_size 319 + Log size; min 1 MB. 320 + @end table 321 + @end table 322 + 323 + @subsubsection Read-only formats 324 + More disk image file formats are supported in a read-only mode. 325 + @table @option 326 + @item bochs 327 + Bochs images of @code{growing} type. 328 + @item cloop 329 + Linux Compressed Loop image, useful only to reuse directly compressed 330 + CD-ROM images present for example in the Knoppix CD-ROMs. 331 + @item dmg 332 + Apple disk image. 333 + @item parallels 334 + Parallels disk image format. 335 + @end table 336 + 337 + 338 + @node host_drives 339 + @subsection Using host drives 340 + 341 + In addition to disk image files, QEMU can directly access host 342 + devices. We describe here the usage for QEMU version >= 0.8.3. 343 + 344 + @subsubsection Linux 345 + 346 + On Linux, you can directly use the host device filename instead of a 347 + disk image filename provided you have enough privileges to access 348 + it. For example, use @file{/dev/cdrom} to access to the CDROM. 349 + 350 + @table @code 351 + @item CD 352 + You can specify a CDROM device even if no CDROM is loaded. QEMU has 353 + specific code to detect CDROM insertion or removal. CDROM ejection by 354 + the guest OS is supported. Currently only data CDs are supported. 355 + @item Floppy 356 + You can specify a floppy device even if no floppy is loaded. Floppy 357 + removal is currently not detected accurately (if you change floppy 358 + without doing floppy access while the floppy is not loaded, the guest 359 + OS will think that the same floppy is loaded). 360 + Use of the host's floppy device is deprecated, and support for it will 361 + be removed in a future release. 362 + @item Hard disks 363 + Hard disks can be used. Normally you must specify the whole disk 364 + (@file{/dev/hdb} instead of @file{/dev/hdb1}) so that the guest OS can 365 + see it as a partitioned disk. WARNING: unless you know what you do, it 366 + is better to only make READ-ONLY accesses to the hard disk otherwise 367 + you may corrupt your host data (use the @option{-snapshot} command 368 + line option or modify the device permissions accordingly). 369 + @end table 370 + 371 + @subsubsection Windows 372 + 373 + @table @code 374 + @item CD 375 + The preferred syntax is the drive letter (e.g. @file{d:}). The 376 + alternate syntax @file{\\.\d:} is supported. @file{/dev/cdrom} is 377 + supported as an alias to the first CDROM drive. 378 + 379 + Currently there is no specific code to handle removable media, so it 380 + is better to use the @code{change} or @code{eject} monitor commands to 381 + change or eject media. 382 + @item Hard disks 383 + Hard disks can be used with the syntax: @file{\\.\PhysicalDrive@var{N}} 384 + where @var{N} is the drive number (0 is the first hard disk). 385 + 386 + WARNING: unless you know what you do, it is better to only make 387 + READ-ONLY accesses to the hard disk otherwise you may corrupt your 388 + host data (use the @option{-snapshot} command line so that the 389 + modifications are written in a temporary file). 390 + @end table 391 + 392 + 393 + @subsubsection Mac OS X 394 + 395 + @file{/dev/cdrom} is an alias to the first CDROM. 396 + 397 + Currently there is no specific code to handle removable media, so it 398 + is better to use the @code{change} or @code{eject} monitor commands to 399 + change or eject media. 400 + 401 + @node disk_images_fat_images 402 + @subsection Virtual FAT disk images 403 + 404 + QEMU can automatically create a virtual FAT disk image from a 405 + directory tree. In order to use it, just type: 406 + 407 + @example 408 + qemu-system-i386 linux.img -hdb fat:/my_directory 409 + @end example 410 + 411 + Then you access access to all the files in the @file{/my_directory} 412 + directory without having to copy them in a disk image or to export 413 + them via SAMBA or NFS. The default access is @emph{read-only}. 414 + 415 + Floppies can be emulated with the @code{:floppy:} option: 416 + 417 + @example 418 + qemu-system-i386 linux.img -fda fat:floppy:/my_directory 419 + @end example 420 + 421 + A read/write support is available for testing (beta stage) with the 422 + @code{:rw:} option: 423 + 424 + @example 425 + qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory 426 + @end example 427 + 428 + What you should @emph{never} do: 429 + @itemize 430 + @item use non-ASCII filenames ; 431 + @item use "-snapshot" together with ":rw:" ; 432 + @item expect it to work when loadvm'ing ; 433 + @item write to the FAT directory on the host system while accessing it with the guest system. 434 + @end itemize 435 + 436 + @node disk_images_nbd 437 + @subsection NBD access 438 + 439 + QEMU can access directly to block device exported using the Network Block Device 440 + protocol. 441 + 442 + @example 443 + qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ 444 + @end example 445 + 446 + If the NBD server is located on the same host, you can use an unix socket instead 447 + of an inet socket: 448 + 449 + @example 450 + qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket 451 + @end example 452 + 453 + In this case, the block device must be exported using qemu-nbd: 454 + 455 + @example 456 + qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 457 + @end example 458 + 459 + The use of qemu-nbd allows sharing of a disk between several guests: 460 + @example 461 + qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 462 + @end example 463 + 464 + @noindent 465 + and then you can use it with two guests: 466 + @example 467 + qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket 468 + qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket 469 + @end example 470 + 471 + If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's 472 + own embedded NBD server), you must specify an export name in the URI: 473 + @example 474 + qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst 475 + qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst 476 + @end example 477 + 478 + The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is 479 + also available. Here are some example of the older syntax: 480 + @example 481 + qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 482 + qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket 483 + qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst 484 + @end example 485 + 486 + @node disk_images_sheepdog 487 + @subsection Sheepdog disk images 488 + 489 + Sheepdog is a distributed storage system for QEMU. It provides highly 490 + available block level storage volumes that can be attached to 491 + QEMU-based virtual machines. 492 + 493 + You can create a Sheepdog disk image with the command: 494 + @example 495 + qemu-img create sheepdog:///@var{image} @var{size} 496 + @end example 497 + where @var{image} is the Sheepdog image name and @var{size} is its 498 + size. 499 + 500 + To import the existing @var{filename} to Sheepdog, you can use a 501 + convert command. 502 + @example 503 + qemu-img convert @var{filename} sheepdog:///@var{image} 504 + @end example 505 + 506 + You can boot from the Sheepdog disk image with the command: 507 + @example 508 + qemu-system-i386 sheepdog:///@var{image} 509 + @end example 510 + 511 + You can also create a snapshot of the Sheepdog image like qcow2. 512 + @example 513 + qemu-img snapshot -c @var{tag} sheepdog:///@var{image} 514 + @end example 515 + where @var{tag} is a tag name of the newly created snapshot. 516 + 517 + To boot from the Sheepdog snapshot, specify the tag name of the 518 + snapshot. 519 + @example 520 + qemu-system-i386 sheepdog:///@var{image}#@var{tag} 521 + @end example 522 + 523 + You can create a cloned image from the existing snapshot. 524 + @example 525 + qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} 526 + @end example 527 + where @var{base} is a image name of the source snapshot and @var{tag} 528 + is its tag name. 529 + 530 + You can use an unix socket instead of an inet socket: 531 + 532 + @example 533 + qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} 534 + @end example 535 + 536 + If the Sheepdog daemon doesn't run on the local host, you need to 537 + specify one of the Sheepdog servers to connect to. 538 + @example 539 + qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} 540 + qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} 541 + @end example 542 + 543 + @node disk_images_iscsi 544 + @subsection iSCSI LUNs 545 + 546 + iSCSI is a popular protocol used to access SCSI devices across a computer 547 + network. 548 + 549 + There are two different ways iSCSI devices can be used by QEMU. 550 + 551 + The first method is to mount the iSCSI LUN on the host, and make it appear as 552 + any other ordinary SCSI device on the host and then to access this device as a 553 + /dev/sd device from QEMU. How to do this differs between host OSes. 554 + 555 + The second method involves using the iSCSI initiator that is built into 556 + QEMU. This provides a mechanism that works the same way regardless of which 557 + host OS you are running QEMU on. This section will describe this second method 558 + of using iSCSI together with QEMU. 559 + 560 + In QEMU, iSCSI devices are described using special iSCSI URLs 561 + 562 + @example 563 + URL syntax: 564 + iscsi://[<username>[%<password>]@@]<host>[:<port>]/<target-iqn-name>/<lun> 565 + @end example 566 + 567 + Username and password are optional and only used if your target is set up 568 + using CHAP authentication for access control. 569 + Alternatively the username and password can also be set via environment 570 + variables to have these not show up in the process list 571 + 572 + @example 573 + export LIBISCSI_CHAP_USERNAME=<username> 574 + export LIBISCSI_CHAP_PASSWORD=<password> 575 + iscsi://<host>/<target-iqn-name>/<lun> 576 + @end example 577 + 578 + Various session related parameters can be set via special options, either 579 + in a configuration file provided via '-readconfig' or directly on the 580 + command line. 581 + 582 + If the initiator-name is not specified qemu will use a default name 583 + of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the 584 + virtual machine. If the UUID is not specified qemu will use 585 + 'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the 586 + virtual machine. 587 + 588 + @example 589 + Setting a specific initiator name to use when logging in to the target 590 + -iscsi initiator-name=iqn.qemu.test:my-initiator 591 + @end example 592 + 593 + @example 594 + Controlling which type of header digest to negotiate with the target 595 + -iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 596 + @end example 597 + 598 + These can also be set via a configuration file 599 + @example 600 + [iscsi] 601 + user = "CHAP username" 602 + password = "CHAP password" 603 + initiator-name = "iqn.qemu.test:my-initiator" 604 + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 605 + header-digest = "CRC32C" 606 + @end example 607 + 608 + 609 + Setting the target name allows different options for different targets 610 + @example 611 + [iscsi "iqn.target.name"] 612 + user = "CHAP username" 613 + password = "CHAP password" 614 + initiator-name = "iqn.qemu.test:my-initiator" 615 + # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 616 + header-digest = "CRC32C" 617 + @end example 618 + 619 + 620 + Howto use a configuration file to set iSCSI configuration options: 621 + @example 622 + cat >iscsi.conf <<EOF 623 + [iscsi] 624 + user = "me" 625 + password = "my password" 626 + initiator-name = "iqn.qemu.test:my-initiator" 627 + header-digest = "CRC32C" 628 + EOF 629 + 630 + qemu-system-i386 -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ 631 + -readconfig iscsi.conf 632 + @end example 633 + 634 + 635 + Howto set up a simple iSCSI target on loopback and accessing it via QEMU: 636 + @example 637 + This example shows how to set up an iSCSI target with one CDROM and one DISK 638 + using the Linux STGT software target. This target is available on Red Hat based 639 + systems as the package 'scsi-target-utils'. 640 + 641 + tgtd --iscsi portal=127.0.0.1:3260 642 + tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test 643 + tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \ 644 + -b /IMAGES/disk.img --device-type=disk 645 + tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \ 646 + -b /IMAGES/cd.iso --device-type=cd 647 + tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL 648 + 649 + qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \ 650 + -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ 651 + -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 652 + @end example 653 + 654 + @node disk_images_gluster 655 + @subsection GlusterFS disk images 656 + 657 + GlusterFS is a user space distributed file system. 658 + 659 + You can boot from the GlusterFS disk image with the command: 660 + @example 661 + URI: 662 + qemu-system-x86_64 -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path} 663 + [?socket=...][,file.debug=9][,file.logfile=...] 664 + 665 + JSON: 666 + qemu-system-x86_64 'json:@{"driver":"qcow2", 667 + "file":@{"driver":"gluster", 668 + "volume":"testvol","path":"a.img","debug":9,"logfile":"...", 669 + "server":[@{"type":"tcp","host":"...","port":"..."@}, 670 + @{"type":"unix","socket":"..."@}]@}@}' 671 + @end example 672 + 673 + @var{gluster} is the protocol. 674 + 675 + @var{type} specifies the transport type used to connect to gluster 676 + management daemon (glusterd). Valid transport types are 677 + tcp and unix. In the URI form, if a transport type isn't specified, 678 + then tcp type is assumed. 679 + 680 + @var{host} specifies the server where the volume file specification for 681 + the given volume resides. This can be either a hostname or an ipv4 address. 682 + If transport type is unix, then @var{host} field should not be specified. 683 + Instead @var{socket} field needs to be populated with the path to unix domain 684 + socket. 685 + 686 + @var{port} is the port number on which glusterd is listening. This is optional 687 + and if not specified, it defaults to port 24007. If the transport type is unix, 688 + then @var{port} should not be specified. 689 + 690 + @var{volume} is the name of the gluster volume which contains the disk image. 691 + 692 + @var{path} is the path to the actual disk image that resides on gluster volume. 693 + 694 + @var{debug} is the logging level of the gluster protocol driver. Debug levels 695 + are 0-9, with 9 being the most verbose, and 0 representing no debugging output. 696 + The default level is 4. The current logging levels defined in the gluster source 697 + are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, 698 + 6 - Notice, 7 - Info, 8 - Debug, 9 - Trace 699 + 700 + @var{logfile} is a commandline option to mention log file path which helps in 701 + logging to the specified file and also help in persisting the gfapi logs. The 702 + default is stderr. 703 + 704 + 705 + 706 + 707 + You can create a GlusterFS disk image with the command: 708 + @example 709 + qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size} 710 + @end example 711 + 712 + Examples 713 + @example 714 + qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img 715 + qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4/testvol/a.img 716 + qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img 717 + qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img 718 + qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img 719 + qemu-system-x86_64 -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img 720 + qemu-system-x86_64 -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket 721 + qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img 722 + qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log 723 + qemu-system-x86_64 'json:@{"driver":"qcow2", 724 + "file":@{"driver":"gluster", 725 + "volume":"testvol","path":"a.img", 726 + "debug":9,"logfile":"/var/log/qemu-gluster.log", 727 + "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, 728 + @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' 729 + qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, 730 + file.debug=9,file.logfile=/var/log/qemu-gluster.log, 731 + file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, 732 + file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket 733 + @end example 734 + 735 + @node disk_images_ssh 736 + @subsection Secure Shell (ssh) disk images 737 + 738 + You can access disk images located on a remote ssh server 739 + by using the ssh protocol: 740 + 741 + @example 742 + qemu-system-x86_64 -drive file=ssh://[@var{user}@@]@var{server}[:@var{port}]/@var{path}[?host_key_check=@var{host_key_check}] 743 + @end example 744 + 745 + Alternative syntax using properties: 746 + 747 + @example 748 + qemu-system-x86_64 -drive file.driver=ssh[,file.user=@var{user}],file.host=@var{server}[,file.port=@var{port}],file.path=@var{path}[,file.host_key_check=@var{host_key_check}] 749 + @end example 750 + 751 + @var{ssh} is the protocol. 752 + 753 + @var{user} is the remote user. If not specified, then the local 754 + username is tried. 755 + 756 + @var{server} specifies the remote ssh server. Any ssh server can be 757 + used, but it must implement the sftp-server protocol. Most Unix/Linux 758 + systems should work without requiring any extra configuration. 759 + 760 + @var{port} is the port number on which sshd is listening. By default 761 + the standard ssh port (22) is used. 762 + 763 + @var{path} is the path to the disk image. 764 + 765 + The optional @var{host_key_check} parameter controls how the remote 766 + host's key is checked. The default is @code{yes} which means to use 767 + the local @file{.ssh/known_hosts} file. Setting this to @code{no} 768 + turns off known-hosts checking. Or you can check that the host key 769 + matches a specific fingerprint: 770 + @code{host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8} 771 + (@code{sha1:} can also be used as a prefix, but note that OpenSSH 772 + tools only use MD5 to print fingerprints). 773 + 774 + Currently authentication must be done using ssh-agent. Other 775 + authentication methods may be supported in future. 776 + 777 + Note: Many ssh servers do not support an @code{fsync}-style operation. 778 + The ssh driver cannot guarantee that disk flush requests are 779 + obeyed, and this causes a risk of disk corruption if the remote 780 + server or network goes down during writes. The driver will 781 + print a warning when @code{fsync} is not supported: 782 + 783 + warning: ssh server @code{ssh.example.com:22} does not support fsync 784 + 785 + With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is 786 + supported. 787 + 788 + @c man end 789 + 790 + @ignore 791 + 792 + @setfilename qemu-block-drivers 793 + @settitle QEMU block drivers reference 794 + 795 + @c man begin SEEALSO 796 + The HTML documentation of QEMU for more precise information and Linux 797 + user mode emulator invocation. 798 + @c man end 799 + 800 + @c man begin AUTHOR 801 + Fabrice Bellard and the QEMU Project developers 802 + @c man end 803 + 804 + @end ignore
+1 -1
include/block/block.h
··· 166 166 typedef struct BDRVReopenState { 167 167 BlockDriverState *bs; 168 168 int flags; 169 + uint64_t perm, shared_perm; 169 170 QDict *options; 170 171 QDict *explicit_options; 171 172 void *opaque; ··· 435 436 int64_t offset, int64_t bytes, int64_t *pnum); 436 437 437 438 bool bdrv_is_read_only(BlockDriverState *bs); 438 - bool bdrv_is_writable(BlockDriverState *bs); 439 439 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, 440 440 bool ignore_allow_rdw, Error **errp); 441 441 int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
+7
include/block/block_int.h
··· 411 411 * 412 412 * If @c is NULL, return the permissions for attaching a new child for the 413 413 * given @role. 414 + * 415 + * If @reopen_queue is non-NULL, don't return the currently needed 416 + * permissions, but those that will be needed after applying the 417 + * @reopen_queue. 414 418 */ 415 419 void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c, 416 420 const BdrvChildRole *role, 421 + BlockReopenQueue *reopen_queue, 417 422 uint64_t parent_perm, uint64_t parent_shared, 418 423 uint64_t *nperm, uint64_t *nshared); 419 424 ··· 983 988 * all children */ 984 989 void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, 985 990 const BdrvChildRole *role, 991 + BlockReopenQueue *reopen_queue, 986 992 uint64_t perm, uint64_t shared, 987 993 uint64_t *nperm, uint64_t *nshared); 988 994 ··· 992 998 * CONSISTENT_READ and doesn't share WRITE. */ 993 999 void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, 994 1000 const BdrvChildRole *role, 1001 + BlockReopenQueue *reopen_queue, 995 1002 uint64_t perm, uint64_t shared, 996 1003 uint64_t *nperm, uint64_t *nshared); 997 1004
+7 -1
qapi/block-core.json
··· 2533 2533 # 2534 2534 # Trigger events supported by blkdebug. 2535 2535 # 2536 + # @l1_shrink_write_table: write zeros to the l1 table to shrink image. 2537 + # (since 2.11) 2538 + # 2539 + # @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11) 2540 + # 2536 2541 # Since: 2.9 2537 2542 ## 2538 2543 { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', ··· 2549 2554 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', 2550 2555 'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head', 2551 2556 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev', 2552 - 'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] } 2557 + 'pwritev_zero', 'pwritev_done', 'empty_image_prepare', 2558 + 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] } 2553 2559 2554 2560 ## 2555 2561 # @BlkdebugInjectErrorOptions:
+1 -780
qemu-doc.texi
··· 490 490 491 491 @include qemu-nbd.texi 492 492 493 - @node disk_images_formats 494 - @subsection Disk image file formats 495 - 496 - QEMU supports many image file formats that can be used with VMs as well as with 497 - any of the tools (like @code{qemu-img}). This includes the preferred formats 498 - raw and qcow2 as well as formats that are supported for compatibility with 499 - older QEMU versions or other hypervisors. 500 - 501 - Depending on the image format, different options can be passed to 502 - @code{qemu-img create} and @code{qemu-img convert} using the @code{-o} option. 503 - This section describes each format and the options that are supported for it. 504 - 505 - @table @option 506 - @item raw 507 - 508 - Raw disk image format. This format has the advantage of 509 - being simple and easily exportable to all other emulators. If your 510 - file system supports @emph{holes} (for example in ext2 or ext3 on 511 - Linux or NTFS on Windows), then only the written sectors will reserve 512 - space. Use @code{qemu-img info} to know the real size used by the 513 - image or @code{ls -ls} on Unix/Linux. 514 - 515 - Supported options: 516 - @table @code 517 - @item preallocation 518 - Preallocation mode (allowed values: @code{off}, @code{falloc}, @code{full}). 519 - @code{falloc} mode preallocates space for image by calling posix_fallocate(). 520 - @code{full} mode preallocates space for image by writing zeros to underlying 521 - storage. 522 - @end table 523 - 524 - @item qcow2 525 - QEMU image format, the most versatile format. Use it to have smaller 526 - images (useful if your filesystem does not supports holes, for example 527 - on Windows), zlib based compression and support of multiple VM 528 - snapshots. 529 - 530 - Supported options: 531 - @table @code 532 - @item compat 533 - Determines the qcow2 version to use. @code{compat=0.10} uses the 534 - traditional image format that can be read by any QEMU since 0.10. 535 - @code{compat=1.1} enables image format extensions that only QEMU 1.1 and 536 - newer understand (this is the default). Amongst others, this includes 537 - zero clusters, which allow efficient copy-on-read for sparse images. 538 - 539 - @item backing_file 540 - File name of a base image (see @option{create} subcommand) 541 - @item backing_fmt 542 - Image format of the base image 543 - @item encryption 544 - This option is deprecated and equivalent to @code{encrypt.format=aes} 545 - 546 - @item encrypt.format 547 - 548 - If this is set to @code{luks}, it requests that the qcow2 payload (not 549 - qcow2 header) be encrypted using the LUKS format. The passphrase to 550 - use to unlock the LUKS key slot is given by the @code{encrypt.key-secret} 551 - parameter. LUKS encryption parameters can be tuned with the other 552 - @code{encrypt.*} parameters. 553 - 554 - If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. 555 - The encryption key is given by the @code{encrypt.key-secret} parameter. 556 - This encryption format is considered to be flawed by modern cryptography 557 - standards, suffering from a number of design problems: 558 - 559 - @itemize @minus 560 - @item The AES-CBC cipher is used with predictable initialization vectors based 561 - on the sector number. This makes it vulnerable to chosen plaintext attacks 562 - which can reveal the existence of encrypted data. 563 - @item The user passphrase is directly used as the encryption key. A poorly 564 - chosen or short passphrase will compromise the security of the encryption. 565 - @item In the event of the passphrase being compromised there is no way to 566 - change the passphrase to protect data in any qcow images. The files must 567 - be cloned, using a different encryption passphrase in the new file. The 568 - original file must then be securely erased using a program like shred, 569 - though even this is ineffective with many modern storage technologies. 570 - @end itemize 571 - 572 - The use of this is no longer supported in system emulators. Support only 573 - remains in the command line utilities, for the purposes of data liberation 574 - and interoperability with old versions of QEMU. The @code{luks} format 575 - should be used instead. 576 - 577 - @item encrypt.key-secret 578 - 579 - Provides the ID of a @code{secret} object that contains the passphrase 580 - (@code{encrypt.format=luks}) or encryption key (@code{encrypt.format=aes}). 581 - 582 - @item encrypt.cipher-alg 583 - 584 - Name of the cipher algorithm and key length. Currently defaults 585 - to @code{aes-256}. Only used when @code{encrypt.format=luks}. 586 - 587 - @item encrypt.cipher-mode 588 - 589 - Name of the encryption mode to use. Currently defaults to @code{xts}. 590 - Only used when @code{encrypt.format=luks}. 591 - 592 - @item encrypt.ivgen-alg 593 - 594 - Name of the initialization vector generator algorithm. Currently defaults 595 - to @code{plain64}. Only used when @code{encrypt.format=luks}. 596 - 597 - @item encrypt.ivgen-hash-alg 598 - 599 - Name of the hash algorithm to use with the initialization vector generator 600 - (if required). Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. 601 - 602 - @item encrypt.hash-alg 603 - 604 - Name of the hash algorithm to use for PBKDF algorithm 605 - Defaults to @code{sha256}. Only used when @code{encrypt.format=luks}. 606 - 607 - @item encrypt.iter-time 608 - 609 - Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. 610 - Defaults to @code{2000}. Only used when @code{encrypt.format=luks}. 611 - 612 - @item cluster_size 613 - Changes the qcow2 cluster size (must be between 512 and 2M). Smaller cluster 614 - sizes can improve the image file size whereas larger cluster sizes generally 615 - provide better performance. 616 - 617 - @item preallocation 618 - Preallocation mode (allowed values: @code{off}, @code{metadata}, @code{falloc}, 619 - @code{full}). An image with preallocated metadata is initially larger but can 620 - improve performance when the image needs to grow. @code{falloc} and @code{full} 621 - preallocations are like the same options of @code{raw} format, but sets up 622 - metadata also. 623 - 624 - @item lazy_refcounts 625 - If this option is set to @code{on}, reference count updates are postponed with 626 - the goal of avoiding metadata I/O and improving performance. This is 627 - particularly interesting with @option{cache=writethrough} which doesn't batch 628 - metadata updates. The tradeoff is that after a host crash, the reference count 629 - tables must be rebuilt, i.e. on the next open an (automatic) @code{qemu-img 630 - check -r all} is required, which may take some time. 631 - 632 - This option can only be enabled if @code{compat=1.1} is specified. 633 - 634 - @item nocow 635 - If this option is set to @code{on}, it will turn off COW of the file. It's only 636 - valid on btrfs, no effect on other file systems. 637 - 638 - Btrfs has low performance when hosting a VM image file, even more when the guest 639 - on the VM also using btrfs as file system. Turning off COW is a way to mitigate 640 - this bad performance. Generally there are two ways to turn off COW on btrfs: 641 - a) Disable it by mounting with nodatacow, then all newly created files will be 642 - NOCOW. b) For an empty file, add the NOCOW file attribute. That's what this option 643 - does. 644 - 645 - Note: this option is only valid to new or empty files. If there is an existing 646 - file which is COW and has data blocks already, it couldn't be changed to NOCOW 647 - by setting @code{nocow=on}. One can issue @code{lsattr filename} to check if 648 - the NOCOW flag is set or not (Capital 'C' is NOCOW flag). 649 - 650 - @end table 651 - 652 - @item qed 653 - Old QEMU image format with support for backing files and compact image files 654 - (when your filesystem or transport medium does not support holes). 655 - 656 - When converting QED images to qcow2, you might want to consider using the 657 - @code{lazy_refcounts=on} option to get a more QED-like behaviour. 658 - 659 - Supported options: 660 - @table @code 661 - @item backing_file 662 - File name of a base image (see @option{create} subcommand). 663 - @item backing_fmt 664 - Image file format of backing file (optional). Useful if the format cannot be 665 - autodetected because it has no header, like some vhd/vpc files. 666 - @item cluster_size 667 - Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller 668 - cluster sizes can improve the image file size whereas larger cluster sizes 669 - generally provide better performance. 670 - @item table_size 671 - Changes the number of clusters per L1/L2 table (must be power-of-2 between 1 672 - and 16). There is normally no need to change this value but this option can be 673 - used for performance benchmarking. 674 - @end table 675 - 676 - @item qcow 677 - Old QEMU image format with support for backing files, compact image files, 678 - encryption and compression. 679 - 680 - Supported options: 681 - @table @code 682 - @item backing_file 683 - File name of a base image (see @option{create} subcommand) 684 - @item encryption 685 - This option is deprecated and equivalent to @code{encrypt.format=aes} 686 - 687 - @item encrypt.format 688 - If this is set to @code{aes}, the image is encrypted with 128-bit AES-CBC. 689 - The encryption key is given by the @code{encrypt.key-secret} parameter. 690 - This encryption format is considered to be flawed by modern cryptography 691 - standards, suffering from a number of design problems enumerated previously 692 - against the @code{qcow2} image format. 693 - 694 - The use of this is no longer supported in system emulators. Support only 695 - remains in the command line utilities, for the purposes of data liberation 696 - and interoperability with old versions of QEMU. 697 - 698 - Users requiring native encryption should use the @code{qcow2} format 699 - instead with @code{encrypt.format=luks}. 700 - 701 - @item encrypt.key-secret 702 - 703 - Provides the ID of a @code{secret} object that contains the encryption 704 - key (@code{encrypt.format=aes}). 705 - 706 - @end table 707 - 708 - @item luks 709 - 710 - LUKS v1 encryption format, compatible with Linux dm-crypt/cryptsetup 711 - 712 - Supported options: 713 - @table @code 714 - 715 - @item key-secret 716 - 717 - Provides the ID of a @code{secret} object that contains the passphrase. 718 - 719 - @item cipher-alg 720 - 721 - Name of the cipher algorithm and key length. Currently defaults 722 - to @code{aes-256}. 723 - 724 - @item cipher-mode 725 - 726 - Name of the encryption mode to use. Currently defaults to @code{xts}. 727 - 728 - @item ivgen-alg 729 - 730 - Name of the initialization vector generator algorithm. Currently defaults 731 - to @code{plain64}. 732 - 733 - @item ivgen-hash-alg 734 - 735 - Name of the hash algorithm to use with the initialization vector generator 736 - (if required). Defaults to @code{sha256}. 737 - 738 - @item hash-alg 739 - 740 - Name of the hash algorithm to use for PBKDF algorithm 741 - Defaults to @code{sha256}. 742 - 743 - @item iter-time 744 - 745 - Amount of time, in milliseconds, to use for PBKDF algorithm per key slot. 746 - Defaults to @code{2000}. 747 - 748 - @end table 749 - 750 - @item vdi 751 - VirtualBox 1.1 compatible image format. 752 - Supported options: 753 - @table @code 754 - @item static 755 - If this option is set to @code{on}, the image is created with metadata 756 - preallocation. 757 - @end table 758 - 759 - @item vmdk 760 - VMware 3 and 4 compatible image format. 761 - 762 - Supported options: 763 - @table @code 764 - @item backing_file 765 - File name of a base image (see @option{create} subcommand). 766 - @item compat6 767 - Create a VMDK version 6 image (instead of version 4) 768 - @item hwversion 769 - Specify vmdk virtual hardware version. Compat6 flag cannot be enabled 770 - if hwversion is specified. 771 - @item subformat 772 - Specifies which VMDK subformat to use. Valid options are 773 - @code{monolithicSparse} (default), 774 - @code{monolithicFlat}, 775 - @code{twoGbMaxExtentSparse}, 776 - @code{twoGbMaxExtentFlat} and 777 - @code{streamOptimized}. 778 - @end table 779 - 780 - @item vpc 781 - VirtualPC compatible image format (VHD). 782 - Supported options: 783 - @table @code 784 - @item subformat 785 - Specifies which VHD subformat to use. Valid options are 786 - @code{dynamic} (default) and @code{fixed}. 787 - @end table 788 - 789 - @item VHDX 790 - Hyper-V compatible image format (VHDX). 791 - Supported options: 792 - @table @code 793 - @item subformat 794 - Specifies which VHDX subformat to use. Valid options are 795 - @code{dynamic} (default) and @code{fixed}. 796 - @item block_state_zero 797 - Force use of payload blocks of type 'ZERO'. Can be set to @code{on} (default) 798 - or @code{off}. When set to @code{off}, new blocks will be created as 799 - @code{PAYLOAD_BLOCK_NOT_PRESENT}, which means parsers are free to return 800 - arbitrary data for those blocks. Do not set to @code{off} when using 801 - @code{qemu-img convert} with @code{subformat=dynamic}. 802 - @item block_size 803 - Block size; min 1 MB, max 256 MB. 0 means auto-calculate based on image size. 804 - @item log_size 805 - Log size; min 1 MB. 806 - @end table 807 - @end table 808 - 809 - @subsubsection Read-only formats 810 - More disk image file formats are supported in a read-only mode. 811 - @table @option 812 - @item bochs 813 - Bochs images of @code{growing} type. 814 - @item cloop 815 - Linux Compressed Loop image, useful only to reuse directly compressed 816 - CD-ROM images present for example in the Knoppix CD-ROMs. 817 - @item dmg 818 - Apple disk image. 819 - @item parallels 820 - Parallels disk image format. 821 - @end table 822 - 823 - 824 - @node host_drives 825 - @subsection Using host drives 826 - 827 - In addition to disk image files, QEMU can directly access host 828 - devices. We describe here the usage for QEMU version >= 0.8.3. 829 - 830 - @subsubsection Linux 831 - 832 - On Linux, you can directly use the host device filename instead of a 833 - disk image filename provided you have enough privileges to access 834 - it. For example, use @file{/dev/cdrom} to access to the CDROM. 835 - 836 - @table @code 837 - @item CD 838 - You can specify a CDROM device even if no CDROM is loaded. QEMU has 839 - specific code to detect CDROM insertion or removal. CDROM ejection by 840 - the guest OS is supported. Currently only data CDs are supported. 841 - @item Floppy 842 - You can specify a floppy device even if no floppy is loaded. Floppy 843 - removal is currently not detected accurately (if you change floppy 844 - without doing floppy access while the floppy is not loaded, the guest 845 - OS will think that the same floppy is loaded). 846 - Use of the host's floppy device is deprecated, and support for it will 847 - be removed in a future release. 848 - @item Hard disks 849 - Hard disks can be used. Normally you must specify the whole disk 850 - (@file{/dev/hdb} instead of @file{/dev/hdb1}) so that the guest OS can 851 - see it as a partitioned disk. WARNING: unless you know what you do, it 852 - is better to only make READ-ONLY accesses to the hard disk otherwise 853 - you may corrupt your host data (use the @option{-snapshot} command 854 - line option or modify the device permissions accordingly). 855 - @end table 856 - 857 - @subsubsection Windows 858 - 859 - @table @code 860 - @item CD 861 - The preferred syntax is the drive letter (e.g. @file{d:}). The 862 - alternate syntax @file{\\.\d:} is supported. @file{/dev/cdrom} is 863 - supported as an alias to the first CDROM drive. 864 - 865 - Currently there is no specific code to handle removable media, so it 866 - is better to use the @code{change} or @code{eject} monitor commands to 867 - change or eject media. 868 - @item Hard disks 869 - Hard disks can be used with the syntax: @file{\\.\PhysicalDrive@var{N}} 870 - where @var{N} is the drive number (0 is the first hard disk). 871 - 872 - WARNING: unless you know what you do, it is better to only make 873 - READ-ONLY accesses to the hard disk otherwise you may corrupt your 874 - host data (use the @option{-snapshot} command line so that the 875 - modifications are written in a temporary file). 876 - @end table 877 - 878 - 879 - @subsubsection Mac OS X 880 - 881 - @file{/dev/cdrom} is an alias to the first CDROM. 882 - 883 - Currently there is no specific code to handle removable media, so it 884 - is better to use the @code{change} or @code{eject} monitor commands to 885 - change or eject media. 886 - 887 - @node disk_images_fat_images 888 - @subsection Virtual FAT disk images 889 - 890 - QEMU can automatically create a virtual FAT disk image from a 891 - directory tree. In order to use it, just type: 892 - 893 - @example 894 - qemu-system-i386 linux.img -hdb fat:/my_directory 895 - @end example 896 - 897 - Then you access access to all the files in the @file{/my_directory} 898 - directory without having to copy them in a disk image or to export 899 - them via SAMBA or NFS. The default access is @emph{read-only}. 900 - 901 - Floppies can be emulated with the @code{:floppy:} option: 902 - 903 - @example 904 - qemu-system-i386 linux.img -fda fat:floppy:/my_directory 905 - @end example 906 - 907 - A read/write support is available for testing (beta stage) with the 908 - @code{:rw:} option: 909 - 910 - @example 911 - qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory 912 - @end example 913 - 914 - What you should @emph{never} do: 915 - @itemize 916 - @item use non-ASCII filenames ; 917 - @item use "-snapshot" together with ":rw:" ; 918 - @item expect it to work when loadvm'ing ; 919 - @item write to the FAT directory on the host system while accessing it with the guest system. 920 - @end itemize 921 - 922 - @node disk_images_nbd 923 - @subsection NBD access 924 - 925 - QEMU can access directly to block device exported using the Network Block Device 926 - protocol. 927 - 928 - @example 929 - qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ 930 - @end example 931 - 932 - If the NBD server is located on the same host, you can use an unix socket instead 933 - of an inet socket: 934 - 935 - @example 936 - qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket 937 - @end example 938 - 939 - In this case, the block device must be exported using qemu-nbd: 940 - 941 - @example 942 - qemu-nbd --socket=/tmp/my_socket my_disk.qcow2 943 - @end example 944 - 945 - The use of qemu-nbd allows sharing of a disk between several guests: 946 - @example 947 - qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 948 - @end example 949 - 950 - @noindent 951 - and then you can use it with two guests: 952 - @example 953 - qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket 954 - qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket 955 - @end example 956 - 957 - If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's 958 - own embedded NBD server), you must specify an export name in the URI: 959 - @example 960 - qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst 961 - qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst 962 - @end example 963 - 964 - The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is 965 - also available. Here are some example of the older syntax: 966 - @example 967 - qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 968 - qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket 969 - qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst 970 - @end example 971 - 972 - @node disk_images_sheepdog 973 - @subsection Sheepdog disk images 974 - 975 - Sheepdog is a distributed storage system for QEMU. It provides highly 976 - available block level storage volumes that can be attached to 977 - QEMU-based virtual machines. 978 - 979 - You can create a Sheepdog disk image with the command: 980 - @example 981 - qemu-img create sheepdog:///@var{image} @var{size} 982 - @end example 983 - where @var{image} is the Sheepdog image name and @var{size} is its 984 - size. 985 - 986 - To import the existing @var{filename} to Sheepdog, you can use a 987 - convert command. 988 - @example 989 - qemu-img convert @var{filename} sheepdog:///@var{image} 990 - @end example 991 - 992 - You can boot from the Sheepdog disk image with the command: 993 - @example 994 - qemu-system-i386 sheepdog:///@var{image} 995 - @end example 996 - 997 - You can also create a snapshot of the Sheepdog image like qcow2. 998 - @example 999 - qemu-img snapshot -c @var{tag} sheepdog:///@var{image} 1000 - @end example 1001 - where @var{tag} is a tag name of the newly created snapshot. 1002 - 1003 - To boot from the Sheepdog snapshot, specify the tag name of the 1004 - snapshot. 1005 - @example 1006 - qemu-system-i386 sheepdog:///@var{image}#@var{tag} 1007 - @end example 1008 - 1009 - You can create a cloned image from the existing snapshot. 1010 - @example 1011 - qemu-img create -b sheepdog:///@var{base}#@var{tag} sheepdog:///@var{image} 1012 - @end example 1013 - where @var{base} is a image name of the source snapshot and @var{tag} 1014 - is its tag name. 1015 - 1016 - You can use an unix socket instead of an inet socket: 1017 - 1018 - @example 1019 - qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} 1020 - @end example 1021 - 1022 - If the Sheepdog daemon doesn't run on the local host, you need to 1023 - specify one of the Sheepdog servers to connect to. 1024 - @example 1025 - qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} 1026 - qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} 1027 - @end example 1028 - 1029 - @node disk_images_iscsi 1030 - @subsection iSCSI LUNs 1031 - 1032 - iSCSI is a popular protocol used to access SCSI devices across a computer 1033 - network. 1034 - 1035 - There are two different ways iSCSI devices can be used by QEMU. 1036 - 1037 - The first method is to mount the iSCSI LUN on the host, and make it appear as 1038 - any other ordinary SCSI device on the host and then to access this device as a 1039 - /dev/sd device from QEMU. How to do this differs between host OSes. 1040 - 1041 - The second method involves using the iSCSI initiator that is built into 1042 - QEMU. This provides a mechanism that works the same way regardless of which 1043 - host OS you are running QEMU on. This section will describe this second method 1044 - of using iSCSI together with QEMU. 1045 - 1046 - In QEMU, iSCSI devices are described using special iSCSI URLs 1047 - 1048 - @example 1049 - URL syntax: 1050 - iscsi://[<username>[%<password>]@@]<host>[:<port>]/<target-iqn-name>/<lun> 1051 - @end example 1052 - 1053 - Username and password are optional and only used if your target is set up 1054 - using CHAP authentication for access control. 1055 - Alternatively the username and password can also be set via environment 1056 - variables to have these not show up in the process list 1057 - 1058 - @example 1059 - export LIBISCSI_CHAP_USERNAME=<username> 1060 - export LIBISCSI_CHAP_PASSWORD=<password> 1061 - iscsi://<host>/<target-iqn-name>/<lun> 1062 - @end example 1063 - 1064 - Various session related parameters can be set via special options, either 1065 - in a configuration file provided via '-readconfig' or directly on the 1066 - command line. 1067 - 1068 - If the initiator-name is not specified qemu will use a default name 1069 - of 'iqn.2008-11.org.linux-kvm[:<uuid>'] where <uuid> is the UUID of the 1070 - virtual machine. If the UUID is not specified qemu will use 1071 - 'iqn.2008-11.org.linux-kvm[:<name>'] where <name> is the name of the 1072 - virtual machine. 1073 - 1074 - @example 1075 - Setting a specific initiator name to use when logging in to the target 1076 - -iscsi initiator-name=iqn.qemu.test:my-initiator 1077 - @end example 1078 - 1079 - @example 1080 - Controlling which type of header digest to negotiate with the target 1081 - -iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 1082 - @end example 1083 - 1084 - These can also be set via a configuration file 1085 - @example 1086 - [iscsi] 1087 - user = "CHAP username" 1088 - password = "CHAP password" 1089 - initiator-name = "iqn.qemu.test:my-initiator" 1090 - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 1091 - header-digest = "CRC32C" 1092 - @end example 1093 - 1094 - 1095 - Setting the target name allows different options for different targets 1096 - @example 1097 - [iscsi "iqn.target.name"] 1098 - user = "CHAP username" 1099 - password = "CHAP password" 1100 - initiator-name = "iqn.qemu.test:my-initiator" 1101 - # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE 1102 - header-digest = "CRC32C" 1103 - @end example 1104 - 1105 - 1106 - Howto use a configuration file to set iSCSI configuration options: 1107 - @example 1108 - cat >iscsi.conf <<EOF 1109 - [iscsi] 1110 - user = "me" 1111 - password = "my password" 1112 - initiator-name = "iqn.qemu.test:my-initiator" 1113 - header-digest = "CRC32C" 1114 - EOF 1115 - 1116 - qemu-system-i386 -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ 1117 - -readconfig iscsi.conf 1118 - @end example 1119 - 1120 - 1121 - Howto set up a simple iSCSI target on loopback and accessing it via QEMU: 1122 - @example 1123 - This example shows how to set up an iSCSI target with one CDROM and one DISK 1124 - using the Linux STGT software target. This target is available on Red Hat based 1125 - systems as the package 'scsi-target-utils'. 1126 - 1127 - tgtd --iscsi portal=127.0.0.1:3260 1128 - tgtadm --lld iscsi --op new --mode target --tid 1 -T iqn.qemu.test 1129 - tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 1 \ 1130 - -b /IMAGES/disk.img --device-type=disk 1131 - tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \ 1132 - -b /IMAGES/cd.iso --device-type=cd 1133 - tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL 1134 - 1135 - qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \ 1136 - -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \ 1137 - -cdrom iscsi://127.0.0.1/iqn.qemu.test/2 1138 - @end example 1139 - 1140 - @node disk_images_gluster 1141 - @subsection GlusterFS disk images 1142 - 1143 - GlusterFS is a user space distributed file system. 1144 - 1145 - You can boot from the GlusterFS disk image with the command: 1146 - @example 1147 - URI: 1148 - qemu-system-x86_64 -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path} 1149 - [?socket=...][,file.debug=9][,file.logfile=...] 1150 - 1151 - JSON: 1152 - qemu-system-x86_64 'json:@{"driver":"qcow2", 1153 - "file":@{"driver":"gluster", 1154 - "volume":"testvol","path":"a.img","debug":9,"logfile":"...", 1155 - "server":[@{"type":"tcp","host":"...","port":"..."@}, 1156 - @{"type":"unix","socket":"..."@}]@}@}' 1157 - @end example 1158 - 1159 - @var{gluster} is the protocol. 1160 - 1161 - @var{type} specifies the transport type used to connect to gluster 1162 - management daemon (glusterd). Valid transport types are 1163 - tcp and unix. In the URI form, if a transport type isn't specified, 1164 - then tcp type is assumed. 1165 - 1166 - @var{host} specifies the server where the volume file specification for 1167 - the given volume resides. This can be either a hostname or an ipv4 address. 1168 - If transport type is unix, then @var{host} field should not be specified. 1169 - Instead @var{socket} field needs to be populated with the path to unix domain 1170 - socket. 1171 - 1172 - @var{port} is the port number on which glusterd is listening. This is optional 1173 - and if not specified, it defaults to port 24007. If the transport type is unix, 1174 - then @var{port} should not be specified. 1175 - 1176 - @var{volume} is the name of the gluster volume which contains the disk image. 1177 - 1178 - @var{path} is the path to the actual disk image that resides on gluster volume. 1179 - 1180 - @var{debug} is the logging level of the gluster protocol driver. Debug levels 1181 - are 0-9, with 9 being the most verbose, and 0 representing no debugging output. 1182 - The default level is 4. The current logging levels defined in the gluster source 1183 - are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning, 1184 - 6 - Notice, 7 - Info, 8 - Debug, 9 - Trace 1185 - 1186 - @var{logfile} is a commandline option to mention log file path which helps in 1187 - logging to the specified file and also help in persisting the gfapi logs. The 1188 - default is stderr. 1189 - 1190 - 1191 - 1192 - 1193 - You can create a GlusterFS disk image with the command: 1194 - @example 1195 - qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size} 1196 - @end example 1197 - 1198 - Examples 1199 - @example 1200 - qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img 1201 - qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4/testvol/a.img 1202 - qemu-system-x86_64 -drive file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img 1203 - qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img 1204 - qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img 1205 - qemu-system-x86_64 -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img 1206 - qemu-system-x86_64 -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket 1207 - qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img 1208 - qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log 1209 - qemu-system-x86_64 'json:@{"driver":"qcow2", 1210 - "file":@{"driver":"gluster", 1211 - "volume":"testvol","path":"a.img", 1212 - "debug":9,"logfile":"/var/log/qemu-gluster.log", 1213 - "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, 1214 - @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' 1215 - qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, 1216 - file.debug=9,file.logfile=/var/log/qemu-gluster.log, 1217 - file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, 1218 - file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket 1219 - @end example 1220 - 1221 - @node disk_images_ssh 1222 - @subsection Secure Shell (ssh) disk images 1223 - 1224 - You can access disk images located on a remote ssh server 1225 - by using the ssh protocol: 1226 - 1227 - @example 1228 - qemu-system-x86_64 -drive file=ssh://[@var{user}@@]@var{server}[:@var{port}]/@var{path}[?host_key_check=@var{host_key_check}] 1229 - @end example 1230 - 1231 - Alternative syntax using properties: 1232 - 1233 - @example 1234 - qemu-system-x86_64 -drive file.driver=ssh[,file.user=@var{user}],file.host=@var{server}[,file.port=@var{port}],file.path=@var{path}[,file.host_key_check=@var{host_key_check}] 1235 - @end example 1236 - 1237 - @var{ssh} is the protocol. 1238 - 1239 - @var{user} is the remote user. If not specified, then the local 1240 - username is tried. 1241 - 1242 - @var{server} specifies the remote ssh server. Any ssh server can be 1243 - used, but it must implement the sftp-server protocol. Most Unix/Linux 1244 - systems should work without requiring any extra configuration. 1245 - 1246 - @var{port} is the port number on which sshd is listening. By default 1247 - the standard ssh port (22) is used. 1248 - 1249 - @var{path} is the path to the disk image. 1250 - 1251 - The optional @var{host_key_check} parameter controls how the remote 1252 - host's key is checked. The default is @code{yes} which means to use 1253 - the local @file{.ssh/known_hosts} file. Setting this to @code{no} 1254 - turns off known-hosts checking. Or you can check that the host key 1255 - matches a specific fingerprint: 1256 - @code{host_key_check=md5:78:45:8e:14:57:4f:d5:45:83:0a:0e:f3:49:82:c9:c8} 1257 - (@code{sha1:} can also be used as a prefix, but note that OpenSSH 1258 - tools only use MD5 to print fingerprints). 1259 - 1260 - Currently authentication must be done using ssh-agent. Other 1261 - authentication methods may be supported in future. 1262 - 1263 - Note: Many ssh servers do not support an @code{fsync}-style operation. 1264 - The ssh driver cannot guarantee that disk flush requests are 1265 - obeyed, and this causes a risk of disk corruption if the remote 1266 - server or network goes down during writes. The driver will 1267 - print a warning when @code{fsync} is not supported: 1268 - 1269 - warning: ssh server @code{ssh.example.com:22} does not support fsync 1270 - 1271 - With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is 1272 - supported. 493 + @include docs/qemu-block-drivers.texi 1273 494 1274 495 @node pcsys_network 1275 496 @section Network emulation
+2 -2
qemu-img-cmds.hx
··· 89 89 ETEXI 90 90 91 91 DEF("resize", img_resize, 92 - "resize [--object objectdef] [--image-opts] [-q] filename [+ | -]size") 92 + "resize [--object objectdef] [--image-opts] [-q] [--shrink] filename [+ | -]size") 93 93 STEXI 94 - @item resize [--object @var{objectdef}] [--image-opts] [-q] @var{filename} [+ | -]@var{size} 94 + @item resize [--object @var{objectdef}] [--image-opts] [-q] [--shrink] @var{filename} [+ | -]@var{size} 95 95 ETEXI 96 96 97 97 STEXI
+23
qemu-img.c
··· 64 64 OPTION_TARGET_IMAGE_OPTS = 263, 65 65 OPTION_SIZE = 264, 66 66 OPTION_PREALLOCATION = 265, 67 + OPTION_SHRINK = 266, 67 68 }; 68 69 69 70 typedef enum OutputFormat { ··· 3436 3437 }, 3437 3438 }; 3438 3439 bool image_opts = false; 3440 + bool shrink = false; 3439 3441 3440 3442 /* Remove size from argv manually so that negative numbers are not treated 3441 3443 * as options by getopt. */ ··· 3454 3456 {"object", required_argument, 0, OPTION_OBJECT}, 3455 3457 {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, 3456 3458 {"preallocation", required_argument, 0, OPTION_PREALLOCATION}, 3459 + {"shrink", no_argument, 0, OPTION_SHRINK}, 3457 3460 {0, 0, 0, 0} 3458 3461 }; 3459 3462 c = getopt_long(argc, argv, ":f:hq", ··· 3495 3498 error_report("Invalid preallocation mode '%s'", optarg); 3496 3499 return 1; 3497 3500 } 3501 + break; 3502 + case OPTION_SHRINK: 3503 + shrink = true; 3498 3504 break; 3499 3505 } 3500 3506 } ··· 3567 3573 error_report("Preallocation can only be used for growing images"); 3568 3574 ret = -1; 3569 3575 goto out; 3576 + } 3577 + 3578 + if (total_size < current_size && !shrink) { 3579 + warn_report("Shrinking an image will delete all data beyond the " 3580 + "shrunken image's end. Before performing such an " 3581 + "operation, make sure there is no important data there."); 3582 + 3583 + if (g_strcmp0(bdrv_get_format_name(blk_bs(blk)), "raw") != 0) { 3584 + error_report( 3585 + "Use the --shrink option to perform a shrink operation."); 3586 + ret = -1; 3587 + goto out; 3588 + } else { 3589 + warn_report("Using the --shrink option will suppress this message. " 3590 + "Note that future versions of qemu-img may refuse to " 3591 + "shrink images without this option."); 3592 + } 3570 3593 } 3571 3594 3572 3595 ret = blk_truncate(blk, total_size, prealloc, &err);
+14 -1
qemu-img.texi
··· 244 244 this case. @var{backing_file} will never be modified unless you use the 245 245 @code{commit} monitor command (or qemu-img commit). 246 246 247 + If a relative path name is given, the backing file is looked up relative to 248 + the directory containing @var{filename}. 249 + 247 250 Note that a given backing file will be opened to check that it is valid. Use 248 251 the @code{-u} option to enable unsafe backing file mode, which means that the 249 252 image will be created even if the associated backing file cannot be opened. A ··· 342 345 created as a copy on write image of the specified base image; the 343 346 @var{backing_file} should have the same content as the input's base image, 344 347 however the path, image format, etc may differ. 348 + 349 + If a relative path name is given, the backing file is looked up relative to 350 + the directory containing @var{output_filename}. 345 351 346 352 If the @code{-n} option is specified, the target volume creation will be 347 353 skipped. This is useful for formats such as @code{rbd} if the target ··· 490 496 string), then the image is rebased onto no backing file (i.e. it will exist 491 497 independently of any backing file). 492 498 499 + If a relative path name is given, the backing file is looked up relative to 500 + the directory containing @var{filename}. 501 + 493 502 @var{cache} specifies the cache mode to be used for @var{filename}, whereas 494 503 @var{src_cache} specifies the cache mode for reading backing files. 495 504 ··· 536 545 At this point, @code{modified.img} can be discarded, since 537 546 @code{base.img + diff.qcow2} contains the same information. 538 547 539 - @item resize [--preallocation=@var{prealloc}] @var{filename} [+ | -]@var{size} 548 + @item resize [--shrink] [--preallocation=@var{prealloc}] @var{filename} [+ | -]@var{size} 540 549 541 550 Change the disk image as if it had been created with @var{size}. 542 551 543 552 Before using this command to shrink a disk image, you MUST use file system and 544 553 partitioning tools inside the VM to reduce allocated file systems and partition 545 554 sizes accordingly. Failure to do so will result in data loss! 555 + 556 + When shrinking images, the @code{--shrink} option must be given. This informs 557 + qemu-img that the user acknowledges all loss of data beyond the truncated 558 + image's end. 546 559 547 560 After using this command to grow a disk image, you must use file system and 548 561 partitioning tools inside the VM to actually begin using the new space on the
+12
qemu-io-cmds.c
··· 2010 2010 return 0; 2011 2011 } 2012 2012 2013 + if (!(flags & BDRV_O_RDWR)) { 2014 + uint64_t orig_perm, orig_shared_perm; 2015 + 2016 + bdrv_drain(bs); 2017 + 2018 + blk_get_perm(blk, &orig_perm, &orig_shared_perm); 2019 + blk_set_perm(blk, 2020 + orig_perm & ~(BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED), 2021 + orig_shared_perm, 2022 + &error_abort); 2023 + } 2024 + 2013 2025 qopts = qemu_opts_find(&reopen_opts, NULL); 2014 2026 opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; 2015 2027 qemu_opts_reset(&reopen_opts);
+5 -1
tests/qemu-iotests/040
··· 82 82 qemu_io('-f', 'raw', '-c', 'write -P 0xab 0 524288', backing_img) 83 83 qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xef 524288 524288', mid_img) 84 84 self.vm = iotests.VM().add_drive(test_img, "node-name=top,backing.node-name=mid,backing.backing.node-name=base", interface="none") 85 - self.vm.add_device("virtio-scsi-pci") 85 + if iotests.qemu_default_machine == 's390-ccw-virtio': 86 + self.vm.add_device("virtio-scsi-ccw") 87 + else: 88 + self.vm.add_device("virtio-scsi-pci") 89 + 86 90 self.vm.add_device("scsi-hd,id=scsi0,drive=drive0") 87 91 self.vm.launch() 88 92
+11 -1
tests/qemu-iotests/051
··· 103 103 echo === Device without drive === 104 104 echo 105 105 106 - run_qemu -device virtio-scsi-pci -device scsi-hd 106 + case "$QEMU_DEFAULT_MACHINE" in 107 + s390-ccw-virtio) 108 + virtio_scsi=virtio-scsi-ccw 109 + ;; 110 + *) 111 + virtio_scsi=virtio-scsi-pci 112 + ;; 113 + esac 114 + 115 + run_qemu -device $virtio_scsi -device scsi-hd | 116 + sed -e "s/$virtio_scsi/VIRTIO_SCSI/" 107 117 108 118 echo 109 119 echo === Overriding backing file ===
+1 -1
tests/qemu-iotests/051.out
··· 49 49 50 50 === Device without drive === 51 51 52 - Testing: -device virtio-scsi-pci -device scsi-hd 52 + Testing: -device VIRTIO_SCSI -device scsi-hd 53 53 QEMU X.Y.Z monitor - type 'help' for more information 54 54 (qemu) QEMU_PROG: -device scsi-hd: drive property not set 55 55
+1 -1
tests/qemu-iotests/051.pc.out
··· 49 49 50 50 === Device without drive === 51 51 52 - Testing: -device virtio-scsi-pci -device scsi-hd 52 + Testing: -device VIRTIO_SCSI -device scsi-hd 53 53 QEMU X.Y.Z monitor - type 'help' for more information 54 54 (qemu) QEMU_PROG: -device scsi-hd: drive property not set 55 55
+1 -1
tests/qemu-iotests/067
··· 141 141 echo === Empty drive with -device and device_del === 142 142 echo 143 143 144 - run_qemu -device virtio-scsi-pci -device scsi-cd,id=cd0 <<EOF 144 + run_qemu -device virtio-scsi -device scsi-cd,id=cd0 <<EOF 145 145 { "execute": "qmp_capabilities" } 146 146 { "execute": "query-block" } 147 147 { "execute": "device_del", "arguments": { "id": "cd0" } }
+1 -1
tests/qemu-iotests/067.out
··· 419 419 420 420 === Empty drive with -device and device_del === 421 421 422 - Testing: -device virtio-scsi-pci -device scsi-cd,id=cd0 422 + Testing: -device virtio-scsi -device scsi-cd,id=cd0 423 423 { 424 424 QMP_VERSION 425 425 }
+2 -2
tests/qemu-iotests/102
··· 54 54 $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io 55 55 # Remove data cluster from image (first cluster: image header, second: reftable, 56 56 # third: refblock, fourth: L1 table, fifth: L2 table) 57 - $QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) 57 + $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) 58 58 59 59 $QEMU_IO -c map "$TEST_IMG" 60 60 $QEMU_IMG map "$TEST_IMG" ··· 69 69 70 70 qemu_comm_method=monitor _launch_qemu -drive if=none,file="$TEST_IMG",id=drv0 71 71 72 - $QEMU_IMG resize -f raw "$TEST_IMG" $((5 * 64 * 1024)) 72 + $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) 73 73 74 74 _send_qemu_cmd $QEMU_HANDLE 'qemu-io drv0 map' 'allocated' \ 75 75 | sed -e 's/^(qemu).*qemu-io drv0 map...$/(qemu) qemu-io drv0 map/'
+1 -1
tests/qemu-iotests/106
··· 83 83 for growth_mode in falloc full off; do 84 84 echo 85 85 echo "--- growth_mode=$growth_mode ---" 86 - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" -${GROWTH_SIZE}K 86 + $QEMU_IMG resize -f "$IMGFMT" --shrink --preallocation=$growth_mode "$TEST_IMG" -${GROWTH_SIZE}K 87 87 done 88 88 89 89 # success, all done
+10 -2
tests/qemu-iotests/139
··· 25 25 26 26 base_img = os.path.join(iotests.test_dir, 'base.img') 27 27 new_img = os.path.join(iotests.test_dir, 'new.img') 28 + if iotests.qemu_default_machine == 's390-ccw-virtio': 29 + default_virtio_blk = 'virtio-blk-ccw' 30 + else: 31 + default_virtio_blk = 'virtio-blk-pci' 28 32 29 33 class TestBlockdevDel(iotests.QMPTestCase): 30 34 31 35 def setUp(self): 32 36 iotests.qemu_img('create', '-f', iotests.imgfmt, base_img, '1M') 33 37 self.vm = iotests.VM() 34 - self.vm.add_device("virtio-scsi-pci,id=virtio-scsi") 38 + if iotests.qemu_default_machine == 's390-ccw-virtio': 39 + self.vm.add_device("virtio-scsi-ccw,id=virtio-scsi") 40 + else: 41 + self.vm.add_device("virtio-scsi-pci,id=virtio-scsi") 42 + 35 43 self.vm.launch() 36 44 37 45 def tearDown(self): ··· 87 95 self.checkBlockDriverState(node, expect_error) 88 96 89 97 # Add a device model 90 - def addDeviceModel(self, device, backend, driver = 'virtio-blk-pci'): 98 + def addDeviceModel(self, device, backend, driver = default_virtio_blk): 91 99 result = self.vm.qmp('device_add', id = device, 92 100 driver = driver, drive = backend) 93 101 self.assert_qmp(result, 'return', {})
+170
tests/qemu-iotests/163
··· 1 + #!/usr/bin/env python 2 + # 3 + # Tests for shrinking images 4 + # 5 + # Copyright (c) 2016-2017 Parallels International GmbH 6 + # 7 + # This program is free software; you can redistribute it and/or modify 8 + # it under the terms of the GNU General Public License as published by 9 + # the Free Software Foundation; either version 2 of the License, or 10 + # (at your option) any later version. 11 + # 12 + # This program is distributed in the hope that it will be useful, 13 + # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 + # GNU General Public License for more details. 16 + # 17 + # You should have received a copy of the GNU General Public License 18 + # along with this program. If not, see <http://www.gnu.org/licenses/>. 19 + # 20 + 21 + import os, random, iotests, struct, qcow2 22 + from iotests import qemu_img, qemu_io, image_size 23 + 24 + test_img = os.path.join(iotests.test_dir, 'test.img') 25 + check_img = os.path.join(iotests.test_dir, 'check.img') 26 + 27 + def size_to_int(str): 28 + suff = ['B', 'K', 'M', 'G', 'T'] 29 + return int(str[:-1]) * 1024**suff.index(str[-1:]) 30 + 31 + class ShrinkBaseClass(iotests.QMPTestCase): 32 + image_len = '128M' 33 + shrink_size = '10M' 34 + chunk_size = '16M' 35 + refcount_bits = '16' 36 + 37 + def __qcow2_check(self, filename): 38 + entry_bits = 3 39 + entry_size = 1 << entry_bits 40 + l1_mask = 0x00fffffffffffe00 41 + div_roundup = lambda n, d: (n + d - 1) / d 42 + 43 + def split_by_n(data, n): 44 + for x in xrange(0, len(data), n): 45 + yield struct.unpack('>Q', data[x:x + n])[0] & l1_mask 46 + 47 + def check_l1_table(h, l1_data): 48 + l1_list = list(split_by_n(l1_data, entry_size)) 49 + real_l1_size = div_roundup(h.size, 50 + 1 << (h.cluster_bits*2 - entry_size)) 51 + used, unused = l1_list[:real_l1_size], l1_list[real_l1_size:] 52 + 53 + self.assertTrue(len(used) != 0, "Verifying l1 table content") 54 + self.assertFalse(any(unused), "Verifying l1 table content") 55 + 56 + def check_reftable(fd, h, reftable): 57 + for offset in split_by_n(reftable, entry_size): 58 + if offset != 0: 59 + fd.seek(offset) 60 + cluster = fd.read(1 << h.cluster_bits) 61 + self.assertTrue(any(cluster), "Verifying reftable content") 62 + 63 + with open(filename, "rb") as fd: 64 + h = qcow2.QcowHeader(fd) 65 + 66 + fd.seek(h.l1_table_offset) 67 + l1_table = fd.read(h.l1_size << entry_bits) 68 + 69 + fd.seek(h.refcount_table_offset) 70 + reftable = fd.read(h.refcount_table_clusters << h.cluster_bits) 71 + 72 + check_l1_table(h, l1_table) 73 + check_reftable(fd, h, reftable) 74 + 75 + def __raw_check(self, filename): 76 + pass 77 + 78 + image_check = { 79 + 'qcow2' : __qcow2_check, 80 + 'raw' : __raw_check 81 + } 82 + 83 + def setUp(self): 84 + if iotests.imgfmt == 'raw': 85 + qemu_img('create', '-f', iotests.imgfmt, test_img, self.image_len) 86 + qemu_img('create', '-f', iotests.imgfmt, check_img, 87 + self.shrink_size) 88 + else: 89 + qemu_img('create', '-f', iotests.imgfmt, 90 + '-o', 'cluster_size=' + self.cluster_size + 91 + ',refcount_bits=' + self.refcount_bits, 92 + test_img, self.image_len) 93 + qemu_img('create', '-f', iotests.imgfmt, 94 + '-o', 'cluster_size=%s'% self.cluster_size, 95 + check_img, self.shrink_size) 96 + qemu_io('-c', 'write -P 0xff 0 ' + self.shrink_size, check_img) 97 + 98 + def tearDown(self): 99 + os.remove(test_img) 100 + os.remove(check_img) 101 + 102 + def image_verify(self): 103 + self.assertEqual(image_size(test_img), image_size(check_img), 104 + "Verifying image size") 105 + self.image_check[iotests.imgfmt](self, test_img) 106 + 107 + if iotests.imgfmt == 'raw': 108 + return 109 + self.assertEqual(qemu_img('check', test_img), 0, 110 + "Verifying image corruption") 111 + 112 + def test_empty_image(self): 113 + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, 114 + self.shrink_size) 115 + 116 + self.assertEqual( 117 + qemu_io('-c', 'read -P 0x00 %s'%self.shrink_size, test_img), 118 + qemu_io('-c', 'read -P 0x00 %s'%self.shrink_size, check_img), 119 + "Verifying image content") 120 + 121 + self.image_verify() 122 + 123 + def test_sequential_write(self): 124 + for offs in range(0, size_to_int(self.image_len), 125 + size_to_int(self.chunk_size)): 126 + qemu_io('-c', 'write -P 0xff %d %s' % (offs, self.chunk_size), 127 + test_img) 128 + 129 + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, 130 + self.shrink_size) 131 + 132 + self.assertEqual(qemu_img("compare", test_img, check_img), 0, 133 + "Verifying image content") 134 + 135 + self.image_verify() 136 + 137 + def test_random_write(self): 138 + offs_list = range(0, size_to_int(self.image_len), 139 + size_to_int(self.chunk_size)) 140 + random.shuffle(offs_list) 141 + for offs in offs_list: 142 + qemu_io('-c', 'write -P 0xff %d %s' % (offs, self.chunk_size), 143 + test_img) 144 + 145 + qemu_img('resize', '-f', iotests.imgfmt, '--shrink', test_img, 146 + self.shrink_size) 147 + 148 + self.assertEqual(qemu_img("compare", test_img, check_img), 0, 149 + "Verifying image content") 150 + 151 + self.image_verify() 152 + 153 + class TestShrink512(ShrinkBaseClass): 154 + image_len = '3M' 155 + shrink_size = '1M' 156 + chunk_size = '256K' 157 + cluster_size = '512' 158 + refcount_bits = '64' 159 + 160 + class TestShrink64K(ShrinkBaseClass): 161 + cluster_size = '64K' 162 + 163 + class TestShrink1M(ShrinkBaseClass): 164 + cluster_size = '1M' 165 + refcount_bits = '1' 166 + 167 + ShrinkBaseClass = None 168 + 169 + if __name__ == '__main__': 170 + iotests.main(supported_fmts=['raw', 'qcow2'])
+5
tests/qemu-iotests/163.out
··· 1 + ......... 2 + ---------------------------------------------------------------------- 3 + Ran 9 tests 4 + 5 + OK
+1 -1
tests/qemu-iotests/172
··· 56 56 done 57 57 fi 58 58 echo quit 59 - ) | $QEMU -nographic -monitor stdio -serial none "$@" 59 + ) | $QEMU -machine accel=qtest -nographic -monitor stdio -serial none "$@" 60 60 echo 61 61 } 62 62
+2
tests/qemu-iotests/181
··· 93 93 94 94 # Slow down migration so much that it definitely won't finish before we can 95 95 # switch to postcopy 96 + # Enable postcopy-ram capability both on source and destination 96 97 silent=yes 98 + _send_qemu_cmd $dest 'migrate_set_capability postcopy-ram on' "(qemu)" 97 99 _send_qemu_cmd $src 'migrate_set_speed 4k' "(qemu)" 98 100 _send_qemu_cmd $src 'migrate_set_capability postcopy-ram on' "(qemu)" 99 101 _send_qemu_cmd $src "migrate -d unix:${MIG_SOCKET}" "(qemu)"
-1
tests/qemu-iotests/181.out
··· 20 20 21 21 === Do some I/O on the destination === 22 22 23 - QEMU X.Y.Z monitor - type 'help' for more information 24 23 (qemu) qemu-io disk "read -P 0x55 0 64k" 25 24 read 65536/65536 bytes at offset 0 26 25 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+11 -2
tests/qemu-iotests/182
··· 45 45 46 46 size=32M 47 47 48 + case "$QEMU_DEFAULT_MACHINE" in 49 + s390-ccw-virtio) 50 + virtioblk=virtio-blk-ccw 51 + ;; 52 + *) 53 + virtioblk=virtio-blk-pci 54 + ;; 55 + esac 56 + 48 57 _make_test_img $size 49 58 50 59 echo "Starting QEMU" 51 60 _launch_qemu -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ 52 - -device virtio-blk-pci,drive=drive0 61 + -device $virtioblk,drive=drive0 53 62 54 63 echo 55 64 echo "Starting a second QEMU using the same image should fail" 56 65 echo 'quit' | $QEMU -monitor stdio \ 57 66 -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \ 58 - -device virtio-blk-pci,drive=drive0 2>&1 | _filter_testdir 2>&1 | 67 + -device $virtioblk,drive=drive0 2>&1 | _filter_testdir 2>&1 | 59 68 _filter_qemu | 60 69 sed -e '/falling back to POSIX file/d' \ 61 70 -e '/locks can be lost unexpectedly/d'
+3 -3
tests/qemu-iotests/186
··· 56 56 done 57 57 fi 58 58 echo quit 59 - ) | $QEMU -S -nodefaults -display none -device virtio-scsi-pci -monitor stdio "$@" 2>&1 59 + ) | $QEMU -S -display none -device virtio-scsi-pci -monitor stdio "$@" 2>&1 60 60 echo 61 61 } 62 62 63 63 function check_info_block() 64 64 { 65 65 echo "info block" | 66 - QEMU_OPTIONS="" do_run_qemu "$@" | _filter_win32 | _filter_hmp | 67 - _filter_qemu | _filter_generated_node_ids 66 + do_run_qemu "$@" | _filter_win32 | _filter_hmp | _filter_qemu | 67 + _filter_generated_node_ids 68 68 } 69 69 70 70
+1 -1
tests/qemu-iotests/187.out
··· 12 12 13 13 wrote 65536/65536 bytes at offset 0 14 14 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 15 - write failed: Operation not permitted 15 + Block node is read-only 16 16 wrote 65536/65536 bytes at offset 0 17 17 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 18 18 *** done
+92
tests/qemu-iotests/195
··· 1 + #!/bin/bash 2 + # 3 + # Test change-backing-file command 4 + # 5 + # Copyright (C) 2017 Red Hat, Inc. 6 + # 7 + # This program is free software; you can redistribute it and/or modify 8 + # it under the terms of the GNU General Public License as published by 9 + # the Free Software Foundation; either version 2 of the License, or 10 + # (at your option) any later version. 11 + # 12 + # This program is distributed in the hope that it will be useful, 13 + # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 + # GNU General Public License for more details. 16 + # 17 + # You should have received a copy of the GNU General Public License 18 + # along with this program. If not, see <http://www.gnu.org/licenses/>. 19 + # 20 + 21 + # creator 22 + owner=kwolf@redhat.com 23 + 24 + seq=`basename $0` 25 + echo "QA output created by $seq" 26 + 27 + here=`pwd` 28 + status=1 # failure is the default! 29 + 30 + _cleanup() 31 + { 32 + _cleanup_test_img 33 + rm -f "$TEST_IMG.mid" 34 + } 35 + trap "_cleanup; exit \$status" 0 1 2 3 15 36 + 37 + # get standard environment, filters and checks 38 + . ./common.rc 39 + . ./common.filter 40 + 41 + _supported_fmt qcow2 42 + _supported_proto file 43 + _supported_os Linux 44 + 45 + function do_run_qemu() 46 + { 47 + echo Testing: "$@" | _filter_imgfmt 48 + $QEMU -nographic -qmp-pretty stdio -serial none "$@" 49 + echo 50 + } 51 + 52 + function run_qemu() 53 + { 54 + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp \ 55 + | _filter_qemu_io | _filter_generated_node_ids 56 + } 57 + 58 + size=64M 59 + TEST_IMG="$TEST_IMG.base" _make_test_img $size 60 + TEST_IMG="$TEST_IMG.mid" _make_test_img -b "$TEST_IMG.base" 61 + _make_test_img -b "$TEST_IMG.mid" 62 + 63 + echo 64 + echo "Change backing file of mid (opened read-only)" 65 + echo 66 + 67 + run_qemu -drive if=none,file="$TEST_IMG",backing.node-name=mid <<EOF 68 + {"execute":"qmp_capabilities"} 69 + {"execute":"change-backing-file", "arguments":{"device":"none0","image-node-name":"mid","backing-file":"/dev/null"}} 70 + {"execute":"quit"} 71 + EOF 72 + 73 + TEST_IMG="$TEST_IMG.mid" _img_info 74 + 75 + echo 76 + echo "Change backing file of top (opened writable)" 77 + echo 78 + 79 + TEST_IMG="$TEST_IMG.mid" _make_test_img -b "$TEST_IMG.base" 80 + 81 + run_qemu -drive if=none,file="$TEST_IMG",node-name=top <<EOF 82 + {"execute":"qmp_capabilities"} 83 + {"execute":"change-backing-file", "arguments":{"device":"none0","image-node-name":"top","backing-file":"/dev/null"}} 84 + {"execute":"quit"} 85 + EOF 86 + 87 + _img_info 88 + 89 + # success, all done 90 + echo "*** done" 91 + rm -f $seq.full 92 + status=0
+78
tests/qemu-iotests/195.out
··· 1 + QA output created by 195 2 + Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 3 + Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base 4 + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.mid 5 + 6 + Change backing file of mid (opened read-only) 7 + 8 + Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid 9 + { 10 + QMP_VERSION 11 + } 12 + { 13 + "return": { 14 + } 15 + } 16 + { 17 + "return": { 18 + } 19 + } 20 + { 21 + "return": { 22 + } 23 + } 24 + { 25 + "timestamp": { 26 + "seconds": TIMESTAMP, 27 + "microseconds": TIMESTAMP 28 + }, 29 + "event": "SHUTDOWN", 30 + "data": { 31 + "guest": false 32 + } 33 + } 34 + 35 + image: TEST_DIR/t.IMGFMT.mid 36 + file format: IMGFMT 37 + virtual size: 64M (67108864 bytes) 38 + cluster_size: 65536 39 + backing file: /dev/null 40 + backing file format: IMGFMT 41 + 42 + Change backing file of top (opened writable) 43 + 44 + Formatting 'TEST_DIR/t.IMGFMT.mid', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base 45 + Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top 46 + { 47 + QMP_VERSION 48 + } 49 + { 50 + "return": { 51 + } 52 + } 53 + { 54 + "return": { 55 + } 56 + } 57 + { 58 + "return": { 59 + } 60 + } 61 + { 62 + "timestamp": { 63 + "seconds": TIMESTAMP, 64 + "microseconds": TIMESTAMP 65 + }, 66 + "event": "SHUTDOWN", 67 + "data": { 68 + "guest": false 69 + } 70 + } 71 + 72 + image: TEST_DIR/t.IMGFMT 73 + file format: IMGFMT 74 + virtual size: 64M (67108864 bytes) 75 + cluster_size: 65536 76 + backing file: /dev/null 77 + backing file format: IMGFMT 78 + *** done
+1 -1
tests/qemu-iotests/check
··· 353 353 else 354 354 echo " - output mismatch (see $seq.out.bad)" 355 355 mv $tmp.out $seq.out.bad 356 - $diff -w "$reference" $seq.out.bad 356 + $diff -w "$reference" $(realpath $seq.out.bad) 357 357 err=true 358 358 fi 359 359 fi
+2
tests/qemu-iotests/group
··· 166 166 159 rw auto quick 167 167 160 rw auto quick 168 168 162 auto quick 169 + 163 rw auto quick 169 170 165 rw auto quick 170 171 170 rw auto quick 171 172 171 rw auto quick ··· 189 190 190 rw auto quick 190 191 192 rw auto quick 191 192 194 rw auto migration quick 193 + 195 rw auto quick
+1
util/throttle.c
··· 124 124 /* If the main bucket is not full yet we still have to check the 125 125 * burst bucket in order to enforce the burst limit */ 126 126 if (bkt->burst_length > 1) { 127 + assert(bkt->max > 0); /* see throttle_is_valid() */ 127 128 extra = bkt->burst_level - burst_bucket_size; 128 129 if (extra > 0) { 129 130 return throttle_do_compute_wait(bkt->max, extra);