qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

vhost-user-blk: add discard/write zeroes features support

Linux commit 1f23816b8 "virtio_blk: add discard and write zeroes support"
added the support in the Guest kernel, while here also enable the features
support with vhost-user-blk driver. Also enable the test example utility
with DISCARD and WRITE ZEROES commands.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Changpeng Liu and committed by
Michael S. Tsirkin
caa1ee43 a56de056

+159 -33
+107 -33
contrib/vhost-user-blk/vhost-user-blk.c
··· 63 63 return len; 64 64 } 65 65 66 + static size_t vub_iov_to_buf(const struct iovec *iov, 67 + const unsigned int iov_cnt, void *buf) 68 + { 69 + size_t len; 70 + unsigned int i; 71 + 72 + len = 0; 73 + for (i = 0; i < iov_cnt; i++) { 74 + memcpy(buf + len, iov[i].iov_base, iov[i].iov_len); 75 + len += iov[i].iov_len; 76 + } 77 + return len; 78 + } 79 + 66 80 static void vub_panic_cb(VuDev *vu_dev, const char *buf) 67 81 { 68 82 VugDev *gdev; ··· 161 175 return rc; 162 176 } 163 177 178 + static int 179 + vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt, 180 + uint32_t type) 181 + { 182 + struct virtio_blk_discard_write_zeroes *desc; 183 + ssize_t size; 184 + void *buf; 185 + 186 + size = vub_iov_size(iov, iovcnt); 187 + if (size != sizeof(*desc)) { 188 + fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc)); 189 + return -1; 190 + } 191 + buf = g_new0(char, size); 192 + vub_iov_to_buf(iov, iovcnt, buf); 193 + 194 + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 195 + VubDev *vdev_blk = req->vdev_blk; 196 + desc = (struct virtio_blk_discard_write_zeroes *)buf; 197 + uint64_t range[2] = { le64toh(desc->sector) << 9, 198 + le32toh(desc->num_sectors) << 9 }; 199 + if (type == VIRTIO_BLK_T_DISCARD) { 200 + if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) { 201 + g_free(buf); 202 + return 0; 203 + } 204 + } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { 205 + if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) { 206 + g_free(buf); 207 + return 0; 208 + } 209 + } 210 + #endif 211 + 212 + g_free(buf); 213 + return -1; 214 + } 215 + 164 216 static void 165 217 vub_flush(VubReq *req) 166 218 { ··· 216 268 in_num--; 217 269 218 270 type = le32toh(req->out->type); 219 - switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { 220 - case VIRTIO_BLK_T_IN: { 221 - ssize_t ret = 0; 222 - bool is_write = type & VIRTIO_BLK_T_OUT; 223 - req->sector_num = le64toh(req->out->sector); 224 - if (is_write) { 225 - ret = vub_writev(req, &elem->out_sg[1], out_num); 226 - } else { 227 - ret = vub_readv(req, &elem->in_sg[0], in_num); 228 - } 229 - if (ret >= 0) { 230 - req->in->status = VIRTIO_BLK_S_OK; 231 - } else { 232 - req->in->status = VIRTIO_BLK_S_IOERR; 233 - } 234 - vub_req_complete(req); 235 - break; 271 + switch (type & ~VIRTIO_BLK_T_BARRIER) { 272 + case VIRTIO_BLK_T_IN: 273 + case VIRTIO_BLK_T_OUT: { 274 + ssize_t ret = 0; 275 + bool is_write = type & VIRTIO_BLK_T_OUT; 276 + req->sector_num = le64toh(req->out->sector); 277 + if (is_write) { 278 + ret = vub_writev(req, &elem->out_sg[1], out_num); 279 + } else { 280 + ret = vub_readv(req, &elem->in_sg[0], in_num); 236 281 } 237 - case VIRTIO_BLK_T_FLUSH: { 238 - vub_flush(req); 282 + if (ret >= 0) { 239 283 req->in->status = VIRTIO_BLK_S_OK; 240 - vub_req_complete(req); 241 - break; 284 + } else { 285 + req->in->status = VIRTIO_BLK_S_IOERR; 242 286 } 243 - case VIRTIO_BLK_T_GET_ID: { 244 - size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 245 - VIRTIO_BLK_ID_BYTES); 246 - snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 287 + vub_req_complete(req); 288 + break; 289 + } 290 + case VIRTIO_BLK_T_FLUSH: 291 + vub_flush(req); 292 + req->in->status = VIRTIO_BLK_S_OK; 293 + vub_req_complete(req); 294 + break; 295 + case VIRTIO_BLK_T_GET_ID: { 296 + size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), 297 + VIRTIO_BLK_ID_BYTES); 298 + snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); 299 + req->in->status = VIRTIO_BLK_S_OK; 300 + req->size = elem->in_sg[0].iov_len; 301 + vub_req_complete(req); 302 + break; 303 + } 304 + case VIRTIO_BLK_T_DISCARD: 305 + case VIRTIO_BLK_T_WRITE_ZEROES: { 306 + int rc; 307 + rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type); 308 + if (rc == 0) { 247 309 req->in->status = VIRTIO_BLK_S_OK; 248 - req->size = elem->in_sg[0].iov_len; 249 - vub_req_complete(req); 250 - break; 251 - } 252 - default: { 253 - req->in->status = VIRTIO_BLK_S_UNSUPP; 254 - vub_req_complete(req); 255 - break; 310 + } else { 311 + req->in->status = VIRTIO_BLK_S_IOERR; 256 312 } 313 + vub_req_complete(req); 314 + break; 315 + } 316 + default: 317 + req->in->status = VIRTIO_BLK_S_UNSUPP; 318 + vub_req_complete(req); 319 + break; 257 320 } 258 321 259 322 return 0; ··· 317 380 1ull << VIRTIO_BLK_F_TOPOLOGY | 318 381 1ull << VIRTIO_BLK_F_BLK_SIZE | 319 382 1ull << VIRTIO_BLK_F_FLUSH | 383 + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 384 + 1ull << VIRTIO_BLK_F_DISCARD | 385 + 1ull << VIRTIO_BLK_F_WRITE_ZEROES | 386 + #endif 320 387 1ull << VIRTIO_BLK_F_CONFIG_WCE | 321 388 1ull << VIRTIO_F_VERSION_1 | 322 389 1ull << VHOST_USER_F_PROTOCOL_FEATURES; ··· 478 545 config->min_io_size = 1; 479 546 config->opt_io_size = 1; 480 547 config->num_queues = 1; 548 + #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT) 549 + config->max_discard_sectors = 32768; 550 + config->max_discard_seg = 1; 551 + config->discard_sector_alignment = config->blk_size >> 9; 552 + config->max_write_zeroes_sectors = 32768; 553 + config->max_write_zeroes_seg = 1; 554 + #endif 481 555 } 482 556 483 557 static VubDev *
+4
hw/block/vhost-user-blk.c
··· 38 38 VIRTIO_BLK_F_RO, 39 39 VIRTIO_BLK_F_FLUSH, 40 40 VIRTIO_BLK_F_CONFIG_WCE, 41 + VIRTIO_BLK_F_DISCARD, 42 + VIRTIO_BLK_F_WRITE_ZEROES, 41 43 VIRTIO_F_VERSION_1, 42 44 VIRTIO_RING_F_INDIRECT_DESC, 43 45 VIRTIO_RING_F_EVENT_IDX, ··· 204 206 virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); 205 207 virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH); 206 208 virtio_add_feature(&features, VIRTIO_BLK_F_RO); 209 + virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD); 210 + virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES); 207 211 208 212 if (s->config_wce) { 209 213 virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
+48
include/standard-headers/linux/virtio_blk.h
··· 38 38 #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ 39 39 #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ 40 40 #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ 41 + #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ 42 + #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ 41 43 42 44 /* Legacy feature bits */ 43 45 #ifndef VIRTIO_BLK_NO_LEGACY ··· 84 86 85 87 /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ 86 88 uint16_t num_queues; 89 + 90 + /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ 91 + /* 92 + * The maximum discard sectors (in 512-byte sectors) for 93 + * one segment. 94 + */ 95 + uint32_t max_discard_sectors; 96 + /* 97 + * The maximum number of discard segments in a 98 + * discard command. 99 + */ 100 + uint32_t max_discard_seg; 101 + /* Discard commands must be aligned to this number of sectors. */ 102 + uint32_t discard_sector_alignment; 103 + 104 + /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ 105 + /* 106 + * The maximum number of write zeroes sectors (in 512-byte sectors) in 107 + * one segment. 108 + */ 109 + uint32_t max_write_zeroes_sectors; 110 + /* 111 + * The maximum number of segments in a write zeroes 112 + * command. 113 + */ 114 + uint32_t max_write_zeroes_seg; 115 + /* 116 + * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the 117 + * deallocation of one or more of the sectors. 118 + */ 119 + uint8_t write_zeroes_may_unmap; 120 + 121 + uint8_t unused1[3]; 87 122 } QEMU_PACKED; 88 123 89 124 /* ··· 135 170 __virtio32 ioprio; 136 171 /* Sector (ie. 512 byte offset) */ 137 172 __virtio64 sector; 173 + }; 174 + 175 + /* Unmap this range (only valid for write zeroes command) */ 176 + #define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 177 + 178 + /* Discard/write zeroes range for each request. */ 179 + struct virtio_blk_discard_write_zeroes { 180 + /* discard/write zeroes start sector */ 181 + uint64_t sector; 182 + /* number of discard/write zeroes sectors */ 183 + uint32_t num_sectors; 184 + /* flags for this range */ 185 + uint32_t flags; 138 186 }; 139 187 140 188 #ifndef VIRTIO_BLK_NO_LEGACY