qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio
at master 587 lines 16 kB view raw
1/* 2 * QEMU Block driver for Veritas HyperScale (VxHS) 3 * 4 * Copyright (c) 2017 Veritas Technologies LLC. 5 * 6 * This work is licensed under the terms of the GNU GPL, version 2 or later. 7 * See the COPYING file in the top-level directory. 8 * 9 */ 10 11#include "qemu/osdep.h" 12#include <qnio/qnio_api.h> 13#include <sys/param.h> 14#include "block/block_int.h" 15#include "block/qdict.h" 16#include "qapi/qmp/qerror.h" 17#include "qapi/qmp/qdict.h" 18#include "qapi/qmp/qstring.h" 19#include "trace.h" 20#include "qemu/module.h" 21#include "qemu/uri.h" 22#include "qapi/error.h" 23#include "qemu/uuid.h" 24#include "crypto/tlscredsx509.h" 25#include "sysemu/replay.h" 26 27#define VXHS_OPT_FILENAME "filename" 28#define VXHS_OPT_VDISK_ID "vdisk-id" 29#define VXHS_OPT_SERVER "server" 30#define VXHS_OPT_HOST "host" 31#define VXHS_OPT_PORT "port" 32 33/* Only accessed under QEMU global mutex */ 34static uint32_t vxhs_ref; 35 36typedef enum { 37 VDISK_AIO_READ, 38 VDISK_AIO_WRITE, 39} VDISKAIOCmd; 40 41/* 42 * HyperScale AIO callbacks structure 43 */ 44typedef struct VXHSAIOCB { 45 BlockAIOCB common; 46 int err; 47} VXHSAIOCB; 48 49typedef struct VXHSvDiskHostsInfo { 50 void *dev_handle; /* Device handle */ 51 char *host; /* Host name or IP */ 52 int port; /* Host's port number */ 53} VXHSvDiskHostsInfo; 54 55/* 56 * Structure per vDisk maintained for state 57 */ 58typedef struct BDRVVXHSState { 59 VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */ 60 char *vdisk_guid; 61 char *tlscredsid; /* tlscredsid */ 62} BDRVVXHSState; 63 64static void vxhs_complete_aio_bh(void *opaque) 65{ 66 VXHSAIOCB *acb = opaque; 67 BlockCompletionFunc *cb = acb->common.cb; 68 void *cb_opaque = acb->common.opaque; 69 int ret = 0; 70 71 if (acb->err != 0) { 72 trace_vxhs_complete_aio(acb, acb->err); 73 ret = (-EIO); 74 } 75 76 qemu_aio_unref(acb); 77 cb(cb_opaque, ret); 78} 79 80/* 81 * Called from a libqnio thread 82 */ 83static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error) 84{ 85 VXHSAIOCB *acb = NULL; 86 87 switch (opcode) { 88 case IRP_READ_REQUEST: 89 case IRP_WRITE_REQUEST: 90 91 /* 92 * ctx is VXHSAIOCB* 93 * ctx is NULL if error is QNIOERROR_CHANNEL_HUP 94 */ 95 if (ctx) { 96 acb = ctx; 97 } else { 98 trace_vxhs_iio_callback(error); 99 goto out; 100 } 101 102 if (error) { 103 if (!acb->err) { 104 acb->err = error; 105 } 106 trace_vxhs_iio_callback(error); 107 } 108 109 replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs), 110 vxhs_complete_aio_bh, acb); 111 break; 112 113 default: 114 if (error == QNIOERROR_HUP) { 115 /* 116 * Channel failed, spontaneous notification, 117 * not in response to I/O 118 */ 119 trace_vxhs_iio_callback_chnfail(error, errno); 120 } else { 121 trace_vxhs_iio_callback_unknwn(opcode, error); 122 } 123 break; 124 } 125out: 126 return; 127} 128 129static QemuOptsList runtime_opts = { 130 .name = "vxhs", 131 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), 132 .desc = { 133 { 134 .name = VXHS_OPT_FILENAME, 135 .type = QEMU_OPT_STRING, 136 .help = "URI to the Veritas HyperScale image", 137 }, 138 { 139 .name = VXHS_OPT_VDISK_ID, 140 .type = QEMU_OPT_STRING, 141 .help = "UUID of the VxHS vdisk", 142 }, 143 { 144 .name = "tls-creds", 145 .type = QEMU_OPT_STRING, 146 .help = "ID of the TLS/SSL credentials to use", 147 }, 148 { /* end of list */ } 149 }, 150}; 151 152static QemuOptsList runtime_tcp_opts = { 153 .name = "vxhs_tcp", 154 .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), 155 .desc = { 156 { 157 .name = VXHS_OPT_HOST, 158 .type = QEMU_OPT_STRING, 159 .help = "host address (ipv4 addresses)", 160 }, 161 { 162 .name = VXHS_OPT_PORT, 163 .type = QEMU_OPT_NUMBER, 164 .help = "port number on which VxHSD is listening (default 9999)", 165 .def_value_str = "9999" 166 }, 167 { /* end of list */ } 168 }, 169}; 170 171/* 172 * Parse incoming URI and populate *options with the host 173 * and device information 174 */ 175static int vxhs_parse_uri(const char *filename, QDict *options) 176{ 177 URI *uri = NULL; 178 char *port; 179 int ret = 0; 180 181 trace_vxhs_parse_uri_filename(filename); 182 uri = uri_parse(filename); 183 if (!uri || !uri->server || !uri->path) { 184 uri_free(uri); 185 return -EINVAL; 186 } 187 188 qdict_put_str(options, VXHS_OPT_SERVER ".host", uri->server); 189 190 if (uri->port) { 191 port = g_strdup_printf("%d", uri->port); 192 qdict_put_str(options, VXHS_OPT_SERVER ".port", port); 193 g_free(port); 194 } 195 196 qdict_put_str(options, "vdisk-id", uri->path); 197 198 trace_vxhs_parse_uri_hostinfo(uri->server, uri->port); 199 uri_free(uri); 200 201 return ret; 202} 203 204static void vxhs_parse_filename(const char *filename, QDict *options, 205 Error **errp) 206{ 207 if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) { 208 error_setg(errp, "vdisk-id/server and a file name may not be specified " 209 "at the same time"); 210 return; 211 } 212 213 if (strstr(filename, "://")) { 214 int ret = vxhs_parse_uri(filename, options); 215 if (ret < 0) { 216 error_setg(errp, "Invalid URI. URI should be of the form " 217 " vxhs://<host_ip>:<port>/<vdisk-id>"); 218 } 219 } 220} 221 222static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp) 223{ 224 /* XXX Does VXHS support AIO on less than 512-byte alignment? */ 225 bs->bl.request_alignment = 512; 226} 227 228static int vxhs_init_and_ref(void) 229{ 230 if (vxhs_ref++ == 0) { 231 if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { 232 return -ENODEV; 233 } 234 } 235 return 0; 236} 237 238static void vxhs_unref(void) 239{ 240 if (--vxhs_ref == 0) { 241 iio_fini(); 242 } 243} 244 245static void vxhs_get_tls_creds(const char *id, char **cacert, 246 char **key, char **cert, Error **errp) 247{ 248 Object *obj; 249 QCryptoTLSCreds *creds; 250 QCryptoTLSCredsX509 *creds_x509; 251 252 obj = object_resolve_path_component( 253 object_get_objects_root(), id); 254 255 if (!obj) { 256 error_setg(errp, "No TLS credentials with id '%s'", 257 id); 258 return; 259 } 260 261 creds_x509 = (QCryptoTLSCredsX509 *) 262 object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509); 263 264 if (!creds_x509) { 265 error_setg(errp, "Object with id '%s' is not TLS credentials", 266 id); 267 return; 268 } 269 270 creds = &creds_x509->parent_obj; 271 272 if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { 273 error_setg(errp, 274 "Expecting TLS credentials with a client endpoint"); 275 return; 276 } 277 278 /* 279 * Get the cacert, client_cert and client_key file names. 280 */ 281 if (!creds->dir) { 282 error_setg(errp, "TLS object missing 'dir' property value"); 283 return; 284 } 285 286 *cacert = g_strdup_printf("%s/%s", creds->dir, 287 QCRYPTO_TLS_CREDS_X509_CA_CERT); 288 *cert = g_strdup_printf("%s/%s", creds->dir, 289 QCRYPTO_TLS_CREDS_X509_CLIENT_CERT); 290 *key = g_strdup_printf("%s/%s", creds->dir, 291 QCRYPTO_TLS_CREDS_X509_CLIENT_KEY); 292} 293 294static int vxhs_open(BlockDriverState *bs, QDict *options, 295 int bdrv_flags, Error **errp) 296{ 297 BDRVVXHSState *s = bs->opaque; 298 void *dev_handlep; 299 QDict *backing_options = NULL; 300 QemuOpts *opts = NULL; 301 QemuOpts *tcp_opts = NULL; 302 char *of_vsa_addr = NULL; 303 Error *local_err = NULL; 304 const char *vdisk_id_opt; 305 const char *server_host_opt; 306 int ret = 0; 307 char *cacert = NULL; 308 char *client_key = NULL; 309 char *client_cert = NULL; 310 311 ret = vxhs_init_and_ref(); 312 if (ret < 0) { 313 ret = -EINVAL; 314 goto out; 315 } 316 317 /* Create opts info from runtime_opts and runtime_tcp_opts list */ 318 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); 319 tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); 320 321 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 322 ret = -EINVAL; 323 goto out; 324 } 325 326 /* vdisk-id is the disk UUID */ 327 vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID); 328 if (!vdisk_id_opt) { 329 error_setg(errp, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID); 330 ret = -EINVAL; 331 goto out; 332 } 333 334 /* vdisk-id may contain a leading '/' */ 335 if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) { 336 error_setg(errp, "vdisk-id cannot be more than %d characters", 337 UUID_FMT_LEN); 338 ret = -EINVAL; 339 goto out; 340 } 341 342 s->vdisk_guid = g_strdup(vdisk_id_opt); 343 trace_vxhs_open_vdiskid(vdisk_id_opt); 344 345 /* get the 'server.' arguments */ 346 qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER"."); 347 348 if (!qemu_opts_absorb_qdict(tcp_opts, backing_options, errp)) { 349 ret = -EINVAL; 350 goto out; 351 } 352 353 server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST); 354 if (!server_host_opt) { 355 error_setg(errp, QERR_MISSING_PARAMETER, 356 VXHS_OPT_SERVER"."VXHS_OPT_HOST); 357 ret = -EINVAL; 358 goto out; 359 } 360 361 if (strlen(server_host_opt) > MAXHOSTNAMELEN) { 362 error_setg(errp, "server.host cannot be more than %d characters", 363 MAXHOSTNAMELEN); 364 ret = -EINVAL; 365 goto out; 366 } 367 368 /* check if we got tls-creds via the --object argument */ 369 s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds")); 370 if (s->tlscredsid) { 371 vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key, 372 &client_cert, &local_err); 373 if (local_err != NULL) { 374 ret = -EINVAL; 375 goto out; 376 } 377 trace_vxhs_get_creds(cacert, client_key, client_cert); 378 } 379 380 s->vdisk_hostinfo.host = g_strdup(server_host_opt); 381 s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts, 382 VXHS_OPT_PORT), 383 NULL, 0); 384 385 trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host, 386 s->vdisk_hostinfo.port); 387 388 of_vsa_addr = g_strdup_printf("of://%s:%d", 389 s->vdisk_hostinfo.host, 390 s->vdisk_hostinfo.port); 391 392 /* 393 * Open qnio channel to storage agent if not opened before 394 */ 395 dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, 396 cacert, client_key, client_cert); 397 if (dev_handlep == NULL) { 398 trace_vxhs_open_iio_open(of_vsa_addr); 399 ret = -ENODEV; 400 goto out; 401 } 402 s->vdisk_hostinfo.dev_handle = dev_handlep; 403 404out: 405 g_free(of_vsa_addr); 406 qobject_unref(backing_options); 407 qemu_opts_del(tcp_opts); 408 qemu_opts_del(opts); 409 g_free(cacert); 410 g_free(client_key); 411 g_free(client_cert); 412 413 if (ret < 0) { 414 vxhs_unref(); 415 g_free(s->vdisk_hostinfo.host); 416 g_free(s->vdisk_guid); 417 g_free(s->tlscredsid); 418 s->vdisk_guid = NULL; 419 } 420 421 return ret; 422} 423 424static const AIOCBInfo vxhs_aiocb_info = { 425 .aiocb_size = sizeof(VXHSAIOCB) 426}; 427 428/* 429 * This allocates QEMU-VXHS callback for each IO 430 * and is passed to QNIO. When QNIO completes the work, 431 * it will be passed back through the callback. 432 */ 433static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset, 434 QEMUIOVector *qiov, uint64_t size, 435 BlockCompletionFunc *cb, void *opaque, 436 VDISKAIOCmd iodir) 437{ 438 VXHSAIOCB *acb = NULL; 439 BDRVVXHSState *s = bs->opaque; 440 int iio_flags = 0; 441 int ret = 0; 442 void *dev_handle = s->vdisk_hostinfo.dev_handle; 443 444 acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque); 445 446 /* 447 * Initialize VXHSAIOCB. 448 */ 449 acb->err = 0; 450 451 iio_flags = IIO_FLAG_ASYNC; 452 453 switch (iodir) { 454 case VDISK_AIO_WRITE: 455 ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, 456 offset, size, iio_flags); 457 break; 458 case VDISK_AIO_READ: 459 ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, 460 offset, size, iio_flags); 461 break; 462 default: 463 trace_vxhs_aio_rw_invalid(iodir); 464 goto errout; 465 } 466 467 if (ret != 0) { 468 trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset, 469 acb, ret, errno); 470 goto errout; 471 } 472 return &acb->common; 473 474errout: 475 qemu_aio_unref(acb); 476 return NULL; 477} 478 479static BlockAIOCB *vxhs_aio_preadv(BlockDriverState *bs, 480 uint64_t offset, uint64_t bytes, 481 QEMUIOVector *qiov, int flags, 482 BlockCompletionFunc *cb, void *opaque) 483{ 484 return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_READ); 485} 486 487static BlockAIOCB *vxhs_aio_pwritev(BlockDriverState *bs, 488 uint64_t offset, uint64_t bytes, 489 QEMUIOVector *qiov, int flags, 490 BlockCompletionFunc *cb, void *opaque) 491{ 492 return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_WRITE); 493} 494 495static void vxhs_close(BlockDriverState *bs) 496{ 497 BDRVVXHSState *s = bs->opaque; 498 499 trace_vxhs_close(s->vdisk_guid); 500 501 g_free(s->vdisk_guid); 502 s->vdisk_guid = NULL; 503 504 /* 505 * Close vDisk device 506 */ 507 if (s->vdisk_hostinfo.dev_handle) { 508 iio_close(s->vdisk_hostinfo.dev_handle); 509 s->vdisk_hostinfo.dev_handle = NULL; 510 } 511 512 vxhs_unref(); 513 514 /* 515 * Free the dynamically allocated host string etc 516 */ 517 g_free(s->vdisk_hostinfo.host); 518 g_free(s->tlscredsid); 519 s->tlscredsid = NULL; 520 s->vdisk_hostinfo.host = NULL; 521 s->vdisk_hostinfo.port = 0; 522} 523 524static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) 525{ 526 int64_t vdisk_size = -1; 527 int ret = 0; 528 void *dev_handle = s->vdisk_hostinfo.dev_handle; 529 530 ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); 531 if (ret < 0) { 532 trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); 533 return -EIO; 534 } 535 536 trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size); 537 return vdisk_size; 538} 539 540/* 541 * Returns the size of vDisk in bytes. This is required 542 * by QEMU block upper block layer so that it is visible 543 * to guest. 544 */ 545static int64_t vxhs_getlength(BlockDriverState *bs) 546{ 547 BDRVVXHSState *s = bs->opaque; 548 int64_t vdisk_size; 549 550 vdisk_size = vxhs_get_vdisk_stat(s); 551 if (vdisk_size < 0) { 552 return -EIO; 553 } 554 555 return vdisk_size; 556} 557 558static const char *const vxhs_strong_runtime_opts[] = { 559 VXHS_OPT_VDISK_ID, 560 "tls-creds", 561 VXHS_OPT_HOST, 562 VXHS_OPT_PORT, 563 VXHS_OPT_SERVER".", 564 565 NULL 566}; 567 568static BlockDriver bdrv_vxhs = { 569 .format_name = "vxhs", 570 .protocol_name = "vxhs", 571 .instance_size = sizeof(BDRVVXHSState), 572 .bdrv_file_open = vxhs_open, 573 .bdrv_parse_filename = vxhs_parse_filename, 574 .bdrv_refresh_limits = vxhs_refresh_limits, 575 .bdrv_close = vxhs_close, 576 .bdrv_getlength = vxhs_getlength, 577 .bdrv_aio_preadv = vxhs_aio_preadv, 578 .bdrv_aio_pwritev = vxhs_aio_pwritev, 579 .strong_runtime_opts = vxhs_strong_runtime_opts, 580}; 581 582static void bdrv_vxhs_init(void) 583{ 584 bdrv_register(&bdrv_vxhs); 585} 586 587block_init(bdrv_vxhs_init);