qemu with hax to log dma reads & writes
jcs.org/2018/11/12/vfio
1/*
2 * QEMU Block driver for Veritas HyperScale (VxHS)
3 *
4 * Copyright (c) 2017 Veritas Technologies LLC.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "qemu/osdep.h"
12#include <qnio/qnio_api.h>
13#include <sys/param.h>
14#include "block/block_int.h"
15#include "block/qdict.h"
16#include "qapi/qmp/qerror.h"
17#include "qapi/qmp/qdict.h"
18#include "qapi/qmp/qstring.h"
19#include "trace.h"
20#include "qemu/module.h"
21#include "qemu/uri.h"
22#include "qapi/error.h"
23#include "qemu/uuid.h"
24#include "crypto/tlscredsx509.h"
25#include "sysemu/replay.h"
26
27#define VXHS_OPT_FILENAME "filename"
28#define VXHS_OPT_VDISK_ID "vdisk-id"
29#define VXHS_OPT_SERVER "server"
30#define VXHS_OPT_HOST "host"
31#define VXHS_OPT_PORT "port"
32
33/* Only accessed under QEMU global mutex */
34static uint32_t vxhs_ref;
35
36typedef enum {
37 VDISK_AIO_READ,
38 VDISK_AIO_WRITE,
39} VDISKAIOCmd;
40
41/*
42 * HyperScale AIO callbacks structure
43 */
44typedef struct VXHSAIOCB {
45 BlockAIOCB common;
46 int err;
47} VXHSAIOCB;
48
49typedef struct VXHSvDiskHostsInfo {
50 void *dev_handle; /* Device handle */
51 char *host; /* Host name or IP */
52 int port; /* Host's port number */
53} VXHSvDiskHostsInfo;
54
55/*
56 * Structure per vDisk maintained for state
57 */
58typedef struct BDRVVXHSState {
59 VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
60 char *vdisk_guid;
61 char *tlscredsid; /* tlscredsid */
62} BDRVVXHSState;
63
64static void vxhs_complete_aio_bh(void *opaque)
65{
66 VXHSAIOCB *acb = opaque;
67 BlockCompletionFunc *cb = acb->common.cb;
68 void *cb_opaque = acb->common.opaque;
69 int ret = 0;
70
71 if (acb->err != 0) {
72 trace_vxhs_complete_aio(acb, acb->err);
73 ret = (-EIO);
74 }
75
76 qemu_aio_unref(acb);
77 cb(cb_opaque, ret);
78}
79
80/*
81 * Called from a libqnio thread
82 */
83static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
84{
85 VXHSAIOCB *acb = NULL;
86
87 switch (opcode) {
88 case IRP_READ_REQUEST:
89 case IRP_WRITE_REQUEST:
90
91 /*
92 * ctx is VXHSAIOCB*
93 * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
94 */
95 if (ctx) {
96 acb = ctx;
97 } else {
98 trace_vxhs_iio_callback(error);
99 goto out;
100 }
101
102 if (error) {
103 if (!acb->err) {
104 acb->err = error;
105 }
106 trace_vxhs_iio_callback(error);
107 }
108
109 replay_bh_schedule_oneshot_event(bdrv_get_aio_context(acb->common.bs),
110 vxhs_complete_aio_bh, acb);
111 break;
112
113 default:
114 if (error == QNIOERROR_HUP) {
115 /*
116 * Channel failed, spontaneous notification,
117 * not in response to I/O
118 */
119 trace_vxhs_iio_callback_chnfail(error, errno);
120 } else {
121 trace_vxhs_iio_callback_unknwn(opcode, error);
122 }
123 break;
124 }
125out:
126 return;
127}
128
129static QemuOptsList runtime_opts = {
130 .name = "vxhs",
131 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
132 .desc = {
133 {
134 .name = VXHS_OPT_FILENAME,
135 .type = QEMU_OPT_STRING,
136 .help = "URI to the Veritas HyperScale image",
137 },
138 {
139 .name = VXHS_OPT_VDISK_ID,
140 .type = QEMU_OPT_STRING,
141 .help = "UUID of the VxHS vdisk",
142 },
143 {
144 .name = "tls-creds",
145 .type = QEMU_OPT_STRING,
146 .help = "ID of the TLS/SSL credentials to use",
147 },
148 { /* end of list */ }
149 },
150};
151
152static QemuOptsList runtime_tcp_opts = {
153 .name = "vxhs_tcp",
154 .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
155 .desc = {
156 {
157 .name = VXHS_OPT_HOST,
158 .type = QEMU_OPT_STRING,
159 .help = "host address (ipv4 addresses)",
160 },
161 {
162 .name = VXHS_OPT_PORT,
163 .type = QEMU_OPT_NUMBER,
164 .help = "port number on which VxHSD is listening (default 9999)",
165 .def_value_str = "9999"
166 },
167 { /* end of list */ }
168 },
169};
170
171/*
172 * Parse incoming URI and populate *options with the host
173 * and device information
174 */
175static int vxhs_parse_uri(const char *filename, QDict *options)
176{
177 URI *uri = NULL;
178 char *port;
179 int ret = 0;
180
181 trace_vxhs_parse_uri_filename(filename);
182 uri = uri_parse(filename);
183 if (!uri || !uri->server || !uri->path) {
184 uri_free(uri);
185 return -EINVAL;
186 }
187
188 qdict_put_str(options, VXHS_OPT_SERVER ".host", uri->server);
189
190 if (uri->port) {
191 port = g_strdup_printf("%d", uri->port);
192 qdict_put_str(options, VXHS_OPT_SERVER ".port", port);
193 g_free(port);
194 }
195
196 qdict_put_str(options, "vdisk-id", uri->path);
197
198 trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
199 uri_free(uri);
200
201 return ret;
202}
203
204static void vxhs_parse_filename(const char *filename, QDict *options,
205 Error **errp)
206{
207 if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
208 error_setg(errp, "vdisk-id/server and a file name may not be specified "
209 "at the same time");
210 return;
211 }
212
213 if (strstr(filename, "://")) {
214 int ret = vxhs_parse_uri(filename, options);
215 if (ret < 0) {
216 error_setg(errp, "Invalid URI. URI should be of the form "
217 " vxhs://<host_ip>:<port>/<vdisk-id>");
218 }
219 }
220}
221
222static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp)
223{
224 /* XXX Does VXHS support AIO on less than 512-byte alignment? */
225 bs->bl.request_alignment = 512;
226}
227
228static int vxhs_init_and_ref(void)
229{
230 if (vxhs_ref++ == 0) {
231 if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
232 return -ENODEV;
233 }
234 }
235 return 0;
236}
237
238static void vxhs_unref(void)
239{
240 if (--vxhs_ref == 0) {
241 iio_fini();
242 }
243}
244
245static void vxhs_get_tls_creds(const char *id, char **cacert,
246 char **key, char **cert, Error **errp)
247{
248 Object *obj;
249 QCryptoTLSCreds *creds;
250 QCryptoTLSCredsX509 *creds_x509;
251
252 obj = object_resolve_path_component(
253 object_get_objects_root(), id);
254
255 if (!obj) {
256 error_setg(errp, "No TLS credentials with id '%s'",
257 id);
258 return;
259 }
260
261 creds_x509 = (QCryptoTLSCredsX509 *)
262 object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
263
264 if (!creds_x509) {
265 error_setg(errp, "Object with id '%s' is not TLS credentials",
266 id);
267 return;
268 }
269
270 creds = &creds_x509->parent_obj;
271
272 if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
273 error_setg(errp,
274 "Expecting TLS credentials with a client endpoint");
275 return;
276 }
277
278 /*
279 * Get the cacert, client_cert and client_key file names.
280 */
281 if (!creds->dir) {
282 error_setg(errp, "TLS object missing 'dir' property value");
283 return;
284 }
285
286 *cacert = g_strdup_printf("%s/%s", creds->dir,
287 QCRYPTO_TLS_CREDS_X509_CA_CERT);
288 *cert = g_strdup_printf("%s/%s", creds->dir,
289 QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
290 *key = g_strdup_printf("%s/%s", creds->dir,
291 QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
292}
293
294static int vxhs_open(BlockDriverState *bs, QDict *options,
295 int bdrv_flags, Error **errp)
296{
297 BDRVVXHSState *s = bs->opaque;
298 void *dev_handlep;
299 QDict *backing_options = NULL;
300 QemuOpts *opts = NULL;
301 QemuOpts *tcp_opts = NULL;
302 char *of_vsa_addr = NULL;
303 Error *local_err = NULL;
304 const char *vdisk_id_opt;
305 const char *server_host_opt;
306 int ret = 0;
307 char *cacert = NULL;
308 char *client_key = NULL;
309 char *client_cert = NULL;
310
311 ret = vxhs_init_and_ref();
312 if (ret < 0) {
313 ret = -EINVAL;
314 goto out;
315 }
316
317 /* Create opts info from runtime_opts and runtime_tcp_opts list */
318 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
319 tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
320
321 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
322 ret = -EINVAL;
323 goto out;
324 }
325
326 /* vdisk-id is the disk UUID */
327 vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
328 if (!vdisk_id_opt) {
329 error_setg(errp, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
330 ret = -EINVAL;
331 goto out;
332 }
333
334 /* vdisk-id may contain a leading '/' */
335 if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
336 error_setg(errp, "vdisk-id cannot be more than %d characters",
337 UUID_FMT_LEN);
338 ret = -EINVAL;
339 goto out;
340 }
341
342 s->vdisk_guid = g_strdup(vdisk_id_opt);
343 trace_vxhs_open_vdiskid(vdisk_id_opt);
344
345 /* get the 'server.' arguments */
346 qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
347
348 if (!qemu_opts_absorb_qdict(tcp_opts, backing_options, errp)) {
349 ret = -EINVAL;
350 goto out;
351 }
352
353 server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
354 if (!server_host_opt) {
355 error_setg(errp, QERR_MISSING_PARAMETER,
356 VXHS_OPT_SERVER"."VXHS_OPT_HOST);
357 ret = -EINVAL;
358 goto out;
359 }
360
361 if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
362 error_setg(errp, "server.host cannot be more than %d characters",
363 MAXHOSTNAMELEN);
364 ret = -EINVAL;
365 goto out;
366 }
367
368 /* check if we got tls-creds via the --object argument */
369 s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
370 if (s->tlscredsid) {
371 vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
372 &client_cert, &local_err);
373 if (local_err != NULL) {
374 ret = -EINVAL;
375 goto out;
376 }
377 trace_vxhs_get_creds(cacert, client_key, client_cert);
378 }
379
380 s->vdisk_hostinfo.host = g_strdup(server_host_opt);
381 s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
382 VXHS_OPT_PORT),
383 NULL, 0);
384
385 trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
386 s->vdisk_hostinfo.port);
387
388 of_vsa_addr = g_strdup_printf("of://%s:%d",
389 s->vdisk_hostinfo.host,
390 s->vdisk_hostinfo.port);
391
392 /*
393 * Open qnio channel to storage agent if not opened before
394 */
395 dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
396 cacert, client_key, client_cert);
397 if (dev_handlep == NULL) {
398 trace_vxhs_open_iio_open(of_vsa_addr);
399 ret = -ENODEV;
400 goto out;
401 }
402 s->vdisk_hostinfo.dev_handle = dev_handlep;
403
404out:
405 g_free(of_vsa_addr);
406 qobject_unref(backing_options);
407 qemu_opts_del(tcp_opts);
408 qemu_opts_del(opts);
409 g_free(cacert);
410 g_free(client_key);
411 g_free(client_cert);
412
413 if (ret < 0) {
414 vxhs_unref();
415 g_free(s->vdisk_hostinfo.host);
416 g_free(s->vdisk_guid);
417 g_free(s->tlscredsid);
418 s->vdisk_guid = NULL;
419 }
420
421 return ret;
422}
423
424static const AIOCBInfo vxhs_aiocb_info = {
425 .aiocb_size = sizeof(VXHSAIOCB)
426};
427
428/*
429 * This allocates QEMU-VXHS callback for each IO
430 * and is passed to QNIO. When QNIO completes the work,
431 * it will be passed back through the callback.
432 */
433static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset,
434 QEMUIOVector *qiov, uint64_t size,
435 BlockCompletionFunc *cb, void *opaque,
436 VDISKAIOCmd iodir)
437{
438 VXHSAIOCB *acb = NULL;
439 BDRVVXHSState *s = bs->opaque;
440 int iio_flags = 0;
441 int ret = 0;
442 void *dev_handle = s->vdisk_hostinfo.dev_handle;
443
444 acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
445
446 /*
447 * Initialize VXHSAIOCB.
448 */
449 acb->err = 0;
450
451 iio_flags = IIO_FLAG_ASYNC;
452
453 switch (iodir) {
454 case VDISK_AIO_WRITE:
455 ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
456 offset, size, iio_flags);
457 break;
458 case VDISK_AIO_READ:
459 ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
460 offset, size, iio_flags);
461 break;
462 default:
463 trace_vxhs_aio_rw_invalid(iodir);
464 goto errout;
465 }
466
467 if (ret != 0) {
468 trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
469 acb, ret, errno);
470 goto errout;
471 }
472 return &acb->common;
473
474errout:
475 qemu_aio_unref(acb);
476 return NULL;
477}
478
479static BlockAIOCB *vxhs_aio_preadv(BlockDriverState *bs,
480 uint64_t offset, uint64_t bytes,
481 QEMUIOVector *qiov, int flags,
482 BlockCompletionFunc *cb, void *opaque)
483{
484 return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_READ);
485}
486
487static BlockAIOCB *vxhs_aio_pwritev(BlockDriverState *bs,
488 uint64_t offset, uint64_t bytes,
489 QEMUIOVector *qiov, int flags,
490 BlockCompletionFunc *cb, void *opaque)
491{
492 return vxhs_aio_rw(bs, offset, qiov, bytes, cb, opaque, VDISK_AIO_WRITE);
493}
494
495static void vxhs_close(BlockDriverState *bs)
496{
497 BDRVVXHSState *s = bs->opaque;
498
499 trace_vxhs_close(s->vdisk_guid);
500
501 g_free(s->vdisk_guid);
502 s->vdisk_guid = NULL;
503
504 /*
505 * Close vDisk device
506 */
507 if (s->vdisk_hostinfo.dev_handle) {
508 iio_close(s->vdisk_hostinfo.dev_handle);
509 s->vdisk_hostinfo.dev_handle = NULL;
510 }
511
512 vxhs_unref();
513
514 /*
515 * Free the dynamically allocated host string etc
516 */
517 g_free(s->vdisk_hostinfo.host);
518 g_free(s->tlscredsid);
519 s->tlscredsid = NULL;
520 s->vdisk_hostinfo.host = NULL;
521 s->vdisk_hostinfo.port = 0;
522}
523
524static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
525{
526 int64_t vdisk_size = -1;
527 int ret = 0;
528 void *dev_handle = s->vdisk_hostinfo.dev_handle;
529
530 ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
531 if (ret < 0) {
532 trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
533 return -EIO;
534 }
535
536 trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
537 return vdisk_size;
538}
539
540/*
541 * Returns the size of vDisk in bytes. This is required
542 * by QEMU block upper block layer so that it is visible
543 * to guest.
544 */
545static int64_t vxhs_getlength(BlockDriverState *bs)
546{
547 BDRVVXHSState *s = bs->opaque;
548 int64_t vdisk_size;
549
550 vdisk_size = vxhs_get_vdisk_stat(s);
551 if (vdisk_size < 0) {
552 return -EIO;
553 }
554
555 return vdisk_size;
556}
557
558static const char *const vxhs_strong_runtime_opts[] = {
559 VXHS_OPT_VDISK_ID,
560 "tls-creds",
561 VXHS_OPT_HOST,
562 VXHS_OPT_PORT,
563 VXHS_OPT_SERVER".",
564
565 NULL
566};
567
568static BlockDriver bdrv_vxhs = {
569 .format_name = "vxhs",
570 .protocol_name = "vxhs",
571 .instance_size = sizeof(BDRVVXHSState),
572 .bdrv_file_open = vxhs_open,
573 .bdrv_parse_filename = vxhs_parse_filename,
574 .bdrv_refresh_limits = vxhs_refresh_limits,
575 .bdrv_close = vxhs_close,
576 .bdrv_getlength = vxhs_getlength,
577 .bdrv_aio_preadv = vxhs_aio_preadv,
578 .bdrv_aio_pwritev = vxhs_aio_pwritev,
579 .strong_runtime_opts = vxhs_strong_runtime_opts,
580};
581
582static void bdrv_vxhs_init(void)
583{
584 bdrv_register(&bdrv_vxhs);
585}
586
587block_init(bdrv_vxhs_init);