qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio
at master 432 lines 13 kB view raw
1/* 2 * netmap access for qemu 3 * 4 * Copyright (c) 2012-2013 Luigi Rizzo 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 26#include "qemu/osdep.h" 27#include <sys/ioctl.h> 28#include <net/if.h> 29#define NETMAP_WITH_LIBS 30#include <net/netmap.h> 31#include <net/netmap_user.h> 32 33#include "net/net.h" 34#include "net/tap.h" 35#include "clients.h" 36#include "sysemu/sysemu.h" 37#include "qemu/error-report.h" 38#include "qapi/error.h" 39#include "qemu/iov.h" 40#include "qemu/cutils.h" 41#include "qemu/main-loop.h" 42 43typedef struct NetmapState { 44 NetClientState nc; 45 struct nm_desc *nmd; 46 char ifname[IFNAMSIZ]; 47 struct netmap_ring *tx; 48 struct netmap_ring *rx; 49 bool read_poll; 50 bool write_poll; 51 struct iovec iov[IOV_MAX]; 52 int vnet_hdr_len; /* Current virtio-net header length. */ 53} NetmapState; 54 55#ifndef __FreeBSD__ 56#define pkt_copy bcopy 57#else 58/* A fast copy routine only for multiples of 64 bytes, non overlapped. */ 59static inline void 60pkt_copy(const void *_src, void *_dst, int l) 61{ 62 const uint64_t *src = _src; 63 uint64_t *dst = _dst; 64 if (unlikely(l >= 1024)) { 65 bcopy(src, dst, l); 66 return; 67 } 68 for (; l > 0; l -= 64) { 69 *dst++ = *src++; 70 *dst++ = *src++; 71 *dst++ = *src++; 72 *dst++ = *src++; 73 *dst++ = *src++; 74 *dst++ = *src++; 75 *dst++ = *src++; 76 *dst++ = *src++; 77 } 78} 79#endif /* __FreeBSD__ */ 80 81/* 82 * Open a netmap device. We assume there is only one queue 83 * (which is the case for the VALE bridge). 84 */ 85static struct nm_desc *netmap_open(const NetdevNetmapOptions *nm_opts, 86 Error **errp) 87{ 88 struct nm_desc *nmd; 89 struct nmreq req; 90 91 memset(&req, 0, sizeof(req)); 92 93 nmd = nm_open(nm_opts->ifname, &req, NETMAP_NO_TX_POLL, 94 NULL); 95 if (nmd == NULL) { 96 error_setg_errno(errp, errno, "Failed to nm_open() %s", 97 nm_opts->ifname); 98 return NULL; 99 } 100 101 return nmd; 102} 103 104static void netmap_send(void *opaque); 105static void netmap_writable(void *opaque); 106 107/* Set the event-loop handlers for the netmap backend. */ 108static void netmap_update_fd_handler(NetmapState *s) 109{ 110 qemu_set_fd_handler(s->nmd->fd, 111 s->read_poll ? netmap_send : NULL, 112 s->write_poll ? netmap_writable : NULL, 113 s); 114} 115 116/* Update the read handler. */ 117static void netmap_read_poll(NetmapState *s, bool enable) 118{ 119 if (s->read_poll != enable) { /* Do nothing if not changed. */ 120 s->read_poll = enable; 121 netmap_update_fd_handler(s); 122 } 123} 124 125/* Update the write handler. */ 126static void netmap_write_poll(NetmapState *s, bool enable) 127{ 128 if (s->write_poll != enable) { 129 s->write_poll = enable; 130 netmap_update_fd_handler(s); 131 } 132} 133 134static void netmap_poll(NetClientState *nc, bool enable) 135{ 136 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 137 138 if (s->read_poll != enable || s->write_poll != enable) { 139 s->write_poll = enable; 140 s->read_poll = enable; 141 netmap_update_fd_handler(s); 142 } 143} 144 145/* 146 * The fd_write() callback, invoked if the fd is marked as 147 * writable after a poll. Unregister the handler and flush any 148 * buffered packets. 149 */ 150static void netmap_writable(void *opaque) 151{ 152 NetmapState *s = opaque; 153 154 netmap_write_poll(s, false); 155 qemu_flush_queued_packets(&s->nc); 156} 157 158static ssize_t netmap_receive_iov(NetClientState *nc, 159 const struct iovec *iov, int iovcnt) 160{ 161 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 162 struct netmap_ring *ring = s->tx; 163 unsigned int tail = ring->tail; 164 ssize_t totlen = 0; 165 uint32_t last; 166 uint32_t idx; 167 uint8_t *dst; 168 int j; 169 uint32_t i; 170 171 last = i = ring->head; 172 173 if (nm_ring_space(ring) < iovcnt) { 174 /* Not enough netmap slots. Tell the kernel that we have seen the new 175 * available slots (so that it notifies us again when it has more 176 * ones), but without publishing any new slots to be processed 177 * (e.g., we don't advance ring->head). */ 178 ring->cur = tail; 179 netmap_write_poll(s, true); 180 return 0; 181 } 182 183 for (j = 0; j < iovcnt; j++) { 184 int iov_frag_size = iov[j].iov_len; 185 int offset = 0; 186 int nm_frag_size; 187 188 totlen += iov_frag_size; 189 190 /* Split each iovec fragment over more netmap slots, if 191 necessary. */ 192 while (iov_frag_size) { 193 nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size); 194 195 if (unlikely(i == tail)) { 196 /* We ran out of netmap slots while splitting the 197 iovec fragments. */ 198 ring->cur = tail; 199 netmap_write_poll(s, true); 200 return 0; 201 } 202 203 idx = ring->slot[i].buf_idx; 204 dst = (uint8_t *)NETMAP_BUF(ring, idx); 205 206 ring->slot[i].len = nm_frag_size; 207 ring->slot[i].flags = NS_MOREFRAG; 208 pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size); 209 210 last = i; 211 i = nm_ring_next(ring, i); 212 213 offset += nm_frag_size; 214 iov_frag_size -= nm_frag_size; 215 } 216 } 217 /* The last slot must not have NS_MOREFRAG set. */ 218 ring->slot[last].flags &= ~NS_MOREFRAG; 219 220 /* Now update ring->head and ring->cur to publish the new slots and 221 * the new wakeup point. */ 222 ring->head = ring->cur = i; 223 224 ioctl(s->nmd->fd, NIOCTXSYNC, NULL); 225 226 return totlen; 227} 228 229static ssize_t netmap_receive(NetClientState *nc, 230 const uint8_t *buf, size_t size) 231{ 232 struct iovec iov; 233 234 iov.iov_base = (void *)buf; 235 iov.iov_len = size; 236 237 return netmap_receive_iov(nc, &iov, 1); 238} 239 240/* Complete a previous send (backend --> guest) and enable the 241 fd_read callback. */ 242static void netmap_send_completed(NetClientState *nc, ssize_t len) 243{ 244 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 245 246 netmap_read_poll(s, true); 247} 248 249static void netmap_send(void *opaque) 250{ 251 NetmapState *s = opaque; 252 struct netmap_ring *ring = s->rx; 253 unsigned int tail = ring->tail; 254 255 /* Keep sending while there are available slots in the netmap 256 RX ring and the forwarding path towards the peer is open. */ 257 while (ring->head != tail) { 258 uint32_t i = ring->head; 259 uint32_t idx; 260 bool morefrag; 261 int iovcnt = 0; 262 int iovsize; 263 264 /* Get a (possibly multi-slot) packet. */ 265 do { 266 idx = ring->slot[i].buf_idx; 267 morefrag = (ring->slot[i].flags & NS_MOREFRAG); 268 s->iov[iovcnt].iov_base = (void *)NETMAP_BUF(ring, idx); 269 s->iov[iovcnt].iov_len = ring->slot[i].len; 270 iovcnt++; 271 i = nm_ring_next(ring, i); 272 } while (i != tail && morefrag); 273 274 /* Advance ring->cur to tell the kernel that we have seen the slots. */ 275 ring->cur = i; 276 277 if (unlikely(morefrag)) { 278 /* This is a truncated packet, so we can stop without releasing the 279 * incomplete slots by updating ring->head. We will hopefully 280 * re-read the complete packet the next time we are called. */ 281 break; 282 } 283 284 iovsize = qemu_sendv_packet_async(&s->nc, s->iov, iovcnt, 285 netmap_send_completed); 286 287 /* Release the slots to the kernel. */ 288 ring->head = i; 289 290 if (iovsize == 0) { 291 /* The peer does not receive anymore. Packet is queued, stop 292 * reading from the backend until netmap_send_completed(). */ 293 netmap_read_poll(s, false); 294 break; 295 } 296 } 297} 298 299/* Flush and close. */ 300static void netmap_cleanup(NetClientState *nc) 301{ 302 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 303 304 qemu_purge_queued_packets(nc); 305 306 netmap_poll(nc, false); 307 nm_close(s->nmd); 308 s->nmd = NULL; 309} 310 311/* Offloading manipulation support callbacks. */ 312static int netmap_fd_set_vnet_hdr_len(NetmapState *s, int len) 313{ 314 struct nmreq req; 315 316 /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header 317 * length for the netmap adapter associated to 's->ifname'. 318 */ 319 memset(&req, 0, sizeof(req)); 320 pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname); 321 req.nr_version = NETMAP_API; 322 req.nr_cmd = NETMAP_BDG_VNET_HDR; 323 req.nr_arg1 = len; 324 325 return ioctl(s->nmd->fd, NIOCREGIF, &req); 326} 327 328static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len) 329{ 330 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 331 int prev_len = s->vnet_hdr_len; 332 333 /* Check that we can set the new length. */ 334 if (netmap_fd_set_vnet_hdr_len(s, len)) { 335 return false; 336 } 337 338 /* Restore the previous length. */ 339 if (netmap_fd_set_vnet_hdr_len(s, prev_len)) { 340 error_report("Failed to restore vnet-hdr length %d on %s: %s", 341 prev_len, s->ifname, strerror(errno)); 342 abort(); 343 } 344 345 return true; 346} 347 348/* A netmap interface that supports virtio-net headers always 349 * supports UFO, so we use this callback also for the has_ufo hook. */ 350static bool netmap_has_vnet_hdr(NetClientState *nc) 351{ 352 return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 353} 354 355static void netmap_using_vnet_hdr(NetClientState *nc, bool enable) 356{ 357} 358 359static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) 360{ 361 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 362 int err; 363 364 err = netmap_fd_set_vnet_hdr_len(s, len); 365 if (err) { 366 error_report("Unable to set vnet-hdr length %d on %s: %s", 367 len, s->ifname, strerror(errno)); 368 } else { 369 /* Keep track of the current length. */ 370 s->vnet_hdr_len = len; 371 } 372} 373 374static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, 375 int ecn, int ufo) 376{ 377 NetmapState *s = DO_UPCAST(NetmapState, nc, nc); 378 379 /* Setting a virtio-net header length greater than zero automatically 380 * enables the offloadings. */ 381 if (!s->vnet_hdr_len) { 382 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr)); 383 } 384} 385 386/* NetClientInfo methods */ 387static NetClientInfo net_netmap_info = { 388 .type = NET_CLIENT_DRIVER_NETMAP, 389 .size = sizeof(NetmapState), 390 .receive = netmap_receive, 391 .receive_iov = netmap_receive_iov, 392 .poll = netmap_poll, 393 .cleanup = netmap_cleanup, 394 .has_ufo = netmap_has_vnet_hdr, 395 .has_vnet_hdr = netmap_has_vnet_hdr, 396 .has_vnet_hdr_len = netmap_has_vnet_hdr_len, 397 .using_vnet_hdr = netmap_using_vnet_hdr, 398 .set_offload = netmap_set_offload, 399 .set_vnet_hdr_len = netmap_set_vnet_hdr_len, 400}; 401 402/* The exported init function 403 * 404 * ... -net netmap,ifname="..." 405 */ 406int net_init_netmap(const Netdev *netdev, 407 const char *name, NetClientState *peer, Error **errp) 408{ 409 const NetdevNetmapOptions *netmap_opts = &netdev->u.netmap; 410 struct nm_desc *nmd; 411 NetClientState *nc; 412 Error *err = NULL; 413 NetmapState *s; 414 415 nmd = netmap_open(netmap_opts, &err); 416 if (err) { 417 error_propagate(errp, err); 418 return -1; 419 } 420 /* Create the object. */ 421 nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name); 422 s = DO_UPCAST(NetmapState, nc, nc); 423 s->nmd = nmd; 424 s->tx = NETMAP_TXRING(nmd->nifp, 0); 425 s->rx = NETMAP_RXRING(nmd->nifp, 0); 426 s->vnet_hdr_len = 0; 427 pstrcpy(s->ifname, sizeof(s->ifname), netmap_opts->ifname); 428 netmap_read_poll(s, true); /* Initially only poll for reads. */ 429 430 return 0; 431} 432