Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging

qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Pull request

# gpg: Signature made Wed 24 Jun 2020 11:01:57 BST
# gpg: using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha/tags/block-pull-request:
block/nvme: support nested aio_poll()
block/nvme: keep BDRVNVMeState pointer in NVMeQueuePair
block/nvme: clarify that free_req_queue is protected by q->lock
block/nvme: switch to a NVMeRequest freelist
block/nvme: don't access CQE after moving cq.head
block/nvme: drop tautologous assertion
block/nvme: poll queues without q->lock
check-block: enable iotests with SafeStack
configure: add flags to support SafeStack
coroutine: add check for SafeStack in sigaltstack
coroutine: support SafeStack in ucontext backend
minikconf: explicitly set encoding to UTF-8

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

Peter Maydell 5 years ago 87fb952d 10f7ffab

+283 -65

8 changed files

expand all

block

nvme.c

trace-events

configure

include

qemu

coroutine_int.h

scripts

minikconf.py

tests

check-block.sh

util

coroutine-sigaltstack.c

coroutine-ucontext.c

+158 -60

block/nvme.c

··· 33 33 #define NVME_QUEUE_SIZE 128 34 34 #define NVME_BAR_SIZE 8192 35 35 36 + /* 37 + * We have to leave one slot empty as that is the full queue case where 38 + * head == tail + 1. 39 + */ 40 + #define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1) 41 + 42 + typedef struct BDRVNVMeState BDRVNVMeState; 43 + 36 44 typedef struct { 37 45 int32_t head, tail; 38 46 uint8_t *queue; ··· 47 55 int cid; 48 56 void *prp_list_page; 49 57 uint64_t prp_list_iova; 50 - bool busy; 58 + int free_req_next; /* q->reqs[] index of next free req */ 51 59 } NVMeRequest; 52 60 53 61 typedef struct { 54 - CoQueue free_req_queue; 55 62 QemuMutex lock; 56 63 64 + /* Read from I/O code path, initialized under BQL */ 65 + BDRVNVMeState *s; 66 + int index; 67 + 57 68 /* Fields protected by BQL */ 58 - int index; 59 69 uint8_t *prp_list_pages; 60 70 61 71 /* Fields protected by @lock */ 72 + CoQueue free_req_queue; 62 73 NVMeQueue sq, cq; 63 74 int cq_phase; 64 - NVMeRequest reqs[NVME_QUEUE_SIZE]; 65 - bool busy; 75 + int free_req_head; 76 + NVMeRequest reqs[NVME_NUM_REQS]; 66 77 int need_kick; 67 78 int inflight; 79 + 80 + /* Thread-safe, no lock necessary */ 81 + QEMUBH *completion_bh; 68 82 } NVMeQueuePair; 69 83 70 84 /* Memory mapped registers */ ··· 89 103 90 104 QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000); 91 105 92 - typedef struct { 106 + struct BDRVNVMeState { 93 107 AioContext *aio_context; 94 108 QEMUVFIOState *vfio; 95 109 NVMeRegs *regs; ··· 123 137 124 138 /* PCI address (required for nvme_refresh_filename()) */ 125 139 char *device; 126 - } BDRVNVMeState; 140 + }; 127 141 128 142 #define NVME_BLOCK_OPT_DEVICE "device" 129 143 #define NVME_BLOCK_OPT_NAMESPACE "namespace" 130 144 145 + static void nvme_process_completion_bh(void *opaque); 146 + 131 147 static QemuOptsList runtime_opts = { 132 148 .name = "nvme", 133 149 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), ··· 167 183 } 168 184 } 169 185 170 - static void nvme_free_queue_pair(BlockDriverState *bs, NVMeQueuePair *q) 186 + static void nvme_free_queue_pair(NVMeQueuePair *q) 171 187 { 188 + if (q->completion_bh) { 189 + qemu_bh_delete(q->completion_bh); 190 + } 172 191 qemu_vfree(q->prp_list_pages); 173 192 qemu_vfree(q->sq.queue); 174 193 qemu_vfree(q->cq.queue); ··· 198 217 uint64_t prp_list_iova; 199 218 200 219 qemu_mutex_init(&q->lock); 220 + q->s = s; 201 221 q->index = idx; 202 222 qemu_co_queue_init(&q->free_req_queue); 203 - q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_QUEUE_SIZE); 223 + q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS); 224 + q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs), 225 + nvme_process_completion_bh, q); 204 226 r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, 205 - s->page_size * NVME_QUEUE_SIZE, 227 + s->page_size * NVME_NUM_REQS, 206 228 false, &prp_list_iova); 207 229 if (r) { 208 230 goto fail; 209 231 } 210 - for (i = 0; i < NVME_QUEUE_SIZE; i++) { 232 + q->free_req_head = -1; 233 + for (i = 0; i < NVME_NUM_REQS; i++) { 211 234 NVMeRequest *req = &q->reqs[i]; 212 235 req->cid = i + 1; 236 + req->free_req_next = q->free_req_head; 237 + q->free_req_head = i; 213 238 req->prp_list_page = q->prp_list_pages + i * s->page_size; 214 239 req->prp_list_iova = prp_list_iova + i * s->page_size; 215 240 } 241 + 216 242 nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err); 217 243 if (local_err) { 218 244 error_propagate(errp, local_err); ··· 229 255 230 256 return q; 231 257 fail: 232 - nvme_free_queue_pair(bs, q); 258 + nvme_free_queue_pair(q); 233 259 return NULL; 234 260 } 235 261 236 262 /* With q->lock */ 237 - static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q) 263 + static void nvme_kick(NVMeQueuePair *q) 238 264 { 265 + BDRVNVMeState *s = q->s; 266 + 239 267 if (s->plugged || !q->need_kick) { 240 268 return; 241 269 } ··· 254 282 */ 255 283 static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) 256 284 { 257 - int i; 258 - NVMeRequest *req = NULL; 285 + NVMeRequest *req; 259 286 260 287 qemu_mutex_lock(&q->lock); 261 - while (q->inflight + q->need_kick > NVME_QUEUE_SIZE - 2) { 262 - /* We have to leave one slot empty as that is the full queue case (head 263 - * == tail + 1). */ 288 + 289 + while (q->free_req_head == -1) { 264 290 if (qemu_in_coroutine()) { 265 291 trace_nvme_free_req_queue_wait(q); 266 292 qemu_co_queue_wait(&q->free_req_queue, &q->lock); ··· 269 295 return NULL; 270 296 } 271 297 } 272 - for (i = 0; i < NVME_QUEUE_SIZE; i++) { 273 - if (!q->reqs[i].busy) { 274 - q->reqs[i].busy = true; 275 - req = &q->reqs[i]; 276 - break; 277 - } 278 - } 279 - /* We have checked inflight and need_kick while holding q->lock, so one 280 - * free req must be available. */ 281 - assert(req); 298 + 299 + req = &q->reqs[q->free_req_head]; 300 + q->free_req_head = req->free_req_next; 301 + req->free_req_next = -1; 302 + 282 303 qemu_mutex_unlock(&q->lock); 283 304 return req; 284 305 } 285 306 307 + /* With q->lock */ 308 + static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req) 309 + { 310 + req->free_req_next = q->free_req_head; 311 + q->free_req_head = req - q->reqs; 312 + } 313 + 314 + /* With q->lock */ 315 + static void nvme_wake_free_req_locked(NVMeQueuePair *q) 316 + { 317 + if (!qemu_co_queue_empty(&q->free_req_queue)) { 318 + replay_bh_schedule_oneshot_event(q->s->aio_context, 319 + nvme_free_req_queue_cb, q); 320 + } 321 + } 322 + 323 + /* Insert a request in the freelist and wake waiters */ 324 + static void nvme_put_free_req_and_wake(NVMeQueuePair *q, NVMeRequest *req) 325 + { 326 + qemu_mutex_lock(&q->lock); 327 + nvme_put_free_req_locked(q, req); 328 + nvme_wake_free_req_locked(q); 329 + qemu_mutex_unlock(&q->lock); 330 + } 331 + 286 332 static inline int nvme_translate_error(const NvmeCqe *c) 287 333 { 288 334 uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF; ··· 306 352 } 307 353 308 354 /* With q->lock */ 309 - static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) 355 + static bool nvme_process_completion(NVMeQueuePair *q) 310 356 { 357 + BDRVNVMeState *s = q->s; 311 358 bool progress = false; 312 359 NVMeRequest *preq; 313 360 NVMeRequest req; 314 361 NvmeCqe *c; 315 362 316 363 trace_nvme_process_completion(s, q->index, q->inflight); 317 - if (q->busy || s->plugged) { 318 - trace_nvme_process_completion_queue_busy(s, q->index); 364 + if (s->plugged) { 365 + trace_nvme_process_completion_queue_plugged(s, q->index); 319 366 return false; 320 367 } 321 - q->busy = true; 368 + 369 + /* 370 + * Support re-entrancy when a request cb() function invokes aio_poll(). 371 + * Pending completions must be visible to aio_poll() so that a cb() 372 + * function can wait for the completion of another request. 373 + * 374 + * The aio_poll() loop will execute our BH and we'll resume completion 375 + * processing there. 376 + */ 377 + qemu_bh_schedule(q->completion_bh); 378 + 322 379 assert(q->inflight >= 0); 323 380 while (q->inflight) { 381 + int ret; 324 382 int16_t cid; 383 + 325 384 c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES]; 326 385 if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) { 327 386 break; 328 387 } 388 + ret = nvme_translate_error(c); 329 389 q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE; 330 390 if (!q->cq.head) { 331 391 q->cq_phase = !q->cq_phase; ··· 336 396 cid); 337 397 continue; 338 398 } 339 - assert(cid <= NVME_QUEUE_SIZE); 340 399 trace_nvme_complete_command(s, q->index, cid); 341 400 preq = &q->reqs[cid - 1]; 342 401 req = *preq; 343 402 assert(req.cid == cid); 344 403 assert(req.cb); 345 - preq->busy = false; 404 + nvme_put_free_req_locked(q, preq); 346 405 preq->cb = preq->opaque = NULL; 406 + q->inflight--; 347 407 qemu_mutex_unlock(&q->lock); 348 - req.cb(req.opaque, nvme_translate_error(c)); 408 + req.cb(req.opaque, ret); 349 409 qemu_mutex_lock(&q->lock); 350 - q->inflight--; 351 410 progress = true; 352 411 } 353 412 if (progress) { 354 413 /* Notify the device so it can post more completions. */ 355 414 smp_mb_release(); 356 415 *q->cq.doorbell = cpu_to_le32(q->cq.head); 357 - if (!qemu_co_queue_empty(&q->free_req_queue)) { 358 - replay_bh_schedule_oneshot_event(s->aio_context, 359 - nvme_free_req_queue_cb, q); 360 - } 416 + nvme_wake_free_req_locked(q); 361 417 } 362 - q->busy = false; 418 + 419 + qemu_bh_cancel(q->completion_bh); 420 + 363 421 return progress; 364 422 } 365 423 424 + static void nvme_process_completion_bh(void *opaque) 425 + { 426 + NVMeQueuePair *q = opaque; 427 + 428 + /* 429 + * We're being invoked because a nvme_process_completion() cb() function 430 + * called aio_poll(). The callback may be waiting for further completions 431 + * so notify the device that it has space to fill in more completions now. 432 + */ 433 + smp_mb_release(); 434 + *q->cq.doorbell = cpu_to_le32(q->cq.head); 435 + nvme_wake_free_req_locked(q); 436 + 437 + nvme_process_completion(q); 438 + } 439 + 366 440 static void nvme_trace_command(const NvmeCmd *cmd) 367 441 { 368 442 int i; ··· 374 448 } 375 449 } 376 450 377 - static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q, 378 - NVMeRequest *req, 451 + static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, 379 452 NvmeCmd *cmd, BlockCompletionFunc cb, 380 453 void *opaque) 381 454 { ··· 384 457 req->opaque = opaque; 385 458 cmd->cid = cpu_to_le32(req->cid); 386 459 387 - trace_nvme_submit_command(s, q->index, req->cid); 460 + trace_nvme_submit_command(q->s, q->index, req->cid); 388 461 nvme_trace_command(cmd); 389 462 qemu_mutex_lock(&q->lock); 390 463 memcpy((uint8_t *)q->sq.queue + 391 464 q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd)); 392 465 q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE; 393 466 q->need_kick++; 394 - nvme_kick(s, q); 395 - nvme_process_completion(s, q); 467 + nvme_kick(q); 468 + nvme_process_completion(q); 396 469 qemu_mutex_unlock(&q->lock); 397 470 } 398 471 ··· 407 480 NvmeCmd *cmd) 408 481 { 409 482 NVMeRequest *req; 410 - BDRVNVMeState *s = bs->opaque; 411 483 int ret = -EINPROGRESS; 412 484 req = nvme_get_free_req(q); 413 485 if (!req) { 414 486 return -EBUSY; 415 487 } 416 - nvme_submit_command(s, q, req, cmd, nvme_cmd_sync_cb, &ret); 488 + nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret); 417 489 418 490 BDRV_POLL_WHILE(bs, ret == -EINPROGRESS); 419 491 return ret; ··· 512 584 513 585 for (i = 0; i < s->nr_queues; i++) { 514 586 NVMeQueuePair *q = s->queues[i]; 587 + const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; 588 + NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; 589 + 590 + /* 591 + * Do an early check for completions. q->lock isn't needed because 592 + * nvme_process_completion() only runs in the event loop thread and 593 + * cannot race with itself. 594 + */ 595 + if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { 596 + continue; 597 + } 598 + 515 599 qemu_mutex_lock(&q->lock); 516 - while (nvme_process_completion(s, q)) { 600 + while (nvme_process_completion(q)) { 517 601 /* Keep polling */ 518 602 progress = true; 519 603 } ··· 551 635 }; 552 636 if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { 553 637 error_setg(errp, "Failed to create io queue [%d]", n); 554 - nvme_free_queue_pair(bs, q); 638 + nvme_free_queue_pair(q); 555 639 return false; 556 640 } 557 641 cmd = (NvmeCmd) { ··· 562 646 }; 563 647 if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { 564 648 error_setg(errp, "Failed to create io queue [%d]", n); 565 - nvme_free_queue_pair(bs, q); 649 + nvme_free_queue_pair(q); 566 650 return false; 567 651 } 568 652 s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); ··· 757 841 BDRVNVMeState *s = bs->opaque; 758 842 759 843 for (i = 0; i < s->nr_queues; ++i) { 760 - nvme_free_queue_pair(bs, s->queues[i]); 844 + nvme_free_queue_pair(s->queues[i]); 761 845 } 762 846 g_free(s->queues); 763 847 aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, ··· 987 1071 r = nvme_cmd_map_qiov(bs, &cmd, req, qiov); 988 1072 qemu_co_mutex_unlock(&s->dma_map_lock); 989 1073 if (r) { 990 - req->busy = false; 1074 + nvme_put_free_req_and_wake(ioq, req); 991 1075 return r; 992 1076 } 993 - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); 1077 + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); 994 1078 995 1079 data.co = qemu_coroutine_self(); 996 1080 while (data.ret == -EINPROGRESS) { ··· 1090 1174 assert(s->nr_queues > 1); 1091 1175 req = nvme_get_free_req(ioq); 1092 1176 assert(req); 1093 - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); 1177 + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); 1094 1178 1095 1179 data.co = qemu_coroutine_self(); 1096 1180 if (data.ret == -EINPROGRESS) { ··· 1143 1227 req = nvme_get_free_req(ioq); 1144 1228 assert(req); 1145 1229 1146 - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); 1230 + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); 1147 1231 1148 1232 data.co = qemu_coroutine_self(); 1149 1233 while (data.ret == -EINPROGRESS) { ··· 1204 1288 qemu_co_mutex_unlock(&s->dma_map_lock); 1205 1289 1206 1290 if (ret) { 1207 - req->busy = false; 1291 + nvme_put_free_req_and_wake(ioq, req); 1208 1292 goto out; 1209 1293 } 1210 1294 1211 1295 trace_nvme_dsm(s, offset, bytes); 1212 1296 1213 - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); 1297 + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); 1214 1298 1215 1299 data.co = qemu_coroutine_self(); 1216 1300 while (data.ret == -EINPROGRESS) { ··· 1262 1346 { 1263 1347 BDRVNVMeState *s = bs->opaque; 1264 1348 1349 + for (int i = 0; i < s->nr_queues; i++) { 1350 + NVMeQueuePair *q = s->queues[i]; 1351 + 1352 + qemu_bh_delete(q->completion_bh); 1353 + q->completion_bh = NULL; 1354 + } 1355 + 1265 1356 aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, 1266 1357 false, NULL, NULL); 1267 1358 } ··· 1274 1365 s->aio_context = new_context; 1275 1366 aio_set_event_notifier(new_context, &s->irq_notifier, 1276 1367 false, nvme_handle_event, nvme_poll_cb); 1368 + 1369 + for (int i = 0; i < s->nr_queues; i++) { 1370 + NVMeQueuePair *q = s->queues[i]; 1371 + 1372 + q->completion_bh = 1373 + aio_bh_new(new_context, nvme_process_completion_bh, q); 1374 + } 1277 1375 } 1278 1376 1279 1377 static void nvme_aio_plug(BlockDriverState *bs) ··· 1292 1390 for (i = 1; i < s->nr_queues; i++) { 1293 1391 NVMeQueuePair *q = s->queues[i]; 1294 1392 qemu_mutex_lock(&q->lock); 1295 - nvme_kick(s, q); 1296 - nvme_process_completion(s, q); 1393 + nvme_kick(q); 1394 + nvme_process_completion(q); 1297 1395 qemu_mutex_unlock(&q->lock); 1298 1396 } 1299 1397 }

+1 -1

block/trace-events

··· 158 158 nvme_dma_flush_queue_wait(void *s) "s %p" 159 159 nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" 160 160 nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d" 161 - nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d" 161 + nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d" 162 162 nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" 163 163 nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" 164 164 nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x"

+73

configure

··· 307 307 libs_qga="" 308 308 debug_info="yes" 309 309 stack_protector="" 310 + safe_stack="" 310 311 use_containers="yes" 311 312 gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") 312 313 ··· 1287 1288 ;; 1288 1289 --disable-stack-protector) stack_protector="no" 1289 1290 ;; 1291 + --enable-safe-stack) safe_stack="yes" 1292 + ;; 1293 + --disable-safe-stack) safe_stack="no" 1294 + ;; 1290 1295 --disable-curses) curses="no" 1291 1296 ;; 1292 1297 --enable-curses) curses="yes" ··· 1829 1834 debug-tcg TCG debugging (default is disabled) 1830 1835 debug-info debugging information 1831 1836 sparse sparse checker 1837 + safe-stack SafeStack Stack Smash Protection. Depends on 1838 + clang/llvm >= 3.7 and requires coroutine backend ucontext. 1832 1839 1833 1840 gnutls GNUTLS cryptography support 1834 1841 nettle nettle cryptography support ··· 5573 5580 fi 5574 5581 fi 5575 5582 5583 + ################################################## 5584 + # SafeStack 5585 + 5586 + 5587 + if test "$safe_stack" = "yes"; then 5588 + cat > $TMPC << EOF 5589 + int main(int argc, char *argv[]) 5590 + { 5591 + #if ! __has_feature(safe_stack) 5592 + #error SafeStack Disabled 5593 + #endif 5594 + return 0; 5595 + } 5596 + EOF 5597 + flag="-fsanitize=safe-stack" 5598 + # Check that safe-stack is supported and enabled. 5599 + if compile_prog "-Werror $flag" "$flag"; then 5600 + # Flag needed both at compilation and at linking 5601 + QEMU_CFLAGS="$QEMU_CFLAGS $flag" 5602 + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" 5603 + else 5604 + error_exit "SafeStack not supported by your compiler" 5605 + fi 5606 + if test "$coroutine" != "ucontext"; then 5607 + error_exit "SafeStack is only supported by the coroutine backend ucontext" 5608 + fi 5609 + else 5610 + cat > $TMPC << EOF 5611 + int main(int argc, char *argv[]) 5612 + { 5613 + #if defined(__has_feature) 5614 + #if __has_feature(safe_stack) 5615 + #error SafeStack Enabled 5616 + #endif 5617 + #endif 5618 + return 0; 5619 + } 5620 + EOF 5621 + if test "$safe_stack" = "no"; then 5622 + # Make sure that safe-stack is disabled 5623 + if ! compile_prog "-Werror" ""; then 5624 + # SafeStack was already enabled, try to explicitly remove the feature 5625 + flag="-fno-sanitize=safe-stack" 5626 + if ! compile_prog "-Werror $flag" "$flag"; then 5627 + error_exit "Configure cannot disable SafeStack" 5628 + fi 5629 + QEMU_CFLAGS="$QEMU_CFLAGS $flag" 5630 + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" 5631 + fi 5632 + else # "$safe_stack" = "" 5633 + # Set safe_stack to yes or no based on pre-existing flags 5634 + if compile_prog "-Werror" ""; then 5635 + safe_stack="no" 5636 + else 5637 + safe_stack="yes" 5638 + if test "$coroutine" != "ucontext"; then 5639 + error_exit "SafeStack is only supported by the coroutine backend ucontext" 5640 + fi 5641 + fi 5642 + fi 5643 + fi 5576 5644 5577 5645 ########################################## 5578 5646 # check if we have open_by_handle_at ··· 6765 6833 echo "strip binaries $strip_opt" 6766 6834 echo "profiler $profiler" 6767 6835 echo "static build $static" 6836 + echo "safe stack $safe_stack" 6768 6837 if test "$darwin" = "yes" ; then 6769 6838 echo "Cocoa support $cocoa" 6770 6839 fi ··· 8368 8437 8369 8438 if test "$ccache_cpp2" = "yes"; then 8370 8439 echo "export CCACHE_CPP2=y" >> $config_host_mak 8440 + fi 8441 + 8442 + if test "$safe_stack" = "yes"; then 8443 + echo "CONFIG_SAFESTACK=y" >> $config_host_mak 8371 8444 fi 8372 8445 8373 8446 # If we're using a separate build tree, set it up now.

include/qemu/coroutine_int.h

··· 28 28 #include "qemu/queue.h" 29 29 #include "qemu/coroutine.h" 30 30 31 + #ifdef CONFIG_SAFESTACK 32 + /* Pointer to the unsafe stack, defined by the compiler */ 33 + extern __thread void *__safestack_unsafe_stack_ptr; 34 + #endif 35 + 31 36 #define COROUTINE_STACK_SIZE (1 << 20) 32 37 33 38 typedef enum {

+3 -3

scripts/minikconf.py

··· 402 402 if incl_abs_fname in self.data.previously_included: 403 403 return 404 404 try: 405 - fp = open(incl_abs_fname, 'r') 405 + fp = open(incl_abs_fname, 'rt', encoding='utf-8') 406 406 except IOError as e: 407 407 raise KconfigParserError(self, 408 408 '%s: %s' % (e.strerror, include)) ··· 696 696 parser.do_assignment(name, value == 'y') 697 697 external_vars.add(name[7:]) 698 698 else: 699 - fp = open(arg, 'r') 699 + fp = open(arg, 'rt', encoding='utf-8') 700 700 parser.parse_file(fp) 701 701 fp.close() 702 702 ··· 705 705 if key not in external_vars and config[key]: 706 706 print ('CONFIG_%s=y' % key) 707 707 708 - deps = open(argv[2], 'w') 708 + deps = open(argv[2], 'wt', encoding='utf-8') 709 709 for fname in data.previously_included: 710 710 print ('%s: %s' % (argv[1], fname), file=deps) 711 711 deps.close()

+11 -1

tests/check-block.sh

··· 21 21 exit 0 22 22 fi 23 23 24 - if grep -q "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ; then 24 + # Disable tests with any sanitizer except for SafeStack 25 + CFLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ) 26 + SANITIZE_FLAGS="" 27 + #Remove all occurrencies of -fsanitize=safe-stack 28 + for i in ${CFLAGS}; do 29 + if [ "${i}" != "-fsanitize=safe-stack" ]; then 30 + SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}" 31 + fi 32 + done 33 + if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then 34 + # Have a sanitize flag that is not allowed, stop 25 35 echo "Sanitizers are enabled ==> Not running the qemu-iotests." 26 36 exit 0 27 37 fi

util/coroutine-sigaltstack.c

··· 30 30 #include "qemu-common.h" 31 31 #include "qemu/coroutine_int.h" 32 32 33 + #ifdef CONFIG_SAFESTACK 34 + #error "SafeStack is not compatible with code run in alternate signal stacks" 35 + #endif 36 + 33 37 typedef struct { 34 38 Coroutine base; 35 39 void *stack;

+28

util/coroutine-ucontext.c

··· 45 45 Coroutine base; 46 46 void *stack; 47 47 size_t stack_size; 48 + #ifdef CONFIG_SAFESTACK 49 + /* Need an unsafe stack for each coroutine */ 50 + void *unsafe_stack; 51 + size_t unsafe_stack_size; 52 + #endif 48 53 sigjmp_buf env; 49 54 50 55 void *tsan_co_fiber; ··· 179 184 co = g_malloc0(sizeof(*co)); 180 185 co->stack_size = COROUTINE_STACK_SIZE; 181 186 co->stack = qemu_alloc_stack(&co->stack_size); 187 + #ifdef CONFIG_SAFESTACK 188 + co->unsafe_stack_size = COROUTINE_STACK_SIZE; 189 + co->unsafe_stack = qemu_alloc_stack(&co->unsafe_stack_size); 190 + #endif 182 191 co->base.entry_arg = &old_env; /* stash away our jmp_buf */ 183 192 184 193 uc.uc_link = &old_uc; ··· 203 212 COROUTINE_YIELD, 204 213 &fake_stack_save, 205 214 co->stack, co->stack_size, co->tsan_co_fiber); 215 + 216 + #ifdef CONFIG_SAFESTACK 217 + /* 218 + * Before we swap the context, set the new unsafe stack 219 + * The unsafe stack grows just like the normal stack, so start from 220 + * the last usable location of the memory area. 221 + * NOTE: we don't have to re-set the usp afterwards because we are 222 + * coming back to this context through a siglongjmp. 223 + * The compiler already wrapped the corresponding sigsetjmp call with 224 + * code that saves the usp on the (safe) stack before the call, and 225 + * restores it right after (which is where we return with siglongjmp). 226 + */ 227 + void *usp = co->unsafe_stack + co->unsafe_stack_size; 228 + __safestack_unsafe_stack_ptr = usp; 229 + #endif 230 + 206 231 swapcontext(&old_uc, &uc); 207 232 } 208 233 ··· 235 260 #endif 236 261 237 262 qemu_free_stack(co->stack, co->stack_size); 263 + #ifdef CONFIG_SAFESTACK 264 + qemu_free_stack(co->unsafe_stack, co->unsafe_stack_size); 265 + #endif 238 266 g_free(co); 239 267 } 240 268