qemu with hax to log dma reads & writes
jcs.org/2018/11/12/vfio
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "qemu/osdep.h"
26#include "qemu/main-loop.h"
27#include "qemu/timer.h"
28#include "qemu/lockable.h"
29#include "sysemu/replay.h"
30#include "sysemu/cpus.h"
31
32#ifdef CONFIG_POSIX
33#include <pthread.h>
34#endif
35
36#ifdef CONFIG_PPOLL
37#include <poll.h>
38#endif
39
40#ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
41#include <sys/prctl.h>
42#endif
43
44/***********************************************************/
45/* timers */
46
47typedef struct QEMUClock {
48 /* We rely on BQL to protect the timerlists */
49 QLIST_HEAD(, QEMUTimerList) timerlists;
50
51 QEMUClockType type;
52 bool enabled;
53} QEMUClock;
54
55QEMUTimerListGroup main_loop_tlg;
56static QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
57
58/* A QEMUTimerList is a list of timers attached to a clock. More
59 * than one QEMUTimerList can be attached to each clock, for instance
60 * used by different AioContexts / threads. Each clock also has
61 * a list of the QEMUTimerLists associated with it, in order that
62 * reenabling the clock can call all the notifiers.
63 */
64
65struct QEMUTimerList {
66 QEMUClock *clock;
67 QemuMutex active_timers_lock;
68 QEMUTimer *active_timers;
69 QLIST_ENTRY(QEMUTimerList) list;
70 QEMUTimerListNotifyCB *notify_cb;
71 void *notify_opaque;
72
73 /* lightweight method to mark the end of timerlist's running */
74 QemuEvent timers_done_ev;
75};
76
77/**
78 * qemu_clock_ptr:
79 * @type: type of clock
80 *
81 * Translate a clock type into a pointer to QEMUClock object.
82 *
83 * Returns: a pointer to the QEMUClock object
84 */
85static inline QEMUClock *qemu_clock_ptr(QEMUClockType type)
86{
87 return &qemu_clocks[type];
88}
89
90static bool timer_expired_ns(QEMUTimer *timer_head, int64_t current_time)
91{
92 return timer_head && (timer_head->expire_time <= current_time);
93}
94
95QEMUTimerList *timerlist_new(QEMUClockType type,
96 QEMUTimerListNotifyCB *cb,
97 void *opaque)
98{
99 QEMUTimerList *timer_list;
100 QEMUClock *clock = qemu_clock_ptr(type);
101
102 timer_list = g_malloc0(sizeof(QEMUTimerList));
103 qemu_event_init(&timer_list->timers_done_ev, true);
104 timer_list->clock = clock;
105 timer_list->notify_cb = cb;
106 timer_list->notify_opaque = opaque;
107 qemu_mutex_init(&timer_list->active_timers_lock);
108 QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
109 return timer_list;
110}
111
112void timerlist_free(QEMUTimerList *timer_list)
113{
114 assert(!timerlist_has_timers(timer_list));
115 if (timer_list->clock) {
116 QLIST_REMOVE(timer_list, list);
117 }
118 qemu_mutex_destroy(&timer_list->active_timers_lock);
119 g_free(timer_list);
120}
121
122static void qemu_clock_init(QEMUClockType type, QEMUTimerListNotifyCB *notify_cb)
123{
124 QEMUClock *clock = qemu_clock_ptr(type);
125
126 /* Assert that the clock of type TYPE has not been initialized yet. */
127 assert(main_loop_tlg.tl[type] == NULL);
128
129 clock->type = type;
130 clock->enabled = (type == QEMU_CLOCK_VIRTUAL ? false : true);
131 QLIST_INIT(&clock->timerlists);
132 main_loop_tlg.tl[type] = timerlist_new(type, notify_cb, NULL);
133}
134
135bool qemu_clock_use_for_deadline(QEMUClockType type)
136{
137 return !(use_icount && (type == QEMU_CLOCK_VIRTUAL));
138}
139
140void qemu_clock_notify(QEMUClockType type)
141{
142 QEMUTimerList *timer_list;
143 QEMUClock *clock = qemu_clock_ptr(type);
144 QLIST_FOREACH(timer_list, &clock->timerlists, list) {
145 timerlist_notify(timer_list);
146 }
147}
148
149/* Disabling the clock will wait for related timerlists to stop
150 * executing qemu_run_timers. Thus, this functions should not
151 * be used from the callback of a timer that is based on @clock.
152 * Doing so would cause a deadlock.
153 *
154 * Caller should hold BQL.
155 */
156void qemu_clock_enable(QEMUClockType type, bool enabled)
157{
158 QEMUClock *clock = qemu_clock_ptr(type);
159 QEMUTimerList *tl;
160 bool old = clock->enabled;
161 clock->enabled = enabled;
162 if (enabled && !old) {
163 qemu_clock_notify(type);
164 } else if (!enabled && old) {
165 QLIST_FOREACH(tl, &clock->timerlists, list) {
166 qemu_event_wait(&tl->timers_done_ev);
167 }
168 }
169}
170
171bool timerlist_has_timers(QEMUTimerList *timer_list)
172{
173 return !!atomic_read(&timer_list->active_timers);
174}
175
176bool qemu_clock_has_timers(QEMUClockType type)
177{
178 return timerlist_has_timers(
179 main_loop_tlg.tl[type]);
180}
181
182bool timerlist_expired(QEMUTimerList *timer_list)
183{
184 int64_t expire_time;
185
186 if (!atomic_read(&timer_list->active_timers)) {
187 return false;
188 }
189
190 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
191 if (!timer_list->active_timers) {
192 return false;
193 }
194 expire_time = timer_list->active_timers->expire_time;
195 }
196
197 return expire_time <= qemu_clock_get_ns(timer_list->clock->type);
198}
199
200bool qemu_clock_expired(QEMUClockType type)
201{
202 return timerlist_expired(
203 main_loop_tlg.tl[type]);
204}
205
206/*
207 * As above, but return -1 for no deadline, and do not cap to 2^32
208 * as we know the result is always positive.
209 */
210
211int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
212{
213 int64_t delta;
214 int64_t expire_time;
215
216 if (!atomic_read(&timer_list->active_timers)) {
217 return -1;
218 }
219
220 if (!timer_list->clock->enabled) {
221 return -1;
222 }
223
224 /* The active timers list may be modified before the caller uses our return
225 * value but ->notify_cb() is called when the deadline changes. Therefore
226 * the caller should notice the change and there is no race condition.
227 */
228 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
229 if (!timer_list->active_timers) {
230 return -1;
231 }
232 expire_time = timer_list->active_timers->expire_time;
233 }
234
235 delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
236
237 if (delta <= 0) {
238 return 0;
239 }
240
241 return delta;
242}
243
244/* Calculate the soonest deadline across all timerlists attached
245 * to the clock. This is used for the icount timeout so we
246 * ignore whether or not the clock should be used in deadline
247 * calculations.
248 */
249int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
250{
251 int64_t deadline = -1;
252 int64_t delta;
253 int64_t expire_time;
254 QEMUTimer *ts;
255 QEMUTimerList *timer_list;
256 QEMUClock *clock = qemu_clock_ptr(type);
257
258 if (!clock->enabled) {
259 return -1;
260 }
261
262 QLIST_FOREACH(timer_list, &clock->timerlists, list) {
263 qemu_mutex_lock(&timer_list->active_timers_lock);
264 ts = timer_list->active_timers;
265 /* Skip all external timers */
266 while (ts && (ts->attributes & ~attr_mask)) {
267 ts = ts->next;
268 }
269 if (!ts) {
270 qemu_mutex_unlock(&timer_list->active_timers_lock);
271 continue;
272 }
273 expire_time = ts->expire_time;
274 qemu_mutex_unlock(&timer_list->active_timers_lock);
275
276 delta = expire_time - qemu_clock_get_ns(type);
277 if (delta <= 0) {
278 delta = 0;
279 }
280 deadline = qemu_soonest_timeout(deadline, delta);
281 }
282 return deadline;
283}
284
285QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list)
286{
287 return timer_list->clock->type;
288}
289
290QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type)
291{
292 return main_loop_tlg.tl[type];
293}
294
295void timerlist_notify(QEMUTimerList *timer_list)
296{
297 if (timer_list->notify_cb) {
298 timer_list->notify_cb(timer_list->notify_opaque, timer_list->clock->type);
299 } else {
300 qemu_notify_event();
301 }
302}
303
304/* Transition function to convert a nanosecond timeout to ms
305 * This is used where a system does not support ppoll
306 */
307int qemu_timeout_ns_to_ms(int64_t ns)
308{
309 int64_t ms;
310 if (ns < 0) {
311 return -1;
312 }
313
314 if (!ns) {
315 return 0;
316 }
317
318 /* Always round up, because it's better to wait too long than to wait too
319 * little and effectively busy-wait
320 */
321 ms = DIV_ROUND_UP(ns, SCALE_MS);
322
323 /* To avoid overflow problems, limit this to 2^31, i.e. approx 25 days */
324 return MIN(ms, INT32_MAX);
325}
326
327
328/* qemu implementation of g_poll which uses a nanosecond timeout but is
329 * otherwise identical to g_poll
330 */
331int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout)
332{
333#ifdef CONFIG_PPOLL
334 if (timeout < 0) {
335 return ppoll((struct pollfd *)fds, nfds, NULL, NULL);
336 } else {
337 struct timespec ts;
338 int64_t tvsec = timeout / 1000000000LL;
339 /* Avoid possibly overflowing and specifying a negative number of
340 * seconds, which would turn a very long timeout into a busy-wait.
341 */
342 if (tvsec > (int64_t)INT32_MAX) {
343 tvsec = INT32_MAX;
344 }
345 ts.tv_sec = tvsec;
346 ts.tv_nsec = timeout % 1000000000LL;
347 return ppoll((struct pollfd *)fds, nfds, &ts, NULL);
348 }
349#else
350 return g_poll(fds, nfds, qemu_timeout_ns_to_ms(timeout));
351#endif
352}
353
354
355void timer_init_full(QEMUTimer *ts,
356 QEMUTimerListGroup *timer_list_group, QEMUClockType type,
357 int scale, int attributes,
358 QEMUTimerCB *cb, void *opaque)
359{
360 if (!timer_list_group) {
361 timer_list_group = &main_loop_tlg;
362 }
363 ts->timer_list = timer_list_group->tl[type];
364 ts->cb = cb;
365 ts->opaque = opaque;
366 ts->scale = scale;
367 ts->attributes = attributes;
368 ts->expire_time = -1;
369}
370
371void timer_deinit(QEMUTimer *ts)
372{
373 assert(ts->expire_time == -1);
374 ts->timer_list = NULL;
375}
376
377static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
378{
379 QEMUTimer **pt, *t;
380
381 ts->expire_time = -1;
382 pt = &timer_list->active_timers;
383 for(;;) {
384 t = *pt;
385 if (!t)
386 break;
387 if (t == ts) {
388 atomic_set(pt, t->next);
389 break;
390 }
391 pt = &t->next;
392 }
393}
394
395static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
396 QEMUTimer *ts, int64_t expire_time)
397{
398 QEMUTimer **pt, *t;
399
400 /* add the timer in the sorted list */
401 pt = &timer_list->active_timers;
402 for (;;) {
403 t = *pt;
404 if (!timer_expired_ns(t, expire_time)) {
405 break;
406 }
407 pt = &t->next;
408 }
409 ts->expire_time = MAX(expire_time, 0);
410 ts->next = *pt;
411 atomic_set(pt, ts);
412
413 return pt == &timer_list->active_timers;
414}
415
416static void timerlist_rearm(QEMUTimerList *timer_list)
417{
418 /* Interrupt execution to force deadline recalculation. */
419 if (timer_list->clock->type == QEMU_CLOCK_VIRTUAL) {
420 qemu_start_warp_timer();
421 }
422 timerlist_notify(timer_list);
423}
424
425/* stop a timer, but do not dealloc it */
426void timer_del(QEMUTimer *ts)
427{
428 QEMUTimerList *timer_list = ts->timer_list;
429
430 if (timer_list) {
431 qemu_mutex_lock(&timer_list->active_timers_lock);
432 timer_del_locked(timer_list, ts);
433 qemu_mutex_unlock(&timer_list->active_timers_lock);
434 }
435}
436
437/* modify the current timer so that it will be fired when current_time
438 >= expire_time. The corresponding callback will be called. */
439void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
440{
441 QEMUTimerList *timer_list = ts->timer_list;
442 bool rearm;
443
444 qemu_mutex_lock(&timer_list->active_timers_lock);
445 timer_del_locked(timer_list, ts);
446 rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
447 qemu_mutex_unlock(&timer_list->active_timers_lock);
448
449 if (rearm) {
450 timerlist_rearm(timer_list);
451 }
452}
453
454/* modify the current timer so that it will be fired when current_time
455 >= expire_time or the current deadline, whichever comes earlier.
456 The corresponding callback will be called. */
457void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
458{
459 QEMUTimerList *timer_list = ts->timer_list;
460 bool rearm;
461
462 WITH_QEMU_LOCK_GUARD(&timer_list->active_timers_lock) {
463 if (ts->expire_time == -1 || ts->expire_time > expire_time) {
464 if (ts->expire_time != -1) {
465 timer_del_locked(timer_list, ts);
466 }
467 rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
468 } else {
469 rearm = false;
470 }
471 }
472 if (rearm) {
473 timerlist_rearm(timer_list);
474 }
475}
476
477void timer_mod(QEMUTimer *ts, int64_t expire_time)
478{
479 timer_mod_ns(ts, expire_time * ts->scale);
480}
481
482void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
483{
484 timer_mod_anticipate_ns(ts, expire_time * ts->scale);
485}
486
487bool timer_pending(QEMUTimer *ts)
488{
489 return ts->expire_time >= 0;
490}
491
492bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
493{
494 return timer_expired_ns(timer_head, current_time * timer_head->scale);
495}
496
497bool timerlist_run_timers(QEMUTimerList *timer_list)
498{
499 QEMUTimer *ts;
500 int64_t current_time;
501 bool progress = false;
502 QEMUTimerCB *cb;
503 void *opaque;
504
505 if (!atomic_read(&timer_list->active_timers)) {
506 return false;
507 }
508
509 qemu_event_reset(&timer_list->timers_done_ev);
510 if (!timer_list->clock->enabled) {
511 goto out;
512 }
513
514 switch (timer_list->clock->type) {
515 case QEMU_CLOCK_REALTIME:
516 break;
517 default:
518 case QEMU_CLOCK_VIRTUAL:
519 break;
520 case QEMU_CLOCK_HOST:
521 if (!replay_checkpoint(CHECKPOINT_CLOCK_HOST)) {
522 goto out;
523 }
524 break;
525 case QEMU_CLOCK_VIRTUAL_RT:
526 if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL_RT)) {
527 goto out;
528 }
529 break;
530 }
531
532 /*
533 * Extract expired timers from active timers list and and process them.
534 *
535 * In rr mode we need "filtered" checkpointing for virtual clock. The
536 * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer,
537 * and that must only be done once since the clock value stays the same. Because
538 * non-EXTERNAL timers may appear in the timers list while it being processed,
539 * the checkpoint can be issued at a time until no timers are left and we are
540 * done".
541 */
542 current_time = qemu_clock_get_ns(timer_list->clock->type);
543 qemu_mutex_lock(&timer_list->active_timers_lock);
544 while ((ts = timer_list->active_timers)) {
545 if (!timer_expired_ns(ts, current_time)) {
546 /* No expired timers left. The checkpoint can be skipped
547 * if no timers fired or they were all external.
548 */
549 break;
550 }
551 /* Checkpoint for virtual clock is redundant in cases where
552 * it's being triggered with only non-EXTERNAL timers, because
553 * these timers don't change guest state directly.
554 */
555 if (replay_mode != REPLAY_MODE_NONE
556 && timer_list->clock->type == QEMU_CLOCK_VIRTUAL
557 && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)
558 && !replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) {
559 qemu_mutex_unlock(&timer_list->active_timers_lock);
560 goto out;
561 }
562
563 /* remove timer from the list before calling the callback */
564 timer_list->active_timers = ts->next;
565 ts->next = NULL;
566 ts->expire_time = -1;
567 cb = ts->cb;
568 opaque = ts->opaque;
569
570 /* run the callback (the timer list can be modified) */
571 qemu_mutex_unlock(&timer_list->active_timers_lock);
572 cb(opaque);
573 qemu_mutex_lock(&timer_list->active_timers_lock);
574
575 progress = true;
576 }
577 qemu_mutex_unlock(&timer_list->active_timers_lock);
578
579out:
580 qemu_event_set(&timer_list->timers_done_ev);
581 return progress;
582}
583
584bool qemu_clock_run_timers(QEMUClockType type)
585{
586 return timerlist_run_timers(main_loop_tlg.tl[type]);
587}
588
589void timerlistgroup_init(QEMUTimerListGroup *tlg,
590 QEMUTimerListNotifyCB *cb, void *opaque)
591{
592 QEMUClockType type;
593 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
594 tlg->tl[type] = timerlist_new(type, cb, opaque);
595 }
596}
597
598void timerlistgroup_deinit(QEMUTimerListGroup *tlg)
599{
600 QEMUClockType type;
601 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
602 timerlist_free(tlg->tl[type]);
603 }
604}
605
606bool timerlistgroup_run_timers(QEMUTimerListGroup *tlg)
607{
608 QEMUClockType type;
609 bool progress = false;
610 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
611 progress |= timerlist_run_timers(tlg->tl[type]);
612 }
613 return progress;
614}
615
616int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg)
617{
618 int64_t deadline = -1;
619 QEMUClockType type;
620 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
621 if (qemu_clock_use_for_deadline(type)) {
622 deadline = qemu_soonest_timeout(deadline,
623 timerlist_deadline_ns(tlg->tl[type]));
624 }
625 }
626 return deadline;
627}
628
629int64_t qemu_clock_get_ns(QEMUClockType type)
630{
631 switch (type) {
632 case QEMU_CLOCK_REALTIME:
633 return get_clock();
634 default:
635 case QEMU_CLOCK_VIRTUAL:
636 if (use_icount) {
637 return cpu_get_icount();
638 } else {
639 return cpu_get_clock();
640 }
641 case QEMU_CLOCK_HOST:
642 return REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
643 case QEMU_CLOCK_VIRTUAL_RT:
644 return REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock());
645 }
646}
647
648void init_clocks(QEMUTimerListNotifyCB *notify_cb)
649{
650 QEMUClockType type;
651 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
652 qemu_clock_init(type, notify_cb);
653 }
654
655#ifdef CONFIG_PRCTL_PR_SET_TIMERSLACK
656 prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0);
657#endif
658}
659
660uint64_t timer_expire_time_ns(QEMUTimer *ts)
661{
662 return timer_pending(ts) ? ts->expire_time : -1;
663}
664
665bool qemu_clock_run_all_timers(void)
666{
667 bool progress = false;
668 QEMUClockType type;
669
670 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
671 if (qemu_clock_use_for_deadline(type)) {
672 progress |= qemu_clock_run_timers(type);
673 }
674 }
675
676 return progress;
677}