qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

virtiofsd: add seccomp whitelist

Only allow system calls that are needed by virtiofsd. All other system
calls cause SIGSYS to be directed at the thread and the process will
coredump.

Restricting system calls reduces the kernel attack surface and limits
what the process can do when compromised.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
with additional entries by:
Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: piaojun <piaojun@huawei.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

authored by

Stefan Hajnoczi and committed by
Dr. David Alan Gilbert
4f8bde99 8e1d4ef2

+174 -3
+3 -2
Makefile
··· 327 327 vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json 328 328 endif 329 329 330 - ifdef CONFIG_LINUX 330 + ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) 331 331 HELPERS-y += virtiofsd$(EXESUF) 332 332 vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json 333 333 endif ··· 674 674 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) 675 675 $(call LINK, $^) 676 676 677 - ifdef CONFIG_LINUX # relies on Linux-specific syscalls 677 + # relies on Linux-specific syscalls 678 + ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) 678 679 virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) 679 680 $(call LINK, $^) 680 681 endif
+4 -1
tools/virtiofsd/Makefile.objs
··· 5 5 fuse_signals.o \ 6 6 fuse_virtio.o \ 7 7 helper.o \ 8 - passthrough_ll.o 8 + passthrough_ll.o \ 9 + seccomp.o 9 10 11 + seccomp.o-cflags := $(SECCOMP_CFLAGS) 12 + seccomp.o-libs := $(SECCOMP_LIBS)
+2
tools/virtiofsd/passthrough_ll.c
··· 59 59 #include <unistd.h> 60 60 61 61 #include "passthrough_helpers.h" 62 + #include "seccomp.h" 62 63 63 64 struct lo_map_elem { 64 65 union { ··· 2091 2092 { 2092 2093 setup_namespaces(lo, se); 2093 2094 setup_mounts(lo->source); 2095 + setup_seccomp(); 2094 2096 } 2095 2097 2096 2098 int main(int argc, char *argv[])
+151
tools/virtiofsd/seccomp.c
··· 1 + /* 2 + * Seccomp sandboxing for virtiofsd 3 + * 4 + * Copyright (C) 2019 Red Hat, Inc. 5 + * 6 + * SPDX-License-Identifier: GPL-2.0-or-later 7 + */ 8 + 9 + #include "qemu/osdep.h" 10 + #include "seccomp.h" 11 + #include "fuse_i.h" 12 + #include "fuse_log.h" 13 + #include <errno.h> 14 + #include <glib.h> 15 + #include <seccomp.h> 16 + #include <stdlib.h> 17 + 18 + /* Bodge for libseccomp 2.4.2 which broke ppoll */ 19 + #if !defined(__SNR_ppoll) && defined(__SNR_brk) 20 + #ifdef __NR_ppoll 21 + #define __SNR_ppoll __NR_ppoll 22 + #else 23 + #define __SNR_ppoll __PNR_ppoll 24 + #endif 25 + #endif 26 + 27 + static const int syscall_whitelist[] = { 28 + /* TODO ireg sem*() syscalls */ 29 + SCMP_SYS(brk), 30 + SCMP_SYS(capget), /* For CAP_FSETID */ 31 + SCMP_SYS(capset), 32 + SCMP_SYS(clock_gettime), 33 + SCMP_SYS(clone), 34 + #ifdef __NR_clone3 35 + SCMP_SYS(clone3), 36 + #endif 37 + SCMP_SYS(close), 38 + SCMP_SYS(copy_file_range), 39 + SCMP_SYS(dup), 40 + SCMP_SYS(eventfd2), 41 + SCMP_SYS(exit), 42 + SCMP_SYS(exit_group), 43 + SCMP_SYS(fallocate), 44 + SCMP_SYS(fchmodat), 45 + SCMP_SYS(fchownat), 46 + SCMP_SYS(fcntl), 47 + SCMP_SYS(fdatasync), 48 + SCMP_SYS(fgetxattr), 49 + SCMP_SYS(flistxattr), 50 + SCMP_SYS(flock), 51 + SCMP_SYS(fremovexattr), 52 + SCMP_SYS(fsetxattr), 53 + SCMP_SYS(fstat), 54 + SCMP_SYS(fstatfs), 55 + SCMP_SYS(fsync), 56 + SCMP_SYS(ftruncate), 57 + SCMP_SYS(futex), 58 + SCMP_SYS(getdents), 59 + SCMP_SYS(getdents64), 60 + SCMP_SYS(getegid), 61 + SCMP_SYS(geteuid), 62 + SCMP_SYS(getpid), 63 + SCMP_SYS(gettid), 64 + SCMP_SYS(gettimeofday), 65 + SCMP_SYS(linkat), 66 + SCMP_SYS(lseek), 67 + SCMP_SYS(madvise), 68 + SCMP_SYS(mkdirat), 69 + SCMP_SYS(mknodat), 70 + SCMP_SYS(mmap), 71 + SCMP_SYS(mprotect), 72 + SCMP_SYS(mremap), 73 + SCMP_SYS(munmap), 74 + SCMP_SYS(newfstatat), 75 + SCMP_SYS(open), 76 + SCMP_SYS(openat), 77 + SCMP_SYS(ppoll), 78 + SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ 79 + SCMP_SYS(preadv), 80 + SCMP_SYS(pread64), 81 + SCMP_SYS(pwritev), 82 + SCMP_SYS(pwrite64), 83 + SCMP_SYS(read), 84 + SCMP_SYS(readlinkat), 85 + SCMP_SYS(recvmsg), 86 + SCMP_SYS(renameat), 87 + SCMP_SYS(renameat2), 88 + SCMP_SYS(rt_sigaction), 89 + SCMP_SYS(rt_sigprocmask), 90 + SCMP_SYS(rt_sigreturn), 91 + SCMP_SYS(sendmsg), 92 + SCMP_SYS(setresgid), 93 + SCMP_SYS(setresuid), 94 + #ifdef __NR_setresgid32 95 + SCMP_SYS(setresgid32), 96 + #endif 97 + #ifdef __NR_setresuid32 98 + SCMP_SYS(setresuid32), 99 + #endif 100 + SCMP_SYS(set_robust_list), 101 + SCMP_SYS(symlinkat), 102 + SCMP_SYS(time), /* Rarely needed, except on static builds */ 103 + SCMP_SYS(tgkill), 104 + SCMP_SYS(unlinkat), 105 + SCMP_SYS(utimensat), 106 + SCMP_SYS(write), 107 + SCMP_SYS(writev), 108 + }; 109 + 110 + void setup_seccomp(void) 111 + { 112 + scmp_filter_ctx ctx; 113 + size_t i; 114 + 115 + #ifdef SCMP_ACT_KILL_PROCESS 116 + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); 117 + /* Handle a newer libseccomp but an older kernel */ 118 + if (!ctx && errno == EOPNOTSUPP) { 119 + ctx = seccomp_init(SCMP_ACT_TRAP); 120 + } 121 + #else 122 + ctx = seccomp_init(SCMP_ACT_TRAP); 123 + #endif 124 + if (!ctx) { 125 + fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); 126 + exit(1); 127 + } 128 + 129 + for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { 130 + if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, 131 + syscall_whitelist[i], 0) != 0) { 132 + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", 133 + syscall_whitelist[i]); 134 + exit(1); 135 + } 136 + } 137 + 138 + /* libvhost-user calls this for post-copy migration, we don't need it */ 139 + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), 140 + SCMP_SYS(userfaultfd), 0) != 0) { 141 + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); 142 + exit(1); 143 + } 144 + 145 + if (seccomp_load(ctx) < 0) { 146 + fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); 147 + exit(1); 148 + } 149 + 150 + seccomp_release(ctx); 151 + }
+14
tools/virtiofsd/seccomp.h
··· 1 + /* 2 + * Seccomp sandboxing for virtiofsd 3 + * 4 + * Copyright (C) 2019 Red Hat, Inc. 5 + * 6 + * SPDX-License-Identifier: GPL-2.0-or-later 7 + */ 8 + 9 + #ifndef VIRTIOFSD_SECCOMP_H 10 + #define VIRTIOFSD_SECCOMP_H 11 + 12 + void setup_seccomp(void); 13 + 14 + #endif /* VIRTIOFSD_SECCOMP_H */