qemu with hax to log dma reads & writes jcs.org/2018/11/12/vfio

Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20200123b' into staging

virtiofsd first pull v2

Import our virtiofsd.
This pulls in the daemon to drive a file system connected to the
existing qemu virtiofsd device.
It's derived from upstream libfuse with lots of changes (and a lot
trimmed out).
The daemon lives in the newly created qemu/tools/virtiofsd

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

v2
drop the docs while we discuss where they should live
and we need to redo the manpage in anything but texi

# gpg: Signature made Thu 23 Jan 2020 16:45:18 GMT
# gpg: using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7

* remotes/dgilbert-gitlab/tags/pull-virtiofs-20200123b: (108 commits)
virtiofsd: add some options to the help message
virtiofsd: stop all queue threads on exit in virtio_loop()
virtiofsd/passthrough_ll: Pass errno to fuse_reply_err()
virtiofsd: Convert lo_destroy to take the lo->mutex lock itself
virtiofsd: add --thread-pool-size=NUM option
virtiofsd: fix lo_destroy() resource leaks
virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races
virtiofsd: process requests in a thread pool
virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance
virtiofsd: add definition of fuse_buf_writev()
virtiofsd: passthrough_ll: Use cache_readdir for directory open
virtiofsd: Fix data corruption with O_APPEND write in writeback mode
virtiofsd: Reset O_DIRECT flag during file open
virtiofsd: convert more fprintf and perror to use fuse log infra
virtiofsd: do not always set FUSE_FLOCK_LOCKS
virtiofsd: introduce inode refcount to prevent use-after-free
virtiofsd: passthrough_ll: fix refcounting on remove/rename
libvhost-user: Fix some memtable remap cases
virtiofsd: rename inode->refcount to inode->nlookup
virtiofsd: prevent races with lo_dirp_put()
...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

+13886 -14
+1
.gitignore
··· 6 6 /config-target.* 7 7 /config.status 8 8 /config-temp 9 + /tools/virtiofsd/50-qemu-virtiofsd.json 9 10 /elf2dmp 10 11 /trace-events-all 11 12 /trace/generated-events.h
+8
MAINTAINERS
··· 1595 1595 T: git https://github.com/borntraeger/qemu.git s390-next 1596 1596 L: qemu-s390x@nongnu.org 1597 1597 1598 + virtiofs 1599 + M: Dr. David Alan Gilbert <dgilbert@redhat.com> 1600 + M: Stefan Hajnoczi <stefanha@redhat.com> 1601 + S: Supported 1602 + F: tools/virtiofsd/* 1603 + F: hw/virtio/vhost-user-fs* 1604 + F: include/hw/virtio/vhost-user-fs.h 1605 + 1598 1606 virtio-input 1599 1607 M: Gerd Hoffmann <kraxel@redhat.com> 1600 1608 S: Maintained
+12
Makefile
··· 327 327 vhost-user-json-y += contrib/vhost-user-gpu/50-qemu-gpu.json 328 328 endif 329 329 330 + ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) 331 + HELPERS-y += virtiofsd$(EXESUF) 332 + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json 333 + endif 334 + 330 335 # Sphinx does not allow building manuals into the same directory as 331 336 # the source files, so if we're doing an in-tree QEMU build we must 332 337 # build the manuals into a subdirectory (and then install them from ··· 431 436 elf2dmp-obj-y \ 432 437 ivshmem-client-obj-y \ 433 438 ivshmem-server-obj-y \ 439 + virtiofsd-obj-y \ 434 440 rdmacm-mux-obj-y \ 435 441 libvhost-user-obj-y \ 436 442 vhost-user-scsi-obj-y \ ··· 669 675 rdmacm-mux$(EXESUF): LIBS += "-libumad" 670 676 rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) 671 677 $(call LINK, $^) 678 + 679 + # relies on Linux-specific syscalls 680 + ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) 681 + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) 682 + $(call LINK, $^) 683 + endif 672 684 673 685 vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a 674 686 $(call LINK, $^)
+1
Makefile.objs
··· 123 123 rdmacm-mux-obj-y = contrib/rdmacm-mux/ 124 124 vhost-user-input-obj-y = contrib/vhost-user-input/ 125 125 vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ 126 + virtiofsd-obj-y = tools/virtiofsd/ 126 127 127 128 ###################################################################### 128 129 trace-events-subdirs =
+16
configure
··· 5197 5197 strchrnul=yes 5198 5198 fi 5199 5199 5200 + ######################################### 5201 + # check if we have st_atim 5202 + 5203 + st_atim=no 5204 + cat > $TMPC << EOF 5205 + #include <sys/stat.h> 5206 + #include <stddef.h> 5207 + int main(void) { return offsetof(struct stat, st_atim); } 5208 + EOF 5209 + if compile_prog "" "" ; then 5210 + st_atim=yes 5211 + fi 5212 + 5200 5213 ########################################## 5201 5214 # check if trace backend exists 5202 5215 ··· 6894 6907 fi 6895 6908 if test "$strchrnul" = "yes" ; then 6896 6909 echo "HAVE_STRCHRNUL=y" >> $config_host_mak 6910 + fi 6911 + if test "$st_atim" = "yes" ; then 6912 + echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak 6897 6913 fi 6898 6914 if test "$byteswap_h" = "yes" ; then 6899 6915 echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak
+45 -12
contrib/libvhost-user/libvhost-user.c
··· 392 392 return vu_message_write(dev, conn_fd, vmsg); 393 393 } 394 394 395 + /* 396 + * Processes a reply on the slave channel. 397 + * Entered with slave_mutex held and releases it before exit. 398 + * Returns true on success. 399 + */ 395 400 static bool 396 401 vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) 397 402 { 398 403 VhostUserMsg msg_reply; 404 + bool result = false; 399 405 400 406 if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { 401 - return true; 407 + result = true; 408 + goto out; 402 409 } 403 410 404 411 if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { 405 - return false; 412 + goto out; 406 413 } 407 414 408 415 if (msg_reply.request != vmsg->request) { 409 416 DPRINT("Received unexpected msg type. Expected %d received %d", 410 417 vmsg->request, msg_reply.request); 411 - return false; 418 + goto out; 412 419 } 413 420 414 - return msg_reply.payload.u64 == 0; 421 + result = msg_reply.payload.u64 == 0; 422 + 423 + out: 424 + pthread_mutex_unlock(&dev->slave_mutex); 425 + return result; 415 426 } 416 427 417 428 /* Kick the log_call_fd if required. */ ··· 554 565 } 555 566 556 567 static bool 568 + map_ring(VuDev *dev, VuVirtq *vq) 569 + { 570 + vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); 571 + vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); 572 + vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); 573 + 574 + DPRINT("Setting virtq addresses:\n"); 575 + DPRINT(" vring_desc at %p\n", vq->vring.desc); 576 + DPRINT(" vring_used at %p\n", vq->vring.used); 577 + DPRINT(" vring_avail at %p\n", vq->vring.avail); 578 + 579 + return !(vq->vring.desc && vq->vring.used && vq->vring.avail); 580 + } 581 + 582 + static bool 557 583 vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) 558 584 { 559 585 int i; ··· 756 782 close(vmsg->fds[i]); 757 783 } 758 784 785 + for (i = 0; i < dev->max_queues; i++) { 786 + if (dev->vq[i].vring.desc) { 787 + if (map_ring(dev, &dev->vq[i])) { 788 + vu_panic(dev, "remaping queue %d during setmemtable", i); 789 + } 790 + } 791 + } 792 + 759 793 return false; 760 794 } 761 795 ··· 842 876 DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); 843 877 DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); 844 878 879 + vq->vra = *vra; 845 880 vq->vring.flags = vra->flags; 846 - vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); 847 - vq->vring.used = qva_to_va(dev, vra->used_user_addr); 848 - vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); 849 881 vq->vring.log_guest_addr = vra->log_guest_addr; 850 882 851 - DPRINT("Setting virtq addresses:\n"); 852 - DPRINT(" vring_desc at %p\n", vq->vring.desc); 853 - DPRINT(" vring_used at %p\n", vq->vring.used); 854 - DPRINT(" vring_avail at %p\n", vq->vring.avail); 855 883 856 - if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { 884 + if (map_ring(dev, vq)) { 857 885 vu_panic(dev, "Invalid vring_addr message"); 858 886 return false; 859 887 } ··· 1105 1133 return false; 1106 1134 } 1107 1135 1136 + pthread_mutex_lock(&dev->slave_mutex); 1108 1137 if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { 1138 + pthread_mutex_unlock(&dev->slave_mutex); 1109 1139 return false; 1110 1140 } 1111 1141 1142 + /* Also unlocks the slave_mutex */ 1112 1143 return vu_process_message_reply(dev, &vmsg); 1113 1144 } 1114 1145 ··· 1628 1659 close(dev->slave_fd); 1629 1660 dev->slave_fd = -1; 1630 1661 } 1662 + pthread_mutex_destroy(&dev->slave_mutex); 1631 1663 1632 1664 if (dev->sock != -1) { 1633 1665 close(dev->sock); ··· 1663 1695 dev->remove_watch = remove_watch; 1664 1696 dev->iface = iface; 1665 1697 dev->log_call_fd = -1; 1698 + pthread_mutex_init(&dev->slave_mutex, NULL); 1666 1699 dev->slave_fd = -1; 1667 1700 dev->max_queues = max_queues; 1668 1701
+6
contrib/libvhost-user/libvhost-user.h
··· 19 19 #include <stddef.h> 20 20 #include <sys/poll.h> 21 21 #include <linux/vhost.h> 22 + #include <pthread.h> 22 23 #include "standard-headers/linux/virtio_ring.h" 23 24 24 25 /* Based on qemu/hw/virtio/vhost-user.c */ ··· 326 327 int err_fd; 327 328 unsigned int enable; 328 329 bool started; 330 + 331 + /* Guest addresses of our ring */ 332 + struct vhost_vring_addr vra; 329 333 } VuVirtq; 330 334 331 335 enum VuWatchCondtion { ··· 355 359 VuVirtq *vq; 356 360 VuDevInflightInfo inflight_info; 357 361 int log_call_fd; 362 + /* Must be held while using slave_fd */ 363 + pthread_mutex_t slave_mutex; 358 364 int slave_fd; 359 365 uint64_t log_size; 360 366 uint8_t *log_table;
+3 -1
docs/interop/vhost-user.json
··· 31 31 # @rproc-serial: virtio remoteproc serial link 32 32 # @scsi: virtio scsi 33 33 # @vsock: virtio vsock transport 34 + # @fs: virtio fs (since 4.2) 34 35 # 35 36 # Since: 4.0 36 37 ## ··· 50 51 'rpmsg', 51 52 'rproc-serial', 52 53 'scsi', 53 - 'vsock' 54 + 'vsock', 55 + 'fs' 54 56 ] 55 57 } 56 58
+1 -1
hw/virtio/vhost-user.c
··· 1061 1061 fd[0]); 1062 1062 break; 1063 1063 default: 1064 - error_report("Received unexpected msg type."); 1064 + error_report("Received unexpected msg type: %d.", hdr.request); 1065 1065 ret = -EINVAL; 1066 1066 } 1067 1067
+891
include/standard-headers/linux/fuse.h
··· 1 + /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ 2 + /* 3 + This file defines the kernel interface of FUSE 4 + Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 5 + 6 + This program can be distributed under the terms of the GNU GPL. 7 + See the file COPYING. 8 + 9 + This -- and only this -- header file may also be distributed under 10 + the terms of the BSD Licence as follows: 11 + 12 + Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. 13 + 14 + Redistribution and use in source and binary forms, with or without 15 + modification, are permitted provided that the following conditions 16 + are met: 17 + 1. Redistributions of source code must retain the above copyright 18 + notice, this list of conditions and the following disclaimer. 19 + 2. Redistributions in binary form must reproduce the above copyright 20 + notice, this list of conditions and the following disclaimer in the 21 + documentation and/or other materials provided with the distribution. 22 + 23 + THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 24 + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 + ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 27 + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 + SUCH DAMAGE. 34 + */ 35 + 36 + /* 37 + * This file defines the kernel interface of FUSE 38 + * 39 + * Protocol changelog: 40 + * 41 + * 7.1: 42 + * - add the following messages: 43 + * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, 44 + * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, 45 + * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, 46 + * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, 47 + * FUSE_RELEASEDIR 48 + * - add padding to messages to accommodate 32-bit servers on 64-bit kernels 49 + * 50 + * 7.2: 51 + * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags 52 + * - add FUSE_FSYNCDIR message 53 + * 54 + * 7.3: 55 + * - add FUSE_ACCESS message 56 + * - add FUSE_CREATE message 57 + * - add filehandle to fuse_setattr_in 58 + * 59 + * 7.4: 60 + * - add frsize to fuse_kstatfs 61 + * - clean up request size limit checking 62 + * 63 + * 7.5: 64 + * - add flags and max_write to fuse_init_out 65 + * 66 + * 7.6: 67 + * - add max_readahead to fuse_init_in and fuse_init_out 68 + * 69 + * 7.7: 70 + * - add FUSE_INTERRUPT message 71 + * - add POSIX file lock support 72 + * 73 + * 7.8: 74 + * - add lock_owner and flags fields to fuse_release_in 75 + * - add FUSE_BMAP message 76 + * - add FUSE_DESTROY message 77 + * 78 + * 7.9: 79 + * - new fuse_getattr_in input argument of GETATTR 80 + * - add lk_flags in fuse_lk_in 81 + * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in 82 + * - add blksize field to fuse_attr 83 + * - add file flags field to fuse_read_in and fuse_write_in 84 + * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in 85 + * 86 + * 7.10 87 + * - add nonseekable open flag 88 + * 89 + * 7.11 90 + * - add IOCTL message 91 + * - add unsolicited notification support 92 + * - add POLL message and NOTIFY_POLL notification 93 + * 94 + * 7.12 95 + * - add umask flag to input argument of create, mknod and mkdir 96 + * - add notification messages for invalidation of inodes and 97 + * directory entries 98 + * 99 + * 7.13 100 + * - make max number of background requests and congestion threshold 101 + * tunables 102 + * 103 + * 7.14 104 + * - add splice support to fuse device 105 + * 106 + * 7.15 107 + * - add store notify 108 + * - add retrieve notify 109 + * 110 + * 7.16 111 + * - add BATCH_FORGET request 112 + * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct 113 + * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' 114 + * - add FUSE_IOCTL_32BIT flag 115 + * 116 + * 7.17 117 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK 118 + * 119 + * 7.18 120 + * - add FUSE_IOCTL_DIR flag 121 + * - add FUSE_NOTIFY_DELETE 122 + * 123 + * 7.19 124 + * - add FUSE_FALLOCATE 125 + * 126 + * 7.20 127 + * - add FUSE_AUTO_INVAL_DATA 128 + * 129 + * 7.21 130 + * - add FUSE_READDIRPLUS 131 + * - send the requested events in POLL request 132 + * 133 + * 7.22 134 + * - add FUSE_ASYNC_DIO 135 + * 136 + * 7.23 137 + * - add FUSE_WRITEBACK_CACHE 138 + * - add time_gran to fuse_init_out 139 + * - add reserved space to fuse_init_out 140 + * - add FATTR_CTIME 141 + * - add ctime and ctimensec to fuse_setattr_in 142 + * - add FUSE_RENAME2 request 143 + * - add FUSE_NO_OPEN_SUPPORT flag 144 + * 145 + * 7.24 146 + * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support 147 + * 148 + * 7.25 149 + * - add FUSE_PARALLEL_DIROPS 150 + * 151 + * 7.26 152 + * - add FUSE_HANDLE_KILLPRIV 153 + * - add FUSE_POSIX_ACL 154 + * 155 + * 7.27 156 + * - add FUSE_ABORT_ERROR 157 + * 158 + * 7.28 159 + * - add FUSE_COPY_FILE_RANGE 160 + * - add FOPEN_CACHE_DIR 161 + * - add FUSE_MAX_PAGES, add max_pages to init_out 162 + * - add FUSE_CACHE_SYMLINKS 163 + * 164 + * 7.29 165 + * - add FUSE_NO_OPENDIR_SUPPORT flag 166 + * 167 + * 7.30 168 + * - add FUSE_EXPLICIT_INVAL_DATA 169 + * - add FUSE_IOCTL_COMPAT_X32 170 + * 171 + * 7.31 172 + * - add FUSE_WRITE_KILL_PRIV flag 173 + * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING 174 + * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag 175 + */ 176 + 177 + #ifndef _LINUX_FUSE_H 178 + #define _LINUX_FUSE_H 179 + 180 + #include <stdint.h> 181 + 182 + /* 183 + * Version negotiation: 184 + * 185 + * Both the kernel and userspace send the version they support in the 186 + * INIT request and reply respectively. 187 + * 188 + * If the major versions match then both shall use the smallest 189 + * of the two minor versions for communication. 190 + * 191 + * If the kernel supports a larger major version, then userspace shall 192 + * reply with the major version it supports, ignore the rest of the 193 + * INIT message and expect a new INIT message from the kernel with a 194 + * matching major version. 195 + * 196 + * If the library supports a larger major version, then it shall fall 197 + * back to the major protocol version sent by the kernel for 198 + * communication and reply with that major version (and an arbitrary 199 + * supported minor version). 200 + */ 201 + 202 + /** Version number of this interface */ 203 + #define FUSE_KERNEL_VERSION 7 204 + 205 + /** Minor version number of this interface */ 206 + #define FUSE_KERNEL_MINOR_VERSION 31 207 + 208 + /** The node ID of the root inode */ 209 + #define FUSE_ROOT_ID 1 210 + 211 + /* Make sure all structures are padded to 64bit boundary, so 32bit 212 + userspace works under 64bit kernels */ 213 + 214 + struct fuse_attr { 215 + uint64_t ino; 216 + uint64_t size; 217 + uint64_t blocks; 218 + uint64_t atime; 219 + uint64_t mtime; 220 + uint64_t ctime; 221 + uint32_t atimensec; 222 + uint32_t mtimensec; 223 + uint32_t ctimensec; 224 + uint32_t mode; 225 + uint32_t nlink; 226 + uint32_t uid; 227 + uint32_t gid; 228 + uint32_t rdev; 229 + uint32_t blksize; 230 + uint32_t padding; 231 + }; 232 + 233 + struct fuse_kstatfs { 234 + uint64_t blocks; 235 + uint64_t bfree; 236 + uint64_t bavail; 237 + uint64_t files; 238 + uint64_t ffree; 239 + uint32_t bsize; 240 + uint32_t namelen; 241 + uint32_t frsize; 242 + uint32_t padding; 243 + uint32_t spare[6]; 244 + }; 245 + 246 + struct fuse_file_lock { 247 + uint64_t start; 248 + uint64_t end; 249 + uint32_t type; 250 + uint32_t pid; /* tgid */ 251 + }; 252 + 253 + /** 254 + * Bitmasks for fuse_setattr_in.valid 255 + */ 256 + #define FATTR_MODE (1 << 0) 257 + #define FATTR_UID (1 << 1) 258 + #define FATTR_GID (1 << 2) 259 + #define FATTR_SIZE (1 << 3) 260 + #define FATTR_ATIME (1 << 4) 261 + #define FATTR_MTIME (1 << 5) 262 + #define FATTR_FH (1 << 6) 263 + #define FATTR_ATIME_NOW (1 << 7) 264 + #define FATTR_MTIME_NOW (1 << 8) 265 + #define FATTR_LOCKOWNER (1 << 9) 266 + #define FATTR_CTIME (1 << 10) 267 + 268 + /** 269 + * Flags returned by the OPEN request 270 + * 271 + * FOPEN_DIRECT_IO: bypass page cache for this open file 272 + * FOPEN_KEEP_CACHE: don't invalidate the data cache on open 273 + * FOPEN_NONSEEKABLE: the file is not seekable 274 + * FOPEN_CACHE_DIR: allow caching this directory 275 + * FOPEN_STREAM: the file is stream-like (no file position at all) 276 + */ 277 + #define FOPEN_DIRECT_IO (1 << 0) 278 + #define FOPEN_KEEP_CACHE (1 << 1) 279 + #define FOPEN_NONSEEKABLE (1 << 2) 280 + #define FOPEN_CACHE_DIR (1 << 3) 281 + #define FOPEN_STREAM (1 << 4) 282 + 283 + /** 284 + * INIT request/reply flags 285 + * 286 + * FUSE_ASYNC_READ: asynchronous read requests 287 + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks 288 + * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) 289 + * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem 290 + * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." 291 + * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB 292 + * FUSE_DONT_MASK: don't apply umask to file mode on create operations 293 + * FUSE_SPLICE_WRITE: kernel supports splice write on the device 294 + * FUSE_SPLICE_MOVE: kernel supports splice move on the device 295 + * FUSE_SPLICE_READ: kernel supports splice read on the device 296 + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks 297 + * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories 298 + * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages 299 + * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) 300 + * FUSE_READDIRPLUS_AUTO: adaptive readdirplus 301 + * FUSE_ASYNC_DIO: asynchronous direct I/O submission 302 + * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes 303 + * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens 304 + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir 305 + * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc 306 + * FUSE_POSIX_ACL: filesystem supports posix acls 307 + * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED 308 + * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages 309 + * FUSE_CACHE_SYMLINKS: cache READLINK responses 310 + * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir 311 + * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request 312 + * FUSE_MAP_ALIGNMENT: map_alignment field is valid 313 + */ 314 + #define FUSE_ASYNC_READ (1 << 0) 315 + #define FUSE_POSIX_LOCKS (1 << 1) 316 + #define FUSE_FILE_OPS (1 << 2) 317 + #define FUSE_ATOMIC_O_TRUNC (1 << 3) 318 + #define FUSE_EXPORT_SUPPORT (1 << 4) 319 + #define FUSE_BIG_WRITES (1 << 5) 320 + #define FUSE_DONT_MASK (1 << 6) 321 + #define FUSE_SPLICE_WRITE (1 << 7) 322 + #define FUSE_SPLICE_MOVE (1 << 8) 323 + #define FUSE_SPLICE_READ (1 << 9) 324 + #define FUSE_FLOCK_LOCKS (1 << 10) 325 + #define FUSE_HAS_IOCTL_DIR (1 << 11) 326 + #define FUSE_AUTO_INVAL_DATA (1 << 12) 327 + #define FUSE_DO_READDIRPLUS (1 << 13) 328 + #define FUSE_READDIRPLUS_AUTO (1 << 14) 329 + #define FUSE_ASYNC_DIO (1 << 15) 330 + #define FUSE_WRITEBACK_CACHE (1 << 16) 331 + #define FUSE_NO_OPEN_SUPPORT (1 << 17) 332 + #define FUSE_PARALLEL_DIROPS (1 << 18) 333 + #define FUSE_HANDLE_KILLPRIV (1 << 19) 334 + #define FUSE_POSIX_ACL (1 << 20) 335 + #define FUSE_ABORT_ERROR (1 << 21) 336 + #define FUSE_MAX_PAGES (1 << 22) 337 + #define FUSE_CACHE_SYMLINKS (1 << 23) 338 + #define FUSE_NO_OPENDIR_SUPPORT (1 << 24) 339 + #define FUSE_EXPLICIT_INVAL_DATA (1 << 25) 340 + #define FUSE_MAP_ALIGNMENT (1 << 26) 341 + 342 + /** 343 + * CUSE INIT request/reply flags 344 + * 345 + * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl 346 + */ 347 + #define CUSE_UNRESTRICTED_IOCTL (1 << 0) 348 + 349 + /** 350 + * Release flags 351 + */ 352 + #define FUSE_RELEASE_FLUSH (1 << 0) 353 + #define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) 354 + 355 + /** 356 + * Getattr flags 357 + */ 358 + #define FUSE_GETATTR_FH (1 << 0) 359 + 360 + /** 361 + * Lock flags 362 + */ 363 + #define FUSE_LK_FLOCK (1 << 0) 364 + 365 + /** 366 + * WRITE flags 367 + * 368 + * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed 369 + * FUSE_WRITE_LOCKOWNER: lock_owner field is valid 370 + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits 371 + */ 372 + #define FUSE_WRITE_CACHE (1 << 0) 373 + #define FUSE_WRITE_LOCKOWNER (1 << 1) 374 + #define FUSE_WRITE_KILL_PRIV (1 << 2) 375 + 376 + /** 377 + * Read flags 378 + */ 379 + #define FUSE_READ_LOCKOWNER (1 << 1) 380 + 381 + /** 382 + * Ioctl flags 383 + * 384 + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine 385 + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed 386 + * FUSE_IOCTL_RETRY: retry with new iovecs 387 + * FUSE_IOCTL_32BIT: 32bit ioctl 388 + * FUSE_IOCTL_DIR: is a directory 389 + * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) 390 + * 391 + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs 392 + */ 393 + #define FUSE_IOCTL_COMPAT (1 << 0) 394 + #define FUSE_IOCTL_UNRESTRICTED (1 << 1) 395 + #define FUSE_IOCTL_RETRY (1 << 2) 396 + #define FUSE_IOCTL_32BIT (1 << 3) 397 + #define FUSE_IOCTL_DIR (1 << 4) 398 + #define FUSE_IOCTL_COMPAT_X32 (1 << 5) 399 + 400 + #define FUSE_IOCTL_MAX_IOV 256 401 + 402 + /** 403 + * Poll flags 404 + * 405 + * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify 406 + */ 407 + #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) 408 + 409 + /** 410 + * Fsync flags 411 + * 412 + * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata 413 + */ 414 + #define FUSE_FSYNC_FDATASYNC (1 << 0) 415 + 416 + enum fuse_opcode { 417 + FUSE_LOOKUP = 1, 418 + FUSE_FORGET = 2, /* no reply */ 419 + FUSE_GETATTR = 3, 420 + FUSE_SETATTR = 4, 421 + FUSE_READLINK = 5, 422 + FUSE_SYMLINK = 6, 423 + FUSE_MKNOD = 8, 424 + FUSE_MKDIR = 9, 425 + FUSE_UNLINK = 10, 426 + FUSE_RMDIR = 11, 427 + FUSE_RENAME = 12, 428 + FUSE_LINK = 13, 429 + FUSE_OPEN = 14, 430 + FUSE_READ = 15, 431 + FUSE_WRITE = 16, 432 + FUSE_STATFS = 17, 433 + FUSE_RELEASE = 18, 434 + FUSE_FSYNC = 20, 435 + FUSE_SETXATTR = 21, 436 + FUSE_GETXATTR = 22, 437 + FUSE_LISTXATTR = 23, 438 + FUSE_REMOVEXATTR = 24, 439 + FUSE_FLUSH = 25, 440 + FUSE_INIT = 26, 441 + FUSE_OPENDIR = 27, 442 + FUSE_READDIR = 28, 443 + FUSE_RELEASEDIR = 29, 444 + FUSE_FSYNCDIR = 30, 445 + FUSE_GETLK = 31, 446 + FUSE_SETLK = 32, 447 + FUSE_SETLKW = 33, 448 + FUSE_ACCESS = 34, 449 + FUSE_CREATE = 35, 450 + FUSE_INTERRUPT = 36, 451 + FUSE_BMAP = 37, 452 + FUSE_DESTROY = 38, 453 + FUSE_IOCTL = 39, 454 + FUSE_POLL = 40, 455 + FUSE_NOTIFY_REPLY = 41, 456 + FUSE_BATCH_FORGET = 42, 457 + FUSE_FALLOCATE = 43, 458 + FUSE_READDIRPLUS = 44, 459 + FUSE_RENAME2 = 45, 460 + FUSE_LSEEK = 46, 461 + FUSE_COPY_FILE_RANGE = 47, 462 + FUSE_SETUPMAPPING = 48, 463 + FUSE_REMOVEMAPPING = 49, 464 + 465 + /* CUSE specific operations */ 466 + CUSE_INIT = 4096, 467 + 468 + /* Reserved opcodes: helpful to detect structure endian-ness */ 469 + CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ 470 + FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ 471 + }; 472 + 473 + enum fuse_notify_code { 474 + FUSE_NOTIFY_POLL = 1, 475 + FUSE_NOTIFY_INVAL_INODE = 2, 476 + FUSE_NOTIFY_INVAL_ENTRY = 3, 477 + FUSE_NOTIFY_STORE = 4, 478 + FUSE_NOTIFY_RETRIEVE = 5, 479 + FUSE_NOTIFY_DELETE = 6, 480 + FUSE_NOTIFY_CODE_MAX, 481 + }; 482 + 483 + /* The read buffer is required to be at least 8k, but may be much larger */ 484 + #define FUSE_MIN_READ_BUFFER 8192 485 + 486 + #define FUSE_COMPAT_ENTRY_OUT_SIZE 120 487 + 488 + struct fuse_entry_out { 489 + uint64_t nodeid; /* Inode ID */ 490 + uint64_t generation; /* Inode generation: nodeid:gen must 491 + be unique for the fs's lifetime */ 492 + uint64_t entry_valid; /* Cache timeout for the name */ 493 + uint64_t attr_valid; /* Cache timeout for the attributes */ 494 + uint32_t entry_valid_nsec; 495 + uint32_t attr_valid_nsec; 496 + struct fuse_attr attr; 497 + }; 498 + 499 + struct fuse_forget_in { 500 + uint64_t nlookup; 501 + }; 502 + 503 + struct fuse_forget_one { 504 + uint64_t nodeid; 505 + uint64_t nlookup; 506 + }; 507 + 508 + struct fuse_batch_forget_in { 509 + uint32_t count; 510 + uint32_t dummy; 511 + }; 512 + 513 + struct fuse_getattr_in { 514 + uint32_t getattr_flags; 515 + uint32_t dummy; 516 + uint64_t fh; 517 + }; 518 + 519 + #define FUSE_COMPAT_ATTR_OUT_SIZE 96 520 + 521 + struct fuse_attr_out { 522 + uint64_t attr_valid; /* Cache timeout for the attributes */ 523 + uint32_t attr_valid_nsec; 524 + uint32_t dummy; 525 + struct fuse_attr attr; 526 + }; 527 + 528 + #define FUSE_COMPAT_MKNOD_IN_SIZE 8 529 + 530 + struct fuse_mknod_in { 531 + uint32_t mode; 532 + uint32_t rdev; 533 + uint32_t umask; 534 + uint32_t padding; 535 + }; 536 + 537 + struct fuse_mkdir_in { 538 + uint32_t mode; 539 + uint32_t umask; 540 + }; 541 + 542 + struct fuse_rename_in { 543 + uint64_t newdir; 544 + }; 545 + 546 + struct fuse_rename2_in { 547 + uint64_t newdir; 548 + uint32_t flags; 549 + uint32_t padding; 550 + }; 551 + 552 + struct fuse_link_in { 553 + uint64_t oldnodeid; 554 + }; 555 + 556 + struct fuse_setattr_in { 557 + uint32_t valid; 558 + uint32_t padding; 559 + uint64_t fh; 560 + uint64_t size; 561 + uint64_t lock_owner; 562 + uint64_t atime; 563 + uint64_t mtime; 564 + uint64_t ctime; 565 + uint32_t atimensec; 566 + uint32_t mtimensec; 567 + uint32_t ctimensec; 568 + uint32_t mode; 569 + uint32_t unused4; 570 + uint32_t uid; 571 + uint32_t gid; 572 + uint32_t unused5; 573 + }; 574 + 575 + struct fuse_open_in { 576 + uint32_t flags; 577 + uint32_t unused; 578 + }; 579 + 580 + struct fuse_create_in { 581 + uint32_t flags; 582 + uint32_t mode; 583 + uint32_t umask; 584 + uint32_t padding; 585 + }; 586 + 587 + struct fuse_open_out { 588 + uint64_t fh; 589 + uint32_t open_flags; 590 + uint32_t padding; 591 + }; 592 + 593 + struct fuse_release_in { 594 + uint64_t fh; 595 + uint32_t flags; 596 + uint32_t release_flags; 597 + uint64_t lock_owner; 598 + }; 599 + 600 + struct fuse_flush_in { 601 + uint64_t fh; 602 + uint32_t unused; 603 + uint32_t padding; 604 + uint64_t lock_owner; 605 + }; 606 + 607 + struct fuse_read_in { 608 + uint64_t fh; 609 + uint64_t offset; 610 + uint32_t size; 611 + uint32_t read_flags; 612 + uint64_t lock_owner; 613 + uint32_t flags; 614 + uint32_t padding; 615 + }; 616 + 617 + #define FUSE_COMPAT_WRITE_IN_SIZE 24 618 + 619 + struct fuse_write_in { 620 + uint64_t fh; 621 + uint64_t offset; 622 + uint32_t size; 623 + uint32_t write_flags; 624 + uint64_t lock_owner; 625 + uint32_t flags; 626 + uint32_t padding; 627 + }; 628 + 629 + struct fuse_write_out { 630 + uint32_t size; 631 + uint32_t padding; 632 + }; 633 + 634 + #define FUSE_COMPAT_STATFS_SIZE 48 635 + 636 + struct fuse_statfs_out { 637 + struct fuse_kstatfs st; 638 + }; 639 + 640 + struct fuse_fsync_in { 641 + uint64_t fh; 642 + uint32_t fsync_flags; 643 + uint32_t padding; 644 + }; 645 + 646 + struct fuse_setxattr_in { 647 + uint32_t size; 648 + uint32_t flags; 649 + }; 650 + 651 + struct fuse_getxattr_in { 652 + uint32_t size; 653 + uint32_t padding; 654 + }; 655 + 656 + struct fuse_getxattr_out { 657 + uint32_t size; 658 + uint32_t padding; 659 + }; 660 + 661 + struct fuse_lk_in { 662 + uint64_t fh; 663 + uint64_t owner; 664 + struct fuse_file_lock lk; 665 + uint32_t lk_flags; 666 + uint32_t padding; 667 + }; 668 + 669 + struct fuse_lk_out { 670 + struct fuse_file_lock lk; 671 + }; 672 + 673 + struct fuse_access_in { 674 + uint32_t mask; 675 + uint32_t padding; 676 + }; 677 + 678 + struct fuse_init_in { 679 + uint32_t major; 680 + uint32_t minor; 681 + uint32_t max_readahead; 682 + uint32_t flags; 683 + }; 684 + 685 + #define FUSE_COMPAT_INIT_OUT_SIZE 8 686 + #define FUSE_COMPAT_22_INIT_OUT_SIZE 24 687 + 688 + struct fuse_init_out { 689 + uint32_t major; 690 + uint32_t minor; 691 + uint32_t max_readahead; 692 + uint32_t flags; 693 + uint16_t max_background; 694 + uint16_t congestion_threshold; 695 + uint32_t max_write; 696 + uint32_t time_gran; 697 + uint16_t max_pages; 698 + uint16_t map_alignment; 699 + uint32_t unused[8]; 700 + }; 701 + 702 + #define CUSE_INIT_INFO_MAX 4096 703 + 704 + struct cuse_init_in { 705 + uint32_t major; 706 + uint32_t minor; 707 + uint32_t unused; 708 + uint32_t flags; 709 + }; 710 + 711 + struct cuse_init_out { 712 + uint32_t major; 713 + uint32_t minor; 714 + uint32_t unused; 715 + uint32_t flags; 716 + uint32_t max_read; 717 + uint32_t max_write; 718 + uint32_t dev_major; /* chardev major */ 719 + uint32_t dev_minor; /* chardev minor */ 720 + uint32_t spare[10]; 721 + }; 722 + 723 + struct fuse_interrupt_in { 724 + uint64_t unique; 725 + }; 726 + 727 + struct fuse_bmap_in { 728 + uint64_t block; 729 + uint32_t blocksize; 730 + uint32_t padding; 731 + }; 732 + 733 + struct fuse_bmap_out { 734 + uint64_t block; 735 + }; 736 + 737 + struct fuse_ioctl_in { 738 + uint64_t fh; 739 + uint32_t flags; 740 + uint32_t cmd; 741 + uint64_t arg; 742 + uint32_t in_size; 743 + uint32_t out_size; 744 + }; 745 + 746 + struct fuse_ioctl_iovec { 747 + uint64_t base; 748 + uint64_t len; 749 + }; 750 + 751 + struct fuse_ioctl_out { 752 + int32_t result; 753 + uint32_t flags; 754 + uint32_t in_iovs; 755 + uint32_t out_iovs; 756 + }; 757 + 758 + struct fuse_poll_in { 759 + uint64_t fh; 760 + uint64_t kh; 761 + uint32_t flags; 762 + uint32_t events; 763 + }; 764 + 765 + struct fuse_poll_out { 766 + uint32_t revents; 767 + uint32_t padding; 768 + }; 769 + 770 + struct fuse_notify_poll_wakeup_out { 771 + uint64_t kh; 772 + }; 773 + 774 + struct fuse_fallocate_in { 775 + uint64_t fh; 776 + uint64_t offset; 777 + uint64_t length; 778 + uint32_t mode; 779 + uint32_t padding; 780 + }; 781 + 782 + struct fuse_in_header { 783 + uint32_t len; 784 + uint32_t opcode; 785 + uint64_t unique; 786 + uint64_t nodeid; 787 + uint32_t uid; 788 + uint32_t gid; 789 + uint32_t pid; 790 + uint32_t padding; 791 + }; 792 + 793 + struct fuse_out_header { 794 + uint32_t len; 795 + int32_t error; 796 + uint64_t unique; 797 + }; 798 + 799 + struct fuse_dirent { 800 + uint64_t ino; 801 + uint64_t off; 802 + uint32_t namelen; 803 + uint32_t type; 804 + char name[]; 805 + }; 806 + 807 + #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) 808 + #define FUSE_DIRENT_ALIGN(x) \ 809 + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) 810 + #define FUSE_DIRENT_SIZE(d) \ 811 + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) 812 + 813 + struct fuse_direntplus { 814 + struct fuse_entry_out entry_out; 815 + struct fuse_dirent dirent; 816 + }; 817 + 818 + #define FUSE_NAME_OFFSET_DIRENTPLUS \ 819 + offsetof(struct fuse_direntplus, dirent.name) 820 + #define FUSE_DIRENTPLUS_SIZE(d) \ 821 + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) 822 + 823 + struct fuse_notify_inval_inode_out { 824 + uint64_t ino; 825 + int64_t off; 826 + int64_t len; 827 + }; 828 + 829 + struct fuse_notify_inval_entry_out { 830 + uint64_t parent; 831 + uint32_t namelen; 832 + uint32_t padding; 833 + }; 834 + 835 + struct fuse_notify_delete_out { 836 + uint64_t parent; 837 + uint64_t child; 838 + uint32_t namelen; 839 + uint32_t padding; 840 + }; 841 + 842 + struct fuse_notify_store_out { 843 + uint64_t nodeid; 844 + uint64_t offset; 845 + uint32_t size; 846 + uint32_t padding; 847 + }; 848 + 849 + struct fuse_notify_retrieve_out { 850 + uint64_t notify_unique; 851 + uint64_t nodeid; 852 + uint64_t offset; 853 + uint32_t size; 854 + uint32_t padding; 855 + }; 856 + 857 + /* Matches the size of fuse_write_in */ 858 + struct fuse_notify_retrieve_in { 859 + uint64_t dummy1; 860 + uint64_t offset; 861 + uint32_t size; 862 + uint32_t dummy2; 863 + uint64_t dummy3; 864 + uint64_t dummy4; 865 + }; 866 + 867 + /* Device ioctls: */ 868 + #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) 869 + 870 + struct fuse_lseek_in { 871 + uint64_t fh; 872 + uint64_t offset; 873 + uint32_t whence; 874 + uint32_t padding; 875 + }; 876 + 877 + struct fuse_lseek_out { 878 + uint64_t offset; 879 + }; 880 + 881 + struct fuse_copy_file_range_in { 882 + uint64_t fh_in; 883 + uint64_t off_in; 884 + uint64_t nodeid_out; 885 + uint64_t fh_out; 886 + uint64_t off_out; 887 + uint64_t len; 888 + uint64_t flags; 889 + }; 890 + 891 + #endif /* _LINUX_FUSE_H */
+1
scripts/update-linux-headers.sh
··· 186 186 mkdir -p "$output/include/standard-headers/linux" 187 187 for i in "$tmpdir"/include/linux/*virtio*.h \ 188 188 "$tmpdir/include/linux/qemu_fw_cfg.h" \ 189 + "$tmpdir/include/linux/fuse.h" \ 189 190 "$tmpdir/include/linux/input.h" \ 190 191 "$tmpdir/include/linux/input-event-codes.h" \ 191 192 "$tmpdir/include/linux/pci_regs.h" \
+5
tools/virtiofsd/50-qemu-virtiofsd.json.in
··· 1 + { 2 + "description": "QEMU virtiofsd vhost-user-fs", 3 + "type": "fs", 4 + "binary": "@libexecdir@/virtiofsd" 5 + }
+12
tools/virtiofsd/Makefile.objs
··· 1 + virtiofsd-obj-y = buffer.o \ 2 + fuse_opt.o \ 3 + fuse_log.o \ 4 + fuse_lowlevel.o \ 5 + fuse_signals.o \ 6 + fuse_virtio.o \ 7 + helper.o \ 8 + passthrough_ll.o \ 9 + seccomp.o 10 + 11 + seccomp.o-cflags := $(SECCOMP_CFLAGS) 12 + seccomp.o-libs := $(SECCOMP_LIBS)
+351
tools/virtiofsd/buffer.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2010 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * Functions for dealing with `struct fuse_buf` and `struct 6 + * fuse_bufvec`. 7 + * 8 + * This program can be distributed under the terms of the GNU LGPLv2. 9 + * See the file COPYING.LIB 10 + */ 11 + 12 + #include "qemu/osdep.h" 13 + #include "fuse_i.h" 14 + #include "fuse_lowlevel.h" 15 + #include <assert.h> 16 + #include <errno.h> 17 + #include <stdlib.h> 18 + #include <string.h> 19 + #include <unistd.h> 20 + 21 + size_t fuse_buf_size(const struct fuse_bufvec *bufv) 22 + { 23 + size_t i; 24 + size_t size = 0; 25 + 26 + for (i = 0; i < bufv->count; i++) { 27 + if (bufv->buf[i].size == SIZE_MAX) { 28 + size = SIZE_MAX; 29 + } else { 30 + size += bufv->buf[i].size; 31 + } 32 + } 33 + 34 + return size; 35 + } 36 + 37 + static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, 38 + struct fuse_bufvec *in_buf) 39 + { 40 + ssize_t res, i, j; 41 + size_t iovcnt = in_buf->count; 42 + struct iovec *iov; 43 + int fd = out_buf->fd; 44 + 45 + iov = calloc(iovcnt, sizeof(struct iovec)); 46 + if (!iov) { 47 + return -ENOMEM; 48 + } 49 + 50 + for (i = 0, j = 0; i < iovcnt; i++) { 51 + /* Skip the buf with 0 size */ 52 + if (in_buf->buf[i].size) { 53 + iov[j].iov_base = in_buf->buf[i].mem; 54 + iov[j].iov_len = in_buf->buf[i].size; 55 + j++; 56 + } 57 + } 58 + 59 + if (out_buf->flags & FUSE_BUF_FD_SEEK) { 60 + res = pwritev(fd, iov, iovcnt, out_buf->pos); 61 + } else { 62 + res = writev(fd, iov, iovcnt); 63 + } 64 + 65 + if (res == -1) { 66 + res = -errno; 67 + } 68 + 69 + free(iov); 70 + return res; 71 + } 72 + 73 + static size_t min_size(size_t s1, size_t s2) 74 + { 75 + return s1 < s2 ? s1 : s2; 76 + } 77 + 78 + static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, 79 + const struct fuse_buf *src, size_t src_off, 80 + size_t len) 81 + { 82 + ssize_t res = 0; 83 + size_t copied = 0; 84 + 85 + while (len) { 86 + if (dst->flags & FUSE_BUF_FD_SEEK) { 87 + res = pwrite(dst->fd, (char *)src->mem + src_off, len, 88 + dst->pos + dst_off); 89 + } else { 90 + res = write(dst->fd, (char *)src->mem + src_off, len); 91 + } 92 + if (res == -1) { 93 + if (!copied) { 94 + return -errno; 95 + } 96 + break; 97 + } 98 + if (res == 0) { 99 + break; 100 + } 101 + 102 + copied += res; 103 + if (!(dst->flags & FUSE_BUF_FD_RETRY)) { 104 + break; 105 + } 106 + 107 + src_off += res; 108 + dst_off += res; 109 + len -= res; 110 + } 111 + 112 + return copied; 113 + } 114 + 115 + static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, 116 + const struct fuse_buf *src, size_t src_off, 117 + size_t len) 118 + { 119 + ssize_t res = 0; 120 + size_t copied = 0; 121 + 122 + while (len) { 123 + if (src->flags & FUSE_BUF_FD_SEEK) { 124 + res = pread(src->fd, (char *)dst->mem + dst_off, len, 125 + src->pos + src_off); 126 + } else { 127 + res = read(src->fd, (char *)dst->mem + dst_off, len); 128 + } 129 + if (res == -1) { 130 + if (!copied) { 131 + return -errno; 132 + } 133 + break; 134 + } 135 + if (res == 0) { 136 + break; 137 + } 138 + 139 + copied += res; 140 + if (!(src->flags & FUSE_BUF_FD_RETRY)) { 141 + break; 142 + } 143 + 144 + dst_off += res; 145 + src_off += res; 146 + len -= res; 147 + } 148 + 149 + return copied; 150 + } 151 + 152 + static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, 153 + const struct fuse_buf *src, size_t src_off, 154 + size_t len) 155 + { 156 + char buf[4096]; 157 + struct fuse_buf tmp = { 158 + .size = sizeof(buf), 159 + .flags = 0, 160 + }; 161 + ssize_t res; 162 + size_t copied = 0; 163 + 164 + tmp.mem = buf; 165 + 166 + while (len) { 167 + size_t this_len = min_size(tmp.size, len); 168 + size_t read_len; 169 + 170 + res = fuse_buf_read(&tmp, 0, src, src_off, this_len); 171 + if (res < 0) { 172 + if (!copied) { 173 + return res; 174 + } 175 + break; 176 + } 177 + if (res == 0) { 178 + break; 179 + } 180 + 181 + read_len = res; 182 + res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); 183 + if (res < 0) { 184 + if (!copied) { 185 + return res; 186 + } 187 + break; 188 + } 189 + if (res == 0) { 190 + break; 191 + } 192 + 193 + copied += res; 194 + 195 + if (res < this_len) { 196 + break; 197 + } 198 + 199 + dst_off += res; 200 + src_off += res; 201 + len -= res; 202 + } 203 + 204 + return copied; 205 + } 206 + 207 + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, 208 + const struct fuse_buf *src, size_t src_off, 209 + size_t len) 210 + { 211 + int src_is_fd = src->flags & FUSE_BUF_IS_FD; 212 + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; 213 + 214 + if (!src_is_fd && !dst_is_fd) { 215 + char *dstmem = (char *)dst->mem + dst_off; 216 + char *srcmem = (char *)src->mem + src_off; 217 + 218 + if (dstmem != srcmem) { 219 + if (dstmem + len <= srcmem || srcmem + len <= dstmem) { 220 + memcpy(dstmem, srcmem, len); 221 + } else { 222 + memmove(dstmem, srcmem, len); 223 + } 224 + } 225 + 226 + return len; 227 + } else if (!src_is_fd) { 228 + return fuse_buf_write(dst, dst_off, src, src_off, len); 229 + } else if (!dst_is_fd) { 230 + return fuse_buf_read(dst, dst_off, src, src_off, len); 231 + } else { 232 + return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); 233 + } 234 + } 235 + 236 + static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) 237 + { 238 + if (bufv->idx < bufv->count) { 239 + return &bufv->buf[bufv->idx]; 240 + } else { 241 + return NULL; 242 + } 243 + } 244 + 245 + static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) 246 + { 247 + const struct fuse_buf *buf = fuse_bufvec_current(bufv); 248 + 249 + bufv->off += len; 250 + assert(bufv->off <= buf->size); 251 + if (bufv->off == buf->size) { 252 + assert(bufv->idx < bufv->count); 253 + bufv->idx++; 254 + if (bufv->idx == bufv->count) { 255 + return 0; 256 + } 257 + bufv->off = 0; 258 + } 259 + return 1; 260 + } 261 + 262 + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) 263 + { 264 + size_t copied = 0, i; 265 + 266 + if (dstv == srcv) { 267 + return fuse_buf_size(dstv); 268 + } 269 + 270 + /* 271 + * use writev to improve bandwidth when all the 272 + * src buffers already mapped by the daemon 273 + * process 274 + */ 275 + for (i = 0; i < srcv->count; i++) { 276 + if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { 277 + break; 278 + } 279 + } 280 + if ((i == srcv->count) && (dstv->count == 1) && 281 + (dstv->idx == 0) && 282 + (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { 283 + dstv->buf[0].pos += dstv->off; 284 + return fuse_buf_writev(&dstv->buf[0], srcv); 285 + } 286 + 287 + for (;;) { 288 + const struct fuse_buf *src = fuse_bufvec_current(srcv); 289 + const struct fuse_buf *dst = fuse_bufvec_current(dstv); 290 + size_t src_len; 291 + size_t dst_len; 292 + size_t len; 293 + ssize_t res; 294 + 295 + if (src == NULL || dst == NULL) { 296 + break; 297 + } 298 + 299 + src_len = src->size - srcv->off; 300 + dst_len = dst->size - dstv->off; 301 + len = min_size(src_len, dst_len); 302 + 303 + res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); 304 + if (res < 0) { 305 + if (!copied) { 306 + return res; 307 + } 308 + break; 309 + } 310 + copied += res; 311 + 312 + if (!fuse_bufvec_advance(srcv, res) || 313 + !fuse_bufvec_advance(dstv, res)) { 314 + break; 315 + } 316 + 317 + if (res < len) { 318 + break; 319 + } 320 + } 321 + 322 + return copied; 323 + } 324 + 325 + void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) 326 + { 327 + void *ptr; 328 + 329 + if (len > iter->size - iter->pos) { 330 + return NULL; 331 + } 332 + 333 + ptr = iter->mem + iter->pos; 334 + iter->pos += len; 335 + return ptr; 336 + } 337 + 338 + const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) 339 + { 340 + const char *str = iter->mem + iter->pos; 341 + size_t remaining = iter->size - iter->pos; 342 + size_t i; 343 + 344 + for (i = 0; i < remaining; i++) { 345 + if (str[i] == '\0') { 346 + iter->pos += i + 1; 347 + return str; 348 + } 349 + } 350 + return NULL; 351 + }
+1249
tools/virtiofsd/fuse.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB. 7 + */ 8 + 9 + #ifndef FUSE_H_ 10 + #define FUSE_H_ 11 + 12 + /* 13 + * 14 + * This file defines the library interface of FUSE 15 + * 16 + * IMPORTANT: you should define FUSE_USE_VERSION before including this header. 17 + */ 18 + 19 + #include "fuse_common.h" 20 + 21 + #include <fcntl.h> 22 + #include <sys/stat.h> 23 + #include <sys/statvfs.h> 24 + #include <sys/types.h> 25 + #include <sys/uio.h> 26 + #include <time.h> 27 + 28 + /* 29 + * Basic FUSE API 30 + */ 31 + 32 + /** Handle for a FUSE filesystem */ 33 + struct fuse; 34 + 35 + /** 36 + * Readdir flags, passed to ->readdir() 37 + */ 38 + enum fuse_readdir_flags { 39 + /** 40 + * "Plus" mode. 41 + * 42 + * The kernel wants to prefill the inode cache during readdir. The 43 + * filesystem may honour this by filling in the attributes and setting 44 + * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also 45 + * just ignore this flag completely. 46 + */ 47 + FUSE_READDIR_PLUS = (1 << 0), 48 + }; 49 + 50 + enum fuse_fill_dir_flags { 51 + /** 52 + * "Plus" mode: all file attributes are valid 53 + * 54 + * The attributes are used by the kernel to prefill the inode cache 55 + * during a readdir. 56 + * 57 + * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set 58 + * and vice versa. 59 + */ 60 + FUSE_FILL_DIR_PLUS = (1 << 1), 61 + }; 62 + 63 + /** 64 + * Function to add an entry in a readdir() operation 65 + * 66 + * The *off* parameter can be any non-zero value that enables the 67 + * filesystem to identify the current point in the directory 68 + * stream. It does not need to be the actual physical position. A 69 + * value of zero is reserved to indicate that seeking in directories 70 + * is not supported. 71 + * 72 + * @param buf the buffer passed to the readdir() operation 73 + * @param name the file name of the directory entry 74 + * @param stat file attributes, can be NULL 75 + * @param off offset of the next entry or zero 76 + * @param flags fill flags 77 + * @return 1 if buffer is full, zero otherwise 78 + */ 79 + typedef int (*fuse_fill_dir_t)(void *buf, const char *name, 80 + const struct stat *stbuf, off_t off, 81 + enum fuse_fill_dir_flags flags); 82 + /** 83 + * Configuration of the high-level API 84 + * 85 + * This structure is initialized from the arguments passed to 86 + * fuse_new(), and then passed to the file system's init() handler 87 + * which should ensure that the configuration is compatible with the 88 + * file system implementation. 89 + */ 90 + struct fuse_config { 91 + /** 92 + * If `set_gid` is non-zero, the st_gid attribute of each file 93 + * is overwritten with the value of `gid`. 94 + */ 95 + int set_gid; 96 + unsigned int gid; 97 + 98 + /** 99 + * If `set_uid` is non-zero, the st_uid attribute of each file 100 + * is overwritten with the value of `uid`. 101 + */ 102 + int set_uid; 103 + unsigned int uid; 104 + 105 + /** 106 + * If `set_mode` is non-zero, the any permissions bits set in 107 + * `umask` are unset in the st_mode attribute of each file. 108 + */ 109 + int set_mode; 110 + unsigned int umask; 111 + 112 + /** 113 + * The timeout in seconds for which name lookups will be 114 + * cached. 115 + */ 116 + double entry_timeout; 117 + 118 + /** 119 + * The timeout in seconds for which a negative lookup will be 120 + * cached. This means, that if file did not exist (lookup 121 + * retuned ENOENT), the lookup will only be redone after the 122 + * timeout, and the file/directory will be assumed to not 123 + * exist until then. A value of zero means that negative 124 + * lookups are not cached. 125 + */ 126 + double negative_timeout; 127 + 128 + /** 129 + * The timeout in seconds for which file/directory attributes 130 + * (as returned by e.g. the `getattr` handler) are cached. 131 + */ 132 + double attr_timeout; 133 + 134 + /** 135 + * Allow requests to be interrupted 136 + */ 137 + int intr; 138 + 139 + /** 140 + * Specify which signal number to send to the filesystem when 141 + * a request is interrupted. The default is hardcoded to 142 + * USR1. 143 + */ 144 + int intr_signal; 145 + 146 + /** 147 + * Normally, FUSE assigns inodes to paths only for as long as 148 + * the kernel is aware of them. With this option inodes are 149 + * instead remembered for at least this many seconds. This 150 + * will require more memory, but may be necessary when using 151 + * applications that make use of inode numbers. 152 + * 153 + * A number of -1 means that inodes will be remembered for the 154 + * entire life-time of the file-system process. 155 + */ 156 + int remember; 157 + 158 + /** 159 + * The default behavior is that if an open file is deleted, 160 + * the file is renamed to a hidden file (.fuse_hiddenXXX), and 161 + * only removed when the file is finally released. This 162 + * relieves the filesystem implementation of having to deal 163 + * with this problem. This option disables the hiding 164 + * behavior, and files are removed immediately in an unlink 165 + * operation (or in a rename operation which overwrites an 166 + * existing file). 167 + * 168 + * It is recommended that you not use the hard_remove 169 + * option. When hard_remove is set, the following libc 170 + * functions fail on unlinked files (returning errno of 171 + * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), 172 + * ftruncate(2), fstat(2), fchmod(2), fchown(2) 173 + */ 174 + int hard_remove; 175 + 176 + /** 177 + * Honor the st_ino field in the functions getattr() and 178 + * fill_dir(). This value is used to fill in the st_ino field 179 + * in the stat(2), lstat(2), fstat(2) functions and the d_ino 180 + * field in the readdir(2) function. The filesystem does not 181 + * have to guarantee uniqueness, however some applications 182 + * rely on this value being unique for the whole filesystem. 183 + * 184 + * Note that this does *not* affect the inode that libfuse 185 + * and the kernel use internally (also called the "nodeid"). 186 + */ 187 + int use_ino; 188 + 189 + /** 190 + * If use_ino option is not given, still try to fill in the 191 + * d_ino field in readdir(2). If the name was previously 192 + * looked up, and is still in the cache, the inode number 193 + * found there will be used. Otherwise it will be set to -1. 194 + * If use_ino option is given, this option is ignored. 195 + */ 196 + int readdir_ino; 197 + 198 + /** 199 + * This option disables the use of page cache (file content cache) 200 + * in the kernel for this filesystem. This has several affects: 201 + * 202 + * 1. Each read(2) or write(2) system call will initiate one 203 + * or more read or write operations, data will not be 204 + * cached in the kernel. 205 + * 206 + * 2. The return value of the read() and write() system calls 207 + * will correspond to the return values of the read and 208 + * write operations. This is useful for example if the 209 + * file size is not known in advance (before reading it). 210 + * 211 + * Internally, enabling this option causes fuse to set the 212 + * `direct_io` field of `struct fuse_file_info` - overwriting 213 + * any value that was put there by the file system. 214 + */ 215 + int direct_io; 216 + 217 + /** 218 + * This option disables flushing the cache of the file 219 + * contents on every open(2). This should only be enabled on 220 + * filesystems where the file data is never changed 221 + * externally (not through the mounted FUSE filesystem). Thus 222 + * it is not suitable for network filesystems and other 223 + * intermediate filesystems. 224 + * 225 + * NOTE: if this option is not specified (and neither 226 + * direct_io) data is still cached after the open(2), so a 227 + * read(2) system call will not always initiate a read 228 + * operation. 229 + * 230 + * Internally, enabling this option causes fuse to set the 231 + * `keep_cache` field of `struct fuse_file_info` - overwriting 232 + * any value that was put there by the file system. 233 + */ 234 + int kernel_cache; 235 + 236 + /** 237 + * This option is an alternative to `kernel_cache`. Instead of 238 + * unconditionally keeping cached data, the cached data is 239 + * invalidated on open(2) if if the modification time or the 240 + * size of the file has changed since it was last opened. 241 + */ 242 + int auto_cache; 243 + 244 + /** 245 + * The timeout in seconds for which file attributes are cached 246 + * for the purpose of checking if auto_cache should flush the 247 + * file data on open. 248 + */ 249 + int ac_attr_timeout_set; 250 + double ac_attr_timeout; 251 + 252 + /** 253 + * If this option is given the file-system handlers for the 254 + * following operations will not receive path information: 255 + * read, write, flush, release, fsync, readdir, releasedir, 256 + * fsyncdir, lock, ioctl and poll. 257 + * 258 + * For the truncate, getattr, chmod, chown and utimens 259 + * operations the path will be provided only if the struct 260 + * fuse_file_info argument is NULL. 261 + */ 262 + int nullpath_ok; 263 + 264 + /** 265 + * The remaining options are used by libfuse internally and 266 + * should not be touched. 267 + */ 268 + int show_help; 269 + char *modules; 270 + int debug; 271 + }; 272 + 273 + 274 + /** 275 + * The file system operations: 276 + * 277 + * Most of these should work very similarly to the well known UNIX 278 + * file system operations. A major exception is that instead of 279 + * returning an error in 'errno', the operation should return the 280 + * negated error value (-errno) directly. 281 + * 282 + * All methods are optional, but some are essential for a useful 283 + * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, 284 + * releasedir, fsyncdir, access, create, truncate, lock, init and 285 + * destroy are special purpose methods, without which a full featured 286 + * filesystem can still be implemented. 287 + * 288 + * In general, all methods are expected to perform any necessary 289 + * permission checking. However, a filesystem may delegate this task 290 + * to the kernel by passing the `default_permissions` mount option to 291 + * `fuse_new()`. In this case, methods will only be called if 292 + * the kernel's permission check has succeeded. 293 + * 294 + * Almost all operations take a path which can be of any length. 295 + */ 296 + struct fuse_operations { 297 + /** 298 + * Get file attributes. 299 + * 300 + * Similar to stat(). The 'st_dev' and 'st_blksize' fields are 301 + * ignored. The 'st_ino' field is ignored except if the 'use_ino' 302 + * mount option is given. In that case it is passed to userspace, 303 + * but libfuse and the kernel will still assign a different 304 + * inode for internal use (called the "nodeid"). 305 + * 306 + * `fi` will always be NULL if the file is not currently open, but 307 + * may also be NULL if the file is open. 308 + */ 309 + int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); 310 + 311 + /** 312 + * Read the target of a symbolic link 313 + * 314 + * The buffer should be filled with a null terminated string. The 315 + * buffer size argument includes the space for the terminating 316 + * null character. If the linkname is too long to fit in the 317 + * buffer, it should be truncated. The return value should be 0 318 + * for success. 319 + */ 320 + int (*readlink)(const char *, char *, size_t); 321 + 322 + /** 323 + * Create a file node 324 + * 325 + * This is called for creation of all non-directory, non-symlink 326 + * nodes. If the filesystem defines a create() method, then for 327 + * regular files that will be called instead. 328 + */ 329 + int (*mknod)(const char *, mode_t, dev_t); 330 + 331 + /** 332 + * Create a directory 333 + * 334 + * Note that the mode argument may not have the type specification 335 + * bits set, i.e. S_ISDIR(mode) can be false. To obtain the 336 + * correct directory type bits use mode|S_IFDIR 337 + */ 338 + int (*mkdir)(const char *, mode_t); 339 + 340 + /** Remove a file */ 341 + int (*unlink)(const char *); 342 + 343 + /** Remove a directory */ 344 + int (*rmdir)(const char *); 345 + 346 + /** Create a symbolic link */ 347 + int (*symlink)(const char *, const char *); 348 + 349 + /** 350 + * Rename a file 351 + * 352 + * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If 353 + * RENAME_NOREPLACE is specified, the filesystem must not 354 + * overwrite *newname* if it exists and return an error 355 + * instead. If `RENAME_EXCHANGE` is specified, the filesystem 356 + * must atomically exchange the two files, i.e. both must 357 + * exist and neither may be deleted. 358 + */ 359 + int (*rename)(const char *, const char *, unsigned int flags); 360 + 361 + /** Create a hard link to a file */ 362 + int (*link)(const char *, const char *); 363 + 364 + /** 365 + * Change the permission bits of a file 366 + * 367 + * `fi` will always be NULL if the file is not currenlty open, but 368 + * may also be NULL if the file is open. 369 + */ 370 + int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); 371 + 372 + /** 373 + * Change the owner and group of a file 374 + * 375 + * `fi` will always be NULL if the file is not currenlty open, but 376 + * may also be NULL if the file is open. 377 + * 378 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 379 + * expected to reset the setuid and setgid bits. 380 + */ 381 + int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); 382 + 383 + /** 384 + * Change the size of a file 385 + * 386 + * `fi` will always be NULL if the file is not currenlty open, but 387 + * may also be NULL if the file is open. 388 + * 389 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 390 + * expected to reset the setuid and setgid bits. 391 + */ 392 + int (*truncate)(const char *, off_t, struct fuse_file_info *fi); 393 + 394 + /** 395 + * Open a file 396 + * 397 + * Open flags are available in fi->flags. The following rules 398 + * apply. 399 + * 400 + * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be 401 + * filtered out / handled by the kernel. 402 + * 403 + * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) 404 + * should be used by the filesystem to check if the operation is 405 + * permitted. If the ``-o default_permissions`` mount option is 406 + * given, this check is already done by the kernel before calling 407 + * open() and may thus be omitted by the filesystem. 408 + * 409 + * - When writeback caching is enabled, the kernel may send 410 + * read requests even for files opened with O_WRONLY. The 411 + * filesystem should be prepared to handle this. 412 + * 413 + * - When writeback caching is disabled, the filesystem is 414 + * expected to properly handle the O_APPEND flag and ensure 415 + * that each write is appending to the end of the file. 416 + * 417 + * - When writeback caching is enabled, the kernel will 418 + * handle O_APPEND. However, unless all changes to the file 419 + * come through the kernel this will not work reliably. The 420 + * filesystem should thus either ignore the O_APPEND flag 421 + * (and let the kernel handle it), or return an error 422 + * (indicating that reliably O_APPEND is not available). 423 + * 424 + * Filesystem may store an arbitrary file handle (pointer, 425 + * index, etc) in fi->fh, and use this in other all other file 426 + * operations (read, write, flush, release, fsync). 427 + * 428 + * Filesystem may also implement stateless file I/O and not store 429 + * anything in fi->fh. 430 + * 431 + * There are also some flags (direct_io, keep_cache) which the 432 + * filesystem may set in fi, to change the way the file is opened. 433 + * See fuse_file_info structure in <fuse_common.h> for more details. 434 + * 435 + * If this request is answered with an error code of ENOSYS 436 + * and FUSE_CAP_NO_OPEN_SUPPORT is set in 437 + * `fuse_conn_info.capable`, this is treated as success and 438 + * future calls to open will also succeed without being send 439 + * to the filesystem process. 440 + * 441 + */ 442 + int (*open)(const char *, struct fuse_file_info *); 443 + 444 + /** 445 + * Read data from an open file 446 + * 447 + * Read should return exactly the number of bytes requested except 448 + * on EOF or error, otherwise the rest of the data will be 449 + * substituted with zeroes. An exception to this is when the 450 + * 'direct_io' mount option is specified, in which case the return 451 + * value of the read system call will reflect the return value of 452 + * this operation. 453 + */ 454 + int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); 455 + 456 + /** 457 + * Write data to an open file 458 + * 459 + * Write should return exactly the number of bytes requested 460 + * except on error. An exception to this is when the 'direct_io' 461 + * mount option is specified (see read operation). 462 + * 463 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 464 + * expected to reset the setuid and setgid bits. 465 + */ 466 + int (*write)(const char *, const char *, size_t, off_t, 467 + struct fuse_file_info *); 468 + 469 + /** 470 + * Get file system statistics 471 + * 472 + * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored 473 + */ 474 + int (*statfs)(const char *, struct statvfs *); 475 + 476 + /** 477 + * Possibly flush cached data 478 + * 479 + * BIG NOTE: This is not equivalent to fsync(). It's not a 480 + * request to sync dirty data. 481 + * 482 + * Flush is called on each close() of a file descriptor, as opposed to 483 + * release which is called on the close of the last file descriptor for 484 + * a file. Under Linux, errors returned by flush() will be passed to 485 + * userspace as errors from close(), so flush() is a good place to write 486 + * back any cached dirty data. However, many applications ignore errors 487 + * on close(), and on non-Linux systems, close() may succeed even if flush() 488 + * returns an error. For these reasons, filesystems should not assume 489 + * that errors returned by flush will ever be noticed or even 490 + * delivered. 491 + * 492 + * NOTE: The flush() method may be called more than once for each 493 + * open(). This happens if more than one file descriptor refers to an 494 + * open file handle, e.g. due to dup(), dup2() or fork() calls. It is 495 + * not possible to determine if a flush is final, so each flush should 496 + * be treated equally. Multiple write-flush sequences are relatively 497 + * rare, so this shouldn't be a problem. 498 + * 499 + * Filesystems shouldn't assume that flush will be called at any 500 + * particular point. It may be called more times than expected, or not 501 + * at all. 502 + * 503 + * [close]: 504 + * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html 505 + */ 506 + int (*flush)(const char *, struct fuse_file_info *); 507 + 508 + /** 509 + * Release an open file 510 + * 511 + * Release is called when there are no more references to an open 512 + * file: all file descriptors are closed and all memory mappings 513 + * are unmapped. 514 + * 515 + * For every open() call there will be exactly one release() call 516 + * with the same flags and file handle. It is possible to 517 + * have a file opened more than once, in which case only the last 518 + * release will mean, that no more reads/writes will happen on the 519 + * file. The return value of release is ignored. 520 + */ 521 + int (*release)(const char *, struct fuse_file_info *); 522 + 523 + /* 524 + * Synchronize file contents 525 + * 526 + * If the datasync parameter is non-zero, then only the user data 527 + * should be flushed, not the meta data. 528 + */ 529 + int (*fsync)(const char *, int, struct fuse_file_info *); 530 + 531 + /** Set extended attributes */ 532 + int (*setxattr)(const char *, const char *, const char *, size_t, int); 533 + 534 + /** Get extended attributes */ 535 + int (*getxattr)(const char *, const char *, char *, size_t); 536 + 537 + /** List extended attributes */ 538 + int (*listxattr)(const char *, char *, size_t); 539 + 540 + /** Remove extended attributes */ 541 + int (*removexattr)(const char *, const char *); 542 + 543 + /* 544 + * Open directory 545 + * 546 + * Unless the 'default_permissions' mount option is given, 547 + * this method should check if opendir is permitted for this 548 + * directory. Optionally opendir may also return an arbitrary 549 + * filehandle in the fuse_file_info structure, which will be 550 + * passed to readdir, releasedir and fsyncdir. 551 + */ 552 + int (*opendir)(const char *, struct fuse_file_info *); 553 + 554 + /* 555 + * Read directory 556 + * 557 + * The filesystem may choose between two modes of operation: 558 + * 559 + * 1) The readdir implementation ignores the offset parameter, and 560 + * passes zero to the filler function's offset. The filler 561 + * function will not return '1' (unless an error happens), so the 562 + * whole directory is read in a single readdir operation. 563 + * 564 + * 2) The readdir implementation keeps track of the offsets of the 565 + * directory entries. It uses the offset parameter and always 566 + * passes non-zero offset to the filler function. When the buffer 567 + * is full (or an error happens) the filler function will return 568 + * '1'. 569 + */ 570 + int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, 571 + struct fuse_file_info *, enum fuse_readdir_flags); 572 + 573 + /** 574 + * Release directory 575 + */ 576 + int (*releasedir)(const char *, struct fuse_file_info *); 577 + 578 + /** 579 + * Synchronize directory contents 580 + * 581 + * If the datasync parameter is non-zero, then only the user data 582 + * should be flushed, not the meta data 583 + */ 584 + int (*fsyncdir)(const char *, int, struct fuse_file_info *); 585 + 586 + /** 587 + * Initialize filesystem 588 + * 589 + * The return value will passed in the `private_data` field of 590 + * `struct fuse_context` to all file operations, and as a 591 + * parameter to the destroy() method. It overrides the initial 592 + * value provided to fuse_main() / fuse_new(). 593 + */ 594 + void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); 595 + 596 + /** 597 + * Clean up filesystem 598 + * 599 + * Called on filesystem exit. 600 + */ 601 + void (*destroy)(void *private_data); 602 + 603 + /** 604 + * Check file access permissions 605 + * 606 + * This will be called for the access() system call. If the 607 + * 'default_permissions' mount option is given, this method is not 608 + * called. 609 + * 610 + * This method is not called under Linux kernel versions 2.4.x 611 + */ 612 + int (*access)(const char *, int); 613 + 614 + /** 615 + * Create and open a file 616 + * 617 + * If the file does not exist, first create it with the specified 618 + * mode, and then open it. 619 + * 620 + * If this method is not implemented or under Linux kernel 621 + * versions earlier than 2.6.15, the mknod() and open() methods 622 + * will be called instead. 623 + */ 624 + int (*create)(const char *, mode_t, struct fuse_file_info *); 625 + 626 + /** 627 + * Perform POSIX file locking operation 628 + * 629 + * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. 630 + * 631 + * For the meaning of fields in 'struct flock' see the man page 632 + * for fcntl(2). The l_whence field will always be set to 633 + * SEEK_SET. 634 + * 635 + * For checking lock ownership, the 'fuse_file_info->owner' 636 + * argument must be used. 637 + * 638 + * For F_GETLK operation, the library will first check currently 639 + * held locks, and if a conflicting lock is found it will return 640 + * information without calling this method. This ensures, that 641 + * for local locks the l_pid field is correctly filled in. The 642 + * results may not be accurate in case of race conditions and in 643 + * the presence of hard links, but it's unlikely that an 644 + * application would rely on accurate GETLK results in these 645 + * cases. If a conflicting lock is not found, this method will be 646 + * called, and the filesystem may fill out l_pid by a meaningful 647 + * value, or it may leave this field zero. 648 + * 649 + * For F_SETLK and F_SETLKW the l_pid field will be set to the pid 650 + * of the process performing the locking operation. 651 + * 652 + * Note: if this method is not implemented, the kernel will still 653 + * allow file locking to work locally. Hence it is only 654 + * interesting for network filesystems and similar. 655 + */ 656 + int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); 657 + 658 + /** 659 + * Change the access and modification times of a file with 660 + * nanosecond resolution 661 + * 662 + * This supersedes the old utime() interface. New applications 663 + * should use this. 664 + * 665 + * `fi` will always be NULL if the file is not currenlty open, but 666 + * may also be NULL if the file is open. 667 + * 668 + * See the utimensat(2) man page for details. 669 + */ 670 + int (*utimens)(const char *, const struct timespec tv[2], 671 + struct fuse_file_info *fi); 672 + 673 + /** 674 + * Map block index within file to block index within device 675 + * 676 + * Note: This makes sense only for block device backed filesystems 677 + * mounted with the 'blkdev' option 678 + */ 679 + int (*bmap)(const char *, size_t blocksize, uint64_t *idx); 680 + 681 + /** 682 + * Ioctl 683 + * 684 + * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in 685 + * 64bit environment. The size and direction of data is 686 + * determined by _IOC_*() decoding of cmd. For _IOC_NONE, 687 + * data will be NULL, for _IOC_WRITE data is out area, for 688 + * _IOC_READ in area and if both are set in/out area. In all 689 + * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. 690 + * 691 + * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a 692 + * directory file handle. 693 + * 694 + * Note : the unsigned long request submitted by the application 695 + * is truncated to 32 bits. 696 + */ 697 + int (*ioctl)(const char *, unsigned int cmd, void *arg, 698 + struct fuse_file_info *, unsigned int flags, void *data); 699 + 700 + /** 701 + * Poll for IO readiness events 702 + * 703 + * Note: If ph is non-NULL, the client should notify 704 + * when IO readiness events occur by calling 705 + * fuse_notify_poll() with the specified ph. 706 + * 707 + * Regardless of the number of times poll with a non-NULL ph 708 + * is received, single notification is enough to clear all. 709 + * Notifying more times incurs overhead but doesn't harm 710 + * correctness. 711 + * 712 + * The callee is responsible for destroying ph with 713 + * fuse_pollhandle_destroy() when no longer in use. 714 + */ 715 + int (*poll)(const char *, struct fuse_file_info *, 716 + struct fuse_pollhandle *ph, unsigned *reventsp); 717 + 718 + /* 719 + * Write contents of buffer to an open file 720 + * 721 + * Similar to the write() method, but data is supplied in a 722 + * generic buffer. Use fuse_buf_copy() to transfer data to 723 + * the destination. 724 + * 725 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 726 + * expected to reset the setuid and setgid bits. 727 + */ 728 + int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, 729 + struct fuse_file_info *); 730 + 731 + /* 732 + * Store data from an open file in a buffer 733 + * 734 + * Similar to the read() method, but data is stored and 735 + * returned in a generic buffer. 736 + * 737 + * No actual copying of data has to take place, the source 738 + * file descriptor may simply be stored in the buffer for 739 + * later data transfer. 740 + * 741 + * The buffer must be allocated dynamically and stored at the 742 + * location pointed to by bufp. If the buffer contains memory 743 + * regions, they too must be allocated using malloc(). The 744 + * allocated memory will be freed by the caller. 745 + */ 746 + int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, 747 + off_t off, struct fuse_file_info *); 748 + /** 749 + * Perform BSD file locking operation 750 + * 751 + * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN 752 + * 753 + * Nonblocking requests will be indicated by ORing LOCK_NB to 754 + * the above operations 755 + * 756 + * For more information see the flock(2) manual page. 757 + * 758 + * Additionally fi->owner will be set to a value unique to 759 + * this open file. This same value will be supplied to 760 + * ->release() when the file is released. 761 + * 762 + * Note: if this method is not implemented, the kernel will still 763 + * allow file locking to work locally. Hence it is only 764 + * interesting for network filesystems and similar. 765 + */ 766 + int (*flock)(const char *, struct fuse_file_info *, int op); 767 + 768 + /** 769 + * Allocates space for an open file 770 + * 771 + * This function ensures that required space is allocated for specified 772 + * file. If this function returns success then any subsequent write 773 + * request to specified range is guaranteed not to fail because of lack 774 + * of space on the file system media. 775 + */ 776 + int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); 777 + 778 + /** 779 + * Copy a range of data from one file to another 780 + * 781 + * Performs an optimized copy between two file descriptors without the 782 + * additional cost of transferring data through the FUSE kernel module 783 + * to user space (glibc) and then back into the FUSE filesystem again. 784 + * 785 + * In case this method is not implemented, glibc falls back to reading 786 + * data from the source and writing to the destination. Effectively 787 + * doing an inefficient copy of the data. 788 + */ 789 + ssize_t (*copy_file_range)(const char *path_in, 790 + struct fuse_file_info *fi_in, off_t offset_in, 791 + const char *path_out, 792 + struct fuse_file_info *fi_out, off_t offset_out, 793 + size_t size, int flags); 794 + 795 + /** 796 + * Find next data or hole after the specified offset 797 + */ 798 + off_t (*lseek)(const char *, off_t off, int whence, 799 + struct fuse_file_info *); 800 + }; 801 + 802 + /* 803 + * Extra context that may be needed by some filesystems 804 + * 805 + * The uid, gid and pid fields are not filled in case of a writepage 806 + * operation. 807 + */ 808 + struct fuse_context { 809 + /** Pointer to the fuse object */ 810 + struct fuse *fuse; 811 + 812 + /** User ID of the calling process */ 813 + uid_t uid; 814 + 815 + /** Group ID of the calling process */ 816 + gid_t gid; 817 + 818 + /** Process ID of the calling thread */ 819 + pid_t pid; 820 + 821 + /** Private filesystem data */ 822 + void *private_data; 823 + 824 + /** Umask of the calling process */ 825 + mode_t umask; 826 + }; 827 + 828 + /** 829 + * Main function of FUSE. 830 + * 831 + * This is for the lazy. This is all that has to be called from the 832 + * main() function. 833 + * 834 + * This function does the following: 835 + * - parses command line options, and handles --help and 836 + * --version 837 + * - installs signal handlers for INT, HUP, TERM and PIPE 838 + * - registers an exit handler to unmount the filesystem on program exit 839 + * - creates a fuse handle 840 + * - registers the operations 841 + * - calls either the single-threaded or the multi-threaded event loop 842 + * 843 + * Most file systems will have to parse some file-system specific 844 + * arguments before calling this function. It is recommended to do 845 + * this with fuse_opt_parse() and a processing function that passes 846 + * through any unknown options (this can also be achieved by just 847 + * passing NULL as the processing function). That way, the remaining 848 + * options can be passed directly to fuse_main(). 849 + * 850 + * fuse_main() accepts all options that can be passed to 851 + * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). 852 + * 853 + * Option parsing skips argv[0], which is assumed to contain the 854 + * program name. This element must always be present and is used to 855 + * construct a basic ``usage: `` message for the --help 856 + * output. argv[0] may also be set to the empty string. In this case 857 + * the usage message is suppressed. This can be used by file systems 858 + * to print their own usage line first. See hello.c for an example of 859 + * how to do this. 860 + * 861 + * Note: this is currently implemented as a macro. 862 + * 863 + * The following error codes may be returned from fuse_main(): 864 + * 1: Invalid option arguments 865 + * 2: No mount point specified 866 + * 3: FUSE setup failed 867 + * 4: Mounting failed 868 + * 5: Failed to daemonize (detach from session) 869 + * 6: Failed to set up signal handlers 870 + * 7: An error occured during the life of the file system 871 + * 872 + * @param argc the argument counter passed to the main() function 873 + * @param argv the argument vector passed to the main() function 874 + * @param op the file system operation 875 + * @param private_data Initial value for the `private_data` 876 + * field of `struct fuse_context`. May be overridden by the 877 + * `struct fuse_operations.init` handler. 878 + * @return 0 on success, nonzero on failure 879 + * 880 + * Example usage, see hello.c 881 + */ 882 + /* 883 + * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, 884 + * void *private_data); 885 + */ 886 + #define fuse_main(argc, argv, op, private_data) \ 887 + fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) 888 + 889 + /* 890 + * More detailed API 891 + */ 892 + 893 + /** 894 + * Print available options (high- and low-level) to stdout. This is 895 + * not an exhaustive list, but includes only those options that may be 896 + * of interest to an end-user of a file system. 897 + * 898 + * The function looks at the argument vector only to determine if 899 + * there are additional modules to be loaded (module=foo option), 900 + * and attempts to call their help functions as well. 901 + * 902 + * @param args the argument vector. 903 + */ 904 + void fuse_lib_help(struct fuse_args *args); 905 + 906 + /** 907 + * Create a new FUSE filesystem. 908 + * 909 + * This function accepts most file-system independent mount options 910 + * (like context, nodev, ro - see mount(8)), as well as the 911 + * FUSE-specific mount options from mount.fuse(8). 912 + * 913 + * If the --help option is specified, the function writes a help text 914 + * to stdout and returns NULL. 915 + * 916 + * Option parsing skips argv[0], which is assumed to contain the 917 + * program name. This element must always be present and is used to 918 + * construct a basic ``usage: `` message for the --help output. If 919 + * argv[0] is set to the empty string, no usage message is included in 920 + * the --help output. 921 + * 922 + * If an unknown option is passed in, an error message is written to 923 + * stderr and the function returns NULL. 924 + * 925 + * @param args argument vector 926 + * @param op the filesystem operations 927 + * @param op_size the size of the fuse_operations structure 928 + * @param private_data Initial value for the `private_data` 929 + * field of `struct fuse_context`. May be overridden by the 930 + * `struct fuse_operations.init` handler. 931 + * @return the created FUSE handle 932 + */ 933 + #if FUSE_USE_VERSION == 30 934 + struct fuse *fuse_new_30(struct fuse_args *args, 935 + const struct fuse_operations *op, size_t op_size, 936 + void *private_data); 937 + #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) 938 + #else 939 + struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, 940 + size_t op_size, void *private_data); 941 + #endif 942 + 943 + /** 944 + * Mount a FUSE file system. 945 + * 946 + * @param mountpoint the mount point path 947 + * @param f the FUSE handle 948 + * 949 + * @return 0 on success, -1 on failure. 950 + **/ 951 + int fuse_mount(struct fuse *f, const char *mountpoint); 952 + 953 + /** 954 + * Unmount a FUSE file system. 955 + * 956 + * See fuse_session_unmount() for additional information. 957 + * 958 + * @param f the FUSE handle 959 + **/ 960 + void fuse_unmount(struct fuse *f); 961 + 962 + /** 963 + * Destroy the FUSE handle. 964 + * 965 + * NOTE: This function does not unmount the filesystem. If this is 966 + * needed, call fuse_unmount() before calling this function. 967 + * 968 + * @param f the FUSE handle 969 + */ 970 + void fuse_destroy(struct fuse *f); 971 + 972 + /** 973 + * FUSE event loop. 974 + * 975 + * Requests from the kernel are processed, and the appropriate 976 + * operations are called. 977 + * 978 + * For a description of the return value and the conditions when the 979 + * event loop exits, refer to the documentation of 980 + * fuse_session_loop(). 981 + * 982 + * @param f the FUSE handle 983 + * @return see fuse_session_loop() 984 + * 985 + * See also: fuse_loop_mt() 986 + */ 987 + int fuse_loop(struct fuse *f); 988 + 989 + /** 990 + * Flag session as terminated 991 + * 992 + * This function will cause any running event loops to exit on 993 + * the next opportunity. 994 + * 995 + * @param f the FUSE handle 996 + */ 997 + void fuse_exit(struct fuse *f); 998 + 999 + /** 1000 + * Get the current context 1001 + * 1002 + * The context is only valid for the duration of a filesystem 1003 + * operation, and thus must not be stored and used later. 1004 + * 1005 + * @return the context 1006 + */ 1007 + struct fuse_context *fuse_get_context(void); 1008 + 1009 + /** 1010 + * Get the current supplementary group IDs for the current request 1011 + * 1012 + * Similar to the getgroups(2) system call, except the return value is 1013 + * always the total number of group IDs, even if it is larger than the 1014 + * specified size. 1015 + * 1016 + * The current fuse kernel module in linux (as of 2.6.30) doesn't pass 1017 + * the group list to userspace, hence this function needs to parse 1018 + * "/proc/$TID/task/$TID/status" to get the group IDs. 1019 + * 1020 + * This feature may not be supported on all operating systems. In 1021 + * such a case this function will return -ENOSYS. 1022 + * 1023 + * @param size size of given array 1024 + * @param list array of group IDs to be filled in 1025 + * @return the total number of supplementary group IDs or -errno on failure 1026 + */ 1027 + int fuse_getgroups(int size, gid_t list[]); 1028 + 1029 + /** 1030 + * Check if the current request has already been interrupted 1031 + * 1032 + * @return 1 if the request has been interrupted, 0 otherwise 1033 + */ 1034 + int fuse_interrupted(void); 1035 + 1036 + /** 1037 + * Invalidates cache for the given path. 1038 + * 1039 + * This calls fuse_lowlevel_notify_inval_inode internally. 1040 + * 1041 + * @return 0 on successful invalidation, negative error value otherwise. 1042 + * This routine may return -ENOENT to indicate that there was 1043 + * no entry to be invalidated, e.g., because the path has not 1044 + * been seen before or has been forgotten; this should not be 1045 + * considered to be an error. 1046 + */ 1047 + int fuse_invalidate_path(struct fuse *f, const char *path); 1048 + 1049 + /** 1050 + * The real main function 1051 + * 1052 + * Do not call this directly, use fuse_main() 1053 + */ 1054 + int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, 1055 + size_t op_size, void *private_data); 1056 + 1057 + /** 1058 + * Start the cleanup thread when using option "remember". 1059 + * 1060 + * This is done automatically by fuse_loop_mt() 1061 + * @param fuse struct fuse pointer for fuse instance 1062 + * @return 0 on success and -1 on error 1063 + */ 1064 + int fuse_start_cleanup_thread(struct fuse *fuse); 1065 + 1066 + /** 1067 + * Stop the cleanup thread when using option "remember". 1068 + * 1069 + * This is done automatically by fuse_loop_mt() 1070 + * @param fuse struct fuse pointer for fuse instance 1071 + */ 1072 + void fuse_stop_cleanup_thread(struct fuse *fuse); 1073 + 1074 + /** 1075 + * Iterate over cache removing stale entries 1076 + * use in conjunction with "-oremember" 1077 + * 1078 + * NOTE: This is already done for the standard sessions 1079 + * 1080 + * @param fuse struct fuse pointer for fuse instance 1081 + * @return the number of seconds until the next cleanup 1082 + */ 1083 + int fuse_clean_cache(struct fuse *fuse); 1084 + 1085 + /* 1086 + * Stacking API 1087 + */ 1088 + 1089 + /** 1090 + * Fuse filesystem object 1091 + * 1092 + * This is opaque object represents a filesystem layer 1093 + */ 1094 + struct fuse_fs; 1095 + 1096 + /* 1097 + * These functions call the relevant filesystem operation, and return 1098 + * the result. 1099 + * 1100 + * If the operation is not defined, they return -ENOSYS, with the 1101 + * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, 1102 + * fuse_fs_releasedir and fuse_fs_statfs, which return 0. 1103 + */ 1104 + 1105 + int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, 1106 + struct fuse_file_info *fi); 1107 + int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, 1108 + unsigned int flags); 1109 + int fuse_fs_unlink(struct fuse_fs *fs, const char *path); 1110 + int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); 1111 + int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); 1112 + int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); 1113 + int fuse_fs_release(struct fuse_fs *fs, const char *path, 1114 + struct fuse_file_info *fi); 1115 + int fuse_fs_open(struct fuse_fs *fs, const char *path, 1116 + struct fuse_file_info *fi); 1117 + int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, 1118 + off_t off, struct fuse_file_info *fi); 1119 + int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, 1120 + struct fuse_bufvec **bufp, size_t size, off_t off, 1121 + struct fuse_file_info *fi); 1122 + int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, 1123 + size_t size, off_t off, struct fuse_file_info *fi); 1124 + int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, 1125 + struct fuse_bufvec *buf, off_t off, 1126 + struct fuse_file_info *fi); 1127 + int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, 1128 + struct fuse_file_info *fi); 1129 + int fuse_fs_flush(struct fuse_fs *fs, const char *path, 1130 + struct fuse_file_info *fi); 1131 + int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); 1132 + int fuse_fs_opendir(struct fuse_fs *fs, const char *path, 1133 + struct fuse_file_info *fi); 1134 + int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, 1135 + fuse_fill_dir_t filler, off_t off, 1136 + struct fuse_file_info *fi, enum fuse_readdir_flags flags); 1137 + int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, 1138 + struct fuse_file_info *fi); 1139 + int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, 1140 + struct fuse_file_info *fi); 1141 + int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, 1142 + struct fuse_file_info *fi); 1143 + int fuse_fs_lock(struct fuse_fs *fs, const char *path, 1144 + struct fuse_file_info *fi, int cmd, struct flock *lock); 1145 + int fuse_fs_flock(struct fuse_fs *fs, const char *path, 1146 + struct fuse_file_info *fi, int op); 1147 + int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, 1148 + struct fuse_file_info *fi); 1149 + int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, 1150 + struct fuse_file_info *fi); 1151 + int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, 1152 + struct fuse_file_info *fi); 1153 + int fuse_fs_utimens(struct fuse_fs *fs, const char *path, 1154 + const struct timespec tv[2], struct fuse_file_info *fi); 1155 + int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); 1156 + int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, 1157 + size_t len); 1158 + int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, 1159 + dev_t rdev); 1160 + int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); 1161 + int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, 1162 + const char *value, size_t size, int flags); 1163 + int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, 1164 + char *value, size_t size); 1165 + int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, 1166 + size_t size); 1167 + int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); 1168 + int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, 1169 + uint64_t *idx); 1170 + int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, 1171 + void *arg, struct fuse_file_info *fi, unsigned int flags, 1172 + void *data); 1173 + int fuse_fs_poll(struct fuse_fs *fs, const char *path, 1174 + struct fuse_file_info *fi, struct fuse_pollhandle *ph, 1175 + unsigned *reventsp); 1176 + int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, 1177 + off_t offset, off_t length, struct fuse_file_info *fi); 1178 + ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, 1179 + struct fuse_file_info *fi_in, off_t off_in, 1180 + const char *path_out, 1181 + struct fuse_file_info *fi_out, off_t off_out, 1182 + size_t len, int flags); 1183 + off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, 1184 + struct fuse_file_info *fi); 1185 + void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, 1186 + struct fuse_config *cfg); 1187 + void fuse_fs_destroy(struct fuse_fs *fs); 1188 + 1189 + int fuse_notify_poll(struct fuse_pollhandle *ph); 1190 + 1191 + /** 1192 + * Create a new fuse filesystem object 1193 + * 1194 + * This is usually called from the factory of a fuse module to create 1195 + * a new instance of a filesystem. 1196 + * 1197 + * @param op the filesystem operations 1198 + * @param op_size the size of the fuse_operations structure 1199 + * @param private_data Initial value for the `private_data` 1200 + * field of `struct fuse_context`. May be overridden by the 1201 + * `struct fuse_operations.init` handler. 1202 + * @return a new filesystem object 1203 + */ 1204 + struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, 1205 + void *private_data); 1206 + 1207 + /** 1208 + * Factory for creating filesystem objects 1209 + * 1210 + * The function may use and remove options from 'args' that belong 1211 + * to this module. 1212 + * 1213 + * For now the 'fs' vector always contains exactly one filesystem. 1214 + * This is the filesystem which will be below the newly created 1215 + * filesystem in the stack. 1216 + * 1217 + * @param args the command line arguments 1218 + * @param fs NULL terminated filesystem object vector 1219 + * @return the new filesystem object 1220 + */ 1221 + typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, 1222 + struct fuse_fs *fs[]); 1223 + /** 1224 + * Register filesystem module 1225 + * 1226 + * If the "-omodules=*name*_:..." option is present, filesystem 1227 + * objects are created and pushed onto the stack with the *factory_* 1228 + * function. 1229 + * 1230 + * @param name_ the name of this filesystem module 1231 + * @param factory_ the factory function for this filesystem module 1232 + */ 1233 + #define FUSE_REGISTER_MODULE(name_, factory_) \ 1234 + fuse_module_factory_t fuse_module_##name_##_factory = factory_ 1235 + 1236 + /** Get session from fuse object */ 1237 + struct fuse_session *fuse_get_session(struct fuse *f); 1238 + 1239 + /** 1240 + * Open a FUSE file descriptor and set up the mount for the given 1241 + * mountpoint and flags. 1242 + * 1243 + * @param mountpoint reference to the mount in the file system 1244 + * @param options mount options 1245 + * @return the FUSE file descriptor or -1 upon error 1246 + */ 1247 + int fuse_open_channel(const char *mountpoint, const char *options); 1248 + 1249 + #endif /* FUSE_H_ */
+816
tools/virtiofsd/fuse_common.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB. 7 + */ 8 + 9 + /** @file */ 10 + 11 + #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) 12 + #error \ 13 + "Never include <fuse_common.h> directly; use <fuse.h> or <fuse_lowlevel.h> instead." 14 + #endif 15 + 16 + #ifndef FUSE_COMMON_H_ 17 + #define FUSE_COMMON_H_ 18 + 19 + #include "fuse_log.h" 20 + #include "fuse_opt.h" 21 + #include <stdint.h> 22 + #include <sys/types.h> 23 + 24 + /** Major version of FUSE library interface */ 25 + #define FUSE_MAJOR_VERSION 3 26 + 27 + /** Minor version of FUSE library interface */ 28 + #define FUSE_MINOR_VERSION 2 29 + 30 + #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) 31 + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) 32 + 33 + /** 34 + * Information about an open file. 35 + * 36 + * File Handles are created by the open, opendir, and create methods and closed 37 + * by the release and releasedir methods. Multiple file handles may be 38 + * concurrently open for the same file. Generally, a client will create one 39 + * file handle per file descriptor, though in some cases multiple file 40 + * descriptors can share a single file handle. 41 + */ 42 + struct fuse_file_info { 43 + /** Open flags. Available in open() and release() */ 44 + int flags; 45 + 46 + /* 47 + * In case of a write operation indicates if this was caused 48 + * by a delayed write from the page cache. If so, then the 49 + * context's pid, uid, and gid fields will not be valid, and 50 + * the *fh* value may not match the *fh* value that would 51 + * have been sent with the corresponding individual write 52 + * requests if write caching had been disabled. 53 + */ 54 + unsigned int writepage:1; 55 + 56 + /** Can be filled in by open, to use direct I/O on this file. */ 57 + unsigned int direct_io:1; 58 + 59 + /* 60 + * Can be filled in by open. It signals the kernel that any 61 + * currently cached file data (ie., data that the filesystem 62 + * provided the last time the file was open) need not be 63 + * invalidated. Has no effect when set in other contexts (in 64 + * particular it does nothing when set by opendir()). 65 + */ 66 + unsigned int keep_cache:1; 67 + 68 + /* 69 + * Indicates a flush operation. Set in flush operation, also 70 + * maybe set in highlevel lock operation and lowlevel release 71 + * operation. 72 + */ 73 + unsigned int flush:1; 74 + 75 + /* 76 + * Can be filled in by open, to indicate that the file is not 77 + * seekable. 78 + */ 79 + unsigned int nonseekable:1; 80 + 81 + /* 82 + * Indicates that flock locks for this file should be 83 + * released. If set, lock_owner shall contain a valid value. 84 + * May only be set in ->release(). 85 + */ 86 + unsigned int flock_release:1; 87 + 88 + /* 89 + * Can be filled in by opendir. It signals the kernel to 90 + * enable caching of entries returned by readdir(). Has no 91 + * effect when set in other contexts (in particular it does 92 + * nothing when set by open()). 93 + */ 94 + unsigned int cache_readdir:1; 95 + 96 + /* Indicates that suid/sgid bits should be removed upon write */ 97 + unsigned int kill_priv:1; 98 + 99 + 100 + /** Padding. Reserved for future use*/ 101 + unsigned int padding:24; 102 + unsigned int padding2:32; 103 + 104 + /* 105 + * File handle id. May be filled in by filesystem in create, 106 + * open, and opendir(). Available in most other file operations on the 107 + * same file handle. 108 + */ 109 + uint64_t fh; 110 + 111 + /** Lock owner id. Available in locking operations and flush */ 112 + uint64_t lock_owner; 113 + 114 + /* 115 + * Requested poll events. Available in ->poll. Only set on kernels 116 + * which support it. If unsupported, this field is set to zero. 117 + */ 118 + uint32_t poll_events; 119 + }; 120 + 121 + /* 122 + * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' 123 + */ 124 + 125 + /** 126 + * Indicates that the filesystem supports asynchronous read requests. 127 + * 128 + * If this capability is not requested/available, the kernel will 129 + * ensure that there is at most one pending read request per 130 + * file-handle at any time, and will attempt to order read requests by 131 + * increasing offset. 132 + * 133 + * This feature is enabled by default when supported by the kernel. 134 + */ 135 + #define FUSE_CAP_ASYNC_READ (1 << 0) 136 + 137 + /** 138 + * Indicates that the filesystem supports "remote" locking. 139 + * 140 + * This feature is enabled by default when supported by the kernel, 141 + * and if getlk() and setlk() handlers are implemented. 142 + */ 143 + #define FUSE_CAP_POSIX_LOCKS (1 << 1) 144 + 145 + /** 146 + * Indicates that the filesystem supports the O_TRUNC open flag. If 147 + * disabled, and an application specifies O_TRUNC, fuse first calls 148 + * truncate() and then open() with O_TRUNC filtered out. 149 + * 150 + * This feature is enabled by default when supported by the kernel. 151 + */ 152 + #define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) 153 + 154 + /** 155 + * Indicates that the filesystem supports lookups of "." and "..". 156 + * 157 + * This feature is disabled by default. 158 + */ 159 + #define FUSE_CAP_EXPORT_SUPPORT (1 << 4) 160 + 161 + /** 162 + * Indicates that the kernel should not apply the umask to the 163 + * file mode on create operations. 164 + * 165 + * This feature is disabled by default. 166 + */ 167 + #define FUSE_CAP_DONT_MASK (1 << 6) 168 + 169 + /** 170 + * Indicates that libfuse should try to use splice() when writing to 171 + * the fuse device. This may improve performance. 172 + * 173 + * This feature is disabled by default. 174 + */ 175 + #define FUSE_CAP_SPLICE_WRITE (1 << 7) 176 + 177 + /** 178 + * Indicates that libfuse should try to move pages instead of copying when 179 + * writing to / reading from the fuse device. This may improve performance. 180 + * 181 + * This feature is disabled by default. 182 + */ 183 + #define FUSE_CAP_SPLICE_MOVE (1 << 8) 184 + 185 + /** 186 + * Indicates that libfuse should try to use splice() when reading from 187 + * the fuse device. This may improve performance. 188 + * 189 + * This feature is enabled by default when supported by the kernel and 190 + * if the filesystem implements a write_buf() handler. 191 + */ 192 + #define FUSE_CAP_SPLICE_READ (1 << 9) 193 + 194 + /** 195 + * If set, the calls to flock(2) will be emulated using POSIX locks and must 196 + * then be handled by the filesystem's setlock() handler. 197 + * 198 + * If not set, flock(2) calls will be handled by the FUSE kernel module 199 + * internally (so any access that does not go through the kernel cannot be taken 200 + * into account). 201 + * 202 + * This feature is enabled by default when supported by the kernel and 203 + * if the filesystem implements a flock() handler. 204 + */ 205 + #define FUSE_CAP_FLOCK_LOCKS (1 << 10) 206 + 207 + /** 208 + * Indicates that the filesystem supports ioctl's on directories. 209 + * 210 + * This feature is enabled by default when supported by the kernel. 211 + */ 212 + #define FUSE_CAP_IOCTL_DIR (1 << 11) 213 + 214 + /** 215 + * Traditionally, while a file is open the FUSE kernel module only 216 + * asks the filesystem for an update of the file's attributes when a 217 + * client attempts to read beyond EOF. This is unsuitable for 218 + * e.g. network filesystems, where the file contents may change 219 + * without the kernel knowing about it. 220 + * 221 + * If this flag is set, FUSE will check the validity of the attributes 222 + * on every read. If the attributes are no longer valid (i.e., if the 223 + * *attr_timeout* passed to fuse_reply_attr() or set in `struct 224 + * fuse_entry_param` has passed), it will first issue a `getattr` 225 + * request. If the new mtime differs from the previous value, any 226 + * cached file *contents* will be invalidated as well. 227 + * 228 + * This flag should always be set when available. If all file changes 229 + * go through the kernel, *attr_timeout* should be set to a very large 230 + * number to avoid unnecessary getattr() calls. 231 + * 232 + * This feature is enabled by default when supported by the kernel. 233 + */ 234 + #define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) 235 + 236 + /** 237 + * Indicates that the filesystem supports readdirplus. 238 + * 239 + * This feature is enabled by default when supported by the kernel and if the 240 + * filesystem implements a readdirplus() handler. 241 + */ 242 + #define FUSE_CAP_READDIRPLUS (1 << 13) 243 + 244 + /** 245 + * Indicates that the filesystem supports adaptive readdirplus. 246 + * 247 + * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. 248 + * 249 + * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel 250 + * will always issue readdirplus() requests to retrieve directory 251 + * contents. 252 + * 253 + * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel 254 + * will issue both readdir() and readdirplus() requests, depending on 255 + * how much information is expected to be required. 256 + * 257 + * As of Linux 4.20, the algorithm is as follows: when userspace 258 + * starts to read directory entries, issue a READDIRPLUS request to 259 + * the filesystem. If any entry attributes have been looked up by the 260 + * time userspace requests the next batch of entries continue with 261 + * READDIRPLUS, otherwise switch to plain READDIR. This will reasult 262 + * in eg plain "ls" triggering READDIRPLUS first then READDIR after 263 + * that because it doesn't do lookups. "ls -l" should result in all 264 + * READDIRPLUS, except if dentries are already cached. 265 + * 266 + * This feature is enabled by default when supported by the kernel and 267 + * if the filesystem implements both a readdirplus() and a readdir() 268 + * handler. 269 + */ 270 + #define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) 271 + 272 + /** 273 + * Indicates that the filesystem supports asynchronous direct I/O submission. 274 + * 275 + * If this capability is not requested/available, the kernel will ensure that 276 + * there is at most one pending read and one pending write request per direct 277 + * I/O file-handle at any time. 278 + * 279 + * This feature is enabled by default when supported by the kernel. 280 + */ 281 + #define FUSE_CAP_ASYNC_DIO (1 << 15) 282 + 283 + /** 284 + * Indicates that writeback caching should be enabled. This means that 285 + * individual write request may be buffered and merged in the kernel 286 + * before they are send to the filesystem. 287 + * 288 + * This feature is disabled by default. 289 + */ 290 + #define FUSE_CAP_WRITEBACK_CACHE (1 << 16) 291 + 292 + /** 293 + * Indicates support for zero-message opens. If this flag is set in 294 + * the `capable` field of the `fuse_conn_info` structure, then the 295 + * filesystem may return `ENOSYS` from the open() handler to indicate 296 + * success. Further attempts to open files will be handled in the 297 + * kernel. (If this flag is not set, returning ENOSYS will be treated 298 + * as an error and signaled to the caller). 299 + * 300 + * Setting (or unsetting) this flag in the `want` field has *no 301 + * effect*. 302 + */ 303 + #define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) 304 + 305 + /** 306 + * Indicates support for parallel directory operations. If this flag 307 + * is unset, the FUSE kernel module will ensure that lookup() and 308 + * readdir() requests are never issued concurrently for the same 309 + * directory. 310 + * 311 + * This feature is enabled by default when supported by the kernel. 312 + */ 313 + #define FUSE_CAP_PARALLEL_DIROPS (1 << 18) 314 + 315 + /** 316 + * Indicates support for POSIX ACLs. 317 + * 318 + * If this feature is enabled, the kernel will cache and have 319 + * responsibility for enforcing ACLs. ACL will be stored as xattrs and 320 + * passed to userspace, which is responsible for updating the ACLs in 321 + * the filesystem, keeping the file mode in sync with the ACL, and 322 + * ensuring inheritance of default ACLs when new filesystem nodes are 323 + * created. Note that this requires that the file system is able to 324 + * parse and interpret the xattr representation of ACLs. 325 + * 326 + * Enabling this feature implicitly turns on the 327 + * ``default_permissions`` mount option (even if it was not passed to 328 + * mount(2)). 329 + * 330 + * This feature is disabled by default. 331 + */ 332 + #define FUSE_CAP_POSIX_ACL (1 << 19) 333 + 334 + /** 335 + * Indicates that the filesystem is responsible for unsetting 336 + * setuid and setgid bits when a file is written, truncated, or 337 + * its owner is changed. 338 + * 339 + * This feature is enabled by default when supported by the kernel. 340 + */ 341 + #define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) 342 + 343 + /** 344 + * Indicates support for zero-message opendirs. If this flag is set in 345 + * the `capable` field of the `fuse_conn_info` structure, then the filesystem 346 + * may return `ENOSYS` from the opendir() handler to indicate success. Further 347 + * opendir and releasedir messages will be handled in the kernel. (If this 348 + * flag is not set, returning ENOSYS will be treated as an error and signalled 349 + * to the caller.) 350 + * 351 + * Setting (or unsetting) this flag in the `want` field has *no effect*. 352 + */ 353 + #define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) 354 + 355 + /** 356 + * Ioctl flags 357 + * 358 + * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine 359 + * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed 360 + * FUSE_IOCTL_RETRY: retry with new iovecs 361 + * FUSE_IOCTL_DIR: is a directory 362 + * 363 + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs 364 + */ 365 + #define FUSE_IOCTL_COMPAT (1 << 0) 366 + #define FUSE_IOCTL_UNRESTRICTED (1 << 1) 367 + #define FUSE_IOCTL_RETRY (1 << 2) 368 + #define FUSE_IOCTL_DIR (1 << 4) 369 + 370 + #define FUSE_IOCTL_MAX_IOV 256 371 + 372 + /** 373 + * Connection information, passed to the ->init() method 374 + * 375 + * Some of the elements are read-write, these can be changed to 376 + * indicate the value requested by the filesystem. The requested 377 + * value must usually be smaller than the indicated value. 378 + */ 379 + struct fuse_conn_info { 380 + /** 381 + * Major version of the protocol (read-only) 382 + */ 383 + unsigned proto_major; 384 + 385 + /** 386 + * Minor version of the protocol (read-only) 387 + */ 388 + unsigned proto_minor; 389 + 390 + /** 391 + * Maximum size of the write buffer 392 + */ 393 + unsigned max_write; 394 + 395 + /** 396 + * Maximum size of read requests. A value of zero indicates no 397 + * limit. However, even if the filesystem does not specify a 398 + * limit, the maximum size of read requests will still be 399 + * limited by the kernel. 400 + * 401 + * NOTE: For the time being, the maximum size of read requests 402 + * must be set both here *and* passed to fuse_session_new() 403 + * using the ``-o max_read=<n>`` mount option. At some point 404 + * in the future, specifying the mount option will no longer 405 + * be necessary. 406 + */ 407 + unsigned max_read; 408 + 409 + /** 410 + * Maximum readahead 411 + */ 412 + unsigned max_readahead; 413 + 414 + /** 415 + * Capability flags that the kernel supports (read-only) 416 + */ 417 + unsigned capable; 418 + 419 + /** 420 + * Capability flags that the filesystem wants to enable. 421 + * 422 + * libfuse attempts to initialize this field with 423 + * reasonable default values before calling the init() handler. 424 + */ 425 + unsigned want; 426 + 427 + /** 428 + * Maximum number of pending "background" requests. A 429 + * background request is any type of request for which the 430 + * total number is not limited by other means. As of kernel 431 + * 4.8, only two types of requests fall into this category: 432 + * 433 + * 1. Read-ahead requests 434 + * 2. Asynchronous direct I/O requests 435 + * 436 + * Read-ahead requests are generated (if max_readahead is 437 + * non-zero) by the kernel to preemptively fill its caches 438 + * when it anticipates that userspace will soon read more 439 + * data. 440 + * 441 + * Asynchronous direct I/O requests are generated if 442 + * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large 443 + * direct I/O request. In this case the kernel will internally 444 + * split it up into multiple smaller requests and submit them 445 + * to the filesystem concurrently. 446 + * 447 + * Note that the following requests are *not* background 448 + * requests: writeback requests (limited by the kernel's 449 + * flusher algorithm), regular (i.e., synchronous and 450 + * buffered) userspace read/write requests (limited to one per 451 + * thread), asynchronous read requests (Linux's io_submit(2) 452 + * call actually blocks, so these are also limited to one per 453 + * thread). 454 + */ 455 + unsigned max_background; 456 + 457 + /** 458 + * Kernel congestion threshold parameter. If the number of pending 459 + * background requests exceeds this number, the FUSE kernel module will 460 + * mark the filesystem as "congested". This instructs the kernel to 461 + * expect that queued requests will take some time to complete, and to 462 + * adjust its algorithms accordingly (e.g. by putting a waiting thread 463 + * to sleep instead of using a busy-loop). 464 + */ 465 + unsigned congestion_threshold; 466 + 467 + /** 468 + * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible 469 + * for updating mtime and ctime when write requests are received. The 470 + * updated values are passed to the filesystem with setattr() requests. 471 + * However, if the filesystem does not support the full resolution of 472 + * the kernel timestamps (nanoseconds), the mtime and ctime values used 473 + * by kernel and filesystem will differ (and result in an apparent 474 + * change of times after a cache flush). 475 + * 476 + * To prevent this problem, this variable can be used to inform the 477 + * kernel about the timestamp granularity supported by the file-system. 478 + * The value should be power of 10. The default is 1, i.e. full 479 + * nano-second resolution. Filesystems supporting only second resolution 480 + * should set this to 1000000000. 481 + */ 482 + unsigned time_gran; 483 + 484 + /** 485 + * For future use. 486 + */ 487 + unsigned reserved[22]; 488 + }; 489 + 490 + struct fuse_session; 491 + struct fuse_pollhandle; 492 + struct fuse_conn_info_opts; 493 + 494 + /** 495 + * This function parses several command-line options that can be used 496 + * to override elements of struct fuse_conn_info. The pointer returned 497 + * by this function should be passed to the 498 + * fuse_apply_conn_info_opts() method by the file system's init() 499 + * handler. 500 + * 501 + * Before using this function, think twice if you really want these 502 + * parameters to be adjustable from the command line. In most cases, 503 + * they should be determined by the file system internally. 504 + * 505 + * The following options are recognized: 506 + * 507 + * -o max_write=N sets conn->max_write 508 + * -o max_readahead=N sets conn->max_readahead 509 + * -o max_background=N sets conn->max_background 510 + * -o congestion_threshold=N sets conn->congestion_threshold 511 + * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want 512 + * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want 513 + * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want 514 + * -o no_remote_lock Equivalent to -o 515 + *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets 516 + *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets 517 + *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets 518 + *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets 519 + *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets 520 + *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets 521 + *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets 522 + *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets 523 + *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o 524 + *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO 525 + *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in 526 + *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in 527 + *conn->want -o time_gran=N sets conn->time_gran 528 + * 529 + * Known options will be removed from *args*, unknown options will be 530 + * passed through unchanged. 531 + * 532 + * @param args argument vector (input+output) 533 + * @return parsed options 534 + **/ 535 + struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); 536 + 537 + /** 538 + * This function applies the (parsed) parameters in *opts* to the 539 + * *conn* pointer. It may modify the following fields: wants, 540 + * max_write, max_readahead, congestion_threshold, max_background, 541 + * time_gran. A field is only set (or unset) if the corresponding 542 + * option has been explicitly set. 543 + */ 544 + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, 545 + struct fuse_conn_info *conn); 546 + 547 + /** 548 + * Go into the background 549 + * 550 + * @param foreground if true, stay in the foreground 551 + * @return 0 on success, -1 on failure 552 + */ 553 + int fuse_daemonize(int foreground); 554 + 555 + /** 556 + * Get the version of the library 557 + * 558 + * @return the version 559 + */ 560 + int fuse_version(void); 561 + 562 + /** 563 + * Get the full package version string of the library 564 + * 565 + * @return the package version 566 + */ 567 + const char *fuse_pkgversion(void); 568 + 569 + /** 570 + * Destroy poll handle 571 + * 572 + * @param ph the poll handle 573 + */ 574 + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); 575 + 576 + /* 577 + * Data buffer 578 + */ 579 + 580 + /** 581 + * Buffer flags 582 + */ 583 + enum fuse_buf_flags { 584 + /** 585 + * Buffer contains a file descriptor 586 + * 587 + * If this flag is set, the .fd field is valid, otherwise the 588 + * .mem fields is valid. 589 + */ 590 + FUSE_BUF_IS_FD = (1 << 1), 591 + 592 + /** 593 + * Seek on the file descriptor 594 + * 595 + * If this flag is set then the .pos field is valid and is 596 + * used to seek to the given offset before performing 597 + * operation on file descriptor. 598 + */ 599 + FUSE_BUF_FD_SEEK = (1 << 2), 600 + 601 + /** 602 + * Retry operation on file descriptor 603 + * 604 + * If this flag is set then retry operation on file descriptor 605 + * until .size bytes have been copied or an error or EOF is 606 + * detected. 607 + */ 608 + FUSE_BUF_FD_RETRY = (1 << 3), 609 + }; 610 + 611 + /** 612 + * Single data buffer 613 + * 614 + * Generic data buffer for I/O, extended attributes, etc... Data may 615 + * be supplied as a memory pointer or as a file descriptor 616 + */ 617 + struct fuse_buf { 618 + /** 619 + * Size of data in bytes 620 + */ 621 + size_t size; 622 + 623 + /** 624 + * Buffer flags 625 + */ 626 + enum fuse_buf_flags flags; 627 + 628 + /** 629 + * Memory pointer 630 + * 631 + * Used unless FUSE_BUF_IS_FD flag is set. 632 + */ 633 + void *mem; 634 + 635 + /** 636 + * File descriptor 637 + * 638 + * Used if FUSE_BUF_IS_FD flag is set. 639 + */ 640 + int fd; 641 + 642 + /** 643 + * File position 644 + * 645 + * Used if FUSE_BUF_FD_SEEK flag is set. 646 + */ 647 + off_t pos; 648 + }; 649 + 650 + /** 651 + * Data buffer vector 652 + * 653 + * An array of data buffers, each containing a memory pointer or a 654 + * file descriptor. 655 + * 656 + * Allocate dynamically to add more than one buffer. 657 + */ 658 + struct fuse_bufvec { 659 + /** 660 + * Number of buffers in the array 661 + */ 662 + size_t count; 663 + 664 + /** 665 + * Index of current buffer within the array 666 + */ 667 + size_t idx; 668 + 669 + /** 670 + * Current offset within the current buffer 671 + */ 672 + size_t off; 673 + 674 + /** 675 + * Array of buffers 676 + */ 677 + struct fuse_buf buf[1]; 678 + }; 679 + 680 + /* Initialize bufvec with a single buffer of given size */ 681 + #define FUSE_BUFVEC_INIT(size__) \ 682 + ((struct fuse_bufvec){ /* .count= */ 1, \ 683 + /* .idx = */ 0, \ 684 + /* .off = */ 0, /* .buf = */ \ 685 + { /* [0] = */ { \ 686 + /* .size = */ (size__), \ 687 + /* .flags = */ (enum fuse_buf_flags)0, \ 688 + /* .mem = */ NULL, \ 689 + /* .fd = */ -1, \ 690 + /* .pos = */ 0, \ 691 + } } }) 692 + 693 + /** 694 + * Get total size of data in a fuse buffer vector 695 + * 696 + * @param bufv buffer vector 697 + * @return size of data 698 + */ 699 + size_t fuse_buf_size(const struct fuse_bufvec *bufv); 700 + 701 + /** 702 + * Copy data from one buffer vector to another 703 + * 704 + * @param dst destination buffer vector 705 + * @param src source buffer vector 706 + * @return actual number of bytes copied or -errno on error 707 + */ 708 + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); 709 + 710 + /** 711 + * Memory buffer iterator 712 + * 713 + */ 714 + struct fuse_mbuf_iter { 715 + /** 716 + * Data pointer 717 + */ 718 + void *mem; 719 + 720 + /** 721 + * Total length, in bytes 722 + */ 723 + size_t size; 724 + 725 + /** 726 + * Offset from start of buffer 727 + */ 728 + size_t pos; 729 + }; 730 + 731 + /* Initialize memory buffer iterator from a fuse_buf */ 732 + #define FUSE_MBUF_ITER_INIT(fbuf) \ 733 + ((struct fuse_mbuf_iter){ \ 734 + .mem = fbuf->mem, \ 735 + .size = fbuf->size, \ 736 + .pos = 0, \ 737 + }) 738 + 739 + /** 740 + * Consume bytes from a memory buffer iterator 741 + * 742 + * @param iter memory buffer iterator 743 + * @param len number of bytes to consume 744 + * @return pointer to start of consumed bytes or 745 + * NULL if advancing beyond end of buffer 746 + */ 747 + void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); 748 + 749 + /** 750 + * Consume a NUL-terminated string from a memory buffer iterator 751 + * 752 + * @param iter memory buffer iterator 753 + * @return pointer to the string or 754 + * NULL if advancing beyond end of buffer or there is no NUL-terminator 755 + */ 756 + const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); 757 + 758 + /* 759 + * Signal handling 760 + */ 761 + /** 762 + * Exit session on HUP, TERM and INT signals and ignore PIPE signal 763 + * 764 + * Stores session in a global variable. May only be called once per 765 + * process until fuse_remove_signal_handlers() is called. 766 + * 767 + * Once either of the POSIX signals arrives, the signal handler calls 768 + * fuse_session_exit(). 769 + * 770 + * @param se the session to exit 771 + * @return 0 on success, -1 on failure 772 + * 773 + * See also: 774 + * fuse_remove_signal_handlers() 775 + */ 776 + int fuse_set_signal_handlers(struct fuse_session *se); 777 + 778 + /** 779 + * Restore default signal handlers 780 + * 781 + * Resets global session. After this fuse_set_signal_handlers() may 782 + * be called again. 783 + * 784 + * @param se the same session as given in fuse_set_signal_handlers() 785 + * 786 + * See also: 787 + * fuse_set_signal_handlers() 788 + */ 789 + void fuse_remove_signal_handlers(struct fuse_session *se); 790 + 791 + /* 792 + * Compatibility stuff 793 + */ 794 + 795 + #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 796 + #error only API version 30 or greater is supported 797 + #endif 798 + 799 + 800 + /* 801 + * This interface uses 64 bit off_t. 802 + * 803 + * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! 804 + */ 805 + 806 + #if defined(__GNUC__) && \ 807 + (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ 808 + !defined __cplusplus 809 + _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); 810 + #else 811 + struct _fuse_off_t_must_be_64bit_dummy_struct { 812 + unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); 813 + }; 814 + #endif 815 + 816 + #endif /* FUSE_COMMON_H_ */
+115
tools/virtiofsd/fuse_i.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB 7 + */ 8 + 9 + #ifndef FUSE_I_H 10 + #define FUSE_I_H 11 + 12 + #define FUSE_USE_VERSION 31 13 + #include "fuse.h" 14 + #include "fuse_lowlevel.h" 15 + 16 + struct fv_VuDev; 17 + struct fv_QueueInfo; 18 + 19 + struct fuse_req { 20 + struct fuse_session *se; 21 + uint64_t unique; 22 + int ctr; 23 + pthread_mutex_t lock; 24 + struct fuse_ctx ctx; 25 + struct fuse_chan *ch; 26 + int interrupted; 27 + unsigned int ioctl_64bit:1; 28 + union { 29 + struct { 30 + uint64_t unique; 31 + } i; 32 + struct { 33 + fuse_interrupt_func_t func; 34 + void *data; 35 + } ni; 36 + } u; 37 + struct fuse_req *next; 38 + struct fuse_req *prev; 39 + }; 40 + 41 + struct fuse_notify_req { 42 + uint64_t unique; 43 + void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, 44 + const void *, const struct fuse_buf *); 45 + struct fuse_notify_req *next; 46 + struct fuse_notify_req *prev; 47 + }; 48 + 49 + struct fuse_session { 50 + char *mountpoint; 51 + volatile int exited; 52 + int fd; 53 + int debug; 54 + int deny_others; 55 + struct fuse_lowlevel_ops op; 56 + int got_init; 57 + struct cuse_data *cuse_data; 58 + void *userdata; 59 + uid_t owner; 60 + struct fuse_conn_info conn; 61 + struct fuse_req list; 62 + struct fuse_req interrupts; 63 + pthread_mutex_t lock; 64 + pthread_rwlock_t init_rwlock; 65 + int got_destroy; 66 + int broken_splice_nonblock; 67 + uint64_t notify_ctr; 68 + struct fuse_notify_req notify_list; 69 + size_t bufsize; 70 + int error; 71 + char *vu_socket_path; 72 + int vu_listen_fd; 73 + int vu_socketfd; 74 + struct fv_VuDev *virtio_dev; 75 + int thread_pool_size; 76 + }; 77 + 78 + struct fuse_chan { 79 + pthread_mutex_t lock; 80 + int ctr; 81 + int fd; 82 + struct fv_QueueInfo *qi; 83 + }; 84 + 85 + /** 86 + * Filesystem module 87 + * 88 + * Filesystem modules are registered with the FUSE_REGISTER_MODULE() 89 + * macro. 90 + * 91 + */ 92 + struct fuse_module { 93 + char *name; 94 + fuse_module_factory_t factory; 95 + struct fuse_module *next; 96 + struct fusemod_so *so; 97 + int ctr; 98 + }; 99 + 100 + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, 101 + int count); 102 + void fuse_free_req(fuse_req_t req); 103 + 104 + void fuse_session_process_buf_int(struct fuse_session *se, 105 + struct fuse_bufvec *bufv, 106 + struct fuse_chan *ch); 107 + 108 + 109 + #define FUSE_MAX_MAX_PAGES 256 110 + #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 111 + 112 + /* room needed in buffer to accommodate header */ 113 + #define FUSE_BUFFER_HEADER_SIZE 0x1000 114 + 115 + #endif
+41
tools/virtiofsd/fuse_log.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2019 Red Hat, Inc. 4 + * 5 + * Logging API. 6 + * 7 + * This program can be distributed under the terms of the GNU LGPLv2. 8 + * See the file COPYING.LIB 9 + */ 10 + 11 + #include "qemu/osdep.h" 12 + #include "fuse_log.h" 13 + 14 + #include <stdarg.h> 15 + #include <stdio.h> 16 + 17 + static void default_log_func(__attribute__((unused)) enum fuse_log_level level, 18 + const char *fmt, va_list ap) 19 + { 20 + vfprintf(stderr, fmt, ap); 21 + } 22 + 23 + static fuse_log_func_t log_func = default_log_func; 24 + 25 + void fuse_set_log_func(fuse_log_func_t func) 26 + { 27 + if (!func) { 28 + func = default_log_func; 29 + } 30 + 31 + log_func = func; 32 + } 33 + 34 + void fuse_log(enum fuse_log_level level, const char *fmt, ...) 35 + { 36 + va_list ap; 37 + 38 + va_start(ap, fmt); 39 + log_func(level, fmt, ap); 40 + va_end(ap); 41 + }
+74
tools/virtiofsd/fuse_log.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2019 Red Hat, Inc. 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB. 7 + */ 8 + 9 + #ifndef FUSE_LOG_H_ 10 + #define FUSE_LOG_H_ 11 + 12 + /** @file 13 + * 14 + * This file defines the logging interface of FUSE 15 + */ 16 + 17 + #include <stdarg.h> 18 + 19 + /** 20 + * Log severity level 21 + * 22 + * These levels correspond to syslog(2) log levels since they are widely used. 23 + */ 24 + enum fuse_log_level { 25 + FUSE_LOG_EMERG, 26 + FUSE_LOG_ALERT, 27 + FUSE_LOG_CRIT, 28 + FUSE_LOG_ERR, 29 + FUSE_LOG_WARNING, 30 + FUSE_LOG_NOTICE, 31 + FUSE_LOG_INFO, 32 + FUSE_LOG_DEBUG 33 + }; 34 + 35 + /** 36 + * Log message handler function. 37 + * 38 + * This function must be thread-safe. It may be called from any libfuse 39 + * function, including fuse_parse_cmdline() and other functions invoked before 40 + * a FUSE filesystem is created. 41 + * 42 + * Install a custom log message handler function using fuse_set_log_func(). 43 + * 44 + * @param level log severity level 45 + * @param fmt sprintf-style format string including newline 46 + * @param ap format string arguments 47 + */ 48 + typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, 49 + va_list ap); 50 + 51 + /** 52 + * Install a custom log handler function. 53 + * 54 + * Log messages are emitted by libfuse functions to report errors and debug 55 + * information. Messages are printed to stderr by default but this can be 56 + * overridden by installing a custom log message handler function. 57 + * 58 + * The log message handler function is global and affects all FUSE filesystems 59 + * created within this process. 60 + * 61 + * @param func a custom log message handler function or NULL to revert to 62 + * the default 63 + */ 64 + void fuse_set_log_func(fuse_log_func_t func); 65 + 66 + /** 67 + * Emit a log message 68 + * 69 + * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) 70 + * @param fmt sprintf-style format string including newline 71 + */ 72 + void fuse_log(enum fuse_log_level level, const char *fmt, ...); 73 + 74 + #endif /* FUSE_LOG_H_ */
+2761
tools/virtiofsd/fuse_lowlevel.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * Implementation of (most of) the low-level FUSE API. The session loop 6 + * functions are implemented in separate files. 7 + * 8 + * This program can be distributed under the terms of the GNU LGPLv2. 9 + * See the file COPYING.LIB 10 + */ 11 + 12 + #include "qemu/osdep.h" 13 + #include "fuse_i.h" 14 + #include "standard-headers/linux/fuse.h" 15 + #include "fuse_misc.h" 16 + #include "fuse_opt.h" 17 + #include "fuse_virtio.h" 18 + 19 + #include <assert.h> 20 + #include <errno.h> 21 + #include <glib.h> 22 + #include <limits.h> 23 + #include <stdbool.h> 24 + #include <stddef.h> 25 + #include <stdio.h> 26 + #include <stdlib.h> 27 + #include <string.h> 28 + #include <sys/file.h> 29 + #include <unistd.h> 30 + 31 + #define THREAD_POOL_SIZE 64 32 + 33 + #define OFFSET_MAX 0x7fffffffffffffffLL 34 + 35 + struct fuse_pollhandle { 36 + uint64_t kh; 37 + struct fuse_session *se; 38 + }; 39 + 40 + static size_t pagesize; 41 + 42 + static __attribute__((constructor)) void fuse_ll_init_pagesize(void) 43 + { 44 + pagesize = getpagesize(); 45 + } 46 + 47 + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) 48 + { 49 + *attr = (struct fuse_attr){ 50 + .ino = stbuf->st_ino, 51 + .mode = stbuf->st_mode, 52 + .nlink = stbuf->st_nlink, 53 + .uid = stbuf->st_uid, 54 + .gid = stbuf->st_gid, 55 + .rdev = stbuf->st_rdev, 56 + .size = stbuf->st_size, 57 + .blksize = stbuf->st_blksize, 58 + .blocks = stbuf->st_blocks, 59 + .atime = stbuf->st_atime, 60 + .mtime = stbuf->st_mtime, 61 + .ctime = stbuf->st_ctime, 62 + .atimensec = ST_ATIM_NSEC(stbuf), 63 + .mtimensec = ST_MTIM_NSEC(stbuf), 64 + .ctimensec = ST_CTIM_NSEC(stbuf), 65 + }; 66 + } 67 + 68 + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) 69 + { 70 + stbuf->st_mode = attr->mode; 71 + stbuf->st_uid = attr->uid; 72 + stbuf->st_gid = attr->gid; 73 + stbuf->st_size = attr->size; 74 + stbuf->st_atime = attr->atime; 75 + stbuf->st_mtime = attr->mtime; 76 + stbuf->st_ctime = attr->ctime; 77 + ST_ATIM_NSEC_SET(stbuf, attr->atimensec); 78 + ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); 79 + ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); 80 + } 81 + 82 + static size_t iov_length(const struct iovec *iov, size_t count) 83 + { 84 + size_t seg; 85 + size_t ret = 0; 86 + 87 + for (seg = 0; seg < count; seg++) { 88 + ret += iov[seg].iov_len; 89 + } 90 + return ret; 91 + } 92 + 93 + static void list_init_req(struct fuse_req *req) 94 + { 95 + req->next = req; 96 + req->prev = req; 97 + } 98 + 99 + static void list_del_req(struct fuse_req *req) 100 + { 101 + struct fuse_req *prev = req->prev; 102 + struct fuse_req *next = req->next; 103 + prev->next = next; 104 + next->prev = prev; 105 + } 106 + 107 + static void list_add_req(struct fuse_req *req, struct fuse_req *next) 108 + { 109 + struct fuse_req *prev = next->prev; 110 + req->next = next; 111 + req->prev = prev; 112 + prev->next = req; 113 + next->prev = req; 114 + } 115 + 116 + static void destroy_req(fuse_req_t req) 117 + { 118 + pthread_mutex_destroy(&req->lock); 119 + free(req); 120 + } 121 + 122 + void fuse_free_req(fuse_req_t req) 123 + { 124 + int ctr; 125 + struct fuse_session *se = req->se; 126 + 127 + pthread_mutex_lock(&se->lock); 128 + req->u.ni.func = NULL; 129 + req->u.ni.data = NULL; 130 + list_del_req(req); 131 + ctr = --req->ctr; 132 + req->ch = NULL; 133 + pthread_mutex_unlock(&se->lock); 134 + if (!ctr) { 135 + destroy_req(req); 136 + } 137 + } 138 + 139 + static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) 140 + { 141 + struct fuse_req *req; 142 + 143 + req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); 144 + if (req == NULL) { 145 + fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); 146 + } else { 147 + req->se = se; 148 + req->ctr = 1; 149 + list_init_req(req); 150 + fuse_mutex_init(&req->lock); 151 + } 152 + 153 + return req; 154 + } 155 + 156 + /* Send data. If *ch* is NULL, send via session master fd */ 157 + static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, 158 + struct iovec *iov, int count) 159 + { 160 + struct fuse_out_header *out = iov[0].iov_base; 161 + 162 + out->len = iov_length(iov, count); 163 + if (out->unique == 0) { 164 + fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, 165 + out->len); 166 + } else if (out->error) { 167 + fuse_log(FUSE_LOG_DEBUG, 168 + " unique: %llu, error: %i (%s), outsize: %i\n", 169 + (unsigned long long)out->unique, out->error, 170 + strerror(-out->error), out->len); 171 + } else { 172 + fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", 173 + (unsigned long long)out->unique, out->len); 174 + } 175 + 176 + if (fuse_lowlevel_is_virtio(se)) { 177 + return virtio_send_msg(se, ch, iov, count); 178 + } 179 + 180 + abort(); /* virtio should have taken it before here */ 181 + return 0; 182 + } 183 + 184 + 185 + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, 186 + int count) 187 + { 188 + struct fuse_out_header out = { 189 + .unique = req->unique, 190 + .error = error, 191 + }; 192 + 193 + if (error <= -1000 || error > 0) { 194 + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); 195 + error = -ERANGE; 196 + } 197 + 198 + iov[0].iov_base = &out; 199 + iov[0].iov_len = sizeof(struct fuse_out_header); 200 + 201 + return fuse_send_msg(req->se, req->ch, iov, count); 202 + } 203 + 204 + static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, 205 + int count) 206 + { 207 + int res; 208 + 209 + res = fuse_send_reply_iov_nofree(req, error, iov, count); 210 + fuse_free_req(req); 211 + return res; 212 + } 213 + 214 + static int send_reply(fuse_req_t req, int error, const void *arg, 215 + size_t argsize) 216 + { 217 + struct iovec iov[2]; 218 + int count = 1; 219 + if (argsize) { 220 + iov[1].iov_base = (void *)arg; 221 + iov[1].iov_len = argsize; 222 + count++; 223 + } 224 + return send_reply_iov(req, error, iov, count); 225 + } 226 + 227 + int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) 228 + { 229 + int res; 230 + struct iovec *padded_iov; 231 + 232 + padded_iov = malloc((count + 1) * sizeof(struct iovec)); 233 + if (padded_iov == NULL) { 234 + return fuse_reply_err(req, ENOMEM); 235 + } 236 + 237 + memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); 238 + count++; 239 + 240 + res = send_reply_iov(req, 0, padded_iov, count); 241 + free(padded_iov); 242 + 243 + return res; 244 + } 245 + 246 + 247 + /* 248 + * 'buf` is allowed to be empty so that the proper size may be 249 + * allocated by the caller 250 + */ 251 + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, 252 + const char *name, const struct stat *stbuf, off_t off) 253 + { 254 + (void)req; 255 + size_t namelen; 256 + size_t entlen; 257 + size_t entlen_padded; 258 + struct fuse_dirent *dirent; 259 + 260 + namelen = strlen(name); 261 + entlen = FUSE_NAME_OFFSET + namelen; 262 + entlen_padded = FUSE_DIRENT_ALIGN(entlen); 263 + 264 + if ((buf == NULL) || (entlen_padded > bufsize)) { 265 + return entlen_padded; 266 + } 267 + 268 + dirent = (struct fuse_dirent *)buf; 269 + dirent->ino = stbuf->st_ino; 270 + dirent->off = off; 271 + dirent->namelen = namelen; 272 + dirent->type = (stbuf->st_mode & S_IFMT) >> 12; 273 + memcpy(dirent->name, name, namelen); 274 + memset(dirent->name + namelen, 0, entlen_padded - entlen); 275 + 276 + return entlen_padded; 277 + } 278 + 279 + static void convert_statfs(const struct statvfs *stbuf, 280 + struct fuse_kstatfs *kstatfs) 281 + { 282 + *kstatfs = (struct fuse_kstatfs){ 283 + .bsize = stbuf->f_bsize, 284 + .frsize = stbuf->f_frsize, 285 + .blocks = stbuf->f_blocks, 286 + .bfree = stbuf->f_bfree, 287 + .bavail = stbuf->f_bavail, 288 + .files = stbuf->f_files, 289 + .ffree = stbuf->f_ffree, 290 + .namelen = stbuf->f_namemax, 291 + }; 292 + } 293 + 294 + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) 295 + { 296 + return send_reply(req, 0, arg, argsize); 297 + } 298 + 299 + int fuse_reply_err(fuse_req_t req, int err) 300 + { 301 + return send_reply(req, -err, NULL, 0); 302 + } 303 + 304 + void fuse_reply_none(fuse_req_t req) 305 + { 306 + fuse_free_req(req); 307 + } 308 + 309 + static unsigned long calc_timeout_sec(double t) 310 + { 311 + if (t > (double)ULONG_MAX) { 312 + return ULONG_MAX; 313 + } else if (t < 0.0) { 314 + return 0; 315 + } else { 316 + return (unsigned long)t; 317 + } 318 + } 319 + 320 + static unsigned int calc_timeout_nsec(double t) 321 + { 322 + double f = t - (double)calc_timeout_sec(t); 323 + if (f < 0.0) { 324 + return 0; 325 + } else if (f >= 0.999999999) { 326 + return 999999999; 327 + } else { 328 + return (unsigned int)(f * 1.0e9); 329 + } 330 + } 331 + 332 + static void fill_entry(struct fuse_entry_out *arg, 333 + const struct fuse_entry_param *e) 334 + { 335 + *arg = (struct fuse_entry_out){ 336 + .nodeid = e->ino, 337 + .generation = e->generation, 338 + .entry_valid = calc_timeout_sec(e->entry_timeout), 339 + .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), 340 + .attr_valid = calc_timeout_sec(e->attr_timeout), 341 + .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), 342 + }; 343 + convert_stat(&e->attr, &arg->attr); 344 + } 345 + 346 + /* 347 + * `buf` is allowed to be empty so that the proper size may be 348 + * allocated by the caller 349 + */ 350 + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, 351 + const char *name, 352 + const struct fuse_entry_param *e, off_t off) 353 + { 354 + (void)req; 355 + size_t namelen; 356 + size_t entlen; 357 + size_t entlen_padded; 358 + 359 + namelen = strlen(name); 360 + entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; 361 + entlen_padded = FUSE_DIRENT_ALIGN(entlen); 362 + if ((buf == NULL) || (entlen_padded > bufsize)) { 363 + return entlen_padded; 364 + } 365 + 366 + struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; 367 + memset(&dp->entry_out, 0, sizeof(dp->entry_out)); 368 + fill_entry(&dp->entry_out, e); 369 + 370 + struct fuse_dirent *dirent = &dp->dirent; 371 + *dirent = (struct fuse_dirent){ 372 + .ino = e->attr.st_ino, 373 + .off = off, 374 + .namelen = namelen, 375 + .type = (e->attr.st_mode & S_IFMT) >> 12, 376 + }; 377 + memcpy(dirent->name, name, namelen); 378 + memset(dirent->name + namelen, 0, entlen_padded - entlen); 379 + 380 + return entlen_padded; 381 + } 382 + 383 + static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) 384 + { 385 + arg->fh = f->fh; 386 + if (f->direct_io) { 387 + arg->open_flags |= FOPEN_DIRECT_IO; 388 + } 389 + if (f->keep_cache) { 390 + arg->open_flags |= FOPEN_KEEP_CACHE; 391 + } 392 + if (f->cache_readdir) { 393 + arg->open_flags |= FOPEN_CACHE_DIR; 394 + } 395 + if (f->nonseekable) { 396 + arg->open_flags |= FOPEN_NONSEEKABLE; 397 + } 398 + } 399 + 400 + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) 401 + { 402 + struct fuse_entry_out arg; 403 + size_t size = sizeof(arg); 404 + 405 + memset(&arg, 0, sizeof(arg)); 406 + fill_entry(&arg, e); 407 + return send_reply_ok(req, &arg, size); 408 + } 409 + 410 + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, 411 + const struct fuse_file_info *f) 412 + { 413 + char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; 414 + size_t entrysize = sizeof(struct fuse_entry_out); 415 + struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; 416 + struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); 417 + 418 + memset(buf, 0, sizeof(buf)); 419 + fill_entry(earg, e); 420 + fill_open(oarg, f); 421 + return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); 422 + } 423 + 424 + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, 425 + double attr_timeout) 426 + { 427 + struct fuse_attr_out arg; 428 + size_t size = sizeof(arg); 429 + 430 + memset(&arg, 0, sizeof(arg)); 431 + arg.attr_valid = calc_timeout_sec(attr_timeout); 432 + arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); 433 + convert_stat(attr, &arg.attr); 434 + 435 + return send_reply_ok(req, &arg, size); 436 + } 437 + 438 + int fuse_reply_readlink(fuse_req_t req, const char *linkname) 439 + { 440 + return send_reply_ok(req, linkname, strlen(linkname)); 441 + } 442 + 443 + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) 444 + { 445 + struct fuse_open_out arg; 446 + 447 + memset(&arg, 0, sizeof(arg)); 448 + fill_open(&arg, f); 449 + return send_reply_ok(req, &arg, sizeof(arg)); 450 + } 451 + 452 + int fuse_reply_write(fuse_req_t req, size_t count) 453 + { 454 + struct fuse_write_out arg; 455 + 456 + memset(&arg, 0, sizeof(arg)); 457 + arg.size = count; 458 + 459 + return send_reply_ok(req, &arg, sizeof(arg)); 460 + } 461 + 462 + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) 463 + { 464 + return send_reply_ok(req, buf, size); 465 + } 466 + 467 + static int fuse_send_data_iov_fallback(struct fuse_session *se, 468 + struct fuse_chan *ch, struct iovec *iov, 469 + int iov_count, struct fuse_bufvec *buf, 470 + size_t len) 471 + { 472 + /* Optimize common case */ 473 + if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && 474 + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { 475 + /* 476 + * FIXME: also avoid memory copy if there are multiple buffers 477 + * but none of them contain an fd 478 + */ 479 + 480 + iov[iov_count].iov_base = buf->buf[0].mem; 481 + iov[iov_count].iov_len = len; 482 + iov_count++; 483 + return fuse_send_msg(se, ch, iov, iov_count); 484 + } 485 + 486 + if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && 487 + buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { 488 + return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); 489 + } 490 + 491 + abort(); /* Will have taken vhost path */ 492 + return 0; 493 + } 494 + 495 + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, 496 + struct iovec *iov, int iov_count, 497 + struct fuse_bufvec *buf) 498 + { 499 + size_t len = fuse_buf_size(buf); 500 + 501 + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); 502 + } 503 + 504 + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) 505 + { 506 + struct iovec iov[2]; 507 + struct fuse_out_header out = { 508 + .unique = req->unique, 509 + }; 510 + int res; 511 + 512 + iov[0].iov_base = &out; 513 + iov[0].iov_len = sizeof(struct fuse_out_header); 514 + 515 + res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); 516 + if (res <= 0) { 517 + fuse_free_req(req); 518 + return res; 519 + } else { 520 + return fuse_reply_err(req, res); 521 + } 522 + } 523 + 524 + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) 525 + { 526 + struct fuse_statfs_out arg; 527 + size_t size = sizeof(arg); 528 + 529 + memset(&arg, 0, sizeof(arg)); 530 + convert_statfs(stbuf, &arg.st); 531 + 532 + return send_reply_ok(req, &arg, size); 533 + } 534 + 535 + int fuse_reply_xattr(fuse_req_t req, size_t count) 536 + { 537 + struct fuse_getxattr_out arg; 538 + 539 + memset(&arg, 0, sizeof(arg)); 540 + arg.size = count; 541 + 542 + return send_reply_ok(req, &arg, sizeof(arg)); 543 + } 544 + 545 + int fuse_reply_lock(fuse_req_t req, const struct flock *lock) 546 + { 547 + struct fuse_lk_out arg; 548 + 549 + memset(&arg, 0, sizeof(arg)); 550 + arg.lk.type = lock->l_type; 551 + if (lock->l_type != F_UNLCK) { 552 + arg.lk.start = lock->l_start; 553 + if (lock->l_len == 0) { 554 + arg.lk.end = OFFSET_MAX; 555 + } else { 556 + arg.lk.end = lock->l_start + lock->l_len - 1; 557 + } 558 + } 559 + arg.lk.pid = lock->l_pid; 560 + return send_reply_ok(req, &arg, sizeof(arg)); 561 + } 562 + 563 + int fuse_reply_bmap(fuse_req_t req, uint64_t idx) 564 + { 565 + struct fuse_bmap_out arg; 566 + 567 + memset(&arg, 0, sizeof(arg)); 568 + arg.block = idx; 569 + 570 + return send_reply_ok(req, &arg, sizeof(arg)); 571 + } 572 + 573 + static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, 574 + size_t count) 575 + { 576 + struct fuse_ioctl_iovec *fiov; 577 + size_t i; 578 + 579 + fiov = malloc(sizeof(fiov[0]) * count); 580 + if (!fiov) { 581 + return NULL; 582 + } 583 + 584 + for (i = 0; i < count; i++) { 585 + fiov[i].base = (uintptr_t)iov[i].iov_base; 586 + fiov[i].len = iov[i].iov_len; 587 + } 588 + 589 + return fiov; 590 + } 591 + 592 + int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, 593 + size_t in_count, const struct iovec *out_iov, 594 + size_t out_count) 595 + { 596 + struct fuse_ioctl_out arg; 597 + struct fuse_ioctl_iovec *in_fiov = NULL; 598 + struct fuse_ioctl_iovec *out_fiov = NULL; 599 + struct iovec iov[4]; 600 + size_t count = 1; 601 + int res; 602 + 603 + memset(&arg, 0, sizeof(arg)); 604 + arg.flags |= FUSE_IOCTL_RETRY; 605 + arg.in_iovs = in_count; 606 + arg.out_iovs = out_count; 607 + iov[count].iov_base = &arg; 608 + iov[count].iov_len = sizeof(arg); 609 + count++; 610 + 611 + /* Can't handle non-compat 64bit ioctls on 32bit */ 612 + if (sizeof(void *) == 4 && req->ioctl_64bit) { 613 + res = fuse_reply_err(req, EINVAL); 614 + goto out; 615 + } 616 + 617 + if (in_count) { 618 + in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); 619 + if (!in_fiov) { 620 + goto enomem; 621 + } 622 + 623 + iov[count].iov_base = (void *)in_fiov; 624 + iov[count].iov_len = sizeof(in_fiov[0]) * in_count; 625 + count++; 626 + } 627 + if (out_count) { 628 + out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); 629 + if (!out_fiov) { 630 + goto enomem; 631 + } 632 + 633 + iov[count].iov_base = (void *)out_fiov; 634 + iov[count].iov_len = sizeof(out_fiov[0]) * out_count; 635 + count++; 636 + } 637 + 638 + res = send_reply_iov(req, 0, iov, count); 639 + out: 640 + free(in_fiov); 641 + free(out_fiov); 642 + 643 + return res; 644 + 645 + enomem: 646 + res = fuse_reply_err(req, ENOMEM); 647 + goto out; 648 + } 649 + 650 + int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) 651 + { 652 + struct fuse_ioctl_out arg; 653 + struct iovec iov[3]; 654 + size_t count = 1; 655 + 656 + memset(&arg, 0, sizeof(arg)); 657 + arg.result = result; 658 + iov[count].iov_base = &arg; 659 + iov[count].iov_len = sizeof(arg); 660 + count++; 661 + 662 + if (size) { 663 + iov[count].iov_base = (char *)buf; 664 + iov[count].iov_len = size; 665 + count++; 666 + } 667 + 668 + return send_reply_iov(req, 0, iov, count); 669 + } 670 + 671 + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, 672 + int count) 673 + { 674 + struct iovec *padded_iov; 675 + struct fuse_ioctl_out arg; 676 + int res; 677 + 678 + padded_iov = malloc((count + 2) * sizeof(struct iovec)); 679 + if (padded_iov == NULL) { 680 + return fuse_reply_err(req, ENOMEM); 681 + } 682 + 683 + memset(&arg, 0, sizeof(arg)); 684 + arg.result = result; 685 + padded_iov[1].iov_base = &arg; 686 + padded_iov[1].iov_len = sizeof(arg); 687 + 688 + memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); 689 + 690 + res = send_reply_iov(req, 0, padded_iov, count + 2); 691 + free(padded_iov); 692 + 693 + return res; 694 + } 695 + 696 + int fuse_reply_poll(fuse_req_t req, unsigned revents) 697 + { 698 + struct fuse_poll_out arg; 699 + 700 + memset(&arg, 0, sizeof(arg)); 701 + arg.revents = revents; 702 + 703 + return send_reply_ok(req, &arg, sizeof(arg)); 704 + } 705 + 706 + int fuse_reply_lseek(fuse_req_t req, off_t off) 707 + { 708 + struct fuse_lseek_out arg; 709 + 710 + memset(&arg, 0, sizeof(arg)); 711 + arg.offset = off; 712 + 713 + return send_reply_ok(req, &arg, sizeof(arg)); 714 + } 715 + 716 + static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, 717 + struct fuse_mbuf_iter *iter) 718 + { 719 + const char *name = fuse_mbuf_iter_advance_str(iter); 720 + if (!name) { 721 + fuse_reply_err(req, EINVAL); 722 + return; 723 + } 724 + 725 + if (req->se->op.lookup) { 726 + req->se->op.lookup(req, nodeid, name); 727 + } else { 728 + fuse_reply_err(req, ENOSYS); 729 + } 730 + } 731 + 732 + static void do_forget(fuse_req_t req, fuse_ino_t nodeid, 733 + struct fuse_mbuf_iter *iter) 734 + { 735 + struct fuse_forget_in *arg; 736 + 737 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 738 + if (!arg) { 739 + fuse_reply_err(req, EINVAL); 740 + return; 741 + } 742 + 743 + if (req->se->op.forget) { 744 + req->se->op.forget(req, nodeid, arg->nlookup); 745 + } else { 746 + fuse_reply_none(req); 747 + } 748 + } 749 + 750 + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, 751 + struct fuse_mbuf_iter *iter) 752 + { 753 + struct fuse_batch_forget_in *arg; 754 + struct fuse_forget_data *forgets; 755 + size_t scount; 756 + 757 + (void)nodeid; 758 + 759 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 760 + if (!arg) { 761 + fuse_reply_none(req); 762 + return; 763 + } 764 + 765 + /* 766 + * Prevent integer overflow. The compiler emits the following warning 767 + * unless we use the scount local variable: 768 + * 769 + * error: comparison is always false due to limited range of data type 770 + * [-Werror=type-limits] 771 + * 772 + * This may be true on 64-bit hosts but we need this check for 32-bit 773 + * hosts. 774 + */ 775 + scount = arg->count; 776 + if (scount > SIZE_MAX / sizeof(forgets[0])) { 777 + fuse_reply_none(req); 778 + return; 779 + } 780 + 781 + forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); 782 + if (!forgets) { 783 + fuse_reply_none(req); 784 + return; 785 + } 786 + 787 + if (req->se->op.forget_multi) { 788 + req->se->op.forget_multi(req, arg->count, forgets); 789 + } else if (req->se->op.forget) { 790 + unsigned int i; 791 + 792 + for (i = 0; i < arg->count; i++) { 793 + struct fuse_req *dummy_req; 794 + 795 + dummy_req = fuse_ll_alloc_req(req->se); 796 + if (dummy_req == NULL) { 797 + break; 798 + } 799 + 800 + dummy_req->unique = req->unique; 801 + dummy_req->ctx = req->ctx; 802 + dummy_req->ch = NULL; 803 + 804 + req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); 805 + } 806 + fuse_reply_none(req); 807 + } else { 808 + fuse_reply_none(req); 809 + } 810 + } 811 + 812 + static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, 813 + struct fuse_mbuf_iter *iter) 814 + { 815 + struct fuse_file_info *fip = NULL; 816 + struct fuse_file_info fi; 817 + 818 + struct fuse_getattr_in *arg; 819 + 820 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 821 + if (!arg) { 822 + fuse_reply_err(req, EINVAL); 823 + return; 824 + } 825 + 826 + if (arg->getattr_flags & FUSE_GETATTR_FH) { 827 + memset(&fi, 0, sizeof(fi)); 828 + fi.fh = arg->fh; 829 + fip = &fi; 830 + } 831 + 832 + if (req->se->op.getattr) { 833 + req->se->op.getattr(req, nodeid, fip); 834 + } else { 835 + fuse_reply_err(req, ENOSYS); 836 + } 837 + } 838 + 839 + static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, 840 + struct fuse_mbuf_iter *iter) 841 + { 842 + if (req->se->op.setattr) { 843 + struct fuse_setattr_in *arg; 844 + struct fuse_file_info *fi = NULL; 845 + struct fuse_file_info fi_store; 846 + struct stat stbuf; 847 + 848 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 849 + if (!arg) { 850 + fuse_reply_err(req, EINVAL); 851 + return; 852 + } 853 + 854 + memset(&stbuf, 0, sizeof(stbuf)); 855 + convert_attr(arg, &stbuf); 856 + if (arg->valid & FATTR_FH) { 857 + arg->valid &= ~FATTR_FH; 858 + memset(&fi_store, 0, sizeof(fi_store)); 859 + fi = &fi_store; 860 + fi->fh = arg->fh; 861 + } 862 + arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | 863 + FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | 864 + FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | 865 + FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | 866 + FUSE_SET_ATTR_CTIME; 867 + 868 + req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); 869 + } else { 870 + fuse_reply_err(req, ENOSYS); 871 + } 872 + } 873 + 874 + static void do_access(fuse_req_t req, fuse_ino_t nodeid, 875 + struct fuse_mbuf_iter *iter) 876 + { 877 + struct fuse_access_in *arg; 878 + 879 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 880 + if (!arg) { 881 + fuse_reply_err(req, EINVAL); 882 + return; 883 + } 884 + 885 + if (req->se->op.access) { 886 + req->se->op.access(req, nodeid, arg->mask); 887 + } else { 888 + fuse_reply_err(req, ENOSYS); 889 + } 890 + } 891 + 892 + static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, 893 + struct fuse_mbuf_iter *iter) 894 + { 895 + (void)iter; 896 + 897 + if (req->se->op.readlink) { 898 + req->se->op.readlink(req, nodeid); 899 + } else { 900 + fuse_reply_err(req, ENOSYS); 901 + } 902 + } 903 + 904 + static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, 905 + struct fuse_mbuf_iter *iter) 906 + { 907 + struct fuse_mknod_in *arg; 908 + const char *name; 909 + 910 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 911 + name = fuse_mbuf_iter_advance_str(iter); 912 + if (!arg || !name) { 913 + fuse_reply_err(req, EINVAL); 914 + return; 915 + } 916 + 917 + req->ctx.umask = arg->umask; 918 + 919 + if (req->se->op.mknod) { 920 + req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); 921 + } else { 922 + fuse_reply_err(req, ENOSYS); 923 + } 924 + } 925 + 926 + static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, 927 + struct fuse_mbuf_iter *iter) 928 + { 929 + struct fuse_mkdir_in *arg; 930 + const char *name; 931 + 932 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 933 + name = fuse_mbuf_iter_advance_str(iter); 934 + if (!arg || !name) { 935 + fuse_reply_err(req, EINVAL); 936 + return; 937 + } 938 + 939 + req->ctx.umask = arg->umask; 940 + 941 + if (req->se->op.mkdir) { 942 + req->se->op.mkdir(req, nodeid, name, arg->mode); 943 + } else { 944 + fuse_reply_err(req, ENOSYS); 945 + } 946 + } 947 + 948 + static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, 949 + struct fuse_mbuf_iter *iter) 950 + { 951 + const char *name = fuse_mbuf_iter_advance_str(iter); 952 + 953 + if (!name) { 954 + fuse_reply_err(req, EINVAL); 955 + return; 956 + } 957 + 958 + if (req->se->op.unlink) { 959 + req->se->op.unlink(req, nodeid, name); 960 + } else { 961 + fuse_reply_err(req, ENOSYS); 962 + } 963 + } 964 + 965 + static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, 966 + struct fuse_mbuf_iter *iter) 967 + { 968 + const char *name = fuse_mbuf_iter_advance_str(iter); 969 + 970 + if (!name) { 971 + fuse_reply_err(req, EINVAL); 972 + return; 973 + } 974 + 975 + if (req->se->op.rmdir) { 976 + req->se->op.rmdir(req, nodeid, name); 977 + } else { 978 + fuse_reply_err(req, ENOSYS); 979 + } 980 + } 981 + 982 + static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, 983 + struct fuse_mbuf_iter *iter) 984 + { 985 + const char *name = fuse_mbuf_iter_advance_str(iter); 986 + const char *linkname = fuse_mbuf_iter_advance_str(iter); 987 + 988 + if (!name || !linkname) { 989 + fuse_reply_err(req, EINVAL); 990 + return; 991 + } 992 + 993 + if (req->se->op.symlink) { 994 + req->se->op.symlink(req, linkname, nodeid, name); 995 + } else { 996 + fuse_reply_err(req, ENOSYS); 997 + } 998 + } 999 + 1000 + static void do_rename(fuse_req_t req, fuse_ino_t nodeid, 1001 + struct fuse_mbuf_iter *iter) 1002 + { 1003 + struct fuse_rename_in *arg; 1004 + const char *oldname; 1005 + const char *newname; 1006 + 1007 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1008 + oldname = fuse_mbuf_iter_advance_str(iter); 1009 + newname = fuse_mbuf_iter_advance_str(iter); 1010 + if (!arg || !oldname || !newname) { 1011 + fuse_reply_err(req, EINVAL); 1012 + return; 1013 + } 1014 + 1015 + if (req->se->op.rename) { 1016 + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); 1017 + } else { 1018 + fuse_reply_err(req, ENOSYS); 1019 + } 1020 + } 1021 + 1022 + static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, 1023 + struct fuse_mbuf_iter *iter) 1024 + { 1025 + struct fuse_rename2_in *arg; 1026 + const char *oldname; 1027 + const char *newname; 1028 + 1029 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1030 + oldname = fuse_mbuf_iter_advance_str(iter); 1031 + newname = fuse_mbuf_iter_advance_str(iter); 1032 + if (!arg || !oldname || !newname) { 1033 + fuse_reply_err(req, EINVAL); 1034 + return; 1035 + } 1036 + 1037 + if (req->se->op.rename) { 1038 + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 1039 + arg->flags); 1040 + } else { 1041 + fuse_reply_err(req, ENOSYS); 1042 + } 1043 + } 1044 + 1045 + static void do_link(fuse_req_t req, fuse_ino_t nodeid, 1046 + struct fuse_mbuf_iter *iter) 1047 + { 1048 + struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1049 + const char *name = fuse_mbuf_iter_advance_str(iter); 1050 + 1051 + if (!arg || !name) { 1052 + fuse_reply_err(req, EINVAL); 1053 + return; 1054 + } 1055 + 1056 + if (req->se->op.link) { 1057 + req->se->op.link(req, arg->oldnodeid, nodeid, name); 1058 + } else { 1059 + fuse_reply_err(req, ENOSYS); 1060 + } 1061 + } 1062 + 1063 + static void do_create(fuse_req_t req, fuse_ino_t nodeid, 1064 + struct fuse_mbuf_iter *iter) 1065 + { 1066 + if (req->se->op.create) { 1067 + struct fuse_create_in *arg; 1068 + struct fuse_file_info fi; 1069 + const char *name; 1070 + 1071 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1072 + name = fuse_mbuf_iter_advance_str(iter); 1073 + if (!arg || !name) { 1074 + fuse_reply_err(req, EINVAL); 1075 + return; 1076 + } 1077 + 1078 + memset(&fi, 0, sizeof(fi)); 1079 + fi.flags = arg->flags; 1080 + 1081 + req->ctx.umask = arg->umask; 1082 + 1083 + req->se->op.create(req, nodeid, name, arg->mode, &fi); 1084 + } else { 1085 + fuse_reply_err(req, ENOSYS); 1086 + } 1087 + } 1088 + 1089 + static void do_open(fuse_req_t req, fuse_ino_t nodeid, 1090 + struct fuse_mbuf_iter *iter) 1091 + { 1092 + struct fuse_open_in *arg; 1093 + struct fuse_file_info fi; 1094 + 1095 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1096 + if (!arg) { 1097 + fuse_reply_err(req, EINVAL); 1098 + return; 1099 + } 1100 + 1101 + memset(&fi, 0, sizeof(fi)); 1102 + fi.flags = arg->flags; 1103 + 1104 + if (req->se->op.open) { 1105 + req->se->op.open(req, nodeid, &fi); 1106 + } else { 1107 + fuse_reply_open(req, &fi); 1108 + } 1109 + } 1110 + 1111 + static void do_read(fuse_req_t req, fuse_ino_t nodeid, 1112 + struct fuse_mbuf_iter *iter) 1113 + { 1114 + if (req->se->op.read) { 1115 + struct fuse_read_in *arg; 1116 + struct fuse_file_info fi; 1117 + 1118 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1119 + 1120 + memset(&fi, 0, sizeof(fi)); 1121 + fi.fh = arg->fh; 1122 + fi.lock_owner = arg->lock_owner; 1123 + fi.flags = arg->flags; 1124 + req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); 1125 + } else { 1126 + fuse_reply_err(req, ENOSYS); 1127 + } 1128 + } 1129 + 1130 + static void do_write(fuse_req_t req, fuse_ino_t nodeid, 1131 + struct fuse_mbuf_iter *iter) 1132 + { 1133 + struct fuse_write_in *arg; 1134 + struct fuse_file_info fi; 1135 + const char *param; 1136 + 1137 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1138 + if (!arg) { 1139 + fuse_reply_err(req, EINVAL); 1140 + return; 1141 + } 1142 + 1143 + param = fuse_mbuf_iter_advance(iter, arg->size); 1144 + if (!param) { 1145 + fuse_reply_err(req, EINVAL); 1146 + return; 1147 + } 1148 + 1149 + memset(&fi, 0, sizeof(fi)); 1150 + fi.fh = arg->fh; 1151 + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; 1152 + fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); 1153 + 1154 + fi.lock_owner = arg->lock_owner; 1155 + fi.flags = arg->flags; 1156 + 1157 + if (req->se->op.write) { 1158 + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); 1159 + } else { 1160 + fuse_reply_err(req, ENOSYS); 1161 + } 1162 + } 1163 + 1164 + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, 1165 + struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) 1166 + { 1167 + struct fuse_session *se = req->se; 1168 + struct fuse_bufvec *pbufv = ibufv; 1169 + struct fuse_bufvec tmpbufv = { 1170 + .buf[0] = ibufv->buf[0], 1171 + .count = 1, 1172 + }; 1173 + struct fuse_write_in *arg; 1174 + size_t arg_size = sizeof(*arg); 1175 + struct fuse_file_info fi; 1176 + 1177 + memset(&fi, 0, sizeof(fi)); 1178 + 1179 + arg = fuse_mbuf_iter_advance(iter, arg_size); 1180 + if (!arg) { 1181 + fuse_reply_err(req, EINVAL); 1182 + return; 1183 + } 1184 + 1185 + fi.lock_owner = arg->lock_owner; 1186 + fi.flags = arg->flags; 1187 + fi.fh = arg->fh; 1188 + fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); 1189 + fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); 1190 + 1191 + if (ibufv->count == 1) { 1192 + assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); 1193 + tmpbufv.buf[0].mem = ((char *)arg) + arg_size; 1194 + tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; 1195 + pbufv = &tmpbufv; 1196 + } else { 1197 + /* 1198 + * Input bufv contains the headers in the first element 1199 + * and the data in the rest, we need to skip that first element 1200 + */ 1201 + ibufv->buf[0].size = 0; 1202 + } 1203 + 1204 + if (fuse_buf_size(pbufv) != arg->size) { 1205 + fuse_log(FUSE_LOG_ERR, 1206 + "fuse: do_write_buf: buffer size doesn't match arg->size\n"); 1207 + fuse_reply_err(req, EIO); 1208 + return; 1209 + } 1210 + 1211 + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); 1212 + } 1213 + 1214 + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, 1215 + struct fuse_mbuf_iter *iter) 1216 + { 1217 + struct fuse_flush_in *arg; 1218 + struct fuse_file_info fi; 1219 + 1220 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1221 + if (!arg) { 1222 + fuse_reply_err(req, EINVAL); 1223 + return; 1224 + } 1225 + 1226 + memset(&fi, 0, sizeof(fi)); 1227 + fi.fh = arg->fh; 1228 + fi.flush = 1; 1229 + fi.lock_owner = arg->lock_owner; 1230 + 1231 + if (req->se->op.flush) { 1232 + req->se->op.flush(req, nodeid, &fi); 1233 + } else { 1234 + fuse_reply_err(req, ENOSYS); 1235 + } 1236 + } 1237 + 1238 + static void do_release(fuse_req_t req, fuse_ino_t nodeid, 1239 + struct fuse_mbuf_iter *iter) 1240 + { 1241 + struct fuse_release_in *arg; 1242 + struct fuse_file_info fi; 1243 + 1244 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1245 + if (!arg) { 1246 + fuse_reply_err(req, EINVAL); 1247 + return; 1248 + } 1249 + 1250 + memset(&fi, 0, sizeof(fi)); 1251 + fi.flags = arg->flags; 1252 + fi.fh = arg->fh; 1253 + fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; 1254 + fi.lock_owner = arg->lock_owner; 1255 + 1256 + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { 1257 + fi.flock_release = 1; 1258 + } 1259 + 1260 + if (req->se->op.release) { 1261 + req->se->op.release(req, nodeid, &fi); 1262 + } else { 1263 + fuse_reply_err(req, 0); 1264 + } 1265 + } 1266 + 1267 + static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, 1268 + struct fuse_mbuf_iter *iter) 1269 + { 1270 + struct fuse_fsync_in *arg; 1271 + struct fuse_file_info fi; 1272 + int datasync; 1273 + 1274 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1275 + if (!arg) { 1276 + fuse_reply_err(req, EINVAL); 1277 + return; 1278 + } 1279 + datasync = arg->fsync_flags & 1; 1280 + 1281 + memset(&fi, 0, sizeof(fi)); 1282 + fi.fh = arg->fh; 1283 + 1284 + if (req->se->op.fsync) { 1285 + if (fi.fh == (uint64_t)-1) { 1286 + req->se->op.fsync(req, nodeid, datasync, NULL); 1287 + } else { 1288 + req->se->op.fsync(req, nodeid, datasync, &fi); 1289 + } 1290 + } else { 1291 + fuse_reply_err(req, ENOSYS); 1292 + } 1293 + } 1294 + 1295 + static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, 1296 + struct fuse_mbuf_iter *iter) 1297 + { 1298 + struct fuse_open_in *arg; 1299 + struct fuse_file_info fi; 1300 + 1301 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1302 + if (!arg) { 1303 + fuse_reply_err(req, EINVAL); 1304 + return; 1305 + } 1306 + 1307 + memset(&fi, 0, sizeof(fi)); 1308 + fi.flags = arg->flags; 1309 + 1310 + if (req->se->op.opendir) { 1311 + req->se->op.opendir(req, nodeid, &fi); 1312 + } else { 1313 + fuse_reply_open(req, &fi); 1314 + } 1315 + } 1316 + 1317 + static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, 1318 + struct fuse_mbuf_iter *iter) 1319 + { 1320 + struct fuse_read_in *arg; 1321 + struct fuse_file_info fi; 1322 + 1323 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1324 + if (!arg) { 1325 + fuse_reply_err(req, EINVAL); 1326 + return; 1327 + } 1328 + 1329 + memset(&fi, 0, sizeof(fi)); 1330 + fi.fh = arg->fh; 1331 + 1332 + if (req->se->op.readdir) { 1333 + req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); 1334 + } else { 1335 + fuse_reply_err(req, ENOSYS); 1336 + } 1337 + } 1338 + 1339 + static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, 1340 + struct fuse_mbuf_iter *iter) 1341 + { 1342 + struct fuse_read_in *arg; 1343 + struct fuse_file_info fi; 1344 + 1345 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1346 + if (!arg) { 1347 + fuse_reply_err(req, EINVAL); 1348 + return; 1349 + } 1350 + 1351 + memset(&fi, 0, sizeof(fi)); 1352 + fi.fh = arg->fh; 1353 + 1354 + if (req->se->op.readdirplus) { 1355 + req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); 1356 + } else { 1357 + fuse_reply_err(req, ENOSYS); 1358 + } 1359 + } 1360 + 1361 + static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, 1362 + struct fuse_mbuf_iter *iter) 1363 + { 1364 + struct fuse_release_in *arg; 1365 + struct fuse_file_info fi; 1366 + 1367 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1368 + if (!arg) { 1369 + fuse_reply_err(req, EINVAL); 1370 + return; 1371 + } 1372 + 1373 + memset(&fi, 0, sizeof(fi)); 1374 + fi.flags = arg->flags; 1375 + fi.fh = arg->fh; 1376 + 1377 + if (req->se->op.releasedir) { 1378 + req->se->op.releasedir(req, nodeid, &fi); 1379 + } else { 1380 + fuse_reply_err(req, 0); 1381 + } 1382 + } 1383 + 1384 + static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, 1385 + struct fuse_mbuf_iter *iter) 1386 + { 1387 + struct fuse_fsync_in *arg; 1388 + struct fuse_file_info fi; 1389 + int datasync; 1390 + 1391 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1392 + if (!arg) { 1393 + fuse_reply_err(req, EINVAL); 1394 + return; 1395 + } 1396 + datasync = arg->fsync_flags & 1; 1397 + 1398 + memset(&fi, 0, sizeof(fi)); 1399 + fi.fh = arg->fh; 1400 + 1401 + if (req->se->op.fsyncdir) { 1402 + req->se->op.fsyncdir(req, nodeid, datasync, &fi); 1403 + } else { 1404 + fuse_reply_err(req, ENOSYS); 1405 + } 1406 + } 1407 + 1408 + static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, 1409 + struct fuse_mbuf_iter *iter) 1410 + { 1411 + (void)nodeid; 1412 + (void)iter; 1413 + 1414 + if (req->se->op.statfs) { 1415 + req->se->op.statfs(req, nodeid); 1416 + } else { 1417 + struct statvfs buf = { 1418 + .f_namemax = 255, 1419 + .f_bsize = 512, 1420 + }; 1421 + fuse_reply_statfs(req, &buf); 1422 + } 1423 + } 1424 + 1425 + static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, 1426 + struct fuse_mbuf_iter *iter) 1427 + { 1428 + struct fuse_setxattr_in *arg; 1429 + const char *name; 1430 + const char *value; 1431 + 1432 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1433 + name = fuse_mbuf_iter_advance_str(iter); 1434 + if (!arg || !name) { 1435 + fuse_reply_err(req, EINVAL); 1436 + return; 1437 + } 1438 + 1439 + value = fuse_mbuf_iter_advance(iter, arg->size); 1440 + if (!value) { 1441 + fuse_reply_err(req, EINVAL); 1442 + return; 1443 + } 1444 + 1445 + if (req->se->op.setxattr) { 1446 + req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); 1447 + } else { 1448 + fuse_reply_err(req, ENOSYS); 1449 + } 1450 + } 1451 + 1452 + static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, 1453 + struct fuse_mbuf_iter *iter) 1454 + { 1455 + struct fuse_getxattr_in *arg; 1456 + const char *name; 1457 + 1458 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1459 + name = fuse_mbuf_iter_advance_str(iter); 1460 + if (!arg || !name) { 1461 + fuse_reply_err(req, EINVAL); 1462 + return; 1463 + } 1464 + 1465 + if (req->se->op.getxattr) { 1466 + req->se->op.getxattr(req, nodeid, name, arg->size); 1467 + } else { 1468 + fuse_reply_err(req, ENOSYS); 1469 + } 1470 + } 1471 + 1472 + static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, 1473 + struct fuse_mbuf_iter *iter) 1474 + { 1475 + struct fuse_getxattr_in *arg; 1476 + 1477 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1478 + if (!arg) { 1479 + fuse_reply_err(req, EINVAL); 1480 + return; 1481 + } 1482 + 1483 + if (req->se->op.listxattr) { 1484 + req->se->op.listxattr(req, nodeid, arg->size); 1485 + } else { 1486 + fuse_reply_err(req, ENOSYS); 1487 + } 1488 + } 1489 + 1490 + static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, 1491 + struct fuse_mbuf_iter *iter) 1492 + { 1493 + const char *name = fuse_mbuf_iter_advance_str(iter); 1494 + 1495 + if (!name) { 1496 + fuse_reply_err(req, EINVAL); 1497 + return; 1498 + } 1499 + 1500 + if (req->se->op.removexattr) { 1501 + req->se->op.removexattr(req, nodeid, name); 1502 + } else { 1503 + fuse_reply_err(req, ENOSYS); 1504 + } 1505 + } 1506 + 1507 + static void convert_fuse_file_lock(struct fuse_file_lock *fl, 1508 + struct flock *flock) 1509 + { 1510 + memset(flock, 0, sizeof(struct flock)); 1511 + flock->l_type = fl->type; 1512 + flock->l_whence = SEEK_SET; 1513 + flock->l_start = fl->start; 1514 + if (fl->end == OFFSET_MAX) { 1515 + flock->l_len = 0; 1516 + } else { 1517 + flock->l_len = fl->end - fl->start + 1; 1518 + } 1519 + flock->l_pid = fl->pid; 1520 + } 1521 + 1522 + static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, 1523 + struct fuse_mbuf_iter *iter) 1524 + { 1525 + struct fuse_lk_in *arg; 1526 + struct fuse_file_info fi; 1527 + struct flock flock; 1528 + 1529 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1530 + if (!arg) { 1531 + fuse_reply_err(req, EINVAL); 1532 + return; 1533 + } 1534 + 1535 + memset(&fi, 0, sizeof(fi)); 1536 + fi.fh = arg->fh; 1537 + fi.lock_owner = arg->owner; 1538 + 1539 + convert_fuse_file_lock(&arg->lk, &flock); 1540 + if (req->se->op.getlk) { 1541 + req->se->op.getlk(req, nodeid, &fi, &flock); 1542 + } else { 1543 + fuse_reply_err(req, ENOSYS); 1544 + } 1545 + } 1546 + 1547 + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, 1548 + struct fuse_mbuf_iter *iter, int sleep) 1549 + { 1550 + struct fuse_lk_in *arg; 1551 + struct fuse_file_info fi; 1552 + struct flock flock; 1553 + 1554 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1555 + if (!arg) { 1556 + fuse_reply_err(req, EINVAL); 1557 + return; 1558 + } 1559 + 1560 + memset(&fi, 0, sizeof(fi)); 1561 + fi.fh = arg->fh; 1562 + fi.lock_owner = arg->owner; 1563 + 1564 + if (arg->lk_flags & FUSE_LK_FLOCK) { 1565 + int op = 0; 1566 + 1567 + switch (arg->lk.type) { 1568 + case F_RDLCK: 1569 + op = LOCK_SH; 1570 + break; 1571 + case F_WRLCK: 1572 + op = LOCK_EX; 1573 + break; 1574 + case F_UNLCK: 1575 + op = LOCK_UN; 1576 + break; 1577 + } 1578 + if (!sleep) { 1579 + op |= LOCK_NB; 1580 + } 1581 + 1582 + if (req->se->op.flock) { 1583 + req->se->op.flock(req, nodeid, &fi, op); 1584 + } else { 1585 + fuse_reply_err(req, ENOSYS); 1586 + } 1587 + } else { 1588 + convert_fuse_file_lock(&arg->lk, &flock); 1589 + if (req->se->op.setlk) { 1590 + req->se->op.setlk(req, nodeid, &fi, &flock, sleep); 1591 + } else { 1592 + fuse_reply_err(req, ENOSYS); 1593 + } 1594 + } 1595 + } 1596 + 1597 + static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, 1598 + struct fuse_mbuf_iter *iter) 1599 + { 1600 + do_setlk_common(req, nodeid, iter, 0); 1601 + } 1602 + 1603 + static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, 1604 + struct fuse_mbuf_iter *iter) 1605 + { 1606 + do_setlk_common(req, nodeid, iter, 1); 1607 + } 1608 + 1609 + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) 1610 + { 1611 + struct fuse_req *curr; 1612 + 1613 + for (curr = se->list.next; curr != &se->list; curr = curr->next) { 1614 + if (curr->unique == req->u.i.unique) { 1615 + fuse_interrupt_func_t func; 1616 + void *data; 1617 + 1618 + curr->ctr++; 1619 + pthread_mutex_unlock(&se->lock); 1620 + 1621 + /* Ugh, ugly locking */ 1622 + pthread_mutex_lock(&curr->lock); 1623 + pthread_mutex_lock(&se->lock); 1624 + curr->interrupted = 1; 1625 + func = curr->u.ni.func; 1626 + data = curr->u.ni.data; 1627 + pthread_mutex_unlock(&se->lock); 1628 + if (func) { 1629 + func(curr, data); 1630 + } 1631 + pthread_mutex_unlock(&curr->lock); 1632 + 1633 + pthread_mutex_lock(&se->lock); 1634 + curr->ctr--; 1635 + if (!curr->ctr) { 1636 + destroy_req(curr); 1637 + } 1638 + 1639 + return 1; 1640 + } 1641 + } 1642 + for (curr = se->interrupts.next; curr != &se->interrupts; 1643 + curr = curr->next) { 1644 + if (curr->u.i.unique == req->u.i.unique) { 1645 + return 1; 1646 + } 1647 + } 1648 + return 0; 1649 + } 1650 + 1651 + static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, 1652 + struct fuse_mbuf_iter *iter) 1653 + { 1654 + struct fuse_interrupt_in *arg; 1655 + struct fuse_session *se = req->se; 1656 + 1657 + (void)nodeid; 1658 + 1659 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1660 + if (!arg) { 1661 + fuse_reply_err(req, EINVAL); 1662 + return; 1663 + } 1664 + 1665 + fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", 1666 + (unsigned long long)arg->unique); 1667 + 1668 + req->u.i.unique = arg->unique; 1669 + 1670 + pthread_mutex_lock(&se->lock); 1671 + if (find_interrupted(se, req)) { 1672 + destroy_req(req); 1673 + } else { 1674 + list_add_req(req, &se->interrupts); 1675 + } 1676 + pthread_mutex_unlock(&se->lock); 1677 + } 1678 + 1679 + static struct fuse_req *check_interrupt(struct fuse_session *se, 1680 + struct fuse_req *req) 1681 + { 1682 + struct fuse_req *curr; 1683 + 1684 + for (curr = se->interrupts.next; curr != &se->interrupts; 1685 + curr = curr->next) { 1686 + if (curr->u.i.unique == req->unique) { 1687 + req->interrupted = 1; 1688 + list_del_req(curr); 1689 + free(curr); 1690 + return NULL; 1691 + } 1692 + } 1693 + curr = se->interrupts.next; 1694 + if (curr != &se->interrupts) { 1695 + list_del_req(curr); 1696 + list_init_req(curr); 1697 + return curr; 1698 + } else { 1699 + return NULL; 1700 + } 1701 + } 1702 + 1703 + static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, 1704 + struct fuse_mbuf_iter *iter) 1705 + { 1706 + struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1707 + 1708 + if (!arg) { 1709 + fuse_reply_err(req, EINVAL); 1710 + return; 1711 + } 1712 + 1713 + if (req->se->op.bmap) { 1714 + req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); 1715 + } else { 1716 + fuse_reply_err(req, ENOSYS); 1717 + } 1718 + } 1719 + 1720 + static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, 1721 + struct fuse_mbuf_iter *iter) 1722 + { 1723 + struct fuse_ioctl_in *arg; 1724 + unsigned int flags; 1725 + void *in_buf = NULL; 1726 + struct fuse_file_info fi; 1727 + 1728 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1729 + if (!arg) { 1730 + fuse_reply_err(req, EINVAL); 1731 + return; 1732 + } 1733 + 1734 + flags = arg->flags; 1735 + if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { 1736 + fuse_reply_err(req, ENOTTY); 1737 + return; 1738 + } 1739 + 1740 + if (arg->in_size) { 1741 + in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); 1742 + if (!in_buf) { 1743 + fuse_reply_err(req, EINVAL); 1744 + return; 1745 + } 1746 + } 1747 + 1748 + memset(&fi, 0, sizeof(fi)); 1749 + fi.fh = arg->fh; 1750 + 1751 + if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { 1752 + req->ioctl_64bit = 1; 1753 + } 1754 + 1755 + if (req->se->op.ioctl) { 1756 + req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, 1757 + &fi, flags, in_buf, arg->in_size, arg->out_size); 1758 + } else { 1759 + fuse_reply_err(req, ENOSYS); 1760 + } 1761 + } 1762 + 1763 + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) 1764 + { 1765 + free(ph); 1766 + } 1767 + 1768 + static void do_poll(fuse_req_t req, fuse_ino_t nodeid, 1769 + struct fuse_mbuf_iter *iter) 1770 + { 1771 + struct fuse_poll_in *arg; 1772 + struct fuse_file_info fi; 1773 + 1774 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1775 + if (!arg) { 1776 + fuse_reply_err(req, EINVAL); 1777 + return; 1778 + } 1779 + 1780 + memset(&fi, 0, sizeof(fi)); 1781 + fi.fh = arg->fh; 1782 + fi.poll_events = arg->events; 1783 + 1784 + if (req->se->op.poll) { 1785 + struct fuse_pollhandle *ph = NULL; 1786 + 1787 + if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { 1788 + ph = malloc(sizeof(struct fuse_pollhandle)); 1789 + if (ph == NULL) { 1790 + fuse_reply_err(req, ENOMEM); 1791 + return; 1792 + } 1793 + ph->kh = arg->kh; 1794 + ph->se = req->se; 1795 + } 1796 + 1797 + req->se->op.poll(req, nodeid, &fi, ph); 1798 + } else { 1799 + fuse_reply_err(req, ENOSYS); 1800 + } 1801 + } 1802 + 1803 + static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, 1804 + struct fuse_mbuf_iter *iter) 1805 + { 1806 + struct fuse_fallocate_in *arg; 1807 + struct fuse_file_info fi; 1808 + 1809 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1810 + if (!arg) { 1811 + fuse_reply_err(req, EINVAL); 1812 + return; 1813 + } 1814 + 1815 + memset(&fi, 0, sizeof(fi)); 1816 + fi.fh = arg->fh; 1817 + 1818 + if (req->se->op.fallocate) { 1819 + req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, 1820 + &fi); 1821 + } else { 1822 + fuse_reply_err(req, ENOSYS); 1823 + } 1824 + } 1825 + 1826 + static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, 1827 + struct fuse_mbuf_iter *iter) 1828 + { 1829 + struct fuse_copy_file_range_in *arg; 1830 + struct fuse_file_info fi_in, fi_out; 1831 + 1832 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1833 + if (!arg) { 1834 + fuse_reply_err(req, EINVAL); 1835 + return; 1836 + } 1837 + 1838 + memset(&fi_in, 0, sizeof(fi_in)); 1839 + fi_in.fh = arg->fh_in; 1840 + 1841 + memset(&fi_out, 0, sizeof(fi_out)); 1842 + fi_out.fh = arg->fh_out; 1843 + 1844 + 1845 + if (req->se->op.copy_file_range) { 1846 + req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, 1847 + arg->nodeid_out, arg->off_out, &fi_out, 1848 + arg->len, arg->flags); 1849 + } else { 1850 + fuse_reply_err(req, ENOSYS); 1851 + } 1852 + } 1853 + 1854 + static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, 1855 + struct fuse_mbuf_iter *iter) 1856 + { 1857 + struct fuse_lseek_in *arg; 1858 + struct fuse_file_info fi; 1859 + 1860 + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); 1861 + if (!arg) { 1862 + fuse_reply_err(req, EINVAL); 1863 + return; 1864 + } 1865 + memset(&fi, 0, sizeof(fi)); 1866 + fi.fh = arg->fh; 1867 + 1868 + if (req->se->op.lseek) { 1869 + req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); 1870 + } else { 1871 + fuse_reply_err(req, ENOSYS); 1872 + } 1873 + } 1874 + 1875 + static void do_init(fuse_req_t req, fuse_ino_t nodeid, 1876 + struct fuse_mbuf_iter *iter) 1877 + { 1878 + size_t compat_size = offsetof(struct fuse_init_in, max_readahead); 1879 + struct fuse_init_in *arg; 1880 + struct fuse_init_out outarg; 1881 + struct fuse_session *se = req->se; 1882 + size_t bufsize = se->bufsize; 1883 + size_t outargsize = sizeof(outarg); 1884 + 1885 + (void)nodeid; 1886 + 1887 + /* First consume the old fields... */ 1888 + arg = fuse_mbuf_iter_advance(iter, compat_size); 1889 + if (!arg) { 1890 + fuse_reply_err(req, EINVAL); 1891 + return; 1892 + } 1893 + 1894 + /* ...and now consume the new fields. */ 1895 + if (arg->major == 7 && arg->minor >= 6) { 1896 + if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { 1897 + fuse_reply_err(req, EINVAL); 1898 + return; 1899 + } 1900 + } 1901 + 1902 + fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); 1903 + if (arg->major == 7 && arg->minor >= 6) { 1904 + fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); 1905 + fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); 1906 + } 1907 + se->conn.proto_major = arg->major; 1908 + se->conn.proto_minor = arg->minor; 1909 + se->conn.capable = 0; 1910 + se->conn.want = 0; 1911 + 1912 + memset(&outarg, 0, sizeof(outarg)); 1913 + outarg.major = FUSE_KERNEL_VERSION; 1914 + outarg.minor = FUSE_KERNEL_MINOR_VERSION; 1915 + 1916 + if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { 1917 + fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", 1918 + arg->major, arg->minor); 1919 + fuse_reply_err(req, EPROTO); 1920 + return; 1921 + } 1922 + 1923 + if (arg->major > 7) { 1924 + /* Wait for a second INIT request with a 7.X version */ 1925 + send_reply_ok(req, &outarg, sizeof(outarg)); 1926 + return; 1927 + } 1928 + 1929 + if (arg->max_readahead < se->conn.max_readahead) { 1930 + se->conn.max_readahead = arg->max_readahead; 1931 + } 1932 + if (arg->flags & FUSE_ASYNC_READ) { 1933 + se->conn.capable |= FUSE_CAP_ASYNC_READ; 1934 + } 1935 + if (arg->flags & FUSE_POSIX_LOCKS) { 1936 + se->conn.capable |= FUSE_CAP_POSIX_LOCKS; 1937 + } 1938 + if (arg->flags & FUSE_ATOMIC_O_TRUNC) { 1939 + se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; 1940 + } 1941 + if (arg->flags & FUSE_EXPORT_SUPPORT) { 1942 + se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; 1943 + } 1944 + if (arg->flags & FUSE_DONT_MASK) { 1945 + se->conn.capable |= FUSE_CAP_DONT_MASK; 1946 + } 1947 + if (arg->flags & FUSE_FLOCK_LOCKS) { 1948 + se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; 1949 + } 1950 + if (arg->flags & FUSE_AUTO_INVAL_DATA) { 1951 + se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; 1952 + } 1953 + if (arg->flags & FUSE_DO_READDIRPLUS) { 1954 + se->conn.capable |= FUSE_CAP_READDIRPLUS; 1955 + } 1956 + if (arg->flags & FUSE_READDIRPLUS_AUTO) { 1957 + se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; 1958 + } 1959 + if (arg->flags & FUSE_ASYNC_DIO) { 1960 + se->conn.capable |= FUSE_CAP_ASYNC_DIO; 1961 + } 1962 + if (arg->flags & FUSE_WRITEBACK_CACHE) { 1963 + se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; 1964 + } 1965 + if (arg->flags & FUSE_NO_OPEN_SUPPORT) { 1966 + se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; 1967 + } 1968 + if (arg->flags & FUSE_PARALLEL_DIROPS) { 1969 + se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; 1970 + } 1971 + if (arg->flags & FUSE_POSIX_ACL) { 1972 + se->conn.capable |= FUSE_CAP_POSIX_ACL; 1973 + } 1974 + if (arg->flags & FUSE_HANDLE_KILLPRIV) { 1975 + se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; 1976 + } 1977 + if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { 1978 + se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; 1979 + } 1980 + if (!(arg->flags & FUSE_MAX_PAGES)) { 1981 + size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + 1982 + FUSE_BUFFER_HEADER_SIZE; 1983 + if (bufsize > max_bufsize) { 1984 + bufsize = max_bufsize; 1985 + } 1986 + } 1987 + #ifdef HAVE_SPLICE 1988 + #ifdef HAVE_VMSPLICE 1989 + se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; 1990 + #endif 1991 + se->conn.capable |= FUSE_CAP_SPLICE_READ; 1992 + #endif 1993 + se->conn.capable |= FUSE_CAP_IOCTL_DIR; 1994 + 1995 + /* 1996 + * Default settings for modern filesystems. 1997 + * 1998 + * Most of these capabilities were disabled by default in 1999 + * libfuse2 for backwards compatibility reasons. In libfuse3, 2000 + * we can finally enable them by default (as long as they're 2001 + * supported by the kernel). 2002 + */ 2003 + #define LL_SET_DEFAULT(cond, cap) \ 2004 + if ((cond) && (se->conn.capable & (cap))) \ 2005 + se->conn.want |= (cap) 2006 + LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); 2007 + LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); 2008 + LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); 2009 + LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); 2010 + LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); 2011 + LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); 2012 + LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); 2013 + LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); 2014 + LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); 2015 + LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); 2016 + LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); 2017 + LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, 2018 + FUSE_CAP_READDIRPLUS_AUTO); 2019 + se->conn.time_gran = 1; 2020 + 2021 + if (bufsize < FUSE_MIN_READ_BUFFER) { 2022 + fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", 2023 + bufsize); 2024 + bufsize = FUSE_MIN_READ_BUFFER; 2025 + } 2026 + se->bufsize = bufsize; 2027 + 2028 + if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { 2029 + se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; 2030 + } 2031 + 2032 + se->got_init = 1; 2033 + se->got_destroy = 0; 2034 + if (se->op.init) { 2035 + se->op.init(se->userdata, &se->conn); 2036 + } 2037 + 2038 + if (se->conn.want & (~se->conn.capable)) { 2039 + fuse_log(FUSE_LOG_ERR, 2040 + "fuse: error: filesystem requested capabilities " 2041 + "0x%x that are not supported by kernel, aborting.\n", 2042 + se->conn.want & (~se->conn.capable)); 2043 + fuse_reply_err(req, EPROTO); 2044 + se->error = -EPROTO; 2045 + fuse_session_exit(se); 2046 + return; 2047 + } 2048 + 2049 + if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { 2050 + se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; 2051 + } 2052 + if (arg->flags & FUSE_MAX_PAGES) { 2053 + outarg.flags |= FUSE_MAX_PAGES; 2054 + outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; 2055 + } 2056 + 2057 + /* 2058 + * Always enable big writes, this is superseded 2059 + * by the max_write option 2060 + */ 2061 + outarg.flags |= FUSE_BIG_WRITES; 2062 + 2063 + if (se->conn.want & FUSE_CAP_ASYNC_READ) { 2064 + outarg.flags |= FUSE_ASYNC_READ; 2065 + } 2066 + if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { 2067 + outarg.flags |= FUSE_PARALLEL_DIROPS; 2068 + } 2069 + if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { 2070 + outarg.flags |= FUSE_POSIX_LOCKS; 2071 + } 2072 + if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { 2073 + outarg.flags |= FUSE_ATOMIC_O_TRUNC; 2074 + } 2075 + if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { 2076 + outarg.flags |= FUSE_EXPORT_SUPPORT; 2077 + } 2078 + if (se->conn.want & FUSE_CAP_DONT_MASK) { 2079 + outarg.flags |= FUSE_DONT_MASK; 2080 + } 2081 + if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { 2082 + outarg.flags |= FUSE_FLOCK_LOCKS; 2083 + } 2084 + if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { 2085 + outarg.flags |= FUSE_AUTO_INVAL_DATA; 2086 + } 2087 + if (se->conn.want & FUSE_CAP_READDIRPLUS) { 2088 + outarg.flags |= FUSE_DO_READDIRPLUS; 2089 + } 2090 + if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { 2091 + outarg.flags |= FUSE_READDIRPLUS_AUTO; 2092 + } 2093 + if (se->conn.want & FUSE_CAP_ASYNC_DIO) { 2094 + outarg.flags |= FUSE_ASYNC_DIO; 2095 + } 2096 + if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { 2097 + outarg.flags |= FUSE_WRITEBACK_CACHE; 2098 + } 2099 + if (se->conn.want & FUSE_CAP_POSIX_ACL) { 2100 + outarg.flags |= FUSE_POSIX_ACL; 2101 + } 2102 + outarg.max_readahead = se->conn.max_readahead; 2103 + outarg.max_write = se->conn.max_write; 2104 + if (se->conn.max_background >= (1 << 16)) { 2105 + se->conn.max_background = (1 << 16) - 1; 2106 + } 2107 + if (se->conn.congestion_threshold > se->conn.max_background) { 2108 + se->conn.congestion_threshold = se->conn.max_background; 2109 + } 2110 + if (!se->conn.congestion_threshold) { 2111 + se->conn.congestion_threshold = se->conn.max_background * 3 / 4; 2112 + } 2113 + 2114 + outarg.max_background = se->conn.max_background; 2115 + outarg.congestion_threshold = se->conn.congestion_threshold; 2116 + outarg.time_gran = se->conn.time_gran; 2117 + 2118 + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); 2119 + fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); 2120 + fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); 2121 + fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); 2122 + fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); 2123 + fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", 2124 + outarg.congestion_threshold); 2125 + fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); 2126 + 2127 + send_reply_ok(req, &outarg, outargsize); 2128 + } 2129 + 2130 + static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, 2131 + struct fuse_mbuf_iter *iter) 2132 + { 2133 + struct fuse_session *se = req->se; 2134 + 2135 + (void)nodeid; 2136 + (void)iter; 2137 + 2138 + se->got_destroy = 1; 2139 + se->got_init = 0; 2140 + if (se->op.destroy) { 2141 + se->op.destroy(se->userdata); 2142 + } 2143 + 2144 + send_reply_ok(req, NULL, 0); 2145 + } 2146 + 2147 + static int send_notify_iov(struct fuse_session *se, int notify_code, 2148 + struct iovec *iov, int count) 2149 + { 2150 + struct fuse_out_header out = { 2151 + .error = notify_code, 2152 + }; 2153 + 2154 + if (!se->got_init) { 2155 + return -ENOTCONN; 2156 + } 2157 + 2158 + iov[0].iov_base = &out; 2159 + iov[0].iov_len = sizeof(struct fuse_out_header); 2160 + 2161 + return fuse_send_msg(se, NULL, iov, count); 2162 + } 2163 + 2164 + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) 2165 + { 2166 + if (ph != NULL) { 2167 + struct fuse_notify_poll_wakeup_out outarg = { 2168 + .kh = ph->kh, 2169 + }; 2170 + struct iovec iov[2]; 2171 + 2172 + iov[1].iov_base = &outarg; 2173 + iov[1].iov_len = sizeof(outarg); 2174 + 2175 + return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); 2176 + } else { 2177 + return 0; 2178 + } 2179 + } 2180 + 2181 + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, 2182 + off_t off, off_t len) 2183 + { 2184 + struct fuse_notify_inval_inode_out outarg = { 2185 + .ino = ino, 2186 + .off = off, 2187 + .len = len, 2188 + }; 2189 + struct iovec iov[2]; 2190 + 2191 + if (!se) { 2192 + return -EINVAL; 2193 + } 2194 + 2195 + iov[1].iov_base = &outarg; 2196 + iov[1].iov_len = sizeof(outarg); 2197 + 2198 + return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); 2199 + } 2200 + 2201 + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, 2202 + const char *name, size_t namelen) 2203 + { 2204 + struct fuse_notify_inval_entry_out outarg = { 2205 + .parent = parent, 2206 + .namelen = namelen, 2207 + }; 2208 + struct iovec iov[3]; 2209 + 2210 + if (!se) { 2211 + return -EINVAL; 2212 + } 2213 + 2214 + iov[1].iov_base = &outarg; 2215 + iov[1].iov_len = sizeof(outarg); 2216 + iov[2].iov_base = (void *)name; 2217 + iov[2].iov_len = namelen + 1; 2218 + 2219 + return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); 2220 + } 2221 + 2222 + int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, 2223 + fuse_ino_t child, const char *name, 2224 + size_t namelen) 2225 + { 2226 + struct fuse_notify_delete_out outarg = { 2227 + .parent = parent, 2228 + .child = child, 2229 + .namelen = namelen, 2230 + }; 2231 + struct iovec iov[3]; 2232 + 2233 + if (!se) { 2234 + return -EINVAL; 2235 + } 2236 + 2237 + iov[1].iov_base = &outarg; 2238 + iov[1].iov_len = sizeof(outarg); 2239 + iov[2].iov_base = (void *)name; 2240 + iov[2].iov_len = namelen + 1; 2241 + 2242 + return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); 2243 + } 2244 + 2245 + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, 2246 + off_t offset, struct fuse_bufvec *bufv) 2247 + { 2248 + struct fuse_out_header out = { 2249 + .error = FUSE_NOTIFY_STORE, 2250 + }; 2251 + struct fuse_notify_store_out outarg = { 2252 + .nodeid = ino, 2253 + .offset = offset, 2254 + .size = fuse_buf_size(bufv), 2255 + }; 2256 + struct iovec iov[3]; 2257 + int res; 2258 + 2259 + if (!se) { 2260 + return -EINVAL; 2261 + } 2262 + 2263 + iov[0].iov_base = &out; 2264 + iov[0].iov_len = sizeof(out); 2265 + iov[1].iov_base = &outarg; 2266 + iov[1].iov_len = sizeof(outarg); 2267 + 2268 + res = fuse_send_data_iov(se, NULL, iov, 2, bufv); 2269 + if (res > 0) { 2270 + res = -res; 2271 + } 2272 + 2273 + return res; 2274 + } 2275 + 2276 + void *fuse_req_userdata(fuse_req_t req) 2277 + { 2278 + return req->se->userdata; 2279 + } 2280 + 2281 + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) 2282 + { 2283 + return &req->ctx; 2284 + } 2285 + 2286 + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, 2287 + void *data) 2288 + { 2289 + pthread_mutex_lock(&req->lock); 2290 + pthread_mutex_lock(&req->se->lock); 2291 + req->u.ni.func = func; 2292 + req->u.ni.data = data; 2293 + pthread_mutex_unlock(&req->se->lock); 2294 + if (req->interrupted && func) { 2295 + func(req, data); 2296 + } 2297 + pthread_mutex_unlock(&req->lock); 2298 + } 2299 + 2300 + int fuse_req_interrupted(fuse_req_t req) 2301 + { 2302 + int interrupted; 2303 + 2304 + pthread_mutex_lock(&req->se->lock); 2305 + interrupted = req->interrupted; 2306 + pthread_mutex_unlock(&req->se->lock); 2307 + 2308 + return interrupted; 2309 + } 2310 + 2311 + static struct { 2312 + void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); 2313 + const char *name; 2314 + } fuse_ll_ops[] = { 2315 + [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, 2316 + [FUSE_FORGET] = { do_forget, "FORGET" }, 2317 + [FUSE_GETATTR] = { do_getattr, "GETATTR" }, 2318 + [FUSE_SETATTR] = { do_setattr, "SETATTR" }, 2319 + [FUSE_READLINK] = { do_readlink, "READLINK" }, 2320 + [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, 2321 + [FUSE_MKNOD] = { do_mknod, "MKNOD" }, 2322 + [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, 2323 + [FUSE_UNLINK] = { do_unlink, "UNLINK" }, 2324 + [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, 2325 + [FUSE_RENAME] = { do_rename, "RENAME" }, 2326 + [FUSE_LINK] = { do_link, "LINK" }, 2327 + [FUSE_OPEN] = { do_open, "OPEN" }, 2328 + [FUSE_READ] = { do_read, "READ" }, 2329 + [FUSE_WRITE] = { do_write, "WRITE" }, 2330 + [FUSE_STATFS] = { do_statfs, "STATFS" }, 2331 + [FUSE_RELEASE] = { do_release, "RELEASE" }, 2332 + [FUSE_FSYNC] = { do_fsync, "FSYNC" }, 2333 + [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, 2334 + [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, 2335 + [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, 2336 + [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, 2337 + [FUSE_FLUSH] = { do_flush, "FLUSH" }, 2338 + [FUSE_INIT] = { do_init, "INIT" }, 2339 + [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, 2340 + [FUSE_READDIR] = { do_readdir, "READDIR" }, 2341 + [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, 2342 + [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, 2343 + [FUSE_GETLK] = { do_getlk, "GETLK" }, 2344 + [FUSE_SETLK] = { do_setlk, "SETLK" }, 2345 + [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, 2346 + [FUSE_ACCESS] = { do_access, "ACCESS" }, 2347 + [FUSE_CREATE] = { do_create, "CREATE" }, 2348 + [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, 2349 + [FUSE_BMAP] = { do_bmap, "BMAP" }, 2350 + [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, 2351 + [FUSE_POLL] = { do_poll, "POLL" }, 2352 + [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, 2353 + [FUSE_DESTROY] = { do_destroy, "DESTROY" }, 2354 + [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, 2355 + [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, 2356 + [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, 2357 + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, 2358 + [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, 2359 + [FUSE_LSEEK] = { do_lseek, "LSEEK" }, 2360 + }; 2361 + 2362 + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) 2363 + 2364 + static const char *opname(enum fuse_opcode opcode) 2365 + { 2366 + if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { 2367 + return "???"; 2368 + } else { 2369 + return fuse_ll_ops[opcode].name; 2370 + } 2371 + } 2372 + 2373 + void fuse_session_process_buf(struct fuse_session *se, 2374 + const struct fuse_buf *buf) 2375 + { 2376 + struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; 2377 + fuse_session_process_buf_int(se, &bufv, NULL); 2378 + } 2379 + 2380 + /* 2381 + * Restriction: 2382 + * bufv is normally a single entry buffer, except for a write 2383 + * where (if it's in memory) then the bufv may be multiple entries, 2384 + * where the first entry contains all headers and subsequent entries 2385 + * contain data 2386 + * bufv shall not use any offsets etc to make the data anything 2387 + * other than contiguous starting from 0. 2388 + */ 2389 + void fuse_session_process_buf_int(struct fuse_session *se, 2390 + struct fuse_bufvec *bufv, 2391 + struct fuse_chan *ch) 2392 + { 2393 + const struct fuse_buf *buf = bufv->buf; 2394 + struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); 2395 + struct fuse_in_header *in; 2396 + struct fuse_req *req; 2397 + int err; 2398 + 2399 + /* The first buffer must be a memory buffer */ 2400 + assert(!(buf->flags & FUSE_BUF_IS_FD)); 2401 + 2402 + in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); 2403 + assert(in); /* caller guarantees the input buffer is large enough */ 2404 + 2405 + fuse_log( 2406 + FUSE_LOG_DEBUG, 2407 + "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", 2408 + (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), 2409 + in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); 2410 + 2411 + req = fuse_ll_alloc_req(se); 2412 + if (req == NULL) { 2413 + struct fuse_out_header out = { 2414 + .unique = in->unique, 2415 + .error = -ENOMEM, 2416 + }; 2417 + struct iovec iov = { 2418 + .iov_base = &out, 2419 + .iov_len = sizeof(struct fuse_out_header), 2420 + }; 2421 + 2422 + fuse_send_msg(se, ch, &iov, 1); 2423 + return; 2424 + } 2425 + 2426 + req->unique = in->unique; 2427 + req->ctx.uid = in->uid; 2428 + req->ctx.gid = in->gid; 2429 + req->ctx.pid = in->pid; 2430 + req->ch = ch; 2431 + 2432 + /* 2433 + * INIT and DESTROY requests are serialized, all other request types 2434 + * run in parallel. This prevents races between FUSE_INIT and ordinary 2435 + * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and 2436 + * FUSE_DESTROY and FUSE_DESTROY. 2437 + */ 2438 + if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || 2439 + in->opcode == FUSE_DESTROY) { 2440 + pthread_rwlock_wrlock(&se->init_rwlock); 2441 + } else { 2442 + pthread_rwlock_rdlock(&se->init_rwlock); 2443 + } 2444 + 2445 + err = EIO; 2446 + if (!se->got_init) { 2447 + enum fuse_opcode expected; 2448 + 2449 + expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; 2450 + if (in->opcode != expected) { 2451 + goto reply_err; 2452 + } 2453 + } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { 2454 + if (fuse_lowlevel_is_virtio(se)) { 2455 + /* 2456 + * TODO: This is after a hard reboot typically, we need to do 2457 + * a destroy, but we can't reply to this request yet so 2458 + * we can't use do_destroy 2459 + */ 2460 + fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); 2461 + se->got_destroy = 1; 2462 + se->got_init = 0; 2463 + if (se->op.destroy) { 2464 + se->op.destroy(se->userdata); 2465 + } 2466 + } else { 2467 + goto reply_err; 2468 + } 2469 + } 2470 + 2471 + err = EACCES; 2472 + /* Implement -o allow_root */ 2473 + if (se->deny_others && in->uid != se->owner && in->uid != 0 && 2474 + in->opcode != FUSE_INIT && in->opcode != FUSE_READ && 2475 + in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && 2476 + in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && 2477 + in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && 2478 + in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { 2479 + goto reply_err; 2480 + } 2481 + 2482 + err = ENOSYS; 2483 + if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { 2484 + goto reply_err; 2485 + } 2486 + if (in->opcode != FUSE_INTERRUPT) { 2487 + struct fuse_req *intr; 2488 + pthread_mutex_lock(&se->lock); 2489 + intr = check_interrupt(se, req); 2490 + list_add_req(req, &se->list); 2491 + pthread_mutex_unlock(&se->lock); 2492 + if (intr) { 2493 + fuse_reply_err(intr, EAGAIN); 2494 + } 2495 + } 2496 + 2497 + if (in->opcode == FUSE_WRITE && se->op.write_buf) { 2498 + do_write_buf(req, in->nodeid, &iter, bufv); 2499 + } else { 2500 + fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); 2501 + } 2502 + 2503 + pthread_rwlock_unlock(&se->init_rwlock); 2504 + return; 2505 + 2506 + reply_err: 2507 + fuse_reply_err(req, err); 2508 + pthread_rwlock_unlock(&se->init_rwlock); 2509 + } 2510 + 2511 + #define LL_OPTION(n, o, v) \ 2512 + { \ 2513 + n, offsetof(struct fuse_session, o), v \ 2514 + } 2515 + 2516 + static const struct fuse_opt fuse_ll_opts[] = { 2517 + LL_OPTION("debug", debug, 1), 2518 + LL_OPTION("-d", debug, 1), 2519 + LL_OPTION("--debug", debug, 1), 2520 + LL_OPTION("allow_root", deny_others, 1), 2521 + LL_OPTION("--socket-path=%s", vu_socket_path, 0), 2522 + LL_OPTION("--fd=%d", vu_listen_fd, 0), 2523 + LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), 2524 + FUSE_OPT_END 2525 + }; 2526 + 2527 + void fuse_lowlevel_version(void) 2528 + { 2529 + printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, 2530 + FUSE_KERNEL_MINOR_VERSION); 2531 + } 2532 + 2533 + void fuse_lowlevel_help(void) 2534 + { 2535 + /* 2536 + * These are not all options, but the ones that are 2537 + * potentially of interest to an end-user 2538 + */ 2539 + printf( 2540 + " -o allow_root allow access by root\n" 2541 + " --socket-path=PATH path for the vhost-user socket\n" 2542 + " --fd=FDNUM fd number of vhost-user socket\n" 2543 + " --thread-pool-size=NUM thread pool size limit (default %d)\n", 2544 + THREAD_POOL_SIZE); 2545 + } 2546 + 2547 + void fuse_session_destroy(struct fuse_session *se) 2548 + { 2549 + if (se->got_init && !se->got_destroy) { 2550 + if (se->op.destroy) { 2551 + se->op.destroy(se->userdata); 2552 + } 2553 + } 2554 + pthread_rwlock_destroy(&se->init_rwlock); 2555 + pthread_mutex_destroy(&se->lock); 2556 + free(se->cuse_data); 2557 + if (se->fd != -1) { 2558 + close(se->fd); 2559 + } 2560 + 2561 + if (fuse_lowlevel_is_virtio(se)) { 2562 + virtio_session_close(se); 2563 + } 2564 + 2565 + free(se->vu_socket_path); 2566 + se->vu_socket_path = NULL; 2567 + 2568 + free(se); 2569 + } 2570 + 2571 + 2572 + struct fuse_session *fuse_session_new(struct fuse_args *args, 2573 + const struct fuse_lowlevel_ops *op, 2574 + size_t op_size, void *userdata) 2575 + { 2576 + struct fuse_session *se; 2577 + 2578 + if (sizeof(struct fuse_lowlevel_ops) < op_size) { 2579 + fuse_log( 2580 + FUSE_LOG_ERR, 2581 + "fuse: warning: library too old, some operations may not work\n"); 2582 + op_size = sizeof(struct fuse_lowlevel_ops); 2583 + } 2584 + 2585 + if (args->argc == 0) { 2586 + fuse_log(FUSE_LOG_ERR, 2587 + "fuse: empty argv passed to fuse_session_new().\n"); 2588 + return NULL; 2589 + } 2590 + 2591 + se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); 2592 + if (se == NULL) { 2593 + fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); 2594 + goto out1; 2595 + } 2596 + se->fd = -1; 2597 + se->vu_listen_fd = -1; 2598 + se->thread_pool_size = THREAD_POOL_SIZE; 2599 + se->conn.max_write = UINT_MAX; 2600 + se->conn.max_readahead = UINT_MAX; 2601 + 2602 + /* Parse options */ 2603 + if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { 2604 + goto out2; 2605 + } 2606 + if (args->argc == 1 && args->argv[0][0] == '-') { 2607 + fuse_log(FUSE_LOG_ERR, 2608 + "fuse: warning: argv[0] looks like an option, but " 2609 + "will be ignored\n"); 2610 + } else if (args->argc != 1) { 2611 + int i; 2612 + fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); 2613 + for (i = 1; i < args->argc - 1; i++) { 2614 + fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); 2615 + } 2616 + fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); 2617 + goto out4; 2618 + } 2619 + 2620 + if (!se->vu_socket_path && se->vu_listen_fd < 0) { 2621 + fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); 2622 + goto out4; 2623 + } 2624 + if (se->vu_socket_path && se->vu_listen_fd >= 0) { 2625 + fuse_log(FUSE_LOG_ERR, 2626 + "fuse: --socket-path and --fd cannot be given together\n"); 2627 + goto out4; 2628 + } 2629 + 2630 + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; 2631 + 2632 + list_init_req(&se->list); 2633 + list_init_req(&se->interrupts); 2634 + fuse_mutex_init(&se->lock); 2635 + pthread_rwlock_init(&se->init_rwlock, NULL); 2636 + 2637 + memcpy(&se->op, op, op_size); 2638 + se->owner = getuid(); 2639 + se->userdata = userdata; 2640 + 2641 + return se; 2642 + 2643 + out4: 2644 + fuse_opt_free_args(args); 2645 + out2: 2646 + free(se); 2647 + out1: 2648 + return NULL; 2649 + } 2650 + 2651 + int fuse_session_mount(struct fuse_session *se) 2652 + { 2653 + return virtio_session_mount(se); 2654 + } 2655 + 2656 + int fuse_session_fd(struct fuse_session *se) 2657 + { 2658 + return se->fd; 2659 + } 2660 + 2661 + void fuse_session_unmount(struct fuse_session *se) 2662 + { 2663 + } 2664 + 2665 + int fuse_lowlevel_is_virtio(struct fuse_session *se) 2666 + { 2667 + return !!se->virtio_dev; 2668 + } 2669 + 2670 + #ifdef linux 2671 + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) 2672 + { 2673 + char *buf; 2674 + size_t bufsize = 1024; 2675 + char path[128]; 2676 + int ret; 2677 + int fd; 2678 + unsigned long pid = req->ctx.pid; 2679 + char *s; 2680 + 2681 + sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); 2682 + 2683 + retry: 2684 + buf = malloc(bufsize); 2685 + if (buf == NULL) { 2686 + return -ENOMEM; 2687 + } 2688 + 2689 + ret = -EIO; 2690 + fd = open(path, O_RDONLY); 2691 + if (fd == -1) { 2692 + goto out_free; 2693 + } 2694 + 2695 + ret = read(fd, buf, bufsize); 2696 + close(fd); 2697 + if (ret < 0) { 2698 + ret = -EIO; 2699 + goto out_free; 2700 + } 2701 + 2702 + if ((size_t)ret == bufsize) { 2703 + free(buf); 2704 + bufsize *= 4; 2705 + goto retry; 2706 + } 2707 + 2708 + ret = -EIO; 2709 + s = strstr(buf, "\nGroups:"); 2710 + if (s == NULL) { 2711 + goto out_free; 2712 + } 2713 + 2714 + s += 8; 2715 + ret = 0; 2716 + while (1) { 2717 + char *end; 2718 + unsigned long val = strtoul(s, &end, 0); 2719 + if (end == s) { 2720 + break; 2721 + } 2722 + 2723 + s = end; 2724 + if (ret < size) { 2725 + list[ret] = val; 2726 + } 2727 + ret++; 2728 + } 2729 + 2730 + out_free: 2731 + free(buf); 2732 + return ret; 2733 + } 2734 + #else /* linux */ 2735 + /* 2736 + * This is currently not implemented on other than Linux... 2737 + */ 2738 + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) 2739 + { 2740 + (void)req; 2741 + (void)size; 2742 + (void)list; 2743 + return -ENOSYS; 2744 + } 2745 + #endif 2746 + 2747 + void fuse_session_exit(struct fuse_session *se) 2748 + { 2749 + se->exited = 1; 2750 + } 2751 + 2752 + void fuse_session_reset(struct fuse_session *se) 2753 + { 2754 + se->exited = 0; 2755 + se->error = 0; 2756 + } 2757 + 2758 + int fuse_session_exited(struct fuse_session *se) 2759 + { 2760 + return se->exited; 2761 + }
+1991
tools/virtiofsd/fuse_lowlevel.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB. 7 + */ 8 + 9 + #ifndef FUSE_LOWLEVEL_H_ 10 + #define FUSE_LOWLEVEL_H_ 11 + 12 + /** 13 + * @file 14 + * 15 + * Low level API 16 + * 17 + * IMPORTANT: you should define FUSE_USE_VERSION before including this 18 + * header. To use the newest API define it to 31 (recommended for any 19 + * new application). 20 + */ 21 + 22 + #ifndef FUSE_USE_VERSION 23 + #error FUSE_USE_VERSION not defined 24 + #endif 25 + 26 + #include "fuse_common.h" 27 + 28 + #include <fcntl.h> 29 + #include <sys/stat.h> 30 + #include <sys/statvfs.h> 31 + #include <sys/types.h> 32 + #include <sys/uio.h> 33 + #include <utime.h> 34 + 35 + /* 36 + * Miscellaneous definitions 37 + */ 38 + 39 + /** The node ID of the root inode */ 40 + #define FUSE_ROOT_ID 1 41 + 42 + /** Inode number type */ 43 + typedef uint64_t fuse_ino_t; 44 + 45 + /** Request pointer type */ 46 + typedef struct fuse_req *fuse_req_t; 47 + 48 + /** 49 + * Session 50 + * 51 + * This provides hooks for processing requests, and exiting 52 + */ 53 + struct fuse_session; 54 + 55 + /** Directory entry parameters supplied to fuse_reply_entry() */ 56 + struct fuse_entry_param { 57 + /** 58 + * Unique inode number 59 + * 60 + * In lookup, zero means negative entry (from version 2.5) 61 + * Returning ENOENT also means negative entry, but by setting zero 62 + * ino the kernel may cache negative entries for entry_timeout 63 + * seconds. 64 + */ 65 + fuse_ino_t ino; 66 + 67 + /** 68 + * Generation number for this entry. 69 + * 70 + * If the file system will be exported over NFS, the 71 + * ino/generation pairs need to be unique over the file 72 + * system's lifetime (rather than just the mount time). So if 73 + * the file system reuses an inode after it has been deleted, 74 + * it must assign a new, previously unused generation number 75 + * to the inode at the same time. 76 + * 77 + */ 78 + uint64_t generation; 79 + 80 + /** 81 + * Inode attributes. 82 + * 83 + * Even if attr_timeout == 0, attr must be correct. For example, 84 + * for open(), FUSE uses attr.st_size from lookup() to determine 85 + * how many bytes to request. If this value is not correct, 86 + * incorrect data will be returned. 87 + */ 88 + struct stat attr; 89 + 90 + /** 91 + * Validity timeout (in seconds) for inode attributes. If 92 + * attributes only change as a result of requests that come 93 + * through the kernel, this should be set to a very large 94 + * value. 95 + */ 96 + double attr_timeout; 97 + 98 + /** 99 + * Validity timeout (in seconds) for the name. If directory 100 + * entries are changed/deleted only as a result of requests 101 + * that come through the kernel, this should be set to a very 102 + * large value. 103 + */ 104 + double entry_timeout; 105 + }; 106 + 107 + /** 108 + * Additional context associated with requests. 109 + * 110 + * Note that the reported client uid, gid and pid may be zero in some 111 + * situations. For example, if the FUSE file system is running in a 112 + * PID or user namespace but then accessed from outside the namespace, 113 + * there is no valid uid/pid/gid that could be reported. 114 + */ 115 + struct fuse_ctx { 116 + /** User ID of the calling process */ 117 + uid_t uid; 118 + 119 + /** Group ID of the calling process */ 120 + gid_t gid; 121 + 122 + /** Thread ID of the calling process */ 123 + pid_t pid; 124 + 125 + /** Umask of the calling process */ 126 + mode_t umask; 127 + }; 128 + 129 + struct fuse_forget_data { 130 + fuse_ino_t ino; 131 + uint64_t nlookup; 132 + }; 133 + 134 + /* 'to_set' flags in setattr */ 135 + #define FUSE_SET_ATTR_MODE (1 << 0) 136 + #define FUSE_SET_ATTR_UID (1 << 1) 137 + #define FUSE_SET_ATTR_GID (1 << 2) 138 + #define FUSE_SET_ATTR_SIZE (1 << 3) 139 + #define FUSE_SET_ATTR_ATIME (1 << 4) 140 + #define FUSE_SET_ATTR_MTIME (1 << 5) 141 + #define FUSE_SET_ATTR_ATIME_NOW (1 << 7) 142 + #define FUSE_SET_ATTR_MTIME_NOW (1 << 8) 143 + #define FUSE_SET_ATTR_CTIME (1 << 10) 144 + 145 + /* 146 + * Request methods and replies 147 + */ 148 + 149 + /** 150 + * Low level filesystem operations 151 + * 152 + * Most of the methods (with the exception of init and destroy) 153 + * receive a request handle (fuse_req_t) as their first argument. 154 + * This handle must be passed to one of the specified reply functions. 155 + * 156 + * This may be done inside the method invocation, or after the call 157 + * has returned. The request handle is valid until one of the reply 158 + * functions is called. 159 + * 160 + * Other pointer arguments (name, fuse_file_info, etc) are not valid 161 + * after the call has returned, so if they are needed later, their 162 + * contents have to be copied. 163 + * 164 + * In general, all methods are expected to perform any necessary 165 + * permission checking. However, a filesystem may delegate this task 166 + * to the kernel by passing the `default_permissions` mount option to 167 + * `fuse_session_new()`. In this case, methods will only be called if 168 + * the kernel's permission check has succeeded. 169 + * 170 + * The filesystem sometimes needs to handle a return value of -ENOENT 171 + * from the reply function, which means, that the request was 172 + * interrupted, and the reply discarded. For example if 173 + * fuse_reply_open() return -ENOENT means, that the release method for 174 + * this file will not be called. 175 + */ 176 + struct fuse_lowlevel_ops { 177 + /** 178 + * Initialize filesystem 179 + * 180 + * This function is called when libfuse establishes 181 + * communication with the FUSE kernel module. The file system 182 + * should use this module to inspect and/or modify the 183 + * connection parameters provided in the `conn` structure. 184 + * 185 + * Note that some parameters may be overwritten by options 186 + * passed to fuse_session_new() which take precedence over the 187 + * values set in this handler. 188 + * 189 + * There's no reply to this function 190 + * 191 + * @param userdata the user data passed to fuse_session_new() 192 + */ 193 + void (*init)(void *userdata, struct fuse_conn_info *conn); 194 + 195 + /** 196 + * Clean up filesystem. 197 + * 198 + * Called on filesystem exit. When this method is called, the 199 + * connection to the kernel may be gone already, so that eg. calls 200 + * to fuse_lowlevel_notify_* will fail. 201 + * 202 + * There's no reply to this function 203 + * 204 + * @param userdata the user data passed to fuse_session_new() 205 + */ 206 + void (*destroy)(void *userdata); 207 + 208 + /** 209 + * Look up a directory entry by name and get its attributes. 210 + * 211 + * Valid replies: 212 + * fuse_reply_entry 213 + * fuse_reply_err 214 + * 215 + * @param req request handle 216 + * @param parent inode number of the parent directory 217 + * @param name the name to look up 218 + */ 219 + void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); 220 + 221 + /** 222 + * Forget about an inode 223 + * 224 + * This function is called when the kernel removes an inode 225 + * from its internal caches. 226 + * 227 + * The inode's lookup count increases by one for every call to 228 + * fuse_reply_entry and fuse_reply_create. The nlookup parameter 229 + * indicates by how much the lookup count should be decreased. 230 + * 231 + * Inodes with a non-zero lookup count may receive request from 232 + * the kernel even after calls to unlink, rmdir or (when 233 + * overwriting an existing file) rename. Filesystems must handle 234 + * such requests properly and it is recommended to defer removal 235 + * of the inode until the lookup count reaches zero. Calls to 236 + * unlink, rmdir or rename will be followed closely by forget 237 + * unless the file or directory is open, in which case the 238 + * kernel issues forget only after the release or releasedir 239 + * calls. 240 + * 241 + * Note that if a file system will be exported over NFS the 242 + * inodes lifetime must extend even beyond forget. See the 243 + * generation field in struct fuse_entry_param above. 244 + * 245 + * On unmount the lookup count for all inodes implicitly drops 246 + * to zero. It is not guaranteed that the file system will 247 + * receive corresponding forget messages for the affected 248 + * inodes. 249 + * 250 + * Valid replies: 251 + * fuse_reply_none 252 + * 253 + * @param req request handle 254 + * @param ino the inode number 255 + * @param nlookup the number of lookups to forget 256 + */ 257 + void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); 258 + 259 + /** 260 + * Get file attributes. 261 + * 262 + * If writeback caching is enabled, the kernel may have a 263 + * better idea of a file's length than the FUSE file system 264 + * (eg if there has been a write that extended the file size, 265 + * but that has not yet been passed to the filesystem.n 266 + * 267 + * In this case, the st_size value provided by the file system 268 + * will be ignored. 269 + * 270 + * Valid replies: 271 + * fuse_reply_attr 272 + * fuse_reply_err 273 + * 274 + * @param req request handle 275 + * @param ino the inode number 276 + * @param fi for future use, currently always NULL 277 + */ 278 + void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); 279 + 280 + /** 281 + * Set file attributes 282 + * 283 + * In the 'attr' argument only members indicated by the 'to_set' 284 + * bitmask contain valid values. Other members contain undefined 285 + * values. 286 + * 287 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 288 + * expected to reset the setuid and setgid bits if the file 289 + * size or owner is being changed. 290 + * 291 + * If the setattr was invoked from the ftruncate() system call 292 + * under Linux kernel versions 2.6.15 or later, the fi->fh will 293 + * contain the value set by the open method or will be undefined 294 + * if the open method didn't set any value. Otherwise (not 295 + * ftruncate call, or kernel version earlier than 2.6.15) the fi 296 + * parameter will be NULL. 297 + * 298 + * Valid replies: 299 + * fuse_reply_attr 300 + * fuse_reply_err 301 + * 302 + * @param req request handle 303 + * @param ino the inode number 304 + * @param attr the attributes 305 + * @param to_set bit mask of attributes which should be set 306 + * @param fi file information, or NULL 307 + */ 308 + void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, 309 + int to_set, struct fuse_file_info *fi); 310 + 311 + /** 312 + * Read symbolic link 313 + * 314 + * Valid replies: 315 + * fuse_reply_readlink 316 + * fuse_reply_err 317 + * 318 + * @param req request handle 319 + * @param ino the inode number 320 + */ 321 + void (*readlink)(fuse_req_t req, fuse_ino_t ino); 322 + 323 + /** 324 + * Create file node 325 + * 326 + * Create a regular file, character device, block device, fifo or 327 + * socket node. 328 + * 329 + * Valid replies: 330 + * fuse_reply_entry 331 + * fuse_reply_err 332 + * 333 + * @param req request handle 334 + * @param parent inode number of the parent directory 335 + * @param name to create 336 + * @param mode file type and mode with which to create the new file 337 + * @param rdev the device number (only valid if created file is a device) 338 + */ 339 + void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, 340 + mode_t mode, dev_t rdev); 341 + 342 + /** 343 + * Create a directory 344 + * 345 + * Valid replies: 346 + * fuse_reply_entry 347 + * fuse_reply_err 348 + * 349 + * @param req request handle 350 + * @param parent inode number of the parent directory 351 + * @param name to create 352 + * @param mode with which to create the new file 353 + */ 354 + void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, 355 + mode_t mode); 356 + 357 + /** 358 + * Remove a file 359 + * 360 + * If the file's inode's lookup count is non-zero, the file 361 + * system is expected to postpone any removal of the inode 362 + * until the lookup count reaches zero (see description of the 363 + * forget function). 364 + * 365 + * Valid replies: 366 + * fuse_reply_err 367 + * 368 + * @param req request handle 369 + * @param parent inode number of the parent directory 370 + * @param name to remove 371 + */ 372 + void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); 373 + 374 + /** 375 + * Remove a directory 376 + * 377 + * If the directory's inode's lookup count is non-zero, the 378 + * file system is expected to postpone any removal of the 379 + * inode until the lookup count reaches zero (see description 380 + * of the forget function). 381 + * 382 + * Valid replies: 383 + * fuse_reply_err 384 + * 385 + * @param req request handle 386 + * @param parent inode number of the parent directory 387 + * @param name to remove 388 + */ 389 + void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); 390 + 391 + /** 392 + * Create a symbolic link 393 + * 394 + * Valid replies: 395 + * fuse_reply_entry 396 + * fuse_reply_err 397 + * 398 + * @param req request handle 399 + * @param link the contents of the symbolic link 400 + * @param parent inode number of the parent directory 401 + * @param name to create 402 + */ 403 + void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, 404 + const char *name); 405 + 406 + /** 407 + * Rename a file 408 + * 409 + * If the target exists it should be atomically replaced. If 410 + * the target's inode's lookup count is non-zero, the file 411 + * system is expected to postpone any removal of the inode 412 + * until the lookup count reaches zero (see description of the 413 + * forget function). 414 + * 415 + * If this request is answered with an error code of ENOSYS, this is 416 + * treated as a permanent failure with error code EINVAL, i.e. all 417 + * future bmap requests will fail with EINVAL without being 418 + * send to the filesystem process. 419 + * 420 + * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If 421 + * RENAME_NOREPLACE is specified, the filesystem must not 422 + * overwrite *newname* if it exists and return an error 423 + * instead. If `RENAME_EXCHANGE` is specified, the filesystem 424 + * must atomically exchange the two files, i.e. both must 425 + * exist and neither may be deleted. 426 + * 427 + * Valid replies: 428 + * fuse_reply_err 429 + * 430 + * @param req request handle 431 + * @param parent inode number of the old parent directory 432 + * @param name old name 433 + * @param newparent inode number of the new parent directory 434 + * @param newname new name 435 + */ 436 + void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, 437 + fuse_ino_t newparent, const char *newname, 438 + unsigned int flags); 439 + 440 + /** 441 + * Create a hard link 442 + * 443 + * Valid replies: 444 + * fuse_reply_entry 445 + * fuse_reply_err 446 + * 447 + * @param req request handle 448 + * @param ino the old inode number 449 + * @param newparent inode number of the new parent directory 450 + * @param newname new name to create 451 + */ 452 + void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, 453 + const char *newname); 454 + 455 + /** 456 + * Open a file 457 + * 458 + * Open flags are available in fi->flags. The following rules 459 + * apply. 460 + * 461 + * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be 462 + * filtered out / handled by the kernel. 463 + * 464 + * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used 465 + * by the filesystem to check if the operation is 466 + * permitted. If the ``-o default_permissions`` mount 467 + * option is given, this check is already done by the 468 + * kernel before calling open() and may thus be omitted by 469 + * the filesystem. 470 + * 471 + * - When writeback caching is enabled, the kernel may send 472 + * read requests even for files opened with O_WRONLY. The 473 + * filesystem should be prepared to handle this. 474 + * 475 + * - When writeback caching is disabled, the filesystem is 476 + * expected to properly handle the O_APPEND flag and ensure 477 + * that each write is appending to the end of the file. 478 + * 479 + * - When writeback caching is enabled, the kernel will 480 + * handle O_APPEND. However, unless all changes to the file 481 + * come through the kernel this will not work reliably. The 482 + * filesystem should thus either ignore the O_APPEND flag 483 + * (and let the kernel handle it), or return an error 484 + * (indicating that reliably O_APPEND is not available). 485 + * 486 + * Filesystem may store an arbitrary file handle (pointer, 487 + * index, etc) in fi->fh, and use this in other all other file 488 + * operations (read, write, flush, release, fsync). 489 + * 490 + * Filesystem may also implement stateless file I/O and not store 491 + * anything in fi->fh. 492 + * 493 + * There are also some flags (direct_io, keep_cache) which the 494 + * filesystem may set in fi, to change the way the file is opened. 495 + * See fuse_file_info structure in <fuse_common.h> for more details. 496 + * 497 + * If this request is answered with an error code of ENOSYS 498 + * and FUSE_CAP_NO_OPEN_SUPPORT is set in 499 + * `fuse_conn_info.capable`, this is treated as success and 500 + * future calls to open and release will also succeed without being 501 + * sent to the filesystem process. 502 + * 503 + * Valid replies: 504 + * fuse_reply_open 505 + * fuse_reply_err 506 + * 507 + * @param req request handle 508 + * @param ino the inode number 509 + * @param fi file information 510 + */ 511 + void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); 512 + 513 + /** 514 + * Read data 515 + * 516 + * Read should send exactly the number of bytes requested except 517 + * on EOF or error, otherwise the rest of the data will be 518 + * substituted with zeroes. An exception to this is when the file 519 + * has been opened in 'direct_io' mode, in which case the return 520 + * value of the read system call will reflect the return value of 521 + * this operation. 522 + * 523 + * fi->fh will contain the value set by the open method, or will 524 + * be undefined if the open method didn't set any value. 525 + * 526 + * Valid replies: 527 + * fuse_reply_buf 528 + * fuse_reply_iov 529 + * fuse_reply_data 530 + * fuse_reply_err 531 + * 532 + * @param req request handle 533 + * @param ino the inode number 534 + * @param size number of bytes to read 535 + * @param off offset to read from 536 + * @param fi file information 537 + */ 538 + void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, 539 + struct fuse_file_info *fi); 540 + 541 + /** 542 + * Write data 543 + * 544 + * Write should return exactly the number of bytes requested 545 + * except on error. An exception to this is when the file has 546 + * been opened in 'direct_io' mode, in which case the return value 547 + * of the write system call will reflect the return value of this 548 + * operation. 549 + * 550 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 551 + * expected to reset the setuid and setgid bits. 552 + * 553 + * fi->fh will contain the value set by the open method, or will 554 + * be undefined if the open method didn't set any value. 555 + * 556 + * Valid replies: 557 + * fuse_reply_write 558 + * fuse_reply_err 559 + * 560 + * @param req request handle 561 + * @param ino the inode number 562 + * @param buf data to write 563 + * @param size number of bytes to write 564 + * @param off offset to write to 565 + * @param fi file information 566 + */ 567 + void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, 568 + off_t off, struct fuse_file_info *fi); 569 + 570 + /** 571 + * Flush method 572 + * 573 + * This is called on each close() of the opened file. 574 + * 575 + * Since file descriptors can be duplicated (dup, dup2, fork), for 576 + * one open call there may be many flush calls. 577 + * 578 + * Filesystems shouldn't assume that flush will always be called 579 + * after some writes, or that if will be called at all. 580 + * 581 + * fi->fh will contain the value set by the open method, or will 582 + * be undefined if the open method didn't set any value. 583 + * 584 + * NOTE: the name of the method is misleading, since (unlike 585 + * fsync) the filesystem is not forced to flush pending writes. 586 + * One reason to flush data is if the filesystem wants to return 587 + * write errors during close. However, such use is non-portable 588 + * because POSIX does not require [close] to wait for delayed I/O to 589 + * complete. 590 + * 591 + * If the filesystem supports file locking operations (setlk, 592 + * getlk) it should remove all locks belonging to 'fi->owner'. 593 + * 594 + * If this request is answered with an error code of ENOSYS, 595 + * this is treated as success and future calls to flush() will 596 + * succeed automatically without being send to the filesystem 597 + * process. 598 + * 599 + * Valid replies: 600 + * fuse_reply_err 601 + * 602 + * @param req request handle 603 + * @param ino the inode number 604 + * @param fi file information 605 + * 606 + * [close]: 607 + * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html 608 + */ 609 + void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); 610 + 611 + /** 612 + * Release an open file 613 + * 614 + * Release is called when there are no more references to an open 615 + * file: all file descriptors are closed and all memory mappings 616 + * are unmapped. 617 + * 618 + * For every open call there will be exactly one release call (unless 619 + * the filesystem is force-unmounted). 620 + * 621 + * The filesystem may reply with an error, but error values are 622 + * not returned to close() or munmap() which triggered the 623 + * release. 624 + * 625 + * fi->fh will contain the value set by the open method, or will 626 + * be undefined if the open method didn't set any value. 627 + * fi->flags will contain the same flags as for open. 628 + * 629 + * Valid replies: 630 + * fuse_reply_err 631 + * 632 + * @param req request handle 633 + * @param ino the inode number 634 + * @param fi file information 635 + */ 636 + void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); 637 + 638 + /** 639 + * Synchronize file contents 640 + * 641 + * If the datasync parameter is non-zero, then only the user data 642 + * should be flushed, not the meta data. 643 + * 644 + * If this request is answered with an error code of ENOSYS, 645 + * this is treated as success and future calls to fsync() will 646 + * succeed automatically without being send to the filesystem 647 + * process. 648 + * 649 + * Valid replies: 650 + * fuse_reply_err 651 + * 652 + * @param req request handle 653 + * @param ino the inode number 654 + * @param datasync flag indicating if only data should be flushed 655 + * @param fi file information 656 + */ 657 + void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, 658 + struct fuse_file_info *fi); 659 + 660 + /** 661 + * Open a directory 662 + * 663 + * Filesystem may store an arbitrary file handle (pointer, index, 664 + * etc) in fi->fh, and use this in other all other directory 665 + * stream operations (readdir, releasedir, fsyncdir). 666 + * 667 + * If this request is answered with an error code of ENOSYS and 668 + * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, 669 + * this is treated as success and future calls to opendir and 670 + * releasedir will also succeed without being sent to the filesystem 671 + * process. In addition, the kernel will cache readdir results 672 + * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. 673 + * 674 + * Valid replies: 675 + * fuse_reply_open 676 + * fuse_reply_err 677 + * 678 + * @param req request handle 679 + * @param ino the inode number 680 + * @param fi file information 681 + */ 682 + void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); 683 + 684 + /** 685 + * Read directory 686 + * 687 + * Send a buffer filled using fuse_add_direntry(), with size not 688 + * exceeding the requested size. Send an empty buffer on end of 689 + * stream. 690 + * 691 + * fi->fh will contain the value set by the opendir method, or 692 + * will be undefined if the opendir method didn't set any value. 693 + * 694 + * Returning a directory entry from readdir() does not affect 695 + * its lookup count. 696 + * 697 + * If off_t is non-zero, then it will correspond to one of the off_t 698 + * values that was previously returned by readdir() for the same 699 + * directory handle. In this case, readdir() should skip over entries 700 + * coming before the position defined by the off_t value. If entries 701 + * are added or removed while the directory handle is open, they filesystem 702 + * may still include the entries that have been removed, and may not 703 + * report the entries that have been created. However, addition or 704 + * removal of entries must never cause readdir() to skip over unrelated 705 + * entries or to report them more than once. This means 706 + * that off_t can not be a simple index that enumerates the entries 707 + * that have been returned but must contain sufficient information to 708 + * uniquely determine the next directory entry to return even when the 709 + * set of entries is changing. 710 + * 711 + * The function does not have to report the '.' and '..' 712 + * entries, but is allowed to do so. Note that, if readdir does 713 + * not return '.' or '..', they will not be implicitly returned, 714 + * and this behavior is observable by the caller. 715 + * 716 + * Valid replies: 717 + * fuse_reply_buf 718 + * fuse_reply_data 719 + * fuse_reply_err 720 + * 721 + * @param req request handle 722 + * @param ino the inode number 723 + * @param size maximum number of bytes to send 724 + * @param off offset to continue reading the directory stream 725 + * @param fi file information 726 + */ 727 + void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, 728 + struct fuse_file_info *fi); 729 + 730 + /** 731 + * Release an open directory 732 + * 733 + * For every opendir call there will be exactly one releasedir 734 + * call (unless the filesystem is force-unmounted). 735 + * 736 + * fi->fh will contain the value set by the opendir method, or 737 + * will be undefined if the opendir method didn't set any value. 738 + * 739 + * Valid replies: 740 + * fuse_reply_err 741 + * 742 + * @param req request handle 743 + * @param ino the inode number 744 + * @param fi file information 745 + */ 746 + void (*releasedir)(fuse_req_t req, fuse_ino_t ino, 747 + struct fuse_file_info *fi); 748 + 749 + /** 750 + * Synchronize directory contents 751 + * 752 + * If the datasync parameter is non-zero, then only the directory 753 + * contents should be flushed, not the meta data. 754 + * 755 + * fi->fh will contain the value set by the opendir method, or 756 + * will be undefined if the opendir method didn't set any value. 757 + * 758 + * If this request is answered with an error code of ENOSYS, 759 + * this is treated as success and future calls to fsyncdir() will 760 + * succeed automatically without being send to the filesystem 761 + * process. 762 + * 763 + * Valid replies: 764 + * fuse_reply_err 765 + * 766 + * @param req request handle 767 + * @param ino the inode number 768 + * @param datasync flag indicating if only data should be flushed 769 + * @param fi file information 770 + */ 771 + void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, 772 + struct fuse_file_info *fi); 773 + 774 + /** 775 + * Get file system statistics 776 + * 777 + * Valid replies: 778 + * fuse_reply_statfs 779 + * fuse_reply_err 780 + * 781 + * @param req request handle 782 + * @param ino the inode number, zero means "undefined" 783 + */ 784 + void (*statfs)(fuse_req_t req, fuse_ino_t ino); 785 + 786 + /** 787 + * Set an extended attribute 788 + * 789 + * If this request is answered with an error code of ENOSYS, this is 790 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 791 + * future setxattr() requests will fail with EOPNOTSUPP without being 792 + * send to the filesystem process. 793 + * 794 + * Valid replies: 795 + * fuse_reply_err 796 + */ 797 + void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, 798 + const char *value, size_t size, int flags); 799 + 800 + /** 801 + * Get an extended attribute 802 + * 803 + * If size is zero, the size of the value should be sent with 804 + * fuse_reply_xattr. 805 + * 806 + * If the size is non-zero, and the value fits in the buffer, the 807 + * value should be sent with fuse_reply_buf. 808 + * 809 + * If the size is too small for the value, the ERANGE error should 810 + * be sent. 811 + * 812 + * If this request is answered with an error code of ENOSYS, this is 813 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 814 + * future getxattr() requests will fail with EOPNOTSUPP without being 815 + * send to the filesystem process. 816 + * 817 + * Valid replies: 818 + * fuse_reply_buf 819 + * fuse_reply_data 820 + * fuse_reply_xattr 821 + * fuse_reply_err 822 + * 823 + * @param req request handle 824 + * @param ino the inode number 825 + * @param name of the extended attribute 826 + * @param size maximum size of the value to send 827 + */ 828 + void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, 829 + size_t size); 830 + 831 + /** 832 + * List extended attribute names 833 + * 834 + * If size is zero, the total size of the attribute list should be 835 + * sent with fuse_reply_xattr. 836 + * 837 + * If the size is non-zero, and the null character separated 838 + * attribute list fits in the buffer, the list should be sent with 839 + * fuse_reply_buf. 840 + * 841 + * If the size is too small for the list, the ERANGE error should 842 + * be sent. 843 + * 844 + * If this request is answered with an error code of ENOSYS, this is 845 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 846 + * future listxattr() requests will fail with EOPNOTSUPP without being 847 + * send to the filesystem process. 848 + * 849 + * Valid replies: 850 + * fuse_reply_buf 851 + * fuse_reply_data 852 + * fuse_reply_xattr 853 + * fuse_reply_err 854 + * 855 + * @param req request handle 856 + * @param ino the inode number 857 + * @param size maximum size of the list to send 858 + */ 859 + void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); 860 + 861 + /** 862 + * Remove an extended attribute 863 + * 864 + * If this request is answered with an error code of ENOSYS, this is 865 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 866 + * future removexattr() requests will fail with EOPNOTSUPP without being 867 + * send to the filesystem process. 868 + * 869 + * Valid replies: 870 + * fuse_reply_err 871 + * 872 + * @param req request handle 873 + * @param ino the inode number 874 + * @param name of the extended attribute 875 + */ 876 + void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); 877 + 878 + /** 879 + * Check file access permissions 880 + * 881 + * This will be called for the access() and chdir() system 882 + * calls. If the 'default_permissions' mount option is given, 883 + * this method is not called. 884 + * 885 + * This method is not called under Linux kernel versions 2.4.x 886 + * 887 + * If this request is answered with an error code of ENOSYS, this is 888 + * treated as a permanent success, i.e. this and all future access() 889 + * requests will succeed without being send to the filesystem process. 890 + * 891 + * Valid replies: 892 + * fuse_reply_err 893 + * 894 + * @param req request handle 895 + * @param ino the inode number 896 + * @param mask requested access mode 897 + */ 898 + void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); 899 + 900 + /** 901 + * Create and open a file 902 + * 903 + * If the file does not exist, first create it with the specified 904 + * mode, and then open it. 905 + * 906 + * See the description of the open handler for more 907 + * information. 908 + * 909 + * If this method is not implemented or under Linux kernel 910 + * versions earlier than 2.6.15, the mknod() and open() methods 911 + * will be called instead. 912 + * 913 + * If this request is answered with an error code of ENOSYS, the handler 914 + * is treated as not implemented (i.e., for this and future requests the 915 + * mknod() and open() handlers will be called instead). 916 + * 917 + * Valid replies: 918 + * fuse_reply_create 919 + * fuse_reply_err 920 + * 921 + * @param req request handle 922 + * @param parent inode number of the parent directory 923 + * @param name to create 924 + * @param mode file type and mode with which to create the new file 925 + * @param fi file information 926 + */ 927 + void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, 928 + mode_t mode, struct fuse_file_info *fi); 929 + 930 + /** 931 + * Test for a POSIX file lock 932 + * 933 + * Valid replies: 934 + * fuse_reply_lock 935 + * fuse_reply_err 936 + * 937 + * @param req request handle 938 + * @param ino the inode number 939 + * @param fi file information 940 + * @param lock the region/type to test 941 + */ 942 + void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 943 + struct flock *lock); 944 + 945 + /** 946 + * Acquire, modify or release a POSIX file lock 947 + * 948 + * For POSIX threads (NPTL) there's a 1-1 relation between pid and 949 + * owner, but otherwise this is not always the case. For checking 950 + * lock ownership, 'fi->owner' must be used. The l_pid field in 951 + * 'struct flock' should only be used to fill in this field in 952 + * getlk(). 953 + * 954 + * Note: if the locking methods are not implemented, the kernel 955 + * will still allow file locking to work locally. Hence these are 956 + * only interesting for network filesystems and similar. 957 + * 958 + * Valid replies: 959 + * fuse_reply_err 960 + * 961 + * @param req request handle 962 + * @param ino the inode number 963 + * @param fi file information 964 + * @param lock the region/type to set 965 + * @param sleep locking operation may sleep 966 + */ 967 + void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 968 + struct flock *lock, int sleep); 969 + 970 + /** 971 + * Map block index within file to block index within device 972 + * 973 + * Note: This makes sense only for block device backed filesystems 974 + * mounted with the 'blkdev' option 975 + * 976 + * If this request is answered with an error code of ENOSYS, this is 977 + * treated as a permanent failure, i.e. all future bmap() requests will 978 + * fail with the same error code without being send to the filesystem 979 + * process. 980 + * 981 + * Valid replies: 982 + * fuse_reply_bmap 983 + * fuse_reply_err 984 + * 985 + * @param req request handle 986 + * @param ino the inode number 987 + * @param blocksize unit of block index 988 + * @param idx block index within file 989 + */ 990 + void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, 991 + uint64_t idx); 992 + 993 + /** 994 + * Ioctl 995 + * 996 + * Note: For unrestricted ioctls (not allowed for FUSE 997 + * servers), data in and out areas can be discovered by giving 998 + * iovs and setting FUSE_IOCTL_RETRY in *flags*. For 999 + * restricted ioctls, kernel prepares in/out data area 1000 + * according to the information encoded in cmd. 1001 + * 1002 + * Valid replies: 1003 + * fuse_reply_ioctl_retry 1004 + * fuse_reply_ioctl 1005 + * fuse_reply_ioctl_iov 1006 + * fuse_reply_err 1007 + * 1008 + * @param req request handle 1009 + * @param ino the inode number 1010 + * @param cmd ioctl command 1011 + * @param arg ioctl argument 1012 + * @param fi file information 1013 + * @param flags for FUSE_IOCTL_* flags 1014 + * @param in_buf data fetched from the caller 1015 + * @param in_bufsz number of fetched bytes 1016 + * @param out_bufsz maximum size of output data 1017 + * 1018 + * Note : the unsigned long request submitted by the application 1019 + * is truncated to 32 bits. 1020 + */ 1021 + void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, 1022 + struct fuse_file_info *fi, unsigned flags, const void *in_buf, 1023 + size_t in_bufsz, size_t out_bufsz); 1024 + 1025 + /** 1026 + * Poll for IO readiness 1027 + * 1028 + * Note: If ph is non-NULL, the client should notify 1029 + * when IO readiness events occur by calling 1030 + * fuse_lowlevel_notify_poll() with the specified ph. 1031 + * 1032 + * Regardless of the number of times poll with a non-NULL ph 1033 + * is received, single notification is enough to clear all. 1034 + * Notifying more times incurs overhead but doesn't harm 1035 + * correctness. 1036 + * 1037 + * The callee is responsible for destroying ph with 1038 + * fuse_pollhandle_destroy() when no longer in use. 1039 + * 1040 + * If this request is answered with an error code of ENOSYS, this is 1041 + * treated as success (with a kernel-defined default poll-mask) and 1042 + * future calls to pull() will succeed the same way without being send 1043 + * to the filesystem process. 1044 + * 1045 + * Valid replies: 1046 + * fuse_reply_poll 1047 + * fuse_reply_err 1048 + * 1049 + * @param req request handle 1050 + * @param ino the inode number 1051 + * @param fi file information 1052 + * @param ph poll handle to be used for notification 1053 + */ 1054 + void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 1055 + struct fuse_pollhandle *ph); 1056 + 1057 + /** 1058 + * Write data made available in a buffer 1059 + * 1060 + * This is a more generic version of the ->write() method. If 1061 + * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the 1062 + * kernel supports splicing from the fuse device, then the 1063 + * data will be made available in pipe for supporting zero 1064 + * copy data transfer. 1065 + * 1066 + * buf->count is guaranteed to be one (and thus buf->idx is 1067 + * always zero). The write_buf handler must ensure that 1068 + * bufv->off is correctly updated (reflecting the number of 1069 + * bytes read from bufv->buf[0]). 1070 + * 1071 + * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is 1072 + * expected to reset the setuid and setgid bits. 1073 + * 1074 + * Valid replies: 1075 + * fuse_reply_write 1076 + * fuse_reply_err 1077 + * 1078 + * @param req request handle 1079 + * @param ino the inode number 1080 + * @param bufv buffer containing the data 1081 + * @param off offset to write to 1082 + * @param fi file information 1083 + */ 1084 + void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, 1085 + off_t off, struct fuse_file_info *fi); 1086 + 1087 + /** 1088 + * Forget about multiple inodes 1089 + * 1090 + * See description of the forget function for more 1091 + * information. 1092 + * 1093 + * Valid replies: 1094 + * fuse_reply_none 1095 + * 1096 + * @param req request handle 1097 + */ 1098 + void (*forget_multi)(fuse_req_t req, size_t count, 1099 + struct fuse_forget_data *forgets); 1100 + 1101 + /** 1102 + * Acquire, modify or release a BSD file lock 1103 + * 1104 + * Note: if the locking methods are not implemented, the kernel 1105 + * will still allow file locking to work locally. Hence these are 1106 + * only interesting for network filesystems and similar. 1107 + * 1108 + * Valid replies: 1109 + * fuse_reply_err 1110 + * 1111 + * @param req request handle 1112 + * @param ino the inode number 1113 + * @param fi file information 1114 + * @param op the locking operation, see flock(2) 1115 + */ 1116 + void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 1117 + int op); 1118 + 1119 + /** 1120 + * Allocate requested space. If this function returns success then 1121 + * subsequent writes to the specified range shall not fail due to the lack 1122 + * of free space on the file system storage media. 1123 + * 1124 + * If this request is answered with an error code of ENOSYS, this is 1125 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 1126 + * future fallocate() requests will fail with EOPNOTSUPP without being 1127 + * send to the filesystem process. 1128 + * 1129 + * Valid replies: 1130 + * fuse_reply_err 1131 + * 1132 + * @param req request handle 1133 + * @param ino the inode number 1134 + * @param offset starting point for allocated region 1135 + * @param length size of allocated region 1136 + * @param mode determines the operation to be performed on the given range, 1137 + * see fallocate(2) 1138 + */ 1139 + void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, 1140 + off_t length, struct fuse_file_info *fi); 1141 + 1142 + /** 1143 + * Read directory with attributes 1144 + * 1145 + * Send a buffer filled using fuse_add_direntry_plus(), with size not 1146 + * exceeding the requested size. Send an empty buffer on end of 1147 + * stream. 1148 + * 1149 + * fi->fh will contain the value set by the opendir method, or 1150 + * will be undefined if the opendir method didn't set any value. 1151 + * 1152 + * In contrast to readdir() (which does not affect the lookup counts), 1153 + * the lookup count of every entry returned by readdirplus(), except "." 1154 + * and "..", is incremented by one. 1155 + * 1156 + * Valid replies: 1157 + * fuse_reply_buf 1158 + * fuse_reply_data 1159 + * fuse_reply_err 1160 + * 1161 + * @param req request handle 1162 + * @param ino the inode number 1163 + * @param size maximum number of bytes to send 1164 + * @param off offset to continue reading the directory stream 1165 + * @param fi file information 1166 + */ 1167 + void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, 1168 + struct fuse_file_info *fi); 1169 + 1170 + /** 1171 + * Copy a range of data from one file to another 1172 + * 1173 + * Performs an optimized copy between two file descriptors without the 1174 + * additional cost of transferring data through the FUSE kernel module 1175 + * to user space (glibc) and then back into the FUSE filesystem again. 1176 + * 1177 + * In case this method is not implemented, glibc falls back to reading 1178 + * data from the source and writing to the destination. Effectively 1179 + * doing an inefficient copy of the data. 1180 + * 1181 + * If this request is answered with an error code of ENOSYS, this is 1182 + * treated as a permanent failure with error code EOPNOTSUPP, i.e. all 1183 + * future copy_file_range() requests will fail with EOPNOTSUPP without 1184 + * being send to the filesystem process. 1185 + * 1186 + * Valid replies: 1187 + * fuse_reply_write 1188 + * fuse_reply_err 1189 + * 1190 + * @param req request handle 1191 + * @param ino_in the inode number or the source file 1192 + * @param off_in starting point from were the data should be read 1193 + * @param fi_in file information of the source file 1194 + * @param ino_out the inode number or the destination file 1195 + * @param off_out starting point where the data should be written 1196 + * @param fi_out file information of the destination file 1197 + * @param len maximum size of the data to copy 1198 + * @param flags passed along with the copy_file_range() syscall 1199 + */ 1200 + void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, 1201 + struct fuse_file_info *fi_in, fuse_ino_t ino_out, 1202 + off_t off_out, struct fuse_file_info *fi_out, 1203 + size_t len, int flags); 1204 + 1205 + /** 1206 + * Find next data or hole after the specified offset 1207 + * 1208 + * If this request is answered with an error code of ENOSYS, this is 1209 + * treated as a permanent failure, i.e. all future lseek() requests will 1210 + * fail with the same error code without being send to the filesystem 1211 + * process. 1212 + * 1213 + * Valid replies: 1214 + * fuse_reply_lseek 1215 + * fuse_reply_err 1216 + * 1217 + * @param req request handle 1218 + * @param ino the inode number 1219 + * @param off offset to start search from 1220 + * @param whence either SEEK_DATA or SEEK_HOLE 1221 + * @param fi file information 1222 + */ 1223 + void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, 1224 + struct fuse_file_info *fi); 1225 + }; 1226 + 1227 + /** 1228 + * Reply with an error code or success. 1229 + * 1230 + * Possible requests: 1231 + * all except forget 1232 + * 1233 + * Whereever possible, error codes should be chosen from the list of 1234 + * documented error conditions in the corresponding system calls 1235 + * manpage. 1236 + * 1237 + * An error code of ENOSYS is sometimes treated specially. This is 1238 + * indicated in the documentation of the affected handler functions. 1239 + * 1240 + * The following requests may be answered with a zero error code: 1241 + * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, 1242 + * removexattr, setlk. 1243 + * 1244 + * @param req request handle 1245 + * @param err the positive error value, or zero for success 1246 + * @return zero for success, -errno for failure to send reply 1247 + */ 1248 + int fuse_reply_err(fuse_req_t req, int err); 1249 + 1250 + /** 1251 + * Don't send reply 1252 + * 1253 + * Possible requests: 1254 + * forget 1255 + * forget_multi 1256 + * retrieve_reply 1257 + * 1258 + * @param req request handle 1259 + */ 1260 + void fuse_reply_none(fuse_req_t req); 1261 + 1262 + /** 1263 + * Reply with a directory entry 1264 + * 1265 + * Possible requests: 1266 + * lookup, mknod, mkdir, symlink, link 1267 + * 1268 + * Side effects: 1269 + * increments the lookup count on success 1270 + * 1271 + * @param req request handle 1272 + * @param e the entry parameters 1273 + * @return zero for success, -errno for failure to send reply 1274 + */ 1275 + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); 1276 + 1277 + /** 1278 + * Reply with a directory entry and open parameters 1279 + * 1280 + * currently the following members of 'fi' are used: 1281 + * fh, direct_io, keep_cache 1282 + * 1283 + * Possible requests: 1284 + * create 1285 + * 1286 + * Side effects: 1287 + * increments the lookup count on success 1288 + * 1289 + * @param req request handle 1290 + * @param e the entry parameters 1291 + * @param fi file information 1292 + * @return zero for success, -errno for failure to send reply 1293 + */ 1294 + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, 1295 + const struct fuse_file_info *fi); 1296 + 1297 + /** 1298 + * Reply with attributes 1299 + * 1300 + * Possible requests: 1301 + * getattr, setattr 1302 + * 1303 + * @param req request handle 1304 + * @param attr the attributes 1305 + * @param attr_timeout validity timeout (in seconds) for the attributes 1306 + * @return zero for success, -errno for failure to send reply 1307 + */ 1308 + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, 1309 + double attr_timeout); 1310 + 1311 + /** 1312 + * Reply with the contents of a symbolic link 1313 + * 1314 + * Possible requests: 1315 + * readlink 1316 + * 1317 + * @param req request handle 1318 + * @param link symbolic link contents 1319 + * @return zero for success, -errno for failure to send reply 1320 + */ 1321 + int fuse_reply_readlink(fuse_req_t req, const char *link); 1322 + 1323 + /** 1324 + * Reply with open parameters 1325 + * 1326 + * currently the following members of 'fi' are used: 1327 + * fh, direct_io, keep_cache 1328 + * 1329 + * Possible requests: 1330 + * open, opendir 1331 + * 1332 + * @param req request handle 1333 + * @param fi file information 1334 + * @return zero for success, -errno for failure to send reply 1335 + */ 1336 + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); 1337 + 1338 + /** 1339 + * Reply with number of bytes written 1340 + * 1341 + * Possible requests: 1342 + * write 1343 + * 1344 + * @param req request handle 1345 + * @param count the number of bytes written 1346 + * @return zero for success, -errno for failure to send reply 1347 + */ 1348 + int fuse_reply_write(fuse_req_t req, size_t count); 1349 + 1350 + /** 1351 + * Reply with data 1352 + * 1353 + * Possible requests: 1354 + * read, readdir, getxattr, listxattr 1355 + * 1356 + * @param req request handle 1357 + * @param buf buffer containing data 1358 + * @param size the size of data in bytes 1359 + * @return zero for success, -errno for failure to send reply 1360 + */ 1361 + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); 1362 + 1363 + /** 1364 + * Reply with data copied/moved from buffer(s) 1365 + * 1366 + * Possible requests: 1367 + * read, readdir, getxattr, listxattr 1368 + * 1369 + * Side effects: 1370 + * when used to return data from a readdirplus() (but not readdir()) 1371 + * call, increments the lookup count of each returned entry by one 1372 + * on success. 1373 + * 1374 + * @param req request handle 1375 + * @param bufv buffer vector 1376 + * @return zero for success, -errno for failure to send reply 1377 + */ 1378 + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); 1379 + 1380 + /** 1381 + * Reply with data vector 1382 + * 1383 + * Possible requests: 1384 + * read, readdir, getxattr, listxattr 1385 + * 1386 + * @param req request handle 1387 + * @param iov the vector containing the data 1388 + * @param count the size of vector 1389 + * @return zero for success, -errno for failure to send reply 1390 + */ 1391 + int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); 1392 + 1393 + /** 1394 + * Reply with filesystem statistics 1395 + * 1396 + * Possible requests: 1397 + * statfs 1398 + * 1399 + * @param req request handle 1400 + * @param stbuf filesystem statistics 1401 + * @return zero for success, -errno for failure to send reply 1402 + */ 1403 + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); 1404 + 1405 + /** 1406 + * Reply with needed buffer size 1407 + * 1408 + * Possible requests: 1409 + * getxattr, listxattr 1410 + * 1411 + * @param req request handle 1412 + * @param count the buffer size needed in bytes 1413 + * @return zero for success, -errno for failure to send reply 1414 + */ 1415 + int fuse_reply_xattr(fuse_req_t req, size_t count); 1416 + 1417 + /** 1418 + * Reply with file lock information 1419 + * 1420 + * Possible requests: 1421 + * getlk 1422 + * 1423 + * @param req request handle 1424 + * @param lock the lock information 1425 + * @return zero for success, -errno for failure to send reply 1426 + */ 1427 + int fuse_reply_lock(fuse_req_t req, const struct flock *lock); 1428 + 1429 + /** 1430 + * Reply with block index 1431 + * 1432 + * Possible requests: 1433 + * bmap 1434 + * 1435 + * @param req request handle 1436 + * @param idx block index within device 1437 + * @return zero for success, -errno for failure to send reply 1438 + */ 1439 + int fuse_reply_bmap(fuse_req_t req, uint64_t idx); 1440 + 1441 + /* 1442 + * Filling a buffer in readdir 1443 + */ 1444 + 1445 + /** 1446 + * Add a directory entry to the buffer 1447 + * 1448 + * Buffer needs to be large enough to hold the entry. If it's not, 1449 + * then the entry is not filled in but the size of the entry is still 1450 + * returned. The caller can check this by comparing the bufsize 1451 + * parameter with the returned entry size. If the entry size is 1452 + * larger than the buffer size, the operation failed. 1453 + * 1454 + * From the 'stbuf' argument the st_ino field and bits 12-15 of the 1455 + * st_mode field are used. The other fields are ignored. 1456 + * 1457 + * *off* should be any non-zero value that the filesystem can use to 1458 + * identify the current point in the directory stream. It does not 1459 + * need to be the actual physical position. A value of zero is 1460 + * reserved to mean "from the beginning", and should therefore never 1461 + * be used (the first call to fuse_add_direntry should be passed the 1462 + * offset of the second directory entry). 1463 + * 1464 + * @param req request handle 1465 + * @param buf the point where the new entry will be added to the buffer 1466 + * @param bufsize remaining size of the buffer 1467 + * @param name the name of the entry 1468 + * @param stbuf the file attributes 1469 + * @param off the offset of the next entry 1470 + * @return the space needed for the entry 1471 + */ 1472 + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, 1473 + const char *name, const struct stat *stbuf, off_t off); 1474 + 1475 + /** 1476 + * Add a directory entry to the buffer with the attributes 1477 + * 1478 + * See documentation of `fuse_add_direntry()` for more details. 1479 + * 1480 + * @param req request handle 1481 + * @param buf the point where the new entry will be added to the buffer 1482 + * @param bufsize remaining size of the buffer 1483 + * @param name the name of the entry 1484 + * @param e the directory entry 1485 + * @param off the offset of the next entry 1486 + * @return the space needed for the entry 1487 + */ 1488 + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, 1489 + const char *name, 1490 + const struct fuse_entry_param *e, off_t off); 1491 + 1492 + /** 1493 + * Reply to ask for data fetch and output buffer preparation. ioctl 1494 + * will be retried with the specified input data fetched and output 1495 + * buffer prepared. 1496 + * 1497 + * Possible requests: 1498 + * ioctl 1499 + * 1500 + * @param req request handle 1501 + * @param in_iov iovec specifying data to fetch from the caller 1502 + * @param in_count number of entries in in_iov 1503 + * @param out_iov iovec specifying addresses to write output to 1504 + * @param out_count number of entries in out_iov 1505 + * @return zero for success, -errno for failure to send reply 1506 + */ 1507 + int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, 1508 + size_t in_count, const struct iovec *out_iov, 1509 + size_t out_count); 1510 + 1511 + /** 1512 + * Reply to finish ioctl 1513 + * 1514 + * Possible requests: 1515 + * ioctl 1516 + * 1517 + * @param req request handle 1518 + * @param result result to be passed to the caller 1519 + * @param buf buffer containing output data 1520 + * @param size length of output data 1521 + */ 1522 + int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); 1523 + 1524 + /** 1525 + * Reply to finish ioctl with iov buffer 1526 + * 1527 + * Possible requests: 1528 + * ioctl 1529 + * 1530 + * @param req request handle 1531 + * @param result result to be passed to the caller 1532 + * @param iov the vector containing the data 1533 + * @param count the size of vector 1534 + */ 1535 + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, 1536 + int count); 1537 + 1538 + /** 1539 + * Reply with poll result event mask 1540 + * 1541 + * @param req request handle 1542 + * @param revents poll result event mask 1543 + */ 1544 + int fuse_reply_poll(fuse_req_t req, unsigned revents); 1545 + 1546 + /** 1547 + * Reply with offset 1548 + * 1549 + * Possible requests: 1550 + * lseek 1551 + * 1552 + * @param req request handle 1553 + * @param off offset of next data or hole 1554 + * @return zero for success, -errno for failure to send reply 1555 + */ 1556 + int fuse_reply_lseek(fuse_req_t req, off_t off); 1557 + 1558 + /* 1559 + * Notification 1560 + */ 1561 + 1562 + /** 1563 + * Notify IO readiness event 1564 + * 1565 + * For more information, please read comment for poll operation. 1566 + * 1567 + * @param ph poll handle to notify IO readiness event for 1568 + */ 1569 + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); 1570 + 1571 + /** 1572 + * Notify to invalidate cache for an inode. 1573 + * 1574 + * Added in FUSE protocol version 7.12. If the kernel does not support 1575 + * this (or a newer) version, the function will return -ENOSYS and do 1576 + * nothing. 1577 + * 1578 + * If the filesystem has writeback caching enabled, invalidating an 1579 + * inode will first trigger a writeback of all dirty pages. The call 1580 + * will block until all writeback requests have completed and the 1581 + * inode has been invalidated. It will, however, not wait for 1582 + * completion of pending writeback requests that have been issued 1583 + * before. 1584 + * 1585 + * If there are no dirty pages, this function will never block. 1586 + * 1587 + * @param se the session object 1588 + * @param ino the inode number 1589 + * @param off the offset in the inode where to start invalidating 1590 + * or negative to invalidate attributes only 1591 + * @param len the amount of cache to invalidate or 0 for all 1592 + * @return zero for success, -errno for failure 1593 + */ 1594 + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, 1595 + off_t off, off_t len); 1596 + 1597 + /** 1598 + * Notify to invalidate parent attributes and the dentry matching 1599 + * parent/name 1600 + * 1601 + * To avoid a deadlock this function must not be called in the 1602 + * execution path of a related filesytem operation or within any code 1603 + * that could hold a lock that could be needed to execute such an 1604 + * operation. As of kernel 4.18, a "related operation" is a lookup(), 1605 + * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() 1606 + * request for the parent, and a setattr(), unlink(), rmdir(), 1607 + * rename(), setxattr(), removexattr(), readdir() or readdirplus() 1608 + * request for the inode itself. 1609 + * 1610 + * When called correctly, this function will never block. 1611 + * 1612 + * Added in FUSE protocol version 7.12. If the kernel does not support 1613 + * this (or a newer) version, the function will return -ENOSYS and do 1614 + * nothing. 1615 + * 1616 + * @param se the session object 1617 + * @param parent inode number 1618 + * @param name file name 1619 + * @param namelen strlen() of file name 1620 + * @return zero for success, -errno for failure 1621 + */ 1622 + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, 1623 + const char *name, size_t namelen); 1624 + 1625 + /** 1626 + * This function behaves like fuse_lowlevel_notify_inval_entry() with 1627 + * the following additional effect (at least as of Linux kernel 4.8): 1628 + * 1629 + * If the provided *child* inode matches the inode that is currently 1630 + * associated with the cached dentry, and if there are any inotify 1631 + * watches registered for the dentry, then the watchers are informed 1632 + * that the dentry has been deleted. 1633 + * 1634 + * To avoid a deadlock this function must not be called while 1635 + * executing a related filesytem operation or while holding a lock 1636 + * that could be needed to execute such an operation (see the 1637 + * description of fuse_lowlevel_notify_inval_entry() for more 1638 + * details). 1639 + * 1640 + * When called correctly, this function will never block. 1641 + * 1642 + * Added in FUSE protocol version 7.18. If the kernel does not support 1643 + * this (or a newer) version, the function will return -ENOSYS and do 1644 + * nothing. 1645 + * 1646 + * @param se the session object 1647 + * @param parent inode number 1648 + * @param child inode number 1649 + * @param name file name 1650 + * @param namelen strlen() of file name 1651 + * @return zero for success, -errno for failure 1652 + */ 1653 + int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, 1654 + fuse_ino_t child, const char *name, 1655 + size_t namelen); 1656 + 1657 + /** 1658 + * Store data to the kernel buffers 1659 + * 1660 + * Synchronously store data in the kernel buffers belonging to the 1661 + * given inode. The stored data is marked up-to-date (no read will be 1662 + * performed against it, unless it's invalidated or evicted from the 1663 + * cache). 1664 + * 1665 + * If the stored data overflows the current file size, then the size 1666 + * is extended, similarly to a write(2) on the filesystem. 1667 + * 1668 + * If this function returns an error, then the store wasn't fully 1669 + * completed, but it may have been partially completed. 1670 + * 1671 + * Added in FUSE protocol version 7.15. If the kernel does not support 1672 + * this (or a newer) version, the function will return -ENOSYS and do 1673 + * nothing. 1674 + * 1675 + * @param se the session object 1676 + * @param ino the inode number 1677 + * @param offset the starting offset into the file to store to 1678 + * @param bufv buffer vector 1679 + * @return zero for success, -errno for failure 1680 + */ 1681 + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, 1682 + off_t offset, struct fuse_bufvec *bufv); 1683 + 1684 + /* 1685 + * Utility functions 1686 + */ 1687 + 1688 + /** 1689 + * Get the userdata from the request 1690 + * 1691 + * @param req request handle 1692 + * @return the user data passed to fuse_session_new() 1693 + */ 1694 + void *fuse_req_userdata(fuse_req_t req); 1695 + 1696 + /** 1697 + * Get the context from the request 1698 + * 1699 + * The pointer returned by this function will only be valid for the 1700 + * request's lifetime 1701 + * 1702 + * @param req request handle 1703 + * @return the context structure 1704 + */ 1705 + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); 1706 + 1707 + /** 1708 + * Get the current supplementary group IDs for the specified request 1709 + * 1710 + * Similar to the getgroups(2) system call, except the return value is 1711 + * always the total number of group IDs, even if it is larger than the 1712 + * specified size. 1713 + * 1714 + * The current fuse kernel module in linux (as of 2.6.30) doesn't pass 1715 + * the group list to userspace, hence this function needs to parse 1716 + * "/proc/$TID/task/$TID/status" to get the group IDs. 1717 + * 1718 + * This feature may not be supported on all operating systems. In 1719 + * such a case this function will return -ENOSYS. 1720 + * 1721 + * @param req request handle 1722 + * @param size size of given array 1723 + * @param list array of group IDs to be filled in 1724 + * @return the total number of supplementary group IDs or -errno on failure 1725 + */ 1726 + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); 1727 + 1728 + /** 1729 + * Callback function for an interrupt 1730 + * 1731 + * @param req interrupted request 1732 + * @param data user data 1733 + */ 1734 + typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); 1735 + 1736 + /** 1737 + * Register/unregister callback for an interrupt 1738 + * 1739 + * If an interrupt has already happened, then the callback function is 1740 + * called from within this function, hence it's not possible for 1741 + * interrupts to be lost. 1742 + * 1743 + * @param req request handle 1744 + * @param func the callback function or NULL for unregister 1745 + * @param data user data passed to the callback function 1746 + */ 1747 + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, 1748 + void *data); 1749 + 1750 + /** 1751 + * Check if a request has already been interrupted 1752 + * 1753 + * @param req request handle 1754 + * @return 1 if the request has been interrupted, 0 otherwise 1755 + */ 1756 + int fuse_req_interrupted(fuse_req_t req); 1757 + 1758 + /** 1759 + * Check if the session is connected via virtio 1760 + * 1761 + * @param se session object 1762 + * @return 1 if the session is a virtio session 1763 + */ 1764 + int fuse_lowlevel_is_virtio(struct fuse_session *se); 1765 + 1766 + /* 1767 + * Inquiry functions 1768 + */ 1769 + 1770 + /** 1771 + * Print low-level version information to stdout. 1772 + */ 1773 + void fuse_lowlevel_version(void); 1774 + 1775 + /** 1776 + * Print available low-level options to stdout. This is not an 1777 + * exhaustive list, but includes only those options that may be of 1778 + * interest to an end-user of a file system. 1779 + */ 1780 + void fuse_lowlevel_help(void); 1781 + 1782 + /** 1783 + * Print available options for `fuse_parse_cmdline()`. 1784 + */ 1785 + void fuse_cmdline_help(void); 1786 + 1787 + /* 1788 + * Filesystem setup & teardown 1789 + */ 1790 + 1791 + struct fuse_cmdline_opts { 1792 + int foreground; 1793 + int debug; 1794 + int nodefault_subtype; 1795 + int show_version; 1796 + int show_help; 1797 + int print_capabilities; 1798 + int syslog; 1799 + int log_level; 1800 + unsigned int max_idle_threads; 1801 + }; 1802 + 1803 + /** 1804 + * Utility function to parse common options for simple file systems 1805 + * using the low-level API. A help text that describes the available 1806 + * options can be printed with `fuse_cmdline_help`. A single 1807 + * non-option argument is treated as the mountpoint. Multiple 1808 + * non-option arguments will result in an error. 1809 + * 1810 + * If neither -o subtype= or -o fsname= options are given, a new 1811 + * subtype option will be added and set to the basename of the program 1812 + * (the fsname will remain unset, and then defaults to "fuse"). 1813 + * 1814 + * Known options will be removed from *args*, unknown options will 1815 + * remain. 1816 + * 1817 + * @param args argument vector (input+output) 1818 + * @param opts output argument for parsed options 1819 + * @return 0 on success, -1 on failure 1820 + */ 1821 + int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); 1822 + 1823 + /** 1824 + * Create a low level session. 1825 + * 1826 + * Returns a session structure suitable for passing to 1827 + * fuse_session_mount() and fuse_session_loop(). 1828 + * 1829 + * This function accepts most file-system independent mount options 1830 + * (like context, nodev, ro - see mount(8)), as well as the general 1831 + * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and 1832 + * -o default_permissions, but not ``-o use_ino``). Instead of `-o 1833 + * debug`, debugging may also enabled with `-d` or `--debug`. 1834 + * 1835 + * If not all options are known, an error message is written to stderr 1836 + * and the function returns NULL. 1837 + * 1838 + * Option parsing skips argv[0], which is assumed to contain the 1839 + * program name. To prevent accidentally passing an option in 1840 + * argv[0], this element must always be present (even if no options 1841 + * are specified). It may be set to the empty string ('\0') if no 1842 + * reasonable value can be provided. 1843 + * 1844 + * @param args argument vector 1845 + * @param op the (low-level) filesystem operations 1846 + * @param op_size sizeof(struct fuse_lowlevel_ops) 1847 + * @param userdata user data 1848 + * 1849 + * @return the fuse session on success, NULL on failure 1850 + **/ 1851 + struct fuse_session *fuse_session_new(struct fuse_args *args, 1852 + const struct fuse_lowlevel_ops *op, 1853 + size_t op_size, void *userdata); 1854 + 1855 + /** 1856 + * Mount a FUSE file system. 1857 + * 1858 + * @param se session object 1859 + * 1860 + * @return 0 on success, -1 on failure. 1861 + **/ 1862 + int fuse_session_mount(struct fuse_session *se); 1863 + 1864 + /** 1865 + * Enter a single threaded, blocking event loop. 1866 + * 1867 + * When the event loop terminates because the connection to the FUSE 1868 + * kernel module has been closed, this function returns zero. This 1869 + * happens when the filesystem is unmounted regularly (by the 1870 + * filesystem owner or root running the umount(8) or fusermount(1) 1871 + * command), or if connection is explicitly severed by writing ``1`` 1872 + * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only 1873 + * way to distinguish between these two conditions is to check if the 1874 + * filesystem is still mounted after the session loop returns. 1875 + * 1876 + * When some error occurs during request processing, the function 1877 + * returns a negated errno(3) value. 1878 + * 1879 + * If the loop has been terminated because of a signal handler 1880 + * installed by fuse_set_signal_handlers(), this function returns the 1881 + * (positive) signal value that triggered the exit. 1882 + * 1883 + * @param se the session 1884 + * @return 0, -errno, or a signal value 1885 + */ 1886 + int fuse_session_loop(struct fuse_session *se); 1887 + 1888 + /** 1889 + * Flag a session as terminated. 1890 + * 1891 + * This function is invoked by the POSIX signal handlers, when 1892 + * registered using fuse_set_signal_handlers(). It will cause any 1893 + * running event loops to terminate on the next opportunity. 1894 + * 1895 + * @param se the session 1896 + */ 1897 + void fuse_session_exit(struct fuse_session *se); 1898 + 1899 + /** 1900 + * Reset the terminated flag of a session 1901 + * 1902 + * @param se the session 1903 + */ 1904 + void fuse_session_reset(struct fuse_session *se); 1905 + 1906 + /** 1907 + * Query the terminated flag of a session 1908 + * 1909 + * @param se the session 1910 + * @return 1 if exited, 0 if not exited 1911 + */ 1912 + int fuse_session_exited(struct fuse_session *se); 1913 + 1914 + /** 1915 + * Ensure that file system is unmounted. 1916 + * 1917 + * In regular operation, the file system is typically unmounted by the 1918 + * user calling umount(8) or fusermount(1), which then terminates the 1919 + * FUSE session loop. However, the session loop may also terminate as 1920 + * a result of an explicit call to fuse_session_exit() (e.g. by a 1921 + * signal handler installed by fuse_set_signal_handler()). In this 1922 + * case the filesystem remains mounted, but any attempt to access it 1923 + * will block (while the filesystem process is still running) or give 1924 + * an ESHUTDOWN error (after the filesystem process has terminated). 1925 + * 1926 + * If the communication channel with the FUSE kernel module is still 1927 + * open (i.e., if the session loop was terminated by an explicit call 1928 + * to fuse_session_exit()), this function will close it and unmount 1929 + * the filesystem. If the communication channel has been closed by the 1930 + * kernel, this method will do (almost) nothing. 1931 + * 1932 + * NOTE: The above semantics mean that if the connection to the kernel 1933 + * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, 1934 + * this method will *not* unmount the filesystem. 1935 + * 1936 + * @param se the session 1937 + */ 1938 + void fuse_session_unmount(struct fuse_session *se); 1939 + 1940 + /** 1941 + * Destroy a session 1942 + * 1943 + * @param se the session 1944 + */ 1945 + void fuse_session_destroy(struct fuse_session *se); 1946 + 1947 + /* 1948 + * Custom event loop support 1949 + */ 1950 + 1951 + /** 1952 + * Return file descriptor for communication with kernel. 1953 + * 1954 + * The file selector can be used to integrate FUSE with a custom event 1955 + * loop. Whenever data is available for reading on the provided fd, 1956 + * the event loop should call `fuse_session_receive_buf` followed by 1957 + * `fuse_session_process_buf` to process the request. 1958 + * 1959 + * The returned file descriptor is valid until `fuse_session_unmount` 1960 + * is called. 1961 + * 1962 + * @param se the session 1963 + * @return a file descriptor 1964 + */ 1965 + int fuse_session_fd(struct fuse_session *se); 1966 + 1967 + /** 1968 + * Process a raw request supplied in a generic buffer 1969 + * 1970 + * The fuse_buf may contain a memory buffer or a pipe file descriptor. 1971 + * 1972 + * @param se the session 1973 + * @param buf the fuse_buf containing the request 1974 + */ 1975 + void fuse_session_process_buf(struct fuse_session *se, 1976 + const struct fuse_buf *buf); 1977 + 1978 + /** 1979 + * Read a raw request from the kernel into the supplied buffer. 1980 + * 1981 + * Depending on file system options, system capabilities, and request 1982 + * size the request is either read into a memory buffer or spliced 1983 + * into a temporary pipe. 1984 + * 1985 + * @param se the session 1986 + * @param buf the fuse_buf to store the request in 1987 + * @return the actual size of the raw request, or -errno on error 1988 + */ 1989 + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); 1990 + 1991 + #endif /* FUSE_LOWLEVEL_H_ */
+60
tools/virtiofsd/fuse_misc.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB 7 + */ 8 + 9 + #include <pthread.h> 10 + #include "config-host.h" 11 + 12 + /* 13 + * Versioned symbols cannot be used in some cases because it 14 + * - confuse the dynamic linker in uClibc 15 + * - not supported on MacOSX (in MachO binary format) 16 + */ 17 + #if (!defined(__UCLIBC__) && !defined(__APPLE__)) 18 + #define FUSE_SYMVER(x) __asm__(x) 19 + #else 20 + #define FUSE_SYMVER(x) 21 + #endif 22 + 23 + #ifndef USE_UCLIBC 24 + #define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) 25 + #else 26 + /* Is this hack still needed? */ 27 + static inline void fuse_mutex_init(pthread_mutex_t *mut) 28 + { 29 + pthread_mutexattr_t attr; 30 + pthread_mutexattr_init(&attr); 31 + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); 32 + pthread_mutex_init(mut, &attr); 33 + pthread_mutexattr_destroy(&attr); 34 + } 35 + #endif 36 + 37 + #ifdef HAVE_STRUCT_STAT_ST_ATIM 38 + /* Linux */ 39 + #define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) 40 + #define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) 41 + #define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) 42 + #define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) 43 + #define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) 44 + #define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) 45 + #elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) 46 + /* FreeBSD */ 47 + #define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) 48 + #define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) 49 + #define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) 50 + #define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) 51 + #define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) 52 + #define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) 53 + #else 54 + #define ST_ATIM_NSEC(stbuf) 0 55 + #define ST_CTIM_NSEC(stbuf) 0 56 + #define ST_MTIM_NSEC(stbuf) 0 57 + #define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) 58 + #define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) 59 + #define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) 60 + #endif
+450
tools/virtiofsd/fuse_opt.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * Implementation of option parsing routines (dealing with `struct 6 + * fuse_args`). 7 + * 8 + * This program can be distributed under the terms of the GNU LGPLv2. 9 + * See the file COPYING.LIB 10 + */ 11 + 12 + #include "qemu/osdep.h" 13 + #include "fuse_opt.h" 14 + #include "fuse_i.h" 15 + #include "fuse_misc.h" 16 + 17 + #include <assert.h> 18 + #include <stdio.h> 19 + #include <stdlib.h> 20 + #include <string.h> 21 + 22 + struct fuse_opt_context { 23 + void *data; 24 + const struct fuse_opt *opt; 25 + fuse_opt_proc_t proc; 26 + int argctr; 27 + int argc; 28 + char **argv; 29 + struct fuse_args outargs; 30 + char *opts; 31 + int nonopt; 32 + }; 33 + 34 + void fuse_opt_free_args(struct fuse_args *args) 35 + { 36 + if (args) { 37 + if (args->argv && args->allocated) { 38 + int i; 39 + for (i = 0; i < args->argc; i++) { 40 + free(args->argv[i]); 41 + } 42 + free(args->argv); 43 + } 44 + args->argc = 0; 45 + args->argv = NULL; 46 + args->allocated = 0; 47 + } 48 + } 49 + 50 + static int alloc_failed(void) 51 + { 52 + fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); 53 + return -1; 54 + } 55 + 56 + int fuse_opt_add_arg(struct fuse_args *args, const char *arg) 57 + { 58 + char **newargv; 59 + char *newarg; 60 + 61 + assert(!args->argv || args->allocated); 62 + 63 + newarg = strdup(arg); 64 + if (!newarg) { 65 + return alloc_failed(); 66 + } 67 + 68 + newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); 69 + if (!newargv) { 70 + free(newarg); 71 + return alloc_failed(); 72 + } 73 + 74 + args->argv = newargv; 75 + args->allocated = 1; 76 + args->argv[args->argc++] = newarg; 77 + args->argv[args->argc] = NULL; 78 + return 0; 79 + } 80 + 81 + static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, 82 + const char *arg) 83 + { 84 + assert(pos <= args->argc); 85 + if (fuse_opt_add_arg(args, arg) == -1) { 86 + return -1; 87 + } 88 + 89 + if (pos != args->argc - 1) { 90 + char *newarg = args->argv[args->argc - 1]; 91 + memmove(&args->argv[pos + 1], &args->argv[pos], 92 + sizeof(char *) * (args->argc - pos - 1)); 93 + args->argv[pos] = newarg; 94 + } 95 + return 0; 96 + } 97 + 98 + int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) 99 + { 100 + return fuse_opt_insert_arg_common(args, pos, arg); 101 + } 102 + 103 + static int next_arg(struct fuse_opt_context *ctx, const char *opt) 104 + { 105 + if (ctx->argctr + 1 >= ctx->argc) { 106 + fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); 107 + return -1; 108 + } 109 + ctx->argctr++; 110 + return 0; 111 + } 112 + 113 + static int add_arg(struct fuse_opt_context *ctx, const char *arg) 114 + { 115 + return fuse_opt_add_arg(&ctx->outargs, arg); 116 + } 117 + 118 + static int add_opt_common(char **opts, const char *opt, int esc) 119 + { 120 + unsigned oldlen = *opts ? strlen(*opts) : 0; 121 + char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); 122 + 123 + if (!d) { 124 + return alloc_failed(); 125 + } 126 + 127 + *opts = d; 128 + if (oldlen) { 129 + d += oldlen; 130 + *d++ = ','; 131 + } 132 + 133 + for (; *opt; opt++) { 134 + if (esc && (*opt == ',' || *opt == '\\')) { 135 + *d++ = '\\'; 136 + } 137 + *d++ = *opt; 138 + } 139 + *d = '\0'; 140 + 141 + return 0; 142 + } 143 + 144 + int fuse_opt_add_opt(char **opts, const char *opt) 145 + { 146 + return add_opt_common(opts, opt, 0); 147 + } 148 + 149 + int fuse_opt_add_opt_escaped(char **opts, const char *opt) 150 + { 151 + return add_opt_common(opts, opt, 1); 152 + } 153 + 154 + static int add_opt(struct fuse_opt_context *ctx, const char *opt) 155 + { 156 + return add_opt_common(&ctx->opts, opt, 1); 157 + } 158 + 159 + static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, 160 + int iso) 161 + { 162 + if (key == FUSE_OPT_KEY_DISCARD) { 163 + return 0; 164 + } 165 + 166 + if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { 167 + int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); 168 + if (res == -1 || !res) { 169 + return res; 170 + } 171 + } 172 + if (iso) { 173 + return add_opt(ctx, arg); 174 + } else { 175 + return add_arg(ctx, arg); 176 + } 177 + } 178 + 179 + static int match_template(const char *t, const char *arg, unsigned *sepp) 180 + { 181 + int arglen = strlen(arg); 182 + const char *sep = strchr(t, '='); 183 + sep = sep ? sep : strchr(t, ' '); 184 + if (sep && (!sep[1] || sep[1] == '%')) { 185 + int tlen = sep - t; 186 + if (sep[0] == '=') { 187 + tlen++; 188 + } 189 + if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { 190 + *sepp = sep - t; 191 + return 1; 192 + } 193 + } 194 + if (strcmp(t, arg) == 0) { 195 + *sepp = 0; 196 + return 1; 197 + } 198 + return 0; 199 + } 200 + 201 + static const struct fuse_opt *find_opt(const struct fuse_opt *opt, 202 + const char *arg, unsigned *sepp) 203 + { 204 + for (; opt && opt->templ; opt++) { 205 + if (match_template(opt->templ, arg, sepp)) { 206 + return opt; 207 + } 208 + } 209 + return NULL; 210 + } 211 + 212 + int fuse_opt_match(const struct fuse_opt *opts, const char *opt) 213 + { 214 + unsigned dummy; 215 + return find_opt(opts, opt, &dummy) ? 1 : 0; 216 + } 217 + 218 + static int process_opt_param(void *var, const char *format, const char *param, 219 + const char *arg) 220 + { 221 + assert(format[0] == '%'); 222 + if (format[1] == 's') { 223 + char **s = var; 224 + char *copy = strdup(param); 225 + if (!copy) { 226 + return alloc_failed(); 227 + } 228 + 229 + free(*s); 230 + *s = copy; 231 + } else { 232 + if (sscanf(param, format, var) != 1) { 233 + fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", 234 + arg); 235 + return -1; 236 + } 237 + } 238 + return 0; 239 + } 240 + 241 + static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, 242 + unsigned sep, const char *arg, int iso) 243 + { 244 + if (opt->offset == -1U) { 245 + if (call_proc(ctx, arg, opt->value, iso) == -1) { 246 + return -1; 247 + } 248 + } else { 249 + void *var = (char *)ctx->data + opt->offset; 250 + if (sep && opt->templ[sep + 1]) { 251 + const char *param = arg + sep; 252 + if (opt->templ[sep] == '=') { 253 + param++; 254 + } 255 + if (process_opt_param(var, opt->templ + sep + 1, param, arg) == 256 + -1) { 257 + return -1; 258 + } 259 + } else { 260 + *(int *)var = opt->value; 261 + } 262 + } 263 + return 0; 264 + } 265 + 266 + static int process_opt_sep_arg(struct fuse_opt_context *ctx, 267 + const struct fuse_opt *opt, unsigned sep, 268 + const char *arg, int iso) 269 + { 270 + int res; 271 + char *newarg; 272 + char *param; 273 + 274 + if (next_arg(ctx, arg) == -1) { 275 + return -1; 276 + } 277 + 278 + param = ctx->argv[ctx->argctr]; 279 + newarg = malloc(sep + strlen(param) + 1); 280 + if (!newarg) { 281 + return alloc_failed(); 282 + } 283 + 284 + memcpy(newarg, arg, sep); 285 + strcpy(newarg + sep, param); 286 + res = process_opt(ctx, opt, sep, newarg, iso); 287 + free(newarg); 288 + 289 + return res; 290 + } 291 + 292 + static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) 293 + { 294 + unsigned sep; 295 + const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); 296 + if (opt) { 297 + for (; opt; opt = find_opt(opt + 1, arg, &sep)) { 298 + int res; 299 + if (sep && opt->templ[sep] == ' ' && !arg[sep]) { 300 + res = process_opt_sep_arg(ctx, opt, sep, arg, iso); 301 + } else { 302 + res = process_opt(ctx, opt, sep, arg, iso); 303 + } 304 + if (res == -1) { 305 + return -1; 306 + } 307 + } 308 + return 0; 309 + } else { 310 + return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); 311 + } 312 + } 313 + 314 + static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) 315 + { 316 + char *s = opts; 317 + char *d = s; 318 + int end = 0; 319 + 320 + while (!end) { 321 + if (*s == '\0') { 322 + end = 1; 323 + } 324 + if (*s == ',' || end) { 325 + int res; 326 + 327 + *d = '\0'; 328 + res = process_gopt(ctx, opts, 1); 329 + if (res == -1) { 330 + return -1; 331 + } 332 + d = opts; 333 + } else { 334 + if (s[0] == '\\' && s[1] != '\0') { 335 + s++; 336 + if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && 337 + s[2] >= '0' && s[2] <= '7') { 338 + *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + 339 + (s[2] - '0'); 340 + s += 2; 341 + } else { 342 + *d++ = *s; 343 + } 344 + } else { 345 + *d++ = *s; 346 + } 347 + } 348 + s++; 349 + } 350 + 351 + return 0; 352 + } 353 + 354 + static int process_option_group(struct fuse_opt_context *ctx, const char *opts) 355 + { 356 + int res; 357 + char *copy = strdup(opts); 358 + 359 + if (!copy) { 360 + fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); 361 + return -1; 362 + } 363 + res = process_real_option_group(ctx, copy); 364 + free(copy); 365 + return res; 366 + } 367 + 368 + static int process_one(struct fuse_opt_context *ctx, const char *arg) 369 + { 370 + if (ctx->nonopt || arg[0] != '-') { 371 + return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); 372 + } else if (arg[1] == 'o') { 373 + if (arg[2]) { 374 + return process_option_group(ctx, arg + 2); 375 + } else { 376 + if (next_arg(ctx, arg) == -1) { 377 + return -1; 378 + } 379 + 380 + return process_option_group(ctx, ctx->argv[ctx->argctr]); 381 + } 382 + } else if (arg[1] == '-' && !arg[2]) { 383 + if (add_arg(ctx, arg) == -1) { 384 + return -1; 385 + } 386 + ctx->nonopt = ctx->outargs.argc; 387 + return 0; 388 + } else { 389 + return process_gopt(ctx, arg, 0); 390 + } 391 + } 392 + 393 + static int opt_parse(struct fuse_opt_context *ctx) 394 + { 395 + if (ctx->argc) { 396 + if (add_arg(ctx, ctx->argv[0]) == -1) { 397 + return -1; 398 + } 399 + } 400 + 401 + for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { 402 + if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { 403 + return -1; 404 + } 405 + } 406 + 407 + if (ctx->opts) { 408 + if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || 409 + fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { 410 + return -1; 411 + } 412 + } 413 + 414 + /* If option separator ("--") is the last argument, remove it */ 415 + if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && 416 + strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { 417 + free(ctx->outargs.argv[ctx->outargs.argc - 1]); 418 + ctx->outargs.argv[--ctx->outargs.argc] = NULL; 419 + } 420 + 421 + return 0; 422 + } 423 + 424 + int fuse_opt_parse(struct fuse_args *args, void *data, 425 + const struct fuse_opt opts[], fuse_opt_proc_t proc) 426 + { 427 + int res; 428 + struct fuse_opt_context ctx = { 429 + .data = data, 430 + .opt = opts, 431 + .proc = proc, 432 + }; 433 + 434 + if (!args || !args->argv || !args->argc) { 435 + return 0; 436 + } 437 + 438 + ctx.argc = args->argc; 439 + ctx.argv = args->argv; 440 + 441 + res = opt_parse(&ctx); 442 + if (res != -1) { 443 + struct fuse_args tmp = *args; 444 + *args = ctx.outargs; 445 + ctx.outargs = tmp; 446 + } 447 + free(ctx.opts); 448 + fuse_opt_free_args(&ctx.outargs); 449 + return res; 450 + }
+272
tools/virtiofsd/fuse_opt.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU LGPLv2. 6 + * See the file COPYING.LIB. 7 + */ 8 + 9 + #ifndef FUSE_OPT_H_ 10 + #define FUSE_OPT_H_ 11 + 12 + /** @file 13 + * 14 + * This file defines the option parsing interface of FUSE 15 + */ 16 + 17 + /** 18 + * Option description 19 + * 20 + * This structure describes a single option, and action associated 21 + * with it, in case it matches. 22 + * 23 + * More than one such match may occur, in which case the action for 24 + * each match is executed. 25 + * 26 + * There are three possible actions in case of a match: 27 + * 28 + * i) An integer (int or unsigned) variable determined by 'offset' is 29 + * set to 'value' 30 + * 31 + * ii) The processing function is called, with 'value' as the key 32 + * 33 + * iii) An integer (any) or string (char *) variable determined by 34 + * 'offset' is set to the value of an option parameter 35 + * 36 + * 'offset' should normally be either set to 37 + * 38 + * - 'offsetof(struct foo, member)' actions i) and iii) 39 + * 40 + * - -1 action ii) 41 + * 42 + * The 'offsetof()' macro is defined in the <stddef.h> header. 43 + * 44 + * The template determines which options match, and also have an 45 + * effect on the action. Normally the action is either i) or ii), but 46 + * if a format is present in the template, then action iii) is 47 + * performed. 48 + * 49 + * The types of templates are: 50 + * 51 + * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only 52 + * themselves. Invalid values are "--" and anything beginning 53 + * with "-o" 54 + * 55 + * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or 56 + * the relevant option in a comma separated option list 57 + * 58 + * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) 59 + * which have a parameter 60 + * 61 + * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform 62 + * action iii). 63 + * 64 + * 5) "-x ", etc. Matches either "-xparam" or "-x param" as 65 + * two separate arguments 66 + * 67 + * 6) "-x %s", etc. Combination of 4) and 5) 68 + * 69 + * If the format is "%s", memory is allocated for the string unlike with 70 + * scanf(). The previous value (if non-NULL) stored at the this location is 71 + * freed. 72 + */ 73 + struct fuse_opt { 74 + /** Matching template and optional parameter formatting */ 75 + const char *templ; 76 + 77 + /** 78 + * Offset of variable within 'data' parameter of fuse_opt_parse() 79 + * or -1 80 + */ 81 + unsigned long offset; 82 + 83 + /** 84 + * Value to set the variable to, or to be passed as 'key' to the 85 + * processing function. Ignored if template has a format 86 + */ 87 + int value; 88 + }; 89 + 90 + /** 91 + * Key option. In case of a match, the processing function will be 92 + * called with the specified key. 93 + */ 94 + #define FUSE_OPT_KEY(templ, key) \ 95 + { \ 96 + templ, -1U, key \ 97 + } 98 + 99 + /** 100 + * Last option. An array of 'struct fuse_opt' must end with a NULL 101 + * template value 102 + */ 103 + #define FUSE_OPT_END \ 104 + { \ 105 + NULL, 0, 0 \ 106 + } 107 + 108 + /** 109 + * Argument list 110 + */ 111 + struct fuse_args { 112 + /** Argument count */ 113 + int argc; 114 + 115 + /** Argument vector. NULL terminated */ 116 + char **argv; 117 + 118 + /** Is 'argv' allocated? */ 119 + int allocated; 120 + }; 121 + 122 + /** 123 + * Initializer for 'struct fuse_args' 124 + */ 125 + #define FUSE_ARGS_INIT(argc, argv) \ 126 + { \ 127 + argc, argv, 0 \ 128 + } 129 + 130 + /** 131 + * Key value passed to the processing function if an option did not 132 + * match any template 133 + */ 134 + #define FUSE_OPT_KEY_OPT -1 135 + 136 + /** 137 + * Key value passed to the processing function for all non-options 138 + * 139 + * Non-options are the arguments beginning with a character other than 140 + * '-' or all arguments after the special '--' option 141 + */ 142 + #define FUSE_OPT_KEY_NONOPT -2 143 + 144 + /** 145 + * Special key value for options to keep 146 + * 147 + * Argument is not passed to processing function, but behave as if the 148 + * processing function returned 1 149 + */ 150 + #define FUSE_OPT_KEY_KEEP -3 151 + 152 + /** 153 + * Special key value for options to discard 154 + * 155 + * Argument is not passed to processing function, but behave as if the 156 + * processing function returned zero 157 + */ 158 + #define FUSE_OPT_KEY_DISCARD -4 159 + 160 + /** 161 + * Processing function 162 + * 163 + * This function is called if 164 + * - option did not match any 'struct fuse_opt' 165 + * - argument is a non-option 166 + * - option did match and offset was set to -1 167 + * 168 + * The 'arg' parameter will always contain the whole argument or 169 + * option including the parameter if exists. A two-argument option 170 + * ("-x foo") is always converted to single argument option of the 171 + * form "-xfoo" before this function is called. 172 + * 173 + * Options of the form '-ofoo' are passed to this function without the 174 + * '-o' prefix. 175 + * 176 + * The return value of this function determines whether this argument 177 + * is to be inserted into the output argument vector, or discarded. 178 + * 179 + * @param data is the user data passed to the fuse_opt_parse() function 180 + * @param arg is the whole argument or option 181 + * @param key determines why the processing function was called 182 + * @param outargs the current output argument list 183 + * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept 184 + */ 185 + typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, 186 + struct fuse_args *outargs); 187 + 188 + /** 189 + * Option parsing function 190 + * 191 + * If 'args' was returned from a previous call to fuse_opt_parse() or 192 + * it was constructed from 193 + * 194 + * A NULL 'args' is equivalent to an empty argument vector 195 + * 196 + * A NULL 'opts' is equivalent to an 'opts' array containing a single 197 + * end marker 198 + * 199 + * A NULL 'proc' is equivalent to a processing function always 200 + * returning '1' 201 + * 202 + * @param args is the input and output argument list 203 + * @param data is the user data 204 + * @param opts is the option description array 205 + * @param proc is the processing function 206 + * @return -1 on error, 0 on success 207 + */ 208 + int fuse_opt_parse(struct fuse_args *args, void *data, 209 + const struct fuse_opt opts[], fuse_opt_proc_t proc); 210 + 211 + /** 212 + * Add an option to a comma separated option list 213 + * 214 + * @param opts is a pointer to an option list, may point to a NULL value 215 + * @param opt is the option to add 216 + * @return -1 on allocation error, 0 on success 217 + */ 218 + int fuse_opt_add_opt(char **opts, const char *opt); 219 + 220 + /** 221 + * Add an option, escaping commas, to a comma separated option list 222 + * 223 + * @param opts is a pointer to an option list, may point to a NULL value 224 + * @param opt is the option to add 225 + * @return -1 on allocation error, 0 on success 226 + */ 227 + int fuse_opt_add_opt_escaped(char **opts, const char *opt); 228 + 229 + /** 230 + * Add an argument to a NULL terminated argument vector 231 + * 232 + * @param args is the structure containing the current argument list 233 + * @param arg is the new argument to add 234 + * @return -1 on allocation error, 0 on success 235 + */ 236 + int fuse_opt_add_arg(struct fuse_args *args, const char *arg); 237 + 238 + /** 239 + * Add an argument at the specified position in a NULL terminated 240 + * argument vector 241 + * 242 + * Adds the argument to the N-th position. This is useful for adding 243 + * options at the beginning of the array which must not come after the 244 + * special '--' option. 245 + * 246 + * @param args is the structure containing the current argument list 247 + * @param pos is the position at which to add the argument 248 + * @param arg is the new argument to add 249 + * @return -1 on allocation error, 0 on success 250 + */ 251 + int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); 252 + 253 + /** 254 + * Free the contents of argument list 255 + * 256 + * The structure itself is not freed 257 + * 258 + * @param args is the structure containing the argument list 259 + */ 260 + void fuse_opt_free_args(struct fuse_args *args); 261 + 262 + 263 + /** 264 + * Check if an option matches 265 + * 266 + * @param opts is the option description array 267 + * @param opt is the option to match 268 + * @return 1 if a match is found, 0 if not 269 + */ 270 + int fuse_opt_match(const struct fuse_opt opts[], const char *opt); 271 + 272 + #endif /* FUSE_OPT_H_ */
+98
tools/virtiofsd/fuse_signals.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * Utility functions for setting signal handlers. 6 + * 7 + * This program can be distributed under the terms of the GNU LGPLv2. 8 + * See the file COPYING.LIB 9 + */ 10 + 11 + #include "qemu/osdep.h" 12 + #include "fuse_i.h" 13 + #include "fuse_lowlevel.h" 14 + 15 + #include <errno.h> 16 + #include <signal.h> 17 + #include <stdio.h> 18 + #include <stdlib.h> 19 + #include <string.h> 20 + 21 + static struct fuse_session *fuse_instance; 22 + 23 + static void exit_handler(int sig) 24 + { 25 + if (fuse_instance) { 26 + fuse_session_exit(fuse_instance); 27 + if (sig <= 0) { 28 + fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); 29 + abort(); 30 + } 31 + fuse_instance->error = sig; 32 + } 33 + } 34 + 35 + static void do_nothing(int sig) 36 + { 37 + (void)sig; 38 + } 39 + 40 + static int set_one_signal_handler(int sig, void (*handler)(int), int remove) 41 + { 42 + struct sigaction sa; 43 + struct sigaction old_sa; 44 + 45 + memset(&sa, 0, sizeof(struct sigaction)); 46 + sa.sa_handler = remove ? SIG_DFL : handler; 47 + sigemptyset(&(sa.sa_mask)); 48 + sa.sa_flags = 0; 49 + 50 + if (sigaction(sig, NULL, &old_sa) == -1) { 51 + fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", 52 + strerror(errno)); 53 + return -1; 54 + } 55 + 56 + if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && 57 + sigaction(sig, &sa, NULL) == -1) { 58 + fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", 59 + strerror(errno)); 60 + return -1; 61 + } 62 + return 0; 63 + } 64 + 65 + int fuse_set_signal_handlers(struct fuse_session *se) 66 + { 67 + /* 68 + * If we used SIG_IGN instead of the do_nothing function, 69 + * then we would be unable to tell if we set SIG_IGN (and 70 + * thus should reset to SIG_DFL in fuse_remove_signal_handlers) 71 + * or if it was already set to SIG_IGN (and should be left 72 + * untouched. 73 + */ 74 + if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || 75 + set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || 76 + set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || 77 + set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { 78 + return -1; 79 + } 80 + 81 + fuse_instance = se; 82 + return 0; 83 + } 84 + 85 + void fuse_remove_signal_handlers(struct fuse_session *se) 86 + { 87 + if (fuse_instance != se) { 88 + fuse_log(FUSE_LOG_ERR, 89 + "fuse: fuse_remove_signal_handlers: unknown session\n"); 90 + } else { 91 + fuse_instance = NULL; 92 + } 93 + 94 + set_one_signal_handler(SIGHUP, exit_handler, 1); 95 + set_one_signal_handler(SIGINT, exit_handler, 1); 96 + set_one_signal_handler(SIGTERM, exit_handler, 1); 97 + set_one_signal_handler(SIGPIPE, do_nothing, 1); 98 + }
+986
tools/virtiofsd/fuse_virtio.c
··· 1 + /* 2 + * virtio-fs glue for FUSE 3 + * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates 4 + * 5 + * Authors: 6 + * Dave Gilbert <dgilbert@redhat.com> 7 + * 8 + * Implements the glue between libfuse and libvhost-user 9 + * 10 + * This program can be distributed under the terms of the GNU LGPLv2. 11 + * See the file COPYING.LIB 12 + */ 13 + 14 + #include "qemu/osdep.h" 15 + #include "qemu/iov.h" 16 + #include "qapi/error.h" 17 + #include "fuse_i.h" 18 + #include "standard-headers/linux/fuse.h" 19 + #include "fuse_misc.h" 20 + #include "fuse_opt.h" 21 + #include "fuse_virtio.h" 22 + 23 + #include <assert.h> 24 + #include <errno.h> 25 + #include <glib.h> 26 + #include <stdint.h> 27 + #include <stdio.h> 28 + #include <stdlib.h> 29 + #include <string.h> 30 + #include <sys/eventfd.h> 31 + #include <sys/socket.h> 32 + #include <sys/types.h> 33 + #include <sys/un.h> 34 + #include <unistd.h> 35 + 36 + #include "contrib/libvhost-user/libvhost-user.h" 37 + 38 + struct fv_VuDev; 39 + struct fv_QueueInfo { 40 + pthread_t thread; 41 + /* 42 + * This lock protects the VuVirtq preventing races between 43 + * fv_queue_thread() and fv_queue_worker(). 44 + */ 45 + pthread_mutex_t vq_lock; 46 + 47 + struct fv_VuDev *virtio_dev; 48 + 49 + /* Our queue index, corresponds to array position */ 50 + int qidx; 51 + int kick_fd; 52 + int kill_fd; /* For killing the thread */ 53 + }; 54 + 55 + /* A FUSE request */ 56 + typedef struct { 57 + VuVirtqElement elem; 58 + struct fuse_chan ch; 59 + 60 + /* Used to complete requests that involve no reply */ 61 + bool reply_sent; 62 + } FVRequest; 63 + 64 + /* 65 + * We pass the dev element into libvhost-user 66 + * and then use it to get back to the outer 67 + * container for other data. 68 + */ 69 + struct fv_VuDev { 70 + VuDev dev; 71 + struct fuse_session *se; 72 + 73 + /* 74 + * Either handle virtqueues or vhost-user protocol messages. Don't do 75 + * both at the same time since that could lead to race conditions if 76 + * virtqueues or memory tables change while another thread is accessing 77 + * them. 78 + * 79 + * The assumptions are: 80 + * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. 81 + * 2. virtio_loop() reads/writes virtqueues and VuDev. 82 + */ 83 + pthread_rwlock_t vu_dispatch_rwlock; 84 + 85 + /* 86 + * The following pair of fields are only accessed in the main 87 + * virtio_loop 88 + */ 89 + size_t nqueues; 90 + struct fv_QueueInfo **qi; 91 + }; 92 + 93 + /* From spec */ 94 + struct virtio_fs_config { 95 + char tag[36]; 96 + uint32_t num_queues; 97 + }; 98 + 99 + /* Callback from libvhost-user */ 100 + static uint64_t fv_get_features(VuDev *dev) 101 + { 102 + return 1ULL << VIRTIO_F_VERSION_1; 103 + } 104 + 105 + /* Callback from libvhost-user */ 106 + static void fv_set_features(VuDev *dev, uint64_t features) 107 + { 108 + } 109 + 110 + /* 111 + * Callback from libvhost-user if there's a new fd we're supposed to listen 112 + * to, typically a queue kick? 113 + */ 114 + static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, 115 + void *data) 116 + { 117 + fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); 118 + } 119 + 120 + /* 121 + * Callback from libvhost-user if we're no longer supposed to listen on an fd 122 + */ 123 + static void fv_remove_watch(VuDev *dev, int fd) 124 + { 125 + fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); 126 + } 127 + 128 + /* Callback from libvhost-user to panic */ 129 + static void fv_panic(VuDev *dev, const char *err) 130 + { 131 + fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); 132 + /* TODO: Allow reconnects?? */ 133 + exit(EXIT_FAILURE); 134 + } 135 + 136 + /* 137 + * Copy from an iovec into a fuse_buf (memory only) 138 + * Caller must ensure there is space 139 + */ 140 + static void copy_from_iov(struct fuse_buf *buf, size_t out_num, 141 + const struct iovec *out_sg) 142 + { 143 + void *dest = buf->mem; 144 + 145 + while (out_num) { 146 + size_t onelen = out_sg->iov_len; 147 + memcpy(dest, out_sg->iov_base, onelen); 148 + dest += onelen; 149 + out_sg++; 150 + out_num--; 151 + } 152 + } 153 + 154 + /* 155 + * Copy from one iov to another, the given number of bytes 156 + * The caller must have checked sizes. 157 + */ 158 + static void copy_iov(struct iovec *src_iov, int src_count, 159 + struct iovec *dst_iov, int dst_count, size_t to_copy) 160 + { 161 + size_t dst_offset = 0; 162 + /* Outer loop copies 'src' elements */ 163 + while (to_copy) { 164 + assert(src_count); 165 + size_t src_len = src_iov[0].iov_len; 166 + size_t src_offset = 0; 167 + 168 + if (src_len > to_copy) { 169 + src_len = to_copy; 170 + } 171 + /* Inner loop copies contents of one 'src' to maybe multiple dst. */ 172 + while (src_len) { 173 + assert(dst_count); 174 + size_t dst_len = dst_iov[0].iov_len - dst_offset; 175 + if (dst_len > src_len) { 176 + dst_len = src_len; 177 + } 178 + 179 + memcpy(dst_iov[0].iov_base + dst_offset, 180 + src_iov[0].iov_base + src_offset, dst_len); 181 + src_len -= dst_len; 182 + to_copy -= dst_len; 183 + src_offset += dst_len; 184 + dst_offset += dst_len; 185 + 186 + assert(dst_offset <= dst_iov[0].iov_len); 187 + if (dst_offset == dst_iov[0].iov_len) { 188 + dst_offset = 0; 189 + dst_iov++; 190 + dst_count--; 191 + } 192 + } 193 + src_iov++; 194 + src_count--; 195 + } 196 + } 197 + 198 + /* 199 + * Called back by ll whenever it wants to send a reply/message back 200 + * The 1st element of the iov starts with the fuse_out_header 201 + * 'unique'==0 means it's a notify message. 202 + */ 203 + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, 204 + struct iovec *iov, int count) 205 + { 206 + FVRequest *req = container_of(ch, FVRequest, ch); 207 + struct fv_QueueInfo *qi = ch->qi; 208 + VuDev *dev = &se->virtio_dev->dev; 209 + VuVirtq *q = vu_get_queue(dev, qi->qidx); 210 + VuVirtqElement *elem = &req->elem; 211 + int ret = 0; 212 + 213 + assert(count >= 1); 214 + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); 215 + 216 + struct fuse_out_header *out = iov[0].iov_base; 217 + /* TODO: Endianness! */ 218 + 219 + size_t tosend_len = iov_size(iov, count); 220 + 221 + /* unique == 0 is notification, which we don't support */ 222 + assert(out->unique); 223 + assert(!req->reply_sent); 224 + 225 + /* The 'in' part of the elem is to qemu */ 226 + unsigned int in_num = elem->in_num; 227 + struct iovec *in_sg = elem->in_sg; 228 + size_t in_len = iov_size(in_sg, in_num); 229 + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", 230 + __func__, elem->index, in_num, in_len); 231 + 232 + /* 233 + * The elem should have room for a 'fuse_out_header' (out from fuse) 234 + * plus the data based on the len in the header. 235 + */ 236 + if (in_len < sizeof(struct fuse_out_header)) { 237 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", 238 + __func__, elem->index); 239 + ret = -E2BIG; 240 + goto err; 241 + } 242 + if (in_len < tosend_len) { 243 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", 244 + __func__, elem->index, tosend_len); 245 + ret = -E2BIG; 246 + goto err; 247 + } 248 + 249 + copy_iov(iov, count, in_sg, in_num, tosend_len); 250 + 251 + pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); 252 + pthread_mutex_lock(&qi->vq_lock); 253 + vu_queue_push(dev, q, elem, tosend_len); 254 + vu_queue_notify(dev, q); 255 + pthread_mutex_unlock(&qi->vq_lock); 256 + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); 257 + 258 + req->reply_sent = true; 259 + 260 + err: 261 + return ret; 262 + } 263 + 264 + /* 265 + * Callback from fuse_send_data_iov_* when it's virtio and the buffer 266 + * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK 267 + * We need send the iov and then the buffer. 268 + * Return 0 on success 269 + */ 270 + int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, 271 + struct iovec *iov, int count, struct fuse_bufvec *buf, 272 + size_t len) 273 + { 274 + FVRequest *req = container_of(ch, FVRequest, ch); 275 + struct fv_QueueInfo *qi = ch->qi; 276 + VuDev *dev = &se->virtio_dev->dev; 277 + VuVirtq *q = vu_get_queue(dev, qi->qidx); 278 + VuVirtqElement *elem = &req->elem; 279 + int ret = 0; 280 + 281 + assert(count >= 1); 282 + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); 283 + 284 + struct fuse_out_header *out = iov[0].iov_base; 285 + /* TODO: Endianness! */ 286 + 287 + size_t iov_len = iov_size(iov, count); 288 + size_t tosend_len = iov_len + len; 289 + 290 + out->len = tosend_len; 291 + 292 + fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, 293 + count, len, iov_len); 294 + 295 + /* unique == 0 is notification which we don't support */ 296 + assert(out->unique); 297 + 298 + assert(!req->reply_sent); 299 + 300 + /* The 'in' part of the elem is to qemu */ 301 + unsigned int in_num = elem->in_num; 302 + struct iovec *in_sg = elem->in_sg; 303 + size_t in_len = iov_size(in_sg, in_num); 304 + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", 305 + __func__, elem->index, in_num, in_len); 306 + 307 + /* 308 + * The elem should have room for a 'fuse_out_header' (out from fuse) 309 + * plus the data based on the len in the header. 310 + */ 311 + if (in_len < sizeof(struct fuse_out_header)) { 312 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", 313 + __func__, elem->index); 314 + ret = E2BIG; 315 + goto err; 316 + } 317 + if (in_len < tosend_len) { 318 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", 319 + __func__, elem->index, tosend_len); 320 + ret = E2BIG; 321 + goto err; 322 + } 323 + 324 + /* TODO: Limit to 'len' */ 325 + 326 + /* First copy the header data from iov->in_sg */ 327 + copy_iov(iov, count, in_sg, in_num, iov_len); 328 + 329 + /* 330 + * Build a copy of the the in_sg iov so we can skip bits in it, 331 + * including changing the offsets 332 + */ 333 + struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); 334 + assert(in_sg_cpy); 335 + memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); 336 + /* These get updated as we skip */ 337 + struct iovec *in_sg_ptr = in_sg_cpy; 338 + int in_sg_cpy_count = in_num; 339 + 340 + /* skip over parts of in_sg that contained the header iov */ 341 + size_t skip_size = iov_len; 342 + 343 + size_t in_sg_left = 0; 344 + do { 345 + while (skip_size != 0 && in_sg_cpy_count) { 346 + if (skip_size >= in_sg_ptr[0].iov_len) { 347 + skip_size -= in_sg_ptr[0].iov_len; 348 + in_sg_ptr++; 349 + in_sg_cpy_count--; 350 + } else { 351 + in_sg_ptr[0].iov_len -= skip_size; 352 + in_sg_ptr[0].iov_base += skip_size; 353 + break; 354 + } 355 + } 356 + 357 + int i; 358 + for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { 359 + in_sg_left += in_sg_ptr[i].iov_len; 360 + } 361 + fuse_log(FUSE_LOG_DEBUG, 362 + "%s: after skip skip_size=%zd in_sg_cpy_count=%d " 363 + "in_sg_left=%zd\n", 364 + __func__, skip_size, in_sg_cpy_count, in_sg_left); 365 + ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, 366 + buf->buf[0].pos); 367 + 368 + if (ret == -1) { 369 + ret = errno; 370 + fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", 371 + __func__, len); 372 + free(in_sg_cpy); 373 + goto err; 374 + } 375 + fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, 376 + ret, len); 377 + if (ret < len && ret) { 378 + fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); 379 + /* Skip over this much next time around */ 380 + skip_size = ret; 381 + buf->buf[0].pos += ret; 382 + len -= ret; 383 + 384 + /* Lets do another read */ 385 + continue; 386 + } 387 + if (!ret) { 388 + /* EOF case? */ 389 + fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, 390 + in_sg_left); 391 + break; 392 + } 393 + if (ret != len) { 394 + fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); 395 + ret = EIO; 396 + free(in_sg_cpy); 397 + goto err; 398 + } 399 + in_sg_left -= ret; 400 + len -= ret; 401 + } while (in_sg_left); 402 + free(in_sg_cpy); 403 + 404 + /* Need to fix out->len on EOF */ 405 + if (len) { 406 + struct fuse_out_header *out_sg = in_sg[0].iov_base; 407 + 408 + tosend_len -= len; 409 + out_sg->len = tosend_len; 410 + } 411 + 412 + ret = 0; 413 + 414 + pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); 415 + pthread_mutex_lock(&qi->vq_lock); 416 + vu_queue_push(dev, q, elem, tosend_len); 417 + vu_queue_notify(dev, q); 418 + pthread_mutex_unlock(&qi->vq_lock); 419 + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); 420 + 421 + err: 422 + if (ret == 0) { 423 + req->reply_sent = true; 424 + } 425 + 426 + return ret; 427 + } 428 + 429 + /* Process one FVRequest in a thread pool */ 430 + static void fv_queue_worker(gpointer data, gpointer user_data) 431 + { 432 + struct fv_QueueInfo *qi = user_data; 433 + struct fuse_session *se = qi->virtio_dev->se; 434 + struct VuDev *dev = &qi->virtio_dev->dev; 435 + FVRequest *req = data; 436 + VuVirtqElement *elem = &req->elem; 437 + struct fuse_buf fbuf = {}; 438 + bool allocated_bufv = false; 439 + struct fuse_bufvec bufv; 440 + struct fuse_bufvec *pbufv; 441 + 442 + assert(se->bufsize > sizeof(struct fuse_in_header)); 443 + 444 + /* 445 + * An element contains one request and the space to send our response 446 + * They're spread over multiple descriptors in a scatter/gather set 447 + * and we can't trust the guest to keep them still; so copy in/out. 448 + */ 449 + fbuf.mem = malloc(se->bufsize); 450 + assert(fbuf.mem); 451 + 452 + fuse_mutex_init(&req->ch.lock); 453 + req->ch.fd = -1; 454 + req->ch.qi = qi; 455 + 456 + /* The 'out' part of the elem is from qemu */ 457 + unsigned int out_num = elem->out_num; 458 + struct iovec *out_sg = elem->out_sg; 459 + size_t out_len = iov_size(out_sg, out_num); 460 + fuse_log(FUSE_LOG_DEBUG, 461 + "%s: elem %d: with %d out desc of length %zd\n", 462 + __func__, elem->index, out_num, out_len); 463 + 464 + /* 465 + * The elem should contain a 'fuse_in_header' (in to fuse) 466 + * plus the data based on the len in the header. 467 + */ 468 + if (out_len < sizeof(struct fuse_in_header)) { 469 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", 470 + __func__, elem->index); 471 + assert(0); /* TODO */ 472 + } 473 + if (out_len > se->bufsize) { 474 + fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, 475 + elem->index); 476 + assert(0); /* TODO */ 477 + } 478 + /* Copy just the first element and look at it */ 479 + copy_from_iov(&fbuf, 1, out_sg); 480 + 481 + pbufv = NULL; /* Compiler thinks an unitialised path */ 482 + if (out_num > 2 && 483 + out_sg[0].iov_len == sizeof(struct fuse_in_header) && 484 + ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && 485 + out_sg[1].iov_len == sizeof(struct fuse_write_in)) { 486 + /* 487 + * For a write we don't actually need to copy the 488 + * data, we can just do it straight out of guest memory 489 + * but we must still copy the headers in case the guest 490 + * was nasty and changed them while we were using them. 491 + */ 492 + fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); 493 + 494 + /* copy the fuse_write_in header afte rthe fuse_in_header */ 495 + fbuf.mem += out_sg->iov_len; 496 + copy_from_iov(&fbuf, 1, out_sg + 1); 497 + fbuf.mem -= out_sg->iov_len; 498 + fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; 499 + 500 + /* Allocate the bufv, with space for the rest of the iov */ 501 + pbufv = malloc(sizeof(struct fuse_bufvec) + 502 + sizeof(struct fuse_buf) * (out_num - 2)); 503 + if (!pbufv) { 504 + fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", 505 + __func__); 506 + goto out; 507 + } 508 + 509 + allocated_bufv = true; 510 + pbufv->count = 1; 511 + pbufv->buf[0] = fbuf; 512 + 513 + size_t iovindex, pbufvindex; 514 + iovindex = 2; /* 2 headers, separate iovs */ 515 + pbufvindex = 1; /* 2 headers, 1 fusebuf */ 516 + 517 + for (; iovindex < out_num; iovindex++, pbufvindex++) { 518 + pbufv->count++; 519 + pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ 520 + pbufv->buf[pbufvindex].flags = 0; 521 + pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; 522 + pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; 523 + } 524 + } else { 525 + /* Normal (non fast write) path */ 526 + 527 + /* Copy the rest of the buffer */ 528 + fbuf.mem += out_sg->iov_len; 529 + copy_from_iov(&fbuf, out_num - 1, out_sg + 1); 530 + fbuf.mem -= out_sg->iov_len; 531 + fbuf.size = out_len; 532 + 533 + /* TODO! Endianness of header */ 534 + 535 + /* TODO: Add checks for fuse_session_exited */ 536 + bufv.buf[0] = fbuf; 537 + bufv.count = 1; 538 + pbufv = &bufv; 539 + } 540 + pbufv->idx = 0; 541 + pbufv->off = 0; 542 + fuse_session_process_buf_int(se, pbufv, &req->ch); 543 + 544 + out: 545 + if (allocated_bufv) { 546 + free(pbufv); 547 + } 548 + 549 + /* If the request has no reply, still recycle the virtqueue element */ 550 + if (!req->reply_sent) { 551 + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); 552 + 553 + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, 554 + elem->index); 555 + 556 + pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); 557 + pthread_mutex_lock(&qi->vq_lock); 558 + vu_queue_push(dev, q, elem, 0); 559 + vu_queue_notify(dev, q); 560 + pthread_mutex_unlock(&qi->vq_lock); 561 + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); 562 + } 563 + 564 + pthread_mutex_destroy(&req->ch.lock); 565 + free(fbuf.mem); 566 + free(req); 567 + } 568 + 569 + /* Thread function for individual queues, created when a queue is 'started' */ 570 + static void *fv_queue_thread(void *opaque) 571 + { 572 + struct fv_QueueInfo *qi = opaque; 573 + struct VuDev *dev = &qi->virtio_dev->dev; 574 + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); 575 + struct fuse_session *se = qi->virtio_dev->se; 576 + GThreadPool *pool; 577 + 578 + pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, 579 + NULL); 580 + if (!pool) { 581 + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); 582 + return NULL; 583 + } 584 + 585 + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, 586 + qi->qidx, qi->kick_fd); 587 + while (1) { 588 + struct pollfd pf[2]; 589 + int ret; 590 + 591 + pf[0].fd = qi->kick_fd; 592 + pf[0].events = POLLIN; 593 + pf[0].revents = 0; 594 + pf[1].fd = qi->kill_fd; 595 + pf[1].events = POLLIN; 596 + pf[1].revents = 0; 597 + 598 + fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, 599 + qi->qidx); 600 + int poll_res = ppoll(pf, 2, NULL, NULL); 601 + 602 + if (poll_res == -1) { 603 + if (errno == EINTR) { 604 + fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", 605 + __func__); 606 + continue; 607 + } 608 + fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); 609 + break; 610 + } 611 + assert(poll_res >= 1); 612 + if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { 613 + fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", 614 + __func__, pf[0].revents, qi->qidx); 615 + break; 616 + } 617 + if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { 618 + fuse_log(FUSE_LOG_ERR, 619 + "%s: Unexpected poll revents %x Queue %d killfd\n", 620 + __func__, pf[1].revents, qi->qidx); 621 + break; 622 + } 623 + if (pf[1].revents) { 624 + fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", 625 + __func__, qi->qidx); 626 + break; 627 + } 628 + assert(pf[0].revents & POLLIN); 629 + fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, 630 + qi->qidx); 631 + 632 + eventfd_t evalue; 633 + if (eventfd_read(qi->kick_fd, &evalue)) { 634 + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); 635 + break; 636 + } 637 + /* Mutual exclusion with virtio_loop() */ 638 + ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); 639 + assert(ret == 0); /* there is no possible error case */ 640 + pthread_mutex_lock(&qi->vq_lock); 641 + /* out is from guest, in is too guest */ 642 + unsigned int in_bytes, out_bytes; 643 + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); 644 + 645 + fuse_log(FUSE_LOG_DEBUG, 646 + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", 647 + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); 648 + 649 + while (1) { 650 + FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); 651 + if (!req) { 652 + break; 653 + } 654 + 655 + req->reply_sent = false; 656 + 657 + g_thread_pool_push(pool, req, NULL); 658 + } 659 + 660 + pthread_mutex_unlock(&qi->vq_lock); 661 + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); 662 + } 663 + 664 + g_thread_pool_free(pool, FALSE, TRUE); 665 + 666 + return NULL; 667 + } 668 + 669 + static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) 670 + { 671 + int ret; 672 + struct fv_QueueInfo *ourqi; 673 + 674 + assert(qidx < vud->nqueues); 675 + ourqi = vud->qi[qidx]; 676 + 677 + /* Kill the thread */ 678 + if (eventfd_write(ourqi->kill_fd, 1)) { 679 + fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", 680 + qidx, strerror(errno)); 681 + } 682 + ret = pthread_join(ourqi->thread, NULL); 683 + if (ret) { 684 + fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", 685 + __func__, qidx, ret); 686 + } 687 + pthread_mutex_destroy(&ourqi->vq_lock); 688 + close(ourqi->kill_fd); 689 + ourqi->kick_fd = -1; 690 + free(vud->qi[qidx]); 691 + vud->qi[qidx] = NULL; 692 + } 693 + 694 + /* Callback from libvhost-user on start or stop of a queue */ 695 + static void fv_queue_set_started(VuDev *dev, int qidx, bool started) 696 + { 697 + struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); 698 + struct fv_QueueInfo *ourqi; 699 + 700 + fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, 701 + started); 702 + assert(qidx >= 0); 703 + 704 + /* 705 + * Ignore additional request queues for now. passthrough_ll.c must be 706 + * audited for thread-safety issues first. It was written with a 707 + * well-behaved client in mind and may not protect against all types of 708 + * races yet. 709 + */ 710 + if (qidx > 1) { 711 + fuse_log(FUSE_LOG_ERR, 712 + "%s: multiple request queues not yet implemented, please only " 713 + "configure 1 request queue\n", 714 + __func__); 715 + exit(EXIT_FAILURE); 716 + } 717 + 718 + if (started) { 719 + /* Fire up a thread to watch this queue */ 720 + if (qidx >= vud->nqueues) { 721 + vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); 722 + assert(vud->qi); 723 + memset(vud->qi + vud->nqueues, 0, 724 + sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); 725 + vud->nqueues = qidx + 1; 726 + } 727 + if (!vud->qi[qidx]) { 728 + vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); 729 + assert(vud->qi[qidx]); 730 + vud->qi[qidx]->virtio_dev = vud; 731 + vud->qi[qidx]->qidx = qidx; 732 + } else { 733 + /* Shouldn't have been started */ 734 + assert(vud->qi[qidx]->kick_fd == -1); 735 + } 736 + ourqi = vud->qi[qidx]; 737 + ourqi->kick_fd = dev->vq[qidx].kick_fd; 738 + 739 + ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); 740 + assert(ourqi->kill_fd != -1); 741 + pthread_mutex_init(&ourqi->vq_lock, NULL); 742 + 743 + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { 744 + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", 745 + __func__, qidx); 746 + assert(0); 747 + } 748 + } else { 749 + fv_queue_cleanup_thread(vud, qidx); 750 + } 751 + } 752 + 753 + static bool fv_queue_order(VuDev *dev, int qidx) 754 + { 755 + return false; 756 + } 757 + 758 + static const VuDevIface fv_iface = { 759 + .get_features = fv_get_features, 760 + .set_features = fv_set_features, 761 + 762 + /* Don't need process message, we've not got any at vhost-user level */ 763 + .queue_set_started = fv_queue_set_started, 764 + 765 + .queue_is_processed_in_order = fv_queue_order, 766 + }; 767 + 768 + /* 769 + * Main loop; this mostly deals with events on the vhost-user 770 + * socket itself, and not actual fuse data. 771 + */ 772 + int virtio_loop(struct fuse_session *se) 773 + { 774 + fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); 775 + 776 + while (!fuse_session_exited(se)) { 777 + struct pollfd pf[1]; 778 + bool ok; 779 + int ret; 780 + pf[0].fd = se->vu_socketfd; 781 + pf[0].events = POLLIN; 782 + pf[0].revents = 0; 783 + 784 + fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); 785 + int poll_res = ppoll(pf, 1, NULL, NULL); 786 + 787 + if (poll_res == -1) { 788 + if (errno == EINTR) { 789 + fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", 790 + __func__); 791 + continue; 792 + } 793 + fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); 794 + break; 795 + } 796 + assert(poll_res == 1); 797 + if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { 798 + fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, 799 + pf[0].revents); 800 + break; 801 + } 802 + assert(pf[0].revents & POLLIN); 803 + fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); 804 + /* Mutual exclusion with fv_queue_thread() */ 805 + ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); 806 + assert(ret == 0); /* there is no possible error case */ 807 + 808 + ok = vu_dispatch(&se->virtio_dev->dev); 809 + 810 + pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); 811 + 812 + if (!ok) { 813 + fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); 814 + break; 815 + } 816 + } 817 + 818 + /* 819 + * Make sure all fv_queue_thread()s quit on exit, as we're about to 820 + * free virtio dev and fuse session, no one should access them anymore. 821 + */ 822 + for (int i = 0; i < se->virtio_dev->nqueues; i++) { 823 + if (!se->virtio_dev->qi[i]) { 824 + continue; 825 + } 826 + 827 + fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); 828 + fv_queue_cleanup_thread(se->virtio_dev, i); 829 + } 830 + 831 + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); 832 + 833 + return 0; 834 + } 835 + 836 + static void strreplace(char *s, char old, char new) 837 + { 838 + for (; *s; ++s) { 839 + if (*s == old) { 840 + *s = new; 841 + } 842 + } 843 + } 844 + 845 + static bool fv_socket_lock(struct fuse_session *se) 846 + { 847 + g_autofree gchar *sk_name = NULL; 848 + g_autofree gchar *pidfile = NULL; 849 + g_autofree gchar *dir = NULL; 850 + Error *local_err = NULL; 851 + 852 + dir = qemu_get_local_state_pathname("run/virtiofsd"); 853 + 854 + if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { 855 + fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", 856 + __func__, dir, strerror(errno)); 857 + return false; 858 + } 859 + 860 + sk_name = g_strdup(se->vu_socket_path); 861 + strreplace(sk_name, '/', '.'); 862 + pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); 863 + 864 + if (!qemu_write_pidfile(pidfile, &local_err)) { 865 + error_report_err(local_err); 866 + return false; 867 + } 868 + 869 + return true; 870 + } 871 + 872 + static int fv_create_listen_socket(struct fuse_session *se) 873 + { 874 + struct sockaddr_un un; 875 + mode_t old_umask; 876 + 877 + /* Nothing to do if fd is already initialized */ 878 + if (se->vu_listen_fd >= 0) { 879 + return 0; 880 + } 881 + 882 + if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { 883 + fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); 884 + return -1; 885 + } 886 + 887 + if (!strlen(se->vu_socket_path)) { 888 + fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); 889 + return -1; 890 + } 891 + 892 + /* Check the vu_socket_path is already used */ 893 + if (!fv_socket_lock(se)) { 894 + return -1; 895 + } 896 + 897 + /* 898 + * Create the Unix socket to communicate with qemu 899 + * based on QEMU's vhost-user-bridge 900 + */ 901 + unlink(se->vu_socket_path); 902 + strcpy(un.sun_path, se->vu_socket_path); 903 + size_t addr_len = sizeof(un); 904 + 905 + int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); 906 + if (listen_sock == -1) { 907 + fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); 908 + return -1; 909 + } 910 + un.sun_family = AF_UNIX; 911 + 912 + /* 913 + * Unfortunately bind doesn't let you set the mask on the socket, 914 + * so set umask to 077 and restore it later. 915 + */ 916 + old_umask = umask(0077); 917 + if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { 918 + fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); 919 + umask(old_umask); 920 + return -1; 921 + } 922 + umask(old_umask); 923 + 924 + if (listen(listen_sock, 1) == -1) { 925 + fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); 926 + return -1; 927 + } 928 + 929 + se->vu_listen_fd = listen_sock; 930 + return 0; 931 + } 932 + 933 + int virtio_session_mount(struct fuse_session *se) 934 + { 935 + int ret; 936 + 937 + ret = fv_create_listen_socket(se); 938 + if (ret < 0) { 939 + return ret; 940 + } 941 + 942 + se->fd = -1; 943 + 944 + fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", 945 + __func__); 946 + int data_sock = accept(se->vu_listen_fd, NULL, NULL); 947 + if (data_sock == -1) { 948 + fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); 949 + close(se->vu_listen_fd); 950 + return -1; 951 + } 952 + close(se->vu_listen_fd); 953 + se->vu_listen_fd = -1; 954 + fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", 955 + __func__); 956 + 957 + /* TODO: Some cleanup/deallocation! */ 958 + se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); 959 + if (!se->virtio_dev) { 960 + fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); 961 + close(data_sock); 962 + return -1; 963 + } 964 + 965 + se->vu_socketfd = data_sock; 966 + se->virtio_dev->se = se; 967 + pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); 968 + vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, 969 + fv_remove_watch, &fv_iface); 970 + 971 + return 0; 972 + } 973 + 974 + void virtio_session_close(struct fuse_session *se) 975 + { 976 + close(se->vu_socketfd); 977 + 978 + if (!se->virtio_dev) { 979 + return; 980 + } 981 + 982 + free(se->virtio_dev->qi); 983 + pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); 984 + free(se->virtio_dev); 985 + se->virtio_dev = NULL; 986 + }
+33
tools/virtiofsd/fuse_virtio.h
··· 1 + /* 2 + * virtio-fs glue for FUSE 3 + * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates 4 + * 5 + * Authors: 6 + * Dave Gilbert <dgilbert@redhat.com> 7 + * 8 + * Implements the glue between libfuse and libvhost-user 9 + * 10 + * This program can be distributed under the terms of the GNU LGPLv2. 11 + * See the file COPYING.LIB 12 + */ 13 + 14 + #ifndef FUSE_VIRTIO_H 15 + #define FUSE_VIRTIO_H 16 + 17 + #include "fuse_i.h" 18 + 19 + struct fuse_session; 20 + 21 + int virtio_session_mount(struct fuse_session *se); 22 + void virtio_session_close(struct fuse_session *se); 23 + int virtio_loop(struct fuse_session *se); 24 + 25 + 26 + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, 27 + struct iovec *iov, int count); 28 + 29 + int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, 30 + struct iovec *iov, int count, 31 + struct fuse_bufvec *buf, size_t len); 32 + 33 + #endif
+349
tools/virtiofsd/helper.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * Helper functions to create (simple) standalone programs. With the 6 + * aid of these functions it should be possible to create full FUSE 7 + * file system by implementing nothing but the request handlers. 8 + 9 + * This program can be distributed under the terms of the GNU LGPLv2. 10 + * See the file COPYING.LIB. 11 + */ 12 + 13 + #include "qemu/osdep.h" 14 + #include "fuse_i.h" 15 + #include "fuse_lowlevel.h" 16 + #include "fuse_misc.h" 17 + #include "fuse_opt.h" 18 + 19 + #include <errno.h> 20 + #include <limits.h> 21 + #include <stddef.h> 22 + #include <stdio.h> 23 + #include <stdlib.h> 24 + #include <string.h> 25 + #include <sys/param.h> 26 + #include <unistd.h> 27 + 28 + #define FUSE_HELPER_OPT(t, p) \ 29 + { \ 30 + t, offsetof(struct fuse_cmdline_opts, p), 1 \ 31 + } 32 + #define FUSE_HELPER_OPT_VALUE(t, p, v) \ 33 + { \ 34 + t, offsetof(struct fuse_cmdline_opts, p), v \ 35 + } 36 + 37 + static const struct fuse_opt fuse_helper_opts[] = { 38 + FUSE_HELPER_OPT("-h", show_help), 39 + FUSE_HELPER_OPT("--help", show_help), 40 + FUSE_HELPER_OPT("-V", show_version), 41 + FUSE_HELPER_OPT("--version", show_version), 42 + FUSE_HELPER_OPT("--print-capabilities", print_capabilities), 43 + FUSE_HELPER_OPT("-d", debug), 44 + FUSE_HELPER_OPT("debug", debug), 45 + FUSE_HELPER_OPT("-d", foreground), 46 + FUSE_HELPER_OPT("debug", foreground), 47 + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), 48 + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), 49 + FUSE_HELPER_OPT("-f", foreground), 50 + FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), 51 + FUSE_HELPER_OPT("fsname=", nodefault_subtype), 52 + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), 53 + FUSE_HELPER_OPT("subtype=", nodefault_subtype), 54 + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), 55 + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), 56 + FUSE_HELPER_OPT("--syslog", syslog), 57 + FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), 58 + FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), 59 + FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), 60 + FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), 61 + FUSE_OPT_END 62 + }; 63 + 64 + struct fuse_conn_info_opts { 65 + int atomic_o_trunc; 66 + int no_remote_posix_lock; 67 + int no_remote_flock; 68 + int splice_write; 69 + int splice_move; 70 + int splice_read; 71 + int no_splice_write; 72 + int no_splice_move; 73 + int no_splice_read; 74 + int auto_inval_data; 75 + int no_auto_inval_data; 76 + int no_readdirplus; 77 + int no_readdirplus_auto; 78 + int async_dio; 79 + int no_async_dio; 80 + int writeback_cache; 81 + int no_writeback_cache; 82 + int async_read; 83 + int sync_read; 84 + unsigned max_write; 85 + unsigned max_readahead; 86 + unsigned max_background; 87 + unsigned congestion_threshold; 88 + unsigned time_gran; 89 + int set_max_write; 90 + int set_max_readahead; 91 + int set_max_background; 92 + int set_congestion_threshold; 93 + int set_time_gran; 94 + }; 95 + 96 + #define CONN_OPTION(t, p, v) \ 97 + { \ 98 + t, offsetof(struct fuse_conn_info_opts, p), v \ 99 + } 100 + static const struct fuse_opt conn_info_opt_spec[] = { 101 + CONN_OPTION("max_write=%u", max_write, 0), 102 + CONN_OPTION("max_write=", set_max_write, 1), 103 + CONN_OPTION("max_readahead=%u", max_readahead, 0), 104 + CONN_OPTION("max_readahead=", set_max_readahead, 1), 105 + CONN_OPTION("max_background=%u", max_background, 0), 106 + CONN_OPTION("max_background=", set_max_background, 1), 107 + CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), 108 + CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), 109 + CONN_OPTION("sync_read", sync_read, 1), 110 + CONN_OPTION("async_read", async_read, 1), 111 + CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), 112 + CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), 113 + CONN_OPTION("no_remote_lock", no_remote_flock, 1), 114 + CONN_OPTION("no_remote_flock", no_remote_flock, 1), 115 + CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), 116 + CONN_OPTION("splice_write", splice_write, 1), 117 + CONN_OPTION("no_splice_write", no_splice_write, 1), 118 + CONN_OPTION("splice_move", splice_move, 1), 119 + CONN_OPTION("no_splice_move", no_splice_move, 1), 120 + CONN_OPTION("splice_read", splice_read, 1), 121 + CONN_OPTION("no_splice_read", no_splice_read, 1), 122 + CONN_OPTION("auto_inval_data", auto_inval_data, 1), 123 + CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), 124 + CONN_OPTION("readdirplus=no", no_readdirplus, 1), 125 + CONN_OPTION("readdirplus=yes", no_readdirplus, 0), 126 + CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), 127 + CONN_OPTION("readdirplus=auto", no_readdirplus, 0), 128 + CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), 129 + CONN_OPTION("async_dio", async_dio, 1), 130 + CONN_OPTION("no_async_dio", no_async_dio, 1), 131 + CONN_OPTION("writeback_cache", writeback_cache, 1), 132 + CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), 133 + CONN_OPTION("time_gran=%u", time_gran, 0), 134 + CONN_OPTION("time_gran=", set_time_gran, 1), 135 + FUSE_OPT_END 136 + }; 137 + 138 + 139 + void fuse_cmdline_help(void) 140 + { 141 + printf(" -h --help print help\n" 142 + " -V --version print version\n" 143 + " --print-capabilities print vhost-user.json\n" 144 + " -d -o debug enable debug output (implies -f)\n" 145 + " --syslog log to syslog (default stderr)\n" 146 + " -f foreground operation\n" 147 + " --daemonize run in background\n" 148 + " -o cache=<mode> cache mode. could be one of \"auto, " 149 + "always, none\"\n" 150 + " default: auto\n" 151 + " -o flock|no_flock enable/disable flock\n" 152 + " default: no_flock\n" 153 + " -o log_level=<level> log level, default to \"info\"\n" 154 + " level could be one of \"debug, " 155 + "info, warn, err\"\n" 156 + " -o max_idle_threads the maximum number of idle worker " 157 + "threads\n" 158 + " allowed (default: 10)\n" 159 + " -o norace disable racy fallback\n" 160 + " default: false\n" 161 + " -o posix_lock|no_posix_lock\n" 162 + " enable/disable remote posix lock\n" 163 + " default: posix_lock\n" 164 + " -o readdirplus|no_readdirplus\n" 165 + " enable/disable readirplus\n" 166 + " default: readdirplus except with " 167 + "cache=none\n" 168 + " -o timeout=<number> I/O timeout (second)\n" 169 + " default: depends on cache= option.\n" 170 + " -o writeback|no_writeback enable/disable writeback cache\n" 171 + " default: no_writeback\n" 172 + " -o xattr|no_xattr enable/disable xattr\n" 173 + " default: no_xattr\n" 174 + ); 175 + } 176 + 177 + static int fuse_helper_opt_proc(void *data, const char *arg, int key, 178 + struct fuse_args *outargs) 179 + { 180 + (void)data; 181 + (void)outargs; 182 + 183 + switch (key) { 184 + case FUSE_OPT_KEY_NONOPT: 185 + fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); 186 + return -1; 187 + 188 + default: 189 + /* Pass through unknown options */ 190 + return 1; 191 + } 192 + } 193 + 194 + int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) 195 + { 196 + memset(opts, 0, sizeof(struct fuse_cmdline_opts)); 197 + 198 + opts->max_idle_threads = 10; 199 + opts->foreground = 1; 200 + 201 + if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == 202 + -1) { 203 + return -1; 204 + } 205 + 206 + return 0; 207 + } 208 + 209 + 210 + int fuse_daemonize(int foreground) 211 + { 212 + int ret = 0, rett; 213 + if (!foreground) { 214 + int nullfd; 215 + int waiter[2]; 216 + char completed; 217 + 218 + if (pipe(waiter)) { 219 + fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", 220 + strerror(errno)); 221 + return -1; 222 + } 223 + 224 + /* 225 + * demonize current process by forking it and killing the 226 + * parent. This makes current process as a child of 'init'. 227 + */ 228 + switch (fork()) { 229 + case -1: 230 + fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", 231 + strerror(errno)); 232 + return -1; 233 + case 0: 234 + break; 235 + default: 236 + _exit(read(waiter[0], &completed, 237 + sizeof(completed) != sizeof(completed))); 238 + } 239 + 240 + if (setsid() == -1) { 241 + fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", 242 + strerror(errno)); 243 + return -1; 244 + } 245 + 246 + ret = chdir("/"); 247 + 248 + nullfd = open("/dev/null", O_RDWR, 0); 249 + if (nullfd != -1) { 250 + rett = dup2(nullfd, 0); 251 + if (!ret) { 252 + ret = rett; 253 + } 254 + rett = dup2(nullfd, 1); 255 + if (!ret) { 256 + ret = rett; 257 + } 258 + rett = dup2(nullfd, 2); 259 + if (!ret) { 260 + ret = rett; 261 + } 262 + if (nullfd > 2) { 263 + close(nullfd); 264 + } 265 + } 266 + 267 + /* Propagate completion of daemon initialization */ 268 + completed = 1; 269 + rett = write(waiter[1], &completed, sizeof(completed)); 270 + if (!ret) { 271 + ret = rett; 272 + } 273 + close(waiter[0]); 274 + close(waiter[1]); 275 + } else { 276 + ret = chdir("/"); 277 + } 278 + return ret; 279 + } 280 + 281 + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, 282 + struct fuse_conn_info *conn) 283 + { 284 + if (opts->set_max_write) { 285 + conn->max_write = opts->max_write; 286 + } 287 + if (opts->set_max_background) { 288 + conn->max_background = opts->max_background; 289 + } 290 + if (opts->set_congestion_threshold) { 291 + conn->congestion_threshold = opts->congestion_threshold; 292 + } 293 + if (opts->set_time_gran) { 294 + conn->time_gran = opts->time_gran; 295 + } 296 + if (opts->set_max_readahead) { 297 + conn->max_readahead = opts->max_readahead; 298 + } 299 + 300 + #define LL_ENABLE(cond, cap) \ 301 + if (cond) \ 302 + conn->want |= (cap) 303 + #define LL_DISABLE(cond, cap) \ 304 + if (cond) \ 305 + conn->want &= ~(cap) 306 + 307 + LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); 308 + LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); 309 + 310 + LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); 311 + LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); 312 + 313 + LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); 314 + LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); 315 + 316 + LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); 317 + LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); 318 + 319 + LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); 320 + LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); 321 + 322 + LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); 323 + LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); 324 + 325 + LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); 326 + LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); 327 + 328 + LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); 329 + LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); 330 + 331 + LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); 332 + LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); 333 + } 334 + 335 + struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) 336 + { 337 + struct fuse_conn_info_opts *opts; 338 + 339 + opts = calloc(1, sizeof(struct fuse_conn_info_opts)); 340 + if (opts == NULL) { 341 + fuse_log(FUSE_LOG_ERR, "calloc failed\n"); 342 + return NULL; 343 + } 344 + if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { 345 + free(opts); 346 + return NULL; 347 + } 348 + return opts; 349 + }
+51
tools/virtiofsd/passthrough_helpers.h
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * 4 + * Redistribution and use in source and binary forms, with or without 5 + * modification, are permitted provided that the following conditions 6 + * are met: 7 + * 1. Redistributions of source code must retain the above copyright 8 + * notice, this list of conditions and the following disclaimer. 9 + * 2. Redistributions in binary form must reproduce the above copyright 10 + * notice, this list of conditions and the following disclaimer in the 11 + * documentation and/or other materials provided with the distribution. 12 + * 13 + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 + * SUCH DAMAGE 24 + */ 25 + 26 + /* 27 + * Creates files on the underlying file system in response to a FUSE_MKNOD 28 + * operation 29 + */ 30 + static int mknod_wrapper(int dirfd, const char *path, const char *link, 31 + int mode, dev_t rdev) 32 + { 33 + int res; 34 + 35 + if (S_ISREG(mode)) { 36 + res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); 37 + if (res >= 0) { 38 + res = close(res); 39 + } 40 + } else if (S_ISDIR(mode)) { 41 + res = mkdirat(dirfd, path, mode); 42 + } else if (S_ISLNK(mode) && link != NULL) { 43 + res = symlinkat(link, dirfd, path); 44 + } else if (S_ISFIFO(mode)) { 45 + res = mkfifoat(dirfd, path, mode); 46 + } else { 47 + res = mknodat(dirfd, path, mode, rdev); 48 + } 49 + 50 + return res; 51 + }
+3006
tools/virtiofsd/passthrough_ll.c
··· 1 + /* 2 + * FUSE: Filesystem in Userspace 3 + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 + * 5 + * This program can be distributed under the terms of the GNU GPLv2. 6 + * See the file COPYING. 7 + */ 8 + 9 + /* 10 + * 11 + * This file system mirrors the existing file system hierarchy of the 12 + * system, starting at the root file system. This is implemented by 13 + * just "passing through" all requests to the corresponding user-space 14 + * libc functions. In contrast to passthrough.c and passthrough_fh.c, 15 + * this implementation uses the low-level API. Its performance should 16 + * be the least bad among the three, but many operations are not 17 + * implemented. In particular, it is not possible to remove files (or 18 + * directories) because the code necessary to defer actual removal 19 + * until the file is not opened anymore would make the example much 20 + * more complicated. 21 + * 22 + * When writeback caching is enabled (-o writeback mount option), it 23 + * is only possible to write to files for which the mounting user has 24 + * read permissions. This is because the writeback cache requires the 25 + * kernel to be able to issue read requests for all files (which the 26 + * passthrough filesystem cannot satisfy if it can't read the file in 27 + * the underlying filesystem). 28 + * 29 + * Compile with: 30 + * 31 + * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o 32 + * passthrough_ll 33 + * 34 + * ## Source code ## 35 + * \include passthrough_ll.c 36 + */ 37 + 38 + #include "qemu/osdep.h" 39 + #include "qemu/timer.h" 40 + #include "fuse_virtio.h" 41 + #include "fuse_log.h" 42 + #include "fuse_lowlevel.h" 43 + #include <assert.h> 44 + #include <cap-ng.h> 45 + #include <dirent.h> 46 + #include <errno.h> 47 + #include <glib.h> 48 + #include <inttypes.h> 49 + #include <limits.h> 50 + #include <pthread.h> 51 + #include <stdbool.h> 52 + #include <stddef.h> 53 + #include <stdio.h> 54 + #include <stdlib.h> 55 + #include <string.h> 56 + #include <sys/file.h> 57 + #include <sys/mount.h> 58 + #include <sys/prctl.h> 59 + #include <sys/resource.h> 60 + #include <sys/syscall.h> 61 + #include <sys/types.h> 62 + #include <sys/wait.h> 63 + #include <sys/xattr.h> 64 + #include <syslog.h> 65 + #include <unistd.h> 66 + 67 + #include "passthrough_helpers.h" 68 + #include "seccomp.h" 69 + 70 + /* Keep track of inode posix locks for each owner. */ 71 + struct lo_inode_plock { 72 + uint64_t lock_owner; 73 + int fd; /* fd for OFD locks */ 74 + }; 75 + 76 + struct lo_map_elem { 77 + union { 78 + struct lo_inode *inode; 79 + struct lo_dirp *dirp; 80 + int fd; 81 + ssize_t freelist; 82 + }; 83 + bool in_use; 84 + }; 85 + 86 + /* Maps FUSE fh or ino values to internal objects */ 87 + struct lo_map { 88 + struct lo_map_elem *elems; 89 + size_t nelems; 90 + ssize_t freelist; 91 + }; 92 + 93 + struct lo_key { 94 + ino_t ino; 95 + dev_t dev; 96 + }; 97 + 98 + struct lo_inode { 99 + int fd; 100 + 101 + /* 102 + * Atomic reference count for this object. The nlookup field holds a 103 + * reference and release it when nlookup reaches 0. 104 + */ 105 + gint refcount; 106 + 107 + struct lo_key key; 108 + 109 + /* 110 + * This counter keeps the inode alive during the FUSE session. 111 + * Incremented when the FUSE inode number is sent in a reply 112 + * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is 113 + * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. 114 + * 115 + * Note that this value is untrusted because the client can manipulate 116 + * it arbitrarily using FUSE_FORGET requests. 117 + * 118 + * Protected by lo->mutex. 119 + */ 120 + uint64_t nlookup; 121 + 122 + fuse_ino_t fuse_ino; 123 + pthread_mutex_t plock_mutex; 124 + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ 125 + 126 + bool is_symlink; 127 + }; 128 + 129 + struct lo_cred { 130 + uid_t euid; 131 + gid_t egid; 132 + }; 133 + 134 + enum { 135 + CACHE_NONE, 136 + CACHE_AUTO, 137 + CACHE_ALWAYS, 138 + }; 139 + 140 + struct lo_data { 141 + pthread_mutex_t mutex; 142 + int debug; 143 + int norace; 144 + int writeback; 145 + int flock; 146 + int posix_lock; 147 + int xattr; 148 + char *source; 149 + double timeout; 150 + int cache; 151 + int timeout_set; 152 + int readdirplus_set; 153 + int readdirplus_clear; 154 + struct lo_inode root; 155 + GHashTable *inodes; /* protected by lo->mutex */ 156 + struct lo_map ino_map; /* protected by lo->mutex */ 157 + struct lo_map dirp_map; /* protected by lo->mutex */ 158 + struct lo_map fd_map; /* protected by lo->mutex */ 159 + 160 + /* An O_PATH file descriptor to /proc/self/fd/ */ 161 + int proc_self_fd; 162 + }; 163 + 164 + static const struct fuse_opt lo_opts[] = { 165 + { "writeback", offsetof(struct lo_data, writeback), 1 }, 166 + { "no_writeback", offsetof(struct lo_data, writeback), 0 }, 167 + { "source=%s", offsetof(struct lo_data, source), 0 }, 168 + { "flock", offsetof(struct lo_data, flock), 1 }, 169 + { "no_flock", offsetof(struct lo_data, flock), 0 }, 170 + { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, 171 + { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, 172 + { "xattr", offsetof(struct lo_data, xattr), 1 }, 173 + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, 174 + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, 175 + { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, 176 + { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, 177 + { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, 178 + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, 179 + { "norace", offsetof(struct lo_data, norace), 1 }, 180 + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, 181 + { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, 182 + FUSE_OPT_END 183 + }; 184 + static bool use_syslog = false; 185 + static int current_log_level; 186 + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, 187 + uint64_t n); 188 + 189 + static struct { 190 + pthread_mutex_t mutex; 191 + void *saved; 192 + } cap; 193 + /* That we loaded cap-ng in the current thread from the saved */ 194 + static __thread bool cap_loaded = 0; 195 + 196 + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); 197 + 198 + static int is_dot_or_dotdot(const char *name) 199 + { 200 + return name[0] == '.' && 201 + (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); 202 + } 203 + 204 + /* Is `path` a single path component that is not "." or ".."? */ 205 + static int is_safe_path_component(const char *path) 206 + { 207 + if (strchr(path, '/')) { 208 + return 0; 209 + } 210 + 211 + return !is_dot_or_dotdot(path); 212 + } 213 + 214 + static struct lo_data *lo_data(fuse_req_t req) 215 + { 216 + return (struct lo_data *)fuse_req_userdata(req); 217 + } 218 + 219 + /* 220 + * Load capng's state from our saved state if the current thread 221 + * hadn't previously been loaded. 222 + * returns 0 on success 223 + */ 224 + static int load_capng(void) 225 + { 226 + if (!cap_loaded) { 227 + pthread_mutex_lock(&cap.mutex); 228 + capng_restore_state(&cap.saved); 229 + /* 230 + * restore_state free's the saved copy 231 + * so make another. 232 + */ 233 + cap.saved = capng_save_state(); 234 + if (!cap.saved) { 235 + fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); 236 + return -EINVAL; 237 + } 238 + pthread_mutex_unlock(&cap.mutex); 239 + 240 + /* 241 + * We want to use the loaded state for our pid, 242 + * not the original 243 + */ 244 + capng_setpid(syscall(SYS_gettid)); 245 + cap_loaded = true; 246 + } 247 + return 0; 248 + } 249 + 250 + /* 251 + * Helpers for dropping and regaining effective capabilities. Returns 0 252 + * on success, error otherwise 253 + */ 254 + static int drop_effective_cap(const char *cap_name, bool *cap_dropped) 255 + { 256 + int cap, ret; 257 + 258 + cap = capng_name_to_capability(cap_name); 259 + if (cap < 0) { 260 + ret = errno; 261 + fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", 262 + cap_name, strerror(errno)); 263 + goto out; 264 + } 265 + 266 + if (load_capng()) { 267 + ret = errno; 268 + fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); 269 + goto out; 270 + } 271 + 272 + /* We dont have this capability in effective set already. */ 273 + if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { 274 + ret = 0; 275 + goto out; 276 + } 277 + 278 + if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { 279 + ret = errno; 280 + fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); 281 + goto out; 282 + } 283 + 284 + if (capng_apply(CAPNG_SELECT_CAPS)) { 285 + ret = errno; 286 + fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); 287 + goto out; 288 + } 289 + 290 + ret = 0; 291 + if (cap_dropped) { 292 + *cap_dropped = true; 293 + } 294 + 295 + out: 296 + return ret; 297 + } 298 + 299 + static int gain_effective_cap(const char *cap_name) 300 + { 301 + int cap; 302 + int ret = 0; 303 + 304 + cap = capng_name_to_capability(cap_name); 305 + if (cap < 0) { 306 + ret = errno; 307 + fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", 308 + cap_name, strerror(errno)); 309 + goto out; 310 + } 311 + 312 + if (load_capng()) { 313 + ret = errno; 314 + fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); 315 + goto out; 316 + } 317 + 318 + if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { 319 + ret = errno; 320 + fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); 321 + goto out; 322 + } 323 + 324 + if (capng_apply(CAPNG_SELECT_CAPS)) { 325 + ret = errno; 326 + fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); 327 + goto out; 328 + } 329 + ret = 0; 330 + 331 + out: 332 + return ret; 333 + } 334 + 335 + static void lo_map_init(struct lo_map *map) 336 + { 337 + map->elems = NULL; 338 + map->nelems = 0; 339 + map->freelist = -1; 340 + } 341 + 342 + static void lo_map_destroy(struct lo_map *map) 343 + { 344 + free(map->elems); 345 + } 346 + 347 + static int lo_map_grow(struct lo_map *map, size_t new_nelems) 348 + { 349 + struct lo_map_elem *new_elems; 350 + size_t i; 351 + 352 + if (new_nelems <= map->nelems) { 353 + return 1; 354 + } 355 + 356 + new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); 357 + if (!new_elems) { 358 + return 0; 359 + } 360 + 361 + for (i = map->nelems; i < new_nelems; i++) { 362 + new_elems[i].freelist = i + 1; 363 + new_elems[i].in_use = false; 364 + } 365 + new_elems[new_nelems - 1].freelist = -1; 366 + 367 + map->elems = new_elems; 368 + map->freelist = map->nelems; 369 + map->nelems = new_nelems; 370 + return 1; 371 + } 372 + 373 + static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) 374 + { 375 + struct lo_map_elem *elem; 376 + 377 + if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { 378 + return NULL; 379 + } 380 + 381 + elem = &map->elems[map->freelist]; 382 + map->freelist = elem->freelist; 383 + 384 + elem->in_use = true; 385 + 386 + return elem; 387 + } 388 + 389 + static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) 390 + { 391 + ssize_t *prev; 392 + 393 + if (!lo_map_grow(map, key + 1)) { 394 + return NULL; 395 + } 396 + 397 + for (prev = &map->freelist; *prev != -1; 398 + prev = &map->elems[*prev].freelist) { 399 + if (*prev == key) { 400 + struct lo_map_elem *elem = &map->elems[key]; 401 + 402 + *prev = elem->freelist; 403 + elem->in_use = true; 404 + return elem; 405 + } 406 + } 407 + return NULL; 408 + } 409 + 410 + static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) 411 + { 412 + if (key >= map->nelems) { 413 + return NULL; 414 + } 415 + if (!map->elems[key].in_use) { 416 + return NULL; 417 + } 418 + return &map->elems[key]; 419 + } 420 + 421 + static void lo_map_remove(struct lo_map *map, size_t key) 422 + { 423 + struct lo_map_elem *elem; 424 + 425 + if (key >= map->nelems) { 426 + return; 427 + } 428 + 429 + elem = &map->elems[key]; 430 + if (!elem->in_use) { 431 + return; 432 + } 433 + 434 + elem->in_use = false; 435 + 436 + elem->freelist = map->freelist; 437 + map->freelist = key; 438 + } 439 + 440 + /* Assumes lo->mutex is held */ 441 + static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) 442 + { 443 + struct lo_map_elem *elem; 444 + 445 + elem = lo_map_alloc_elem(&lo_data(req)->fd_map); 446 + if (!elem) { 447 + return -1; 448 + } 449 + 450 + elem->fd = fd; 451 + return elem - lo_data(req)->fd_map.elems; 452 + } 453 + 454 + /* Assumes lo->mutex is held */ 455 + static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) 456 + { 457 + struct lo_map_elem *elem; 458 + 459 + elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); 460 + if (!elem) { 461 + return -1; 462 + } 463 + 464 + elem->dirp = dirp; 465 + return elem - lo_data(req)->dirp_map.elems; 466 + } 467 + 468 + /* Assumes lo->mutex is held */ 469 + static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) 470 + { 471 + struct lo_map_elem *elem; 472 + 473 + elem = lo_map_alloc_elem(&lo_data(req)->ino_map); 474 + if (!elem) { 475 + return -1; 476 + } 477 + 478 + elem->inode = inode; 479 + return elem - lo_data(req)->ino_map.elems; 480 + } 481 + 482 + static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) 483 + { 484 + struct lo_inode *inode = *inodep; 485 + 486 + if (!inode) { 487 + return; 488 + } 489 + 490 + *inodep = NULL; 491 + 492 + if (g_atomic_int_dec_and_test(&inode->refcount)) { 493 + close(inode->fd); 494 + free(inode); 495 + } 496 + } 497 + 498 + /* Caller must release refcount using lo_inode_put() */ 499 + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) 500 + { 501 + struct lo_data *lo = lo_data(req); 502 + struct lo_map_elem *elem; 503 + 504 + pthread_mutex_lock(&lo->mutex); 505 + elem = lo_map_get(&lo->ino_map, ino); 506 + if (elem) { 507 + g_atomic_int_inc(&elem->inode->refcount); 508 + } 509 + pthread_mutex_unlock(&lo->mutex); 510 + 511 + if (!elem) { 512 + return NULL; 513 + } 514 + 515 + return elem->inode; 516 + } 517 + 518 + /* 519 + * TODO Remove this helper and force callers to hold an inode refcount until 520 + * they are done with the fd. This will be done in a later patch to make 521 + * review easier. 522 + */ 523 + static int lo_fd(fuse_req_t req, fuse_ino_t ino) 524 + { 525 + struct lo_inode *inode = lo_inode(req, ino); 526 + int fd; 527 + 528 + if (!inode) { 529 + return -1; 530 + } 531 + 532 + fd = inode->fd; 533 + lo_inode_put(lo_data(req), &inode); 534 + return fd; 535 + } 536 + 537 + static void lo_init(void *userdata, struct fuse_conn_info *conn) 538 + { 539 + struct lo_data *lo = (struct lo_data *)userdata; 540 + 541 + if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { 542 + conn->want |= FUSE_CAP_EXPORT_SUPPORT; 543 + } 544 + 545 + if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { 546 + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); 547 + conn->want |= FUSE_CAP_WRITEBACK_CACHE; 548 + } 549 + if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { 550 + if (lo->flock) { 551 + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); 552 + conn->want |= FUSE_CAP_FLOCK_LOCKS; 553 + } else { 554 + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); 555 + conn->want &= ~FUSE_CAP_FLOCK_LOCKS; 556 + } 557 + } 558 + 559 + if (conn->capable & FUSE_CAP_POSIX_LOCKS) { 560 + if (lo->posix_lock) { 561 + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); 562 + conn->want |= FUSE_CAP_POSIX_LOCKS; 563 + } else { 564 + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); 565 + conn->want &= ~FUSE_CAP_POSIX_LOCKS; 566 + } 567 + } 568 + 569 + if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || 570 + lo->readdirplus_clear) { 571 + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); 572 + conn->want &= ~FUSE_CAP_READDIRPLUS; 573 + } 574 + } 575 + 576 + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, 577 + struct fuse_file_info *fi) 578 + { 579 + int res; 580 + struct stat buf; 581 + struct lo_data *lo = lo_data(req); 582 + 583 + (void)fi; 584 + 585 + res = 586 + fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 587 + if (res == -1) { 588 + return (void)fuse_reply_err(req, errno); 589 + } 590 + 591 + fuse_reply_attr(req, &buf, lo->timeout); 592 + } 593 + 594 + /* 595 + * Increments parent->nlookup and caller must release refcount using 596 + * lo_inode_put(&parent). 597 + */ 598 + static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, 599 + char path[PATH_MAX], struct lo_inode **parent) 600 + { 601 + char procname[64]; 602 + char *last; 603 + struct stat stat; 604 + struct lo_inode *p; 605 + int retries = 2; 606 + int res; 607 + 608 + retry: 609 + sprintf(procname, "%i", inode->fd); 610 + 611 + res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); 612 + if (res < 0) { 613 + fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); 614 + goto fail_noretry; 615 + } 616 + 617 + if (res >= PATH_MAX) { 618 + fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); 619 + goto fail_noretry; 620 + } 621 + path[res] = '\0'; 622 + 623 + last = strrchr(path, '/'); 624 + if (last == NULL) { 625 + /* Shouldn't happen */ 626 + fuse_log( 627 + FUSE_LOG_WARNING, 628 + "%s: INTERNAL ERROR: bad path read from proc\n", __func__); 629 + goto fail_noretry; 630 + } 631 + if (last == path) { 632 + p = &lo->root; 633 + pthread_mutex_lock(&lo->mutex); 634 + p->nlookup++; 635 + g_atomic_int_inc(&p->refcount); 636 + pthread_mutex_unlock(&lo->mutex); 637 + } else { 638 + *last = '\0'; 639 + res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); 640 + if (res == -1) { 641 + if (!retries) { 642 + fuse_log(FUSE_LOG_WARNING, 643 + "%s: failed to stat parent: %m\n", __func__); 644 + } 645 + goto fail; 646 + } 647 + p = lo_find(lo, &stat); 648 + if (p == NULL) { 649 + if (!retries) { 650 + fuse_log(FUSE_LOG_WARNING, 651 + "%s: failed to find parent\n", __func__); 652 + } 653 + goto fail; 654 + } 655 + } 656 + last++; 657 + res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); 658 + if (res == -1) { 659 + if (!retries) { 660 + fuse_log(FUSE_LOG_WARNING, 661 + "%s: failed to stat last\n", __func__); 662 + } 663 + goto fail_unref; 664 + } 665 + if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { 666 + if (!retries) { 667 + fuse_log(FUSE_LOG_WARNING, 668 + "%s: failed to match last\n", __func__); 669 + } 670 + goto fail_unref; 671 + } 672 + *parent = p; 673 + memmove(path, last, strlen(last) + 1); 674 + 675 + return 0; 676 + 677 + fail_unref: 678 + unref_inode_lolocked(lo, p, 1); 679 + lo_inode_put(lo, &p); 680 + fail: 681 + if (retries) { 682 + retries--; 683 + goto retry; 684 + } 685 + fail_noretry: 686 + errno = EIO; 687 + return -1; 688 + } 689 + 690 + static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, 691 + const struct timespec *tv) 692 + { 693 + int res; 694 + struct lo_inode *parent; 695 + char path[PATH_MAX]; 696 + 697 + if (inode->is_symlink) { 698 + res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); 699 + if (res == -1 && errno == EINVAL) { 700 + /* Sorry, no race free way to set times on symlink. */ 701 + if (lo->norace) { 702 + errno = EPERM; 703 + } else { 704 + goto fallback; 705 + } 706 + } 707 + return res; 708 + } 709 + sprintf(path, "%i", inode->fd); 710 + 711 + return utimensat(lo->proc_self_fd, path, tv, 0); 712 + 713 + fallback: 714 + res = lo_parent_and_name(lo, inode, path, &parent); 715 + if (res != -1) { 716 + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); 717 + unref_inode_lolocked(lo, parent, 1); 718 + lo_inode_put(lo, &parent); 719 + } 720 + 721 + return res; 722 + } 723 + 724 + static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) 725 + { 726 + struct lo_data *lo = lo_data(req); 727 + struct lo_map_elem *elem; 728 + 729 + pthread_mutex_lock(&lo->mutex); 730 + elem = lo_map_get(&lo->fd_map, fi->fh); 731 + pthread_mutex_unlock(&lo->mutex); 732 + 733 + if (!elem) { 734 + return -1; 735 + } 736 + 737 + return elem->fd; 738 + } 739 + 740 + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, 741 + int valid, struct fuse_file_info *fi) 742 + { 743 + int saverr; 744 + char procname[64]; 745 + struct lo_data *lo = lo_data(req); 746 + struct lo_inode *inode; 747 + int ifd; 748 + int res; 749 + int fd; 750 + 751 + inode = lo_inode(req, ino); 752 + if (!inode) { 753 + fuse_reply_err(req, EBADF); 754 + return; 755 + } 756 + 757 + ifd = inode->fd; 758 + 759 + /* If fi->fh is invalid we'll report EBADF later */ 760 + if (fi) { 761 + fd = lo_fi_fd(req, fi); 762 + } 763 + 764 + if (valid & FUSE_SET_ATTR_MODE) { 765 + if (fi) { 766 + res = fchmod(fd, attr->st_mode); 767 + } else { 768 + sprintf(procname, "%i", ifd); 769 + res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); 770 + } 771 + if (res == -1) { 772 + goto out_err; 773 + } 774 + } 775 + if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { 776 + uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; 777 + gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; 778 + 779 + res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 780 + if (res == -1) { 781 + goto out_err; 782 + } 783 + } 784 + if (valid & FUSE_SET_ATTR_SIZE) { 785 + int truncfd; 786 + 787 + if (fi) { 788 + truncfd = fd; 789 + } else { 790 + sprintf(procname, "%i", ifd); 791 + truncfd = openat(lo->proc_self_fd, procname, O_RDWR); 792 + if (truncfd < 0) { 793 + goto out_err; 794 + } 795 + } 796 + 797 + res = ftruncate(truncfd, attr->st_size); 798 + if (!fi) { 799 + saverr = errno; 800 + close(truncfd); 801 + errno = saverr; 802 + } 803 + if (res == -1) { 804 + goto out_err; 805 + } 806 + } 807 + if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { 808 + struct timespec tv[2]; 809 + 810 + tv[0].tv_sec = 0; 811 + tv[1].tv_sec = 0; 812 + tv[0].tv_nsec = UTIME_OMIT; 813 + tv[1].tv_nsec = UTIME_OMIT; 814 + 815 + if (valid & FUSE_SET_ATTR_ATIME_NOW) { 816 + tv[0].tv_nsec = UTIME_NOW; 817 + } else if (valid & FUSE_SET_ATTR_ATIME) { 818 + tv[0] = attr->st_atim; 819 + } 820 + 821 + if (valid & FUSE_SET_ATTR_MTIME_NOW) { 822 + tv[1].tv_nsec = UTIME_NOW; 823 + } else if (valid & FUSE_SET_ATTR_MTIME) { 824 + tv[1] = attr->st_mtim; 825 + } 826 + 827 + if (fi) { 828 + res = futimens(fd, tv); 829 + } else { 830 + res = utimensat_empty(lo, inode, tv); 831 + } 832 + if (res == -1) { 833 + goto out_err; 834 + } 835 + } 836 + lo_inode_put(lo, &inode); 837 + 838 + return lo_getattr(req, ino, fi); 839 + 840 + out_err: 841 + saverr = errno; 842 + lo_inode_put(lo, &inode); 843 + fuse_reply_err(req, saverr); 844 + } 845 + 846 + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) 847 + { 848 + struct lo_inode *p; 849 + struct lo_key key = { 850 + .ino = st->st_ino, 851 + .dev = st->st_dev, 852 + }; 853 + 854 + pthread_mutex_lock(&lo->mutex); 855 + p = g_hash_table_lookup(lo->inodes, &key); 856 + if (p) { 857 + assert(p->nlookup > 0); 858 + p->nlookup++; 859 + g_atomic_int_inc(&p->refcount); 860 + } 861 + pthread_mutex_unlock(&lo->mutex); 862 + 863 + return p; 864 + } 865 + 866 + /* value_destroy_func for posix_locks GHashTable */ 867 + static void posix_locks_value_destroy(gpointer data) 868 + { 869 + struct lo_inode_plock *plock = data; 870 + 871 + /* 872 + * We had used open() for locks and had only one fd. So 873 + * closing this fd should release all OFD locks. 874 + */ 875 + close(plock->fd); 876 + free(plock); 877 + } 878 + 879 + /* 880 + * Increments nlookup and caller must release refcount using 881 + * lo_inode_put(&parent). 882 + */ 883 + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, 884 + struct fuse_entry_param *e) 885 + { 886 + int newfd; 887 + int res; 888 + int saverr; 889 + struct lo_data *lo = lo_data(req); 890 + struct lo_inode *inode = NULL; 891 + struct lo_inode *dir = lo_inode(req, parent); 892 + 893 + /* 894 + * name_to_handle_at() and open_by_handle_at() can reach here with fuse 895 + * mount point in guest, but we don't have its inode info in the 896 + * ino_map. 897 + */ 898 + if (!dir) { 899 + return ENOENT; 900 + } 901 + 902 + memset(e, 0, sizeof(*e)); 903 + e->attr_timeout = lo->timeout; 904 + e->entry_timeout = lo->timeout; 905 + 906 + /* Do not allow escaping root directory */ 907 + if (dir == &lo->root && strcmp(name, "..") == 0) { 908 + name = "."; 909 + } 910 + 911 + newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); 912 + if (newfd == -1) { 913 + goto out_err; 914 + } 915 + 916 + res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 917 + if (res == -1) { 918 + goto out_err; 919 + } 920 + 921 + inode = lo_find(lo, &e->attr); 922 + if (inode) { 923 + close(newfd); 924 + newfd = -1; 925 + } else { 926 + inode = calloc(1, sizeof(struct lo_inode)); 927 + if (!inode) { 928 + goto out_err; 929 + } 930 + 931 + inode->is_symlink = S_ISLNK(e->attr.st_mode); 932 + 933 + /* 934 + * One for the caller and one for nlookup (released in 935 + * unref_inode_lolocked()) 936 + */ 937 + g_atomic_int_set(&inode->refcount, 2); 938 + 939 + inode->nlookup = 1; 940 + inode->fd = newfd; 941 + newfd = -1; 942 + inode->key.ino = e->attr.st_ino; 943 + inode->key.dev = e->attr.st_dev; 944 + pthread_mutex_init(&inode->plock_mutex, NULL); 945 + inode->posix_locks = g_hash_table_new_full( 946 + g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); 947 + 948 + pthread_mutex_lock(&lo->mutex); 949 + inode->fuse_ino = lo_add_inode_mapping(req, inode); 950 + g_hash_table_insert(lo->inodes, &inode->key, inode); 951 + pthread_mutex_unlock(&lo->mutex); 952 + } 953 + e->ino = inode->fuse_ino; 954 + lo_inode_put(lo, &inode); 955 + lo_inode_put(lo, &dir); 956 + 957 + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 958 + name, (unsigned long long)e->ino); 959 + 960 + return 0; 961 + 962 + out_err: 963 + saverr = errno; 964 + if (newfd != -1) { 965 + close(newfd); 966 + } 967 + lo_inode_put(lo, &inode); 968 + lo_inode_put(lo, &dir); 969 + return saverr; 970 + } 971 + 972 + static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) 973 + { 974 + struct fuse_entry_param e; 975 + int err; 976 + 977 + fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, 978 + name); 979 + 980 + /* 981 + * Don't use is_safe_path_component(), allow "." and ".." for NFS export 982 + * support. 983 + */ 984 + if (strchr(name, '/')) { 985 + fuse_reply_err(req, EINVAL); 986 + return; 987 + } 988 + 989 + err = lo_do_lookup(req, parent, name, &e); 990 + if (err) { 991 + fuse_reply_err(req, err); 992 + } else { 993 + fuse_reply_entry(req, &e); 994 + } 995 + } 996 + 997 + /* 998 + * On some archs, setres*id is limited to 2^16 but they 999 + * provide setres*id32 variants that allow 2^32. 1000 + * Others just let setres*id do 2^32 anyway. 1001 + */ 1002 + #ifdef SYS_setresgid32 1003 + #define OURSYS_setresgid SYS_setresgid32 1004 + #else 1005 + #define OURSYS_setresgid SYS_setresgid 1006 + #endif 1007 + 1008 + #ifdef SYS_setresuid32 1009 + #define OURSYS_setresuid SYS_setresuid32 1010 + #else 1011 + #define OURSYS_setresuid SYS_setresuid 1012 + #endif 1013 + 1014 + /* 1015 + * Change to uid/gid of caller so that file is created with 1016 + * ownership of caller. 1017 + * TODO: What about selinux context? 1018 + */ 1019 + static int lo_change_cred(fuse_req_t req, struct lo_cred *old) 1020 + { 1021 + int res; 1022 + 1023 + old->euid = geteuid(); 1024 + old->egid = getegid(); 1025 + 1026 + res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); 1027 + if (res == -1) { 1028 + return errno; 1029 + } 1030 + 1031 + res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); 1032 + if (res == -1) { 1033 + int errno_save = errno; 1034 + 1035 + syscall(OURSYS_setresgid, -1, old->egid, -1); 1036 + return errno_save; 1037 + } 1038 + 1039 + return 0; 1040 + } 1041 + 1042 + /* Regain Privileges */ 1043 + static void lo_restore_cred(struct lo_cred *old) 1044 + { 1045 + int res; 1046 + 1047 + res = syscall(OURSYS_setresuid, -1, old->euid, -1); 1048 + if (res == -1) { 1049 + fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); 1050 + exit(1); 1051 + } 1052 + 1053 + res = syscall(OURSYS_setresgid, -1, old->egid, -1); 1054 + if (res == -1) { 1055 + fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); 1056 + exit(1); 1057 + } 1058 + } 1059 + 1060 + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, 1061 + const char *name, mode_t mode, dev_t rdev, 1062 + const char *link) 1063 + { 1064 + int res; 1065 + int saverr; 1066 + struct lo_data *lo = lo_data(req); 1067 + struct lo_inode *dir; 1068 + struct fuse_entry_param e; 1069 + struct lo_cred old = {}; 1070 + 1071 + if (!is_safe_path_component(name)) { 1072 + fuse_reply_err(req, EINVAL); 1073 + return; 1074 + } 1075 + 1076 + dir = lo_inode(req, parent); 1077 + if (!dir) { 1078 + fuse_reply_err(req, EBADF); 1079 + return; 1080 + } 1081 + 1082 + saverr = ENOMEM; 1083 + 1084 + saverr = lo_change_cred(req, &old); 1085 + if (saverr) { 1086 + goto out; 1087 + } 1088 + 1089 + res = mknod_wrapper(dir->fd, name, link, mode, rdev); 1090 + 1091 + saverr = errno; 1092 + 1093 + lo_restore_cred(&old); 1094 + 1095 + if (res == -1) { 1096 + goto out; 1097 + } 1098 + 1099 + saverr = lo_do_lookup(req, parent, name, &e); 1100 + if (saverr) { 1101 + goto out; 1102 + } 1103 + 1104 + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 1105 + name, (unsigned long long)e.ino); 1106 + 1107 + fuse_reply_entry(req, &e); 1108 + lo_inode_put(lo, &dir); 1109 + return; 1110 + 1111 + out: 1112 + lo_inode_put(lo, &dir); 1113 + fuse_reply_err(req, saverr); 1114 + } 1115 + 1116 + static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, 1117 + mode_t mode, dev_t rdev) 1118 + { 1119 + lo_mknod_symlink(req, parent, name, mode, rdev, NULL); 1120 + } 1121 + 1122 + static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, 1123 + mode_t mode) 1124 + { 1125 + lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); 1126 + } 1127 + 1128 + static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, 1129 + const char *name) 1130 + { 1131 + lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); 1132 + } 1133 + 1134 + static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, 1135 + int dfd, const char *name) 1136 + { 1137 + int res; 1138 + struct lo_inode *parent; 1139 + char path[PATH_MAX]; 1140 + 1141 + if (inode->is_symlink) { 1142 + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); 1143 + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { 1144 + /* Sorry, no race free way to hard-link a symlink. */ 1145 + if (lo->norace) { 1146 + errno = EPERM; 1147 + } else { 1148 + goto fallback; 1149 + } 1150 + } 1151 + return res; 1152 + } 1153 + 1154 + sprintf(path, "%i", inode->fd); 1155 + 1156 + return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); 1157 + 1158 + fallback: 1159 + res = lo_parent_and_name(lo, inode, path, &parent); 1160 + if (res != -1) { 1161 + res = linkat(parent->fd, path, dfd, name, 0); 1162 + unref_inode_lolocked(lo, parent, 1); 1163 + lo_inode_put(lo, &parent); 1164 + } 1165 + 1166 + return res; 1167 + } 1168 + 1169 + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, 1170 + const char *name) 1171 + { 1172 + int res; 1173 + struct lo_data *lo = lo_data(req); 1174 + struct lo_inode *parent_inode; 1175 + struct lo_inode *inode; 1176 + struct fuse_entry_param e; 1177 + int saverr; 1178 + 1179 + if (!is_safe_path_component(name)) { 1180 + fuse_reply_err(req, EINVAL); 1181 + return; 1182 + } 1183 + 1184 + parent_inode = lo_inode(req, parent); 1185 + inode = lo_inode(req, ino); 1186 + if (!parent_inode || !inode) { 1187 + errno = EBADF; 1188 + goto out_err; 1189 + } 1190 + 1191 + memset(&e, 0, sizeof(struct fuse_entry_param)); 1192 + e.attr_timeout = lo->timeout; 1193 + e.entry_timeout = lo->timeout; 1194 + 1195 + res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); 1196 + if (res == -1) { 1197 + goto out_err; 1198 + } 1199 + 1200 + res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 1201 + if (res == -1) { 1202 + goto out_err; 1203 + } 1204 + 1205 + pthread_mutex_lock(&lo->mutex); 1206 + inode->nlookup++; 1207 + pthread_mutex_unlock(&lo->mutex); 1208 + e.ino = inode->fuse_ino; 1209 + 1210 + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 1211 + name, (unsigned long long)e.ino); 1212 + 1213 + fuse_reply_entry(req, &e); 1214 + lo_inode_put(lo, &parent_inode); 1215 + lo_inode_put(lo, &inode); 1216 + return; 1217 + 1218 + out_err: 1219 + saverr = errno; 1220 + lo_inode_put(lo, &parent_inode); 1221 + lo_inode_put(lo, &inode); 1222 + fuse_reply_err(req, saverr); 1223 + } 1224 + 1225 + /* Increments nlookup and caller must release refcount using lo_inode_put() */ 1226 + static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, 1227 + const char *name) 1228 + { 1229 + int res; 1230 + struct stat attr; 1231 + 1232 + res = fstatat(lo_fd(req, parent), name, &attr, 1233 + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 1234 + if (res == -1) { 1235 + return NULL; 1236 + } 1237 + 1238 + return lo_find(lo_data(req), &attr); 1239 + } 1240 + 1241 + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) 1242 + { 1243 + int res; 1244 + struct lo_inode *inode; 1245 + struct lo_data *lo = lo_data(req); 1246 + 1247 + if (!is_safe_path_component(name)) { 1248 + fuse_reply_err(req, EINVAL); 1249 + return; 1250 + } 1251 + 1252 + inode = lookup_name(req, parent, name); 1253 + if (!inode) { 1254 + fuse_reply_err(req, EIO); 1255 + return; 1256 + } 1257 + 1258 + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); 1259 + 1260 + fuse_reply_err(req, res == -1 ? errno : 0); 1261 + unref_inode_lolocked(lo, inode, 1); 1262 + lo_inode_put(lo, &inode); 1263 + } 1264 + 1265 + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, 1266 + fuse_ino_t newparent, const char *newname, 1267 + unsigned int flags) 1268 + { 1269 + int res; 1270 + struct lo_inode *parent_inode; 1271 + struct lo_inode *newparent_inode; 1272 + struct lo_inode *oldinode = NULL; 1273 + struct lo_inode *newinode = NULL; 1274 + struct lo_data *lo = lo_data(req); 1275 + 1276 + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { 1277 + fuse_reply_err(req, EINVAL); 1278 + return; 1279 + } 1280 + 1281 + parent_inode = lo_inode(req, parent); 1282 + newparent_inode = lo_inode(req, newparent); 1283 + if (!parent_inode || !newparent_inode) { 1284 + fuse_reply_err(req, EBADF); 1285 + goto out; 1286 + } 1287 + 1288 + oldinode = lookup_name(req, parent, name); 1289 + newinode = lookup_name(req, newparent, newname); 1290 + 1291 + if (!oldinode) { 1292 + fuse_reply_err(req, EIO); 1293 + goto out; 1294 + } 1295 + 1296 + if (flags) { 1297 + #ifndef SYS_renameat2 1298 + fuse_reply_err(req, EINVAL); 1299 + #else 1300 + res = syscall(SYS_renameat2, parent_inode->fd, name, 1301 + newparent_inode->fd, newname, flags); 1302 + if (res == -1 && errno == ENOSYS) { 1303 + fuse_reply_err(req, EINVAL); 1304 + } else { 1305 + fuse_reply_err(req, res == -1 ? errno : 0); 1306 + } 1307 + #endif 1308 + goto out; 1309 + } 1310 + 1311 + res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); 1312 + 1313 + fuse_reply_err(req, res == -1 ? errno : 0); 1314 + out: 1315 + unref_inode_lolocked(lo, oldinode, 1); 1316 + unref_inode_lolocked(lo, newinode, 1); 1317 + lo_inode_put(lo, &oldinode); 1318 + lo_inode_put(lo, &newinode); 1319 + lo_inode_put(lo, &parent_inode); 1320 + lo_inode_put(lo, &newparent_inode); 1321 + } 1322 + 1323 + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) 1324 + { 1325 + int res; 1326 + struct lo_inode *inode; 1327 + struct lo_data *lo = lo_data(req); 1328 + 1329 + if (!is_safe_path_component(name)) { 1330 + fuse_reply_err(req, EINVAL); 1331 + return; 1332 + } 1333 + 1334 + inode = lookup_name(req, parent, name); 1335 + if (!inode) { 1336 + fuse_reply_err(req, EIO); 1337 + return; 1338 + } 1339 + 1340 + res = unlinkat(lo_fd(req, parent), name, 0); 1341 + 1342 + fuse_reply_err(req, res == -1 ? errno : 0); 1343 + unref_inode_lolocked(lo, inode, 1); 1344 + lo_inode_put(lo, &inode); 1345 + } 1346 + 1347 + /* To be called with lo->mutex held */ 1348 + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) 1349 + { 1350 + if (!inode) { 1351 + return; 1352 + } 1353 + 1354 + assert(inode->nlookup >= n); 1355 + inode->nlookup -= n; 1356 + if (!inode->nlookup) { 1357 + lo_map_remove(&lo->ino_map, inode->fuse_ino); 1358 + g_hash_table_remove(lo->inodes, &inode->key); 1359 + if (g_hash_table_size(inode->posix_locks)) { 1360 + fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); 1361 + } 1362 + g_hash_table_destroy(inode->posix_locks); 1363 + pthread_mutex_destroy(&inode->plock_mutex); 1364 + 1365 + /* Drop our refcount from lo_do_lookup() */ 1366 + lo_inode_put(lo, &inode); 1367 + } 1368 + } 1369 + 1370 + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, 1371 + uint64_t n) 1372 + { 1373 + if (!inode) { 1374 + return; 1375 + } 1376 + 1377 + pthread_mutex_lock(&lo->mutex); 1378 + unref_inode(lo, inode, n); 1379 + pthread_mutex_unlock(&lo->mutex); 1380 + } 1381 + 1382 + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) 1383 + { 1384 + struct lo_data *lo = lo_data(req); 1385 + struct lo_inode *inode; 1386 + 1387 + inode = lo_inode(req, ino); 1388 + if (!inode) { 1389 + return; 1390 + } 1391 + 1392 + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", 1393 + (unsigned long long)ino, (unsigned long long)inode->nlookup, 1394 + (unsigned long long)nlookup); 1395 + 1396 + unref_inode_lolocked(lo, inode, nlookup); 1397 + lo_inode_put(lo, &inode); 1398 + } 1399 + 1400 + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) 1401 + { 1402 + lo_forget_one(req, ino, nlookup); 1403 + fuse_reply_none(req); 1404 + } 1405 + 1406 + static void lo_forget_multi(fuse_req_t req, size_t count, 1407 + struct fuse_forget_data *forgets) 1408 + { 1409 + int i; 1410 + 1411 + for (i = 0; i < count; i++) { 1412 + lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); 1413 + } 1414 + fuse_reply_none(req); 1415 + } 1416 + 1417 + static void lo_readlink(fuse_req_t req, fuse_ino_t ino) 1418 + { 1419 + char buf[PATH_MAX + 1]; 1420 + int res; 1421 + 1422 + res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); 1423 + if (res == -1) { 1424 + return (void)fuse_reply_err(req, errno); 1425 + } 1426 + 1427 + if (res == sizeof(buf)) { 1428 + return (void)fuse_reply_err(req, ENAMETOOLONG); 1429 + } 1430 + 1431 + buf[res] = '\0'; 1432 + 1433 + fuse_reply_readlink(req, buf); 1434 + } 1435 + 1436 + struct lo_dirp { 1437 + gint refcount; 1438 + DIR *dp; 1439 + struct dirent *entry; 1440 + off_t offset; 1441 + }; 1442 + 1443 + static void lo_dirp_put(struct lo_dirp **dp) 1444 + { 1445 + struct lo_dirp *d = *dp; 1446 + 1447 + if (!d) { 1448 + return; 1449 + } 1450 + *dp = NULL; 1451 + 1452 + if (g_atomic_int_dec_and_test(&d->refcount)) { 1453 + closedir(d->dp); 1454 + free(d); 1455 + } 1456 + } 1457 + 1458 + /* Call lo_dirp_put() on the return value when no longer needed */ 1459 + static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) 1460 + { 1461 + struct lo_data *lo = lo_data(req); 1462 + struct lo_map_elem *elem; 1463 + 1464 + pthread_mutex_lock(&lo->mutex); 1465 + elem = lo_map_get(&lo->dirp_map, fi->fh); 1466 + if (elem) { 1467 + g_atomic_int_inc(&elem->dirp->refcount); 1468 + } 1469 + pthread_mutex_unlock(&lo->mutex); 1470 + if (!elem) { 1471 + return NULL; 1472 + } 1473 + 1474 + return elem->dirp; 1475 + } 1476 + 1477 + static void lo_opendir(fuse_req_t req, fuse_ino_t ino, 1478 + struct fuse_file_info *fi) 1479 + { 1480 + int error = ENOMEM; 1481 + struct lo_data *lo = lo_data(req); 1482 + struct lo_dirp *d; 1483 + int fd; 1484 + ssize_t fh; 1485 + 1486 + d = calloc(1, sizeof(struct lo_dirp)); 1487 + if (d == NULL) { 1488 + goto out_err; 1489 + } 1490 + 1491 + fd = openat(lo_fd(req, ino), ".", O_RDONLY); 1492 + if (fd == -1) { 1493 + goto out_errno; 1494 + } 1495 + 1496 + d->dp = fdopendir(fd); 1497 + if (d->dp == NULL) { 1498 + goto out_errno; 1499 + } 1500 + 1501 + d->offset = 0; 1502 + d->entry = NULL; 1503 + 1504 + g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ 1505 + pthread_mutex_lock(&lo->mutex); 1506 + fh = lo_add_dirp_mapping(req, d); 1507 + pthread_mutex_unlock(&lo->mutex); 1508 + if (fh == -1) { 1509 + goto out_err; 1510 + } 1511 + 1512 + fi->fh = fh; 1513 + if (lo->cache == CACHE_ALWAYS) { 1514 + fi->cache_readdir = 1; 1515 + } 1516 + fuse_reply_open(req, fi); 1517 + return; 1518 + 1519 + out_errno: 1520 + error = errno; 1521 + out_err: 1522 + if (d) { 1523 + if (d->dp) { 1524 + closedir(d->dp); 1525 + } 1526 + if (fd != -1) { 1527 + close(fd); 1528 + } 1529 + free(d); 1530 + } 1531 + fuse_reply_err(req, error); 1532 + } 1533 + 1534 + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 1535 + off_t offset, struct fuse_file_info *fi, int plus) 1536 + { 1537 + struct lo_data *lo = lo_data(req); 1538 + struct lo_dirp *d = NULL; 1539 + struct lo_inode *dinode; 1540 + char *buf = NULL; 1541 + char *p; 1542 + size_t rem = size; 1543 + int err = EBADF; 1544 + 1545 + dinode = lo_inode(req, ino); 1546 + if (!dinode) { 1547 + goto error; 1548 + } 1549 + 1550 + d = lo_dirp(req, fi); 1551 + if (!d) { 1552 + goto error; 1553 + } 1554 + 1555 + err = ENOMEM; 1556 + buf = calloc(1, size); 1557 + if (!buf) { 1558 + goto error; 1559 + } 1560 + p = buf; 1561 + 1562 + if (offset != d->offset) { 1563 + seekdir(d->dp, offset); 1564 + d->entry = NULL; 1565 + d->offset = offset; 1566 + } 1567 + while (1) { 1568 + size_t entsize; 1569 + off_t nextoff; 1570 + const char *name; 1571 + 1572 + if (!d->entry) { 1573 + errno = 0; 1574 + d->entry = readdir(d->dp); 1575 + if (!d->entry) { 1576 + if (errno) { /* Error */ 1577 + err = errno; 1578 + goto error; 1579 + } else { /* End of stream */ 1580 + break; 1581 + } 1582 + } 1583 + } 1584 + nextoff = d->entry->d_off; 1585 + name = d->entry->d_name; 1586 + 1587 + fuse_ino_t entry_ino = 0; 1588 + struct fuse_entry_param e = (struct fuse_entry_param){ 1589 + .attr.st_ino = d->entry->d_ino, 1590 + .attr.st_mode = d->entry->d_type << 12, 1591 + }; 1592 + 1593 + /* Hide root's parent directory */ 1594 + if (dinode == &lo->root && strcmp(name, "..") == 0) { 1595 + e.attr.st_ino = lo->root.key.ino; 1596 + e.attr.st_mode = DT_DIR << 12; 1597 + } 1598 + 1599 + if (plus) { 1600 + if (!is_dot_or_dotdot(name)) { 1601 + err = lo_do_lookup(req, ino, name, &e); 1602 + if (err) { 1603 + goto error; 1604 + } 1605 + entry_ino = e.ino; 1606 + } 1607 + 1608 + entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); 1609 + } else { 1610 + entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); 1611 + } 1612 + if (entsize > rem) { 1613 + if (entry_ino != 0) { 1614 + lo_forget_one(req, entry_ino, 1); 1615 + } 1616 + break; 1617 + } 1618 + 1619 + p += entsize; 1620 + rem -= entsize; 1621 + 1622 + d->entry = NULL; 1623 + d->offset = nextoff; 1624 + } 1625 + 1626 + err = 0; 1627 + error: 1628 + lo_dirp_put(&d); 1629 + lo_inode_put(lo, &dinode); 1630 + 1631 + /* 1632 + * If there's an error, we can only signal it if we haven't stored 1633 + * any entries yet - otherwise we'd end up with wrong lookup 1634 + * counts for the entries that are already in the buffer. So we 1635 + * return what we've collected until that point. 1636 + */ 1637 + if (err && rem == size) { 1638 + fuse_reply_err(req, err); 1639 + } else { 1640 + fuse_reply_buf(req, buf, size - rem); 1641 + } 1642 + free(buf); 1643 + } 1644 + 1645 + static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 1646 + off_t offset, struct fuse_file_info *fi) 1647 + { 1648 + lo_do_readdir(req, ino, size, offset, fi, 0); 1649 + } 1650 + 1651 + static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, 1652 + off_t offset, struct fuse_file_info *fi) 1653 + { 1654 + lo_do_readdir(req, ino, size, offset, fi, 1); 1655 + } 1656 + 1657 + static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, 1658 + struct fuse_file_info *fi) 1659 + { 1660 + struct lo_data *lo = lo_data(req); 1661 + struct lo_map_elem *elem; 1662 + struct lo_dirp *d; 1663 + 1664 + (void)ino; 1665 + 1666 + pthread_mutex_lock(&lo->mutex); 1667 + elem = lo_map_get(&lo->dirp_map, fi->fh); 1668 + if (!elem) { 1669 + pthread_mutex_unlock(&lo->mutex); 1670 + fuse_reply_err(req, EBADF); 1671 + return; 1672 + } 1673 + 1674 + d = elem->dirp; 1675 + lo_map_remove(&lo->dirp_map, fi->fh); 1676 + pthread_mutex_unlock(&lo->mutex); 1677 + 1678 + lo_dirp_put(&d); /* paired with lo_opendir() */ 1679 + 1680 + fuse_reply_err(req, 0); 1681 + } 1682 + 1683 + static void update_open_flags(int writeback, struct fuse_file_info *fi) 1684 + { 1685 + /* 1686 + * With writeback cache, kernel may send read requests even 1687 + * when userspace opened write-only 1688 + */ 1689 + if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { 1690 + fi->flags &= ~O_ACCMODE; 1691 + fi->flags |= O_RDWR; 1692 + } 1693 + 1694 + /* 1695 + * With writeback cache, O_APPEND is handled by the kernel. 1696 + * This breaks atomicity (since the file may change in the 1697 + * underlying filesystem, so that the kernel's idea of the 1698 + * end of the file isn't accurate anymore). In this example, 1699 + * we just accept that. A more rigorous filesystem may want 1700 + * to return an error here 1701 + */ 1702 + if (writeback && (fi->flags & O_APPEND)) { 1703 + fi->flags &= ~O_APPEND; 1704 + } 1705 + 1706 + /* 1707 + * O_DIRECT in guest should not necessarily mean bypassing page 1708 + * cache on host as well. If somebody needs that behavior, it 1709 + * probably should be a configuration knob in daemon. 1710 + */ 1711 + fi->flags &= ~O_DIRECT; 1712 + } 1713 + 1714 + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, 1715 + mode_t mode, struct fuse_file_info *fi) 1716 + { 1717 + int fd; 1718 + struct lo_data *lo = lo_data(req); 1719 + struct lo_inode *parent_inode; 1720 + struct fuse_entry_param e; 1721 + int err; 1722 + struct lo_cred old = {}; 1723 + 1724 + fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, 1725 + name); 1726 + 1727 + if (!is_safe_path_component(name)) { 1728 + fuse_reply_err(req, EINVAL); 1729 + return; 1730 + } 1731 + 1732 + parent_inode = lo_inode(req, parent); 1733 + if (!parent_inode) { 1734 + fuse_reply_err(req, EBADF); 1735 + return; 1736 + } 1737 + 1738 + err = lo_change_cred(req, &old); 1739 + if (err) { 1740 + goto out; 1741 + } 1742 + 1743 + update_open_flags(lo->writeback, fi); 1744 + 1745 + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, 1746 + mode); 1747 + err = fd == -1 ? errno : 0; 1748 + lo_restore_cred(&old); 1749 + 1750 + if (!err) { 1751 + ssize_t fh; 1752 + 1753 + pthread_mutex_lock(&lo->mutex); 1754 + fh = lo_add_fd_mapping(req, fd); 1755 + pthread_mutex_unlock(&lo->mutex); 1756 + if (fh == -1) { 1757 + close(fd); 1758 + err = ENOMEM; 1759 + goto out; 1760 + } 1761 + 1762 + fi->fh = fh; 1763 + err = lo_do_lookup(req, parent, name, &e); 1764 + } 1765 + if (lo->cache == CACHE_NONE) { 1766 + fi->direct_io = 1; 1767 + } else if (lo->cache == CACHE_ALWAYS) { 1768 + fi->keep_cache = 1; 1769 + } 1770 + 1771 + out: 1772 + lo_inode_put(lo, &parent_inode); 1773 + 1774 + if (err) { 1775 + fuse_reply_err(req, err); 1776 + } else { 1777 + fuse_reply_create(req, &e, fi); 1778 + } 1779 + } 1780 + 1781 + /* Should be called with inode->plock_mutex held */ 1782 + static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, 1783 + struct lo_inode *inode, 1784 + uint64_t lock_owner, 1785 + pid_t pid, int *err) 1786 + { 1787 + struct lo_inode_plock *plock; 1788 + char procname[64]; 1789 + int fd; 1790 + 1791 + plock = 1792 + g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); 1793 + 1794 + if (plock) { 1795 + return plock; 1796 + } 1797 + 1798 + plock = malloc(sizeof(struct lo_inode_plock)); 1799 + if (!plock) { 1800 + *err = ENOMEM; 1801 + return NULL; 1802 + } 1803 + 1804 + /* Open another instance of file which can be used for ofd locks. */ 1805 + sprintf(procname, "%i", inode->fd); 1806 + 1807 + /* TODO: What if file is not writable? */ 1808 + fd = openat(lo->proc_self_fd, procname, O_RDWR); 1809 + if (fd == -1) { 1810 + *err = errno; 1811 + free(plock); 1812 + return NULL; 1813 + } 1814 + 1815 + plock->lock_owner = lock_owner; 1816 + plock->fd = fd; 1817 + g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), 1818 + plock); 1819 + return plock; 1820 + } 1821 + 1822 + static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 1823 + struct flock *lock) 1824 + { 1825 + struct lo_data *lo = lo_data(req); 1826 + struct lo_inode *inode; 1827 + struct lo_inode_plock *plock; 1828 + int ret, saverr = 0; 1829 + 1830 + fuse_log(FUSE_LOG_DEBUG, 1831 + "lo_getlk(ino=%" PRIu64 ", flags=%d)" 1832 + " owner=0x%lx, l_type=%d l_start=0x%lx" 1833 + " l_len=0x%lx\n", 1834 + ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, 1835 + lock->l_len); 1836 + 1837 + inode = lo_inode(req, ino); 1838 + if (!inode) { 1839 + fuse_reply_err(req, EBADF); 1840 + return; 1841 + } 1842 + 1843 + pthread_mutex_lock(&inode->plock_mutex); 1844 + plock = 1845 + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); 1846 + if (!plock) { 1847 + saverr = ret; 1848 + goto out; 1849 + } 1850 + 1851 + ret = fcntl(plock->fd, F_OFD_GETLK, lock); 1852 + if (ret == -1) { 1853 + saverr = errno; 1854 + } 1855 + 1856 + out: 1857 + pthread_mutex_unlock(&inode->plock_mutex); 1858 + lo_inode_put(lo, &inode); 1859 + 1860 + if (saverr) { 1861 + fuse_reply_err(req, saverr); 1862 + } else { 1863 + fuse_reply_lock(req, lock); 1864 + } 1865 + } 1866 + 1867 + static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 1868 + struct flock *lock, int sleep) 1869 + { 1870 + struct lo_data *lo = lo_data(req); 1871 + struct lo_inode *inode; 1872 + struct lo_inode_plock *plock; 1873 + int ret, saverr = 0; 1874 + 1875 + fuse_log(FUSE_LOG_DEBUG, 1876 + "lo_setlk(ino=%" PRIu64 ", flags=%d)" 1877 + " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" 1878 + " l_start=0x%lx l_len=0x%lx\n", 1879 + ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, 1880 + lock->l_whence, lock->l_start, lock->l_len); 1881 + 1882 + if (sleep) { 1883 + fuse_reply_err(req, EOPNOTSUPP); 1884 + return; 1885 + } 1886 + 1887 + inode = lo_inode(req, ino); 1888 + if (!inode) { 1889 + fuse_reply_err(req, EBADF); 1890 + return; 1891 + } 1892 + 1893 + pthread_mutex_lock(&inode->plock_mutex); 1894 + plock = 1895 + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); 1896 + 1897 + if (!plock) { 1898 + saverr = ret; 1899 + goto out; 1900 + } 1901 + 1902 + /* TODO: Is it alright to modify flock? */ 1903 + lock->l_pid = 0; 1904 + ret = fcntl(plock->fd, F_OFD_SETLK, lock); 1905 + if (ret == -1) { 1906 + saverr = errno; 1907 + } 1908 + 1909 + out: 1910 + pthread_mutex_unlock(&inode->plock_mutex); 1911 + lo_inode_put(lo, &inode); 1912 + 1913 + fuse_reply_err(req, saverr); 1914 + } 1915 + 1916 + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, 1917 + struct fuse_file_info *fi) 1918 + { 1919 + int res; 1920 + struct lo_dirp *d; 1921 + int fd; 1922 + 1923 + (void)ino; 1924 + 1925 + d = lo_dirp(req, fi); 1926 + if (!d) { 1927 + fuse_reply_err(req, EBADF); 1928 + return; 1929 + } 1930 + 1931 + fd = dirfd(d->dp); 1932 + if (datasync) { 1933 + res = fdatasync(fd); 1934 + } else { 1935 + res = fsync(fd); 1936 + } 1937 + 1938 + lo_dirp_put(&d); 1939 + 1940 + fuse_reply_err(req, res == -1 ? errno : 0); 1941 + } 1942 + 1943 + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) 1944 + { 1945 + int fd; 1946 + ssize_t fh; 1947 + char buf[64]; 1948 + struct lo_data *lo = lo_data(req); 1949 + 1950 + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, 1951 + fi->flags); 1952 + 1953 + update_open_flags(lo->writeback, fi); 1954 + 1955 + sprintf(buf, "%i", lo_fd(req, ino)); 1956 + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); 1957 + if (fd == -1) { 1958 + return (void)fuse_reply_err(req, errno); 1959 + } 1960 + 1961 + pthread_mutex_lock(&lo->mutex); 1962 + fh = lo_add_fd_mapping(req, fd); 1963 + pthread_mutex_unlock(&lo->mutex); 1964 + if (fh == -1) { 1965 + close(fd); 1966 + fuse_reply_err(req, ENOMEM); 1967 + return; 1968 + } 1969 + 1970 + fi->fh = fh; 1971 + if (lo->cache == CACHE_NONE) { 1972 + fi->direct_io = 1; 1973 + } else if (lo->cache == CACHE_ALWAYS) { 1974 + fi->keep_cache = 1; 1975 + } 1976 + fuse_reply_open(req, fi); 1977 + } 1978 + 1979 + static void lo_release(fuse_req_t req, fuse_ino_t ino, 1980 + struct fuse_file_info *fi) 1981 + { 1982 + struct lo_data *lo = lo_data(req); 1983 + struct lo_map_elem *elem; 1984 + int fd = -1; 1985 + 1986 + (void)ino; 1987 + 1988 + pthread_mutex_lock(&lo->mutex); 1989 + elem = lo_map_get(&lo->fd_map, fi->fh); 1990 + if (elem) { 1991 + fd = elem->fd; 1992 + elem = NULL; 1993 + lo_map_remove(&lo->fd_map, fi->fh); 1994 + } 1995 + pthread_mutex_unlock(&lo->mutex); 1996 + 1997 + close(fd); 1998 + fuse_reply_err(req, 0); 1999 + } 2000 + 2001 + static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) 2002 + { 2003 + int res; 2004 + (void)ino; 2005 + struct lo_inode *inode; 2006 + 2007 + inode = lo_inode(req, ino); 2008 + if (!inode) { 2009 + fuse_reply_err(req, EBADF); 2010 + return; 2011 + } 2012 + 2013 + /* An fd is going away. Cleanup associated posix locks */ 2014 + pthread_mutex_lock(&inode->plock_mutex); 2015 + g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); 2016 + pthread_mutex_unlock(&inode->plock_mutex); 2017 + 2018 + res = close(dup(lo_fi_fd(req, fi))); 2019 + lo_inode_put(lo_data(req), &inode); 2020 + fuse_reply_err(req, res == -1 ? errno : 0); 2021 + } 2022 + 2023 + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, 2024 + struct fuse_file_info *fi) 2025 + { 2026 + int res; 2027 + int fd; 2028 + char *buf; 2029 + 2030 + fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, 2031 + (void *)fi); 2032 + 2033 + if (!fi) { 2034 + struct lo_data *lo = lo_data(req); 2035 + 2036 + res = asprintf(&buf, "%i", lo_fd(req, ino)); 2037 + if (res == -1) { 2038 + return (void)fuse_reply_err(req, errno); 2039 + } 2040 + 2041 + fd = openat(lo->proc_self_fd, buf, O_RDWR); 2042 + free(buf); 2043 + if (fd == -1) { 2044 + return (void)fuse_reply_err(req, errno); 2045 + } 2046 + } else { 2047 + fd = lo_fi_fd(req, fi); 2048 + } 2049 + 2050 + if (datasync) { 2051 + res = fdatasync(fd); 2052 + } else { 2053 + res = fsync(fd); 2054 + } 2055 + if (!fi) { 2056 + close(fd); 2057 + } 2058 + fuse_reply_err(req, res == -1 ? errno : 0); 2059 + } 2060 + 2061 + static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, 2062 + struct fuse_file_info *fi) 2063 + { 2064 + struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); 2065 + 2066 + fuse_log(FUSE_LOG_DEBUG, 2067 + "lo_read(ino=%" PRIu64 ", size=%zd, " 2068 + "off=%lu)\n", 2069 + ino, size, (unsigned long)offset); 2070 + 2071 + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; 2072 + buf.buf[0].fd = lo_fi_fd(req, fi); 2073 + buf.buf[0].pos = offset; 2074 + 2075 + fuse_reply_data(req, &buf); 2076 + } 2077 + 2078 + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, 2079 + struct fuse_bufvec *in_buf, off_t off, 2080 + struct fuse_file_info *fi) 2081 + { 2082 + (void)ino; 2083 + ssize_t res; 2084 + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); 2085 + bool cap_fsetid_dropped = false; 2086 + 2087 + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; 2088 + out_buf.buf[0].fd = lo_fi_fd(req, fi); 2089 + out_buf.buf[0].pos = off; 2090 + 2091 + fuse_log(FUSE_LOG_DEBUG, 2092 + "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, 2093 + out_buf.buf[0].size, (unsigned long)off); 2094 + 2095 + /* 2096 + * If kill_priv is set, drop CAP_FSETID which should lead to kernel 2097 + * clearing setuid/setgid on file. 2098 + */ 2099 + if (fi->kill_priv) { 2100 + res = drop_effective_cap("FSETID", &cap_fsetid_dropped); 2101 + if (res != 0) { 2102 + fuse_reply_err(req, res); 2103 + return; 2104 + } 2105 + } 2106 + 2107 + res = fuse_buf_copy(&out_buf, in_buf); 2108 + if (res < 0) { 2109 + fuse_reply_err(req, -res); 2110 + } else { 2111 + fuse_reply_write(req, (size_t)res); 2112 + } 2113 + 2114 + if (cap_fsetid_dropped) { 2115 + res = gain_effective_cap("FSETID"); 2116 + if (res) { 2117 + fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); 2118 + } 2119 + } 2120 + } 2121 + 2122 + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) 2123 + { 2124 + int res; 2125 + struct statvfs stbuf; 2126 + 2127 + res = fstatvfs(lo_fd(req, ino), &stbuf); 2128 + if (res == -1) { 2129 + fuse_reply_err(req, errno); 2130 + } else { 2131 + fuse_reply_statfs(req, &stbuf); 2132 + } 2133 + } 2134 + 2135 + static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, 2136 + off_t length, struct fuse_file_info *fi) 2137 + { 2138 + int err = EOPNOTSUPP; 2139 + (void)ino; 2140 + 2141 + #ifdef CONFIG_FALLOCATE 2142 + err = fallocate(lo_fi_fd(req, fi), mode, offset, length); 2143 + if (err < 0) { 2144 + err = errno; 2145 + } 2146 + 2147 + #elif defined(CONFIG_POSIX_FALLOCATE) 2148 + if (mode) { 2149 + fuse_reply_err(req, EOPNOTSUPP); 2150 + return; 2151 + } 2152 + 2153 + err = posix_fallocate(lo_fi_fd(req, fi), offset, length); 2154 + #endif 2155 + 2156 + fuse_reply_err(req, err); 2157 + } 2158 + 2159 + static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 2160 + int op) 2161 + { 2162 + int res; 2163 + (void)ino; 2164 + 2165 + res = flock(lo_fi_fd(req, fi), op); 2166 + 2167 + fuse_reply_err(req, res == -1 ? errno : 0); 2168 + } 2169 + 2170 + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, 2171 + size_t size) 2172 + { 2173 + struct lo_data *lo = lo_data(req); 2174 + char *value = NULL; 2175 + char procname[64]; 2176 + struct lo_inode *inode; 2177 + ssize_t ret; 2178 + int saverr; 2179 + int fd = -1; 2180 + 2181 + inode = lo_inode(req, ino); 2182 + if (!inode) { 2183 + fuse_reply_err(req, EBADF); 2184 + return; 2185 + } 2186 + 2187 + saverr = ENOSYS; 2188 + if (!lo_data(req)->xattr) { 2189 + goto out; 2190 + } 2191 + 2192 + fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", 2193 + ino, name, size); 2194 + 2195 + if (inode->is_symlink) { 2196 + /* Sorry, no race free way to getxattr on symlink. */ 2197 + saverr = EPERM; 2198 + goto out; 2199 + } 2200 + 2201 + sprintf(procname, "%i", inode->fd); 2202 + fd = openat(lo->proc_self_fd, procname, O_RDONLY); 2203 + if (fd < 0) { 2204 + goto out_err; 2205 + } 2206 + 2207 + if (size) { 2208 + value = malloc(size); 2209 + if (!value) { 2210 + goto out_err; 2211 + } 2212 + 2213 + ret = fgetxattr(fd, name, value, size); 2214 + if (ret == -1) { 2215 + goto out_err; 2216 + } 2217 + saverr = 0; 2218 + if (ret == 0) { 2219 + goto out; 2220 + } 2221 + 2222 + fuse_reply_buf(req, value, ret); 2223 + } else { 2224 + ret = fgetxattr(fd, name, NULL, 0); 2225 + if (ret == -1) { 2226 + goto out_err; 2227 + } 2228 + 2229 + fuse_reply_xattr(req, ret); 2230 + } 2231 + out_free: 2232 + free(value); 2233 + 2234 + if (fd >= 0) { 2235 + close(fd); 2236 + } 2237 + 2238 + lo_inode_put(lo, &inode); 2239 + return; 2240 + 2241 + out_err: 2242 + saverr = errno; 2243 + out: 2244 + lo_inode_put(lo, &inode); 2245 + fuse_reply_err(req, saverr); 2246 + goto out_free; 2247 + } 2248 + 2249 + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) 2250 + { 2251 + struct lo_data *lo = lo_data(req); 2252 + char *value = NULL; 2253 + char procname[64]; 2254 + struct lo_inode *inode; 2255 + ssize_t ret; 2256 + int saverr; 2257 + int fd = -1; 2258 + 2259 + inode = lo_inode(req, ino); 2260 + if (!inode) { 2261 + fuse_reply_err(req, EBADF); 2262 + return; 2263 + } 2264 + 2265 + saverr = ENOSYS; 2266 + if (!lo_data(req)->xattr) { 2267 + goto out; 2268 + } 2269 + 2270 + fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, 2271 + size); 2272 + 2273 + if (inode->is_symlink) { 2274 + /* Sorry, no race free way to listxattr on symlink. */ 2275 + saverr = EPERM; 2276 + goto out; 2277 + } 2278 + 2279 + sprintf(procname, "%i", inode->fd); 2280 + fd = openat(lo->proc_self_fd, procname, O_RDONLY); 2281 + if (fd < 0) { 2282 + goto out_err; 2283 + } 2284 + 2285 + if (size) { 2286 + value = malloc(size); 2287 + if (!value) { 2288 + goto out_err; 2289 + } 2290 + 2291 + ret = flistxattr(fd, value, size); 2292 + if (ret == -1) { 2293 + goto out_err; 2294 + } 2295 + saverr = 0; 2296 + if (ret == 0) { 2297 + goto out; 2298 + } 2299 + 2300 + fuse_reply_buf(req, value, ret); 2301 + } else { 2302 + ret = flistxattr(fd, NULL, 0); 2303 + if (ret == -1) { 2304 + goto out_err; 2305 + } 2306 + 2307 + fuse_reply_xattr(req, ret); 2308 + } 2309 + out_free: 2310 + free(value); 2311 + 2312 + if (fd >= 0) { 2313 + close(fd); 2314 + } 2315 + 2316 + lo_inode_put(lo, &inode); 2317 + return; 2318 + 2319 + out_err: 2320 + saverr = errno; 2321 + out: 2322 + lo_inode_put(lo, &inode); 2323 + fuse_reply_err(req, saverr); 2324 + goto out_free; 2325 + } 2326 + 2327 + static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, 2328 + const char *value, size_t size, int flags) 2329 + { 2330 + char procname[64]; 2331 + struct lo_data *lo = lo_data(req); 2332 + struct lo_inode *inode; 2333 + ssize_t ret; 2334 + int saverr; 2335 + int fd = -1; 2336 + 2337 + inode = lo_inode(req, ino); 2338 + if (!inode) { 2339 + fuse_reply_err(req, EBADF); 2340 + return; 2341 + } 2342 + 2343 + saverr = ENOSYS; 2344 + if (!lo_data(req)->xattr) { 2345 + goto out; 2346 + } 2347 + 2348 + fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 2349 + ", name=%s value=%s size=%zd)\n", ino, name, value, size); 2350 + 2351 + if (inode->is_symlink) { 2352 + /* Sorry, no race free way to setxattr on symlink. */ 2353 + saverr = EPERM; 2354 + goto out; 2355 + } 2356 + 2357 + sprintf(procname, "%i", inode->fd); 2358 + fd = openat(lo->proc_self_fd, procname, O_RDWR); 2359 + if (fd < 0) { 2360 + saverr = errno; 2361 + goto out; 2362 + } 2363 + 2364 + ret = fsetxattr(fd, name, value, size, flags); 2365 + saverr = ret == -1 ? errno : 0; 2366 + 2367 + out: 2368 + if (fd >= 0) { 2369 + close(fd); 2370 + } 2371 + 2372 + lo_inode_put(lo, &inode); 2373 + fuse_reply_err(req, saverr); 2374 + } 2375 + 2376 + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) 2377 + { 2378 + char procname[64]; 2379 + struct lo_data *lo = lo_data(req); 2380 + struct lo_inode *inode; 2381 + ssize_t ret; 2382 + int saverr; 2383 + int fd = -1; 2384 + 2385 + inode = lo_inode(req, ino); 2386 + if (!inode) { 2387 + fuse_reply_err(req, EBADF); 2388 + return; 2389 + } 2390 + 2391 + saverr = ENOSYS; 2392 + if (!lo_data(req)->xattr) { 2393 + goto out; 2394 + } 2395 + 2396 + fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, 2397 + name); 2398 + 2399 + if (inode->is_symlink) { 2400 + /* Sorry, no race free way to setxattr on symlink. */ 2401 + saverr = EPERM; 2402 + goto out; 2403 + } 2404 + 2405 + sprintf(procname, "%i", inode->fd); 2406 + fd = openat(lo->proc_self_fd, procname, O_RDWR); 2407 + if (fd < 0) { 2408 + saverr = errno; 2409 + goto out; 2410 + } 2411 + 2412 + ret = fremovexattr(fd, name); 2413 + saverr = ret == -1 ? errno : 0; 2414 + 2415 + out: 2416 + if (fd >= 0) { 2417 + close(fd); 2418 + } 2419 + 2420 + lo_inode_put(lo, &inode); 2421 + fuse_reply_err(req, saverr); 2422 + } 2423 + 2424 + #ifdef HAVE_COPY_FILE_RANGE 2425 + static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, 2426 + struct fuse_file_info *fi_in, fuse_ino_t ino_out, 2427 + off_t off_out, struct fuse_file_info *fi_out, 2428 + size_t len, int flags) 2429 + { 2430 + int in_fd, out_fd; 2431 + ssize_t res; 2432 + 2433 + in_fd = lo_fi_fd(req, fi_in); 2434 + out_fd = lo_fi_fd(req, fi_out); 2435 + 2436 + fuse_log(FUSE_LOG_DEBUG, 2437 + "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " 2438 + "off=%lu, ino=%" PRIu64 "/fd=%d, " 2439 + "off=%lu, size=%zd, flags=0x%x)\n", 2440 + ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); 2441 + 2442 + res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); 2443 + if (res < 0) { 2444 + fuse_reply_err(req, errno); 2445 + } else { 2446 + fuse_reply_write(req, res); 2447 + } 2448 + } 2449 + #endif 2450 + 2451 + static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, 2452 + struct fuse_file_info *fi) 2453 + { 2454 + off_t res; 2455 + 2456 + (void)ino; 2457 + res = lseek(lo_fi_fd(req, fi), off, whence); 2458 + if (res != -1) { 2459 + fuse_reply_lseek(req, res); 2460 + } else { 2461 + fuse_reply_err(req, errno); 2462 + } 2463 + } 2464 + 2465 + static void lo_destroy(void *userdata) 2466 + { 2467 + struct lo_data *lo = (struct lo_data *)userdata; 2468 + 2469 + pthread_mutex_lock(&lo->mutex); 2470 + while (true) { 2471 + GHashTableIter iter; 2472 + gpointer key, value; 2473 + 2474 + g_hash_table_iter_init(&iter, lo->inodes); 2475 + if (!g_hash_table_iter_next(&iter, &key, &value)) { 2476 + break; 2477 + } 2478 + 2479 + struct lo_inode *inode = value; 2480 + unref_inode(lo, inode, inode->nlookup); 2481 + } 2482 + pthread_mutex_unlock(&lo->mutex); 2483 + } 2484 + 2485 + static struct fuse_lowlevel_ops lo_oper = { 2486 + .init = lo_init, 2487 + .lookup = lo_lookup, 2488 + .mkdir = lo_mkdir, 2489 + .mknod = lo_mknod, 2490 + .symlink = lo_symlink, 2491 + .link = lo_link, 2492 + .unlink = lo_unlink, 2493 + .rmdir = lo_rmdir, 2494 + .rename = lo_rename, 2495 + .forget = lo_forget, 2496 + .forget_multi = lo_forget_multi, 2497 + .getattr = lo_getattr, 2498 + .setattr = lo_setattr, 2499 + .readlink = lo_readlink, 2500 + .opendir = lo_opendir, 2501 + .readdir = lo_readdir, 2502 + .readdirplus = lo_readdirplus, 2503 + .releasedir = lo_releasedir, 2504 + .fsyncdir = lo_fsyncdir, 2505 + .create = lo_create, 2506 + .getlk = lo_getlk, 2507 + .setlk = lo_setlk, 2508 + .open = lo_open, 2509 + .release = lo_release, 2510 + .flush = lo_flush, 2511 + .fsync = lo_fsync, 2512 + .read = lo_read, 2513 + .write_buf = lo_write_buf, 2514 + .statfs = lo_statfs, 2515 + .fallocate = lo_fallocate, 2516 + .flock = lo_flock, 2517 + .getxattr = lo_getxattr, 2518 + .listxattr = lo_listxattr, 2519 + .setxattr = lo_setxattr, 2520 + .removexattr = lo_removexattr, 2521 + #ifdef HAVE_COPY_FILE_RANGE 2522 + .copy_file_range = lo_copy_file_range, 2523 + #endif 2524 + .lseek = lo_lseek, 2525 + .destroy = lo_destroy, 2526 + }; 2527 + 2528 + /* Print vhost-user.json backend program capabilities */ 2529 + static void print_capabilities(void) 2530 + { 2531 + printf("{\n"); 2532 + printf(" \"type\": \"fs\"\n"); 2533 + printf("}\n"); 2534 + } 2535 + 2536 + /* 2537 + * Move to a new mount, net, and pid namespaces to isolate this process. 2538 + */ 2539 + static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) 2540 + { 2541 + pid_t child; 2542 + 2543 + /* 2544 + * Create a new pid namespace for *child* processes. We'll have to 2545 + * fork in order to enter the new pid namespace. A new mount namespace 2546 + * is also needed so that we can remount /proc for the new pid 2547 + * namespace. 2548 + * 2549 + * Our UNIX domain sockets have been created. Now we can move to 2550 + * an empty network namespace to prevent TCP/IP and other network 2551 + * activity in case this process is compromised. 2552 + */ 2553 + if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { 2554 + fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); 2555 + exit(1); 2556 + } 2557 + 2558 + child = fork(); 2559 + if (child < 0) { 2560 + fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); 2561 + exit(1); 2562 + } 2563 + if (child > 0) { 2564 + pid_t waited; 2565 + int wstatus; 2566 + 2567 + /* The parent waits for the child */ 2568 + do { 2569 + waited = waitpid(child, &wstatus, 0); 2570 + } while (waited < 0 && errno == EINTR && !se->exited); 2571 + 2572 + /* We were terminated by a signal, see fuse_signals.c */ 2573 + if (se->exited) { 2574 + exit(0); 2575 + } 2576 + 2577 + if (WIFEXITED(wstatus)) { 2578 + exit(WEXITSTATUS(wstatus)); 2579 + } 2580 + 2581 + exit(1); 2582 + } 2583 + 2584 + /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ 2585 + prctl(PR_SET_PDEATHSIG, SIGTERM); 2586 + 2587 + /* 2588 + * If the mounts have shared propagation then we want to opt out so our 2589 + * mount changes don't affect the parent mount namespace. 2590 + */ 2591 + if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { 2592 + fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); 2593 + exit(1); 2594 + } 2595 + 2596 + /* The child must remount /proc to use the new pid namespace */ 2597 + if (mount("proc", "/proc", "proc", 2598 + MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { 2599 + fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); 2600 + exit(1); 2601 + } 2602 + 2603 + /* Now we can get our /proc/self/fd directory file descriptor */ 2604 + lo->proc_self_fd = open("/proc/self/fd", O_PATH); 2605 + if (lo->proc_self_fd == -1) { 2606 + fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); 2607 + exit(1); 2608 + } 2609 + } 2610 + 2611 + /* 2612 + * Capture the capability state, we'll need to restore this for individual 2613 + * threads later; see load_capng. 2614 + */ 2615 + static void setup_capng(void) 2616 + { 2617 + /* Note this accesses /proc so has to happen before the sandbox */ 2618 + if (capng_get_caps_process()) { 2619 + fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); 2620 + exit(1); 2621 + } 2622 + pthread_mutex_init(&cap.mutex, NULL); 2623 + pthread_mutex_lock(&cap.mutex); 2624 + cap.saved = capng_save_state(); 2625 + if (!cap.saved) { 2626 + fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); 2627 + exit(1); 2628 + } 2629 + pthread_mutex_unlock(&cap.mutex); 2630 + } 2631 + 2632 + static void cleanup_capng(void) 2633 + { 2634 + free(cap.saved); 2635 + cap.saved = NULL; 2636 + pthread_mutex_destroy(&cap.mutex); 2637 + } 2638 + 2639 + 2640 + /* 2641 + * Make the source directory our root so symlinks cannot escape and no other 2642 + * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. 2643 + */ 2644 + static void setup_mounts(const char *source) 2645 + { 2646 + int oldroot; 2647 + int newroot; 2648 + 2649 + if (mount(source, source, NULL, MS_BIND, NULL) < 0) { 2650 + fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); 2651 + exit(1); 2652 + } 2653 + 2654 + /* This magic is based on lxc's lxc_pivot_root() */ 2655 + oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); 2656 + if (oldroot < 0) { 2657 + fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); 2658 + exit(1); 2659 + } 2660 + 2661 + newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); 2662 + if (newroot < 0) { 2663 + fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); 2664 + exit(1); 2665 + } 2666 + 2667 + if (fchdir(newroot) < 0) { 2668 + fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); 2669 + exit(1); 2670 + } 2671 + 2672 + if (syscall(__NR_pivot_root, ".", ".") < 0) { 2673 + fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); 2674 + exit(1); 2675 + } 2676 + 2677 + if (fchdir(oldroot) < 0) { 2678 + fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); 2679 + exit(1); 2680 + } 2681 + 2682 + if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { 2683 + fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); 2684 + exit(1); 2685 + } 2686 + 2687 + if (umount2(".", MNT_DETACH) < 0) { 2688 + fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); 2689 + exit(1); 2690 + } 2691 + 2692 + if (fchdir(newroot) < 0) { 2693 + fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); 2694 + exit(1); 2695 + } 2696 + 2697 + close(newroot); 2698 + close(oldroot); 2699 + } 2700 + 2701 + /* 2702 + * Lock down this process to prevent access to other processes or files outside 2703 + * source directory. This reduces the impact of arbitrary code execution bugs. 2704 + */ 2705 + static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, 2706 + bool enable_syslog) 2707 + { 2708 + setup_namespaces(lo, se); 2709 + setup_mounts(lo->source); 2710 + setup_seccomp(enable_syslog); 2711 + } 2712 + 2713 + /* Raise the maximum number of open file descriptors */ 2714 + static void setup_nofile_rlimit(void) 2715 + { 2716 + const rlim_t max_fds = 1000000; 2717 + struct rlimit rlim; 2718 + 2719 + if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { 2720 + fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); 2721 + exit(1); 2722 + } 2723 + 2724 + if (rlim.rlim_cur >= max_fds) { 2725 + return; /* nothing to do */ 2726 + } 2727 + 2728 + rlim.rlim_cur = max_fds; 2729 + rlim.rlim_max = max_fds; 2730 + 2731 + if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { 2732 + /* Ignore SELinux denials */ 2733 + if (errno == EPERM) { 2734 + return; 2735 + } 2736 + 2737 + fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); 2738 + exit(1); 2739 + } 2740 + } 2741 + 2742 + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) 2743 + { 2744 + g_autofree char *localfmt = NULL; 2745 + 2746 + if (current_log_level < level) { 2747 + return; 2748 + } 2749 + 2750 + if (current_log_level == FUSE_LOG_DEBUG) { 2751 + if (!use_syslog) { 2752 + localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", 2753 + get_clock(), syscall(__NR_gettid), fmt); 2754 + } else { 2755 + localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), 2756 + fmt); 2757 + } 2758 + fmt = localfmt; 2759 + } 2760 + 2761 + if (use_syslog) { 2762 + int priority = LOG_ERR; 2763 + switch (level) { 2764 + case FUSE_LOG_EMERG: 2765 + priority = LOG_EMERG; 2766 + break; 2767 + case FUSE_LOG_ALERT: 2768 + priority = LOG_ALERT; 2769 + break; 2770 + case FUSE_LOG_CRIT: 2771 + priority = LOG_CRIT; 2772 + break; 2773 + case FUSE_LOG_ERR: 2774 + priority = LOG_ERR; 2775 + break; 2776 + case FUSE_LOG_WARNING: 2777 + priority = LOG_WARNING; 2778 + break; 2779 + case FUSE_LOG_NOTICE: 2780 + priority = LOG_NOTICE; 2781 + break; 2782 + case FUSE_LOG_INFO: 2783 + priority = LOG_INFO; 2784 + break; 2785 + case FUSE_LOG_DEBUG: 2786 + priority = LOG_DEBUG; 2787 + break; 2788 + } 2789 + vsyslog(priority, fmt, ap); 2790 + } else { 2791 + vfprintf(stderr, fmt, ap); 2792 + } 2793 + } 2794 + 2795 + static void setup_root(struct lo_data *lo, struct lo_inode *root) 2796 + { 2797 + int fd, res; 2798 + struct stat stat; 2799 + 2800 + fd = open("/", O_PATH); 2801 + if (fd == -1) { 2802 + fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); 2803 + exit(1); 2804 + } 2805 + 2806 + res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 2807 + if (res == -1) { 2808 + fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); 2809 + exit(1); 2810 + } 2811 + 2812 + root->is_symlink = false; 2813 + root->fd = fd; 2814 + root->key.ino = stat.st_ino; 2815 + root->key.dev = stat.st_dev; 2816 + root->nlookup = 2; 2817 + g_atomic_int_set(&root->refcount, 2); 2818 + } 2819 + 2820 + static guint lo_key_hash(gconstpointer key) 2821 + { 2822 + const struct lo_key *lkey = key; 2823 + 2824 + return (guint)lkey->ino + (guint)lkey->dev; 2825 + } 2826 + 2827 + static gboolean lo_key_equal(gconstpointer a, gconstpointer b) 2828 + { 2829 + const struct lo_key *la = a; 2830 + const struct lo_key *lb = b; 2831 + 2832 + return la->ino == lb->ino && la->dev == lb->dev; 2833 + } 2834 + 2835 + static void fuse_lo_data_cleanup(struct lo_data *lo) 2836 + { 2837 + if (lo->inodes) { 2838 + g_hash_table_destroy(lo->inodes); 2839 + } 2840 + lo_map_destroy(&lo->fd_map); 2841 + lo_map_destroy(&lo->dirp_map); 2842 + lo_map_destroy(&lo->ino_map); 2843 + 2844 + if (lo->proc_self_fd >= 0) { 2845 + close(lo->proc_self_fd); 2846 + } 2847 + 2848 + if (lo->root.fd >= 0) { 2849 + close(lo->root.fd); 2850 + } 2851 + 2852 + free(lo->source); 2853 + } 2854 + 2855 + int main(int argc, char *argv[]) 2856 + { 2857 + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); 2858 + struct fuse_session *se; 2859 + struct fuse_cmdline_opts opts; 2860 + struct lo_data lo = { 2861 + .debug = 0, 2862 + .writeback = 0, 2863 + .posix_lock = 1, 2864 + .proc_self_fd = -1, 2865 + }; 2866 + struct lo_map_elem *root_elem; 2867 + int ret = -1; 2868 + 2869 + /* Don't mask creation mode, kernel already did that */ 2870 + umask(0); 2871 + 2872 + pthread_mutex_init(&lo.mutex, NULL); 2873 + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); 2874 + lo.root.fd = -1; 2875 + lo.root.fuse_ino = FUSE_ROOT_ID; 2876 + lo.cache = CACHE_AUTO; 2877 + 2878 + /* 2879 + * Set up the ino map like this: 2880 + * [0] Reserved (will not be used) 2881 + * [1] Root inode 2882 + */ 2883 + lo_map_init(&lo.ino_map); 2884 + lo_map_reserve(&lo.ino_map, 0)->in_use = false; 2885 + root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); 2886 + root_elem->inode = &lo.root; 2887 + 2888 + lo_map_init(&lo.dirp_map); 2889 + lo_map_init(&lo.fd_map); 2890 + 2891 + if (fuse_parse_cmdline(&args, &opts) != 0) { 2892 + goto err_out1; 2893 + } 2894 + fuse_set_log_func(log_func); 2895 + use_syslog = opts.syslog; 2896 + if (use_syslog) { 2897 + openlog("virtiofsd", LOG_PID, LOG_DAEMON); 2898 + } 2899 + 2900 + if (opts.show_help) { 2901 + printf("usage: %s [options]\n\n", argv[0]); 2902 + fuse_cmdline_help(); 2903 + printf(" -o source=PATH shared directory tree\n"); 2904 + fuse_lowlevel_help(); 2905 + ret = 0; 2906 + goto err_out1; 2907 + } else if (opts.show_version) { 2908 + fuse_lowlevel_version(); 2909 + ret = 0; 2910 + goto err_out1; 2911 + } else if (opts.print_capabilities) { 2912 + print_capabilities(); 2913 + ret = 0; 2914 + goto err_out1; 2915 + } 2916 + 2917 + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { 2918 + goto err_out1; 2919 + } 2920 + 2921 + /* 2922 + * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, 2923 + * and we don't use this log level). 2924 + */ 2925 + if (opts.log_level != 0) { 2926 + current_log_level = opts.log_level; 2927 + } 2928 + lo.debug = opts.debug; 2929 + if (lo.debug) { 2930 + current_log_level = FUSE_LOG_DEBUG; 2931 + } 2932 + if (lo.source) { 2933 + struct stat stat; 2934 + int res; 2935 + 2936 + res = lstat(lo.source, &stat); 2937 + if (res == -1) { 2938 + fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", 2939 + lo.source); 2940 + exit(1); 2941 + } 2942 + if (!S_ISDIR(stat.st_mode)) { 2943 + fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); 2944 + exit(1); 2945 + } 2946 + } else { 2947 + lo.source = strdup("/"); 2948 + } 2949 + if (!lo.timeout_set) { 2950 + switch (lo.cache) { 2951 + case CACHE_NONE: 2952 + lo.timeout = 0.0; 2953 + break; 2954 + 2955 + case CACHE_AUTO: 2956 + lo.timeout = 1.0; 2957 + break; 2958 + 2959 + case CACHE_ALWAYS: 2960 + lo.timeout = 86400.0; 2961 + break; 2962 + } 2963 + } else if (lo.timeout < 0) { 2964 + fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); 2965 + exit(1); 2966 + } 2967 + 2968 + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); 2969 + if (se == NULL) { 2970 + goto err_out1; 2971 + } 2972 + 2973 + if (fuse_set_signal_handlers(se) != 0) { 2974 + goto err_out2; 2975 + } 2976 + 2977 + if (fuse_session_mount(se) != 0) { 2978 + goto err_out3; 2979 + } 2980 + 2981 + fuse_daemonize(opts.foreground); 2982 + 2983 + setup_nofile_rlimit(); 2984 + 2985 + /* Must be before sandbox since it wants /proc */ 2986 + setup_capng(); 2987 + 2988 + setup_sandbox(&lo, se, opts.syslog); 2989 + 2990 + setup_root(&lo, &lo.root); 2991 + /* Block until ctrl+c or fusermount -u */ 2992 + ret = virtio_loop(se); 2993 + 2994 + fuse_session_unmount(se); 2995 + cleanup_capng(); 2996 + err_out3: 2997 + fuse_remove_signal_handlers(se); 2998 + err_out2: 2999 + fuse_session_destroy(se); 3000 + err_out1: 3001 + fuse_opt_free_args(&args); 3002 + 3003 + fuse_lo_data_cleanup(&lo); 3004 + 3005 + return ret ? 1 : 0; 3006 + }
+165
tools/virtiofsd/seccomp.c
··· 1 + /* 2 + * Seccomp sandboxing for virtiofsd 3 + * 4 + * Copyright (C) 2019 Red Hat, Inc. 5 + * 6 + * SPDX-License-Identifier: GPL-2.0-or-later 7 + */ 8 + 9 + #include "qemu/osdep.h" 10 + #include "seccomp.h" 11 + #include "fuse_i.h" 12 + #include "fuse_log.h" 13 + #include <errno.h> 14 + #include <glib.h> 15 + #include <seccomp.h> 16 + #include <stdlib.h> 17 + 18 + /* Bodge for libseccomp 2.4.2 which broke ppoll */ 19 + #if !defined(__SNR_ppoll) && defined(__SNR_brk) 20 + #ifdef __NR_ppoll 21 + #define __SNR_ppoll __NR_ppoll 22 + #else 23 + #define __SNR_ppoll __PNR_ppoll 24 + #endif 25 + #endif 26 + 27 + static const int syscall_whitelist[] = { 28 + /* TODO ireg sem*() syscalls */ 29 + SCMP_SYS(brk), 30 + SCMP_SYS(capget), /* For CAP_FSETID */ 31 + SCMP_SYS(capset), 32 + SCMP_SYS(clock_gettime), 33 + SCMP_SYS(clone), 34 + #ifdef __NR_clone3 35 + SCMP_SYS(clone3), 36 + #endif 37 + SCMP_SYS(close), 38 + SCMP_SYS(copy_file_range), 39 + SCMP_SYS(dup), 40 + SCMP_SYS(eventfd2), 41 + SCMP_SYS(exit), 42 + SCMP_SYS(exit_group), 43 + SCMP_SYS(fallocate), 44 + SCMP_SYS(fchmodat), 45 + SCMP_SYS(fchownat), 46 + SCMP_SYS(fcntl), 47 + SCMP_SYS(fdatasync), 48 + SCMP_SYS(fgetxattr), 49 + SCMP_SYS(flistxattr), 50 + SCMP_SYS(flock), 51 + SCMP_SYS(fremovexattr), 52 + SCMP_SYS(fsetxattr), 53 + SCMP_SYS(fstat), 54 + SCMP_SYS(fstatfs), 55 + SCMP_SYS(fsync), 56 + SCMP_SYS(ftruncate), 57 + SCMP_SYS(futex), 58 + SCMP_SYS(getdents), 59 + SCMP_SYS(getdents64), 60 + SCMP_SYS(getegid), 61 + SCMP_SYS(geteuid), 62 + SCMP_SYS(getpid), 63 + SCMP_SYS(gettid), 64 + SCMP_SYS(gettimeofday), 65 + SCMP_SYS(linkat), 66 + SCMP_SYS(lseek), 67 + SCMP_SYS(madvise), 68 + SCMP_SYS(mkdirat), 69 + SCMP_SYS(mknodat), 70 + SCMP_SYS(mmap), 71 + SCMP_SYS(mprotect), 72 + SCMP_SYS(mremap), 73 + SCMP_SYS(munmap), 74 + SCMP_SYS(newfstatat), 75 + SCMP_SYS(open), 76 + SCMP_SYS(openat), 77 + SCMP_SYS(ppoll), 78 + SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ 79 + SCMP_SYS(preadv), 80 + SCMP_SYS(pread64), 81 + SCMP_SYS(pwritev), 82 + SCMP_SYS(pwrite64), 83 + SCMP_SYS(read), 84 + SCMP_SYS(readlinkat), 85 + SCMP_SYS(recvmsg), 86 + SCMP_SYS(renameat), 87 + SCMP_SYS(renameat2), 88 + SCMP_SYS(rt_sigaction), 89 + SCMP_SYS(rt_sigprocmask), 90 + SCMP_SYS(rt_sigreturn), 91 + SCMP_SYS(sendmsg), 92 + SCMP_SYS(setresgid), 93 + SCMP_SYS(setresuid), 94 + #ifdef __NR_setresgid32 95 + SCMP_SYS(setresgid32), 96 + #endif 97 + #ifdef __NR_setresuid32 98 + SCMP_SYS(setresuid32), 99 + #endif 100 + SCMP_SYS(set_robust_list), 101 + SCMP_SYS(symlinkat), 102 + SCMP_SYS(time), /* Rarely needed, except on static builds */ 103 + SCMP_SYS(tgkill), 104 + SCMP_SYS(unlinkat), 105 + SCMP_SYS(utimensat), 106 + SCMP_SYS(write), 107 + SCMP_SYS(writev), 108 + }; 109 + 110 + /* Syscalls used when --syslog is enabled */ 111 + static const int syscall_whitelist_syslog[] = { 112 + SCMP_SYS(sendto), 113 + }; 114 + 115 + static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) 116 + { 117 + size_t i; 118 + 119 + for (i = 0; i < len; i++) { 120 + if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { 121 + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", 122 + syscalls[i]); 123 + exit(1); 124 + } 125 + } 126 + } 127 + 128 + void setup_seccomp(bool enable_syslog) 129 + { 130 + scmp_filter_ctx ctx; 131 + 132 + #ifdef SCMP_ACT_KILL_PROCESS 133 + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); 134 + /* Handle a newer libseccomp but an older kernel */ 135 + if (!ctx && errno == EOPNOTSUPP) { 136 + ctx = seccomp_init(SCMP_ACT_TRAP); 137 + } 138 + #else 139 + ctx = seccomp_init(SCMP_ACT_TRAP); 140 + #endif 141 + if (!ctx) { 142 + fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); 143 + exit(1); 144 + } 145 + 146 + add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); 147 + if (enable_syslog) { 148 + add_whitelist(ctx, syscall_whitelist_syslog, 149 + G_N_ELEMENTS(syscall_whitelist_syslog)); 150 + } 151 + 152 + /* libvhost-user calls this for post-copy migration, we don't need it */ 153 + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), 154 + SCMP_SYS(userfaultfd), 0) != 0) { 155 + fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); 156 + exit(1); 157 + } 158 + 159 + if (seccomp_load(ctx) < 0) { 160 + fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); 161 + exit(1); 162 + } 163 + 164 + seccomp_release(ctx); 165 + }
+16
tools/virtiofsd/seccomp.h
··· 1 + /* 2 + * Seccomp sandboxing for virtiofsd 3 + * 4 + * Copyright (C) 2019 Red Hat, Inc. 5 + * 6 + * SPDX-License-Identifier: GPL-2.0-or-later 7 + */ 8 + 9 + #ifndef VIRTIOFSD_SECCOMP_H 10 + #define VIRTIOFSD_SECCOMP_H 11 + 12 + #include <stdbool.h> 13 + 14 + void setup_seccomp(bool enable_syslog); 15 + 16 + #endif /* VIRTIOFSD_SECCOMP_H */