Commit 6ce8b2ce authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull fuse updates from Miklos Szeredi:

 - Add passthrough mode for regular file I/O.

   This allows performing read and write (also via memory maps) on a
   backing file without incurring the overhead of roundtrips to
   userspace. For now this is only allowed to privileged servers, but
   this limitation will go away in the future (Amir Goldstein)

 - Fix interaction of direct I/O mode with memory maps (Bernd Schubert)

 - Export filesystem tags through sysfs for virtiofs (Stefan Hajnoczi)

 - Allow resending queued requests for server crash recovery (Zhao Chen)

 - Misc fixes and cleanups

* tag 'fuse-update-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (38 commits)
  fuse: get rid of ff->readdir.lock
  fuse: remove unneeded lock which protecting update of congestion_threshold
  fuse: Fix missing FOLL_PIN for direct-io
  fuse: remove an unnecessary if statement
  fuse: Track process write operations in both direct and writethrough modes
  fuse: Use the high bit of request ID for indicating resend requests
  fuse: Introduce a new notification type for resend pending requests
  fuse: add support for explicit export disabling
  fuse: __kuid_val/__kgid_val helpers in fuse_fill_attr_from_inode()
  fuse: fix typo for fuse_permission comment
  fuse: Convert fuse_writepage_locked to take a folio
  fuse: Remove fuse_writepage
  virtio_fs: remove duplicate check if queue is broken
  fuse: use FUSE_ROOT_ID in fuse_get_root_inode()
  fuse: don't unhash root
  fuse: fix root lookup with nonzero generation
  fuse: replace remaining make_bad_inode() with fuse_make_bad()
  virtiofs: drop __exit from virtio_fs_sysfs_exit()
  fuse: implement passthrough for mmap
  fuse: implement splice read/write passthrough
  ...
parents 68bf6bfd cdf6ac2a
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
What:		/sys/fs/virtiofs/<n>/tag
Date:		Feb 2024
Contact:	virtio-fs@lists.linux.dev
Description:
		[RO] The mount "tag" that can be used to mount this filesystem.

What:		/sys/fs/virtiofs/<n>/device
Date:		Feb 2024
Contact:	virtio-fs@lists.linux.dev
Description:
		Symlink to the virtio device that exports this filesystem.
+11 −0
Original line number Diff line number Diff line
@@ -52,3 +52,14 @@ config FUSE_DAX

	  If you want to allow mounting a Virtio Filesystem with the "dax"
	  option, answer Y.

config FUSE_PASSTHROUGH
	bool "FUSE passthrough operations support"
	default y
	depends on FUSE_FS
	select FS_STACK
	help
	  This allows bypassing FUSE server by mapping specific FUSE operations
	  to be performed directly on a backing file.

	  If you want to allow passthrough operations, answer Y.
+2 −0
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@ obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o

fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o

virtiofs-y := virtio_fs.o
+1 −5
Original line number Diff line number Diff line
@@ -174,11 +174,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
	if (!fc)
		goto out;

	down_read(&fc->killsb);
	spin_lock(&fc->bg_lock);
	fc->congestion_threshold = val;
	spin_unlock(&fc->bg_lock);
	up_read(&fc->killsb);
	WRITE_ONCE(fc->congestion_threshold, val);
	fuse_conn_put(fc);
out:
	return ret;
+131 −25
Original line number Diff line number Diff line
@@ -1775,6 +1775,61 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
	return err;
}

/*
 * Resending all processing queue requests.
 *
 * During a FUSE daemon panics and failover, it is possible for some inflight
 * requests to be lost and never returned. As a result, applications awaiting
 * replies would become stuck forever. To address this, we can use notification
 * to trigger resending of these pending requests to the FUSE daemon, ensuring
 * they are properly processed again.
 *
 * Please note that this strategy is applicable only to idempotent requests or
 * if the FUSE daemon takes careful measures to avoid processing duplicated
 * non-idempotent requests.
 */
static void fuse_resend(struct fuse_conn *fc)
{
	struct fuse_dev *fud;
	struct fuse_req *req, *next;
	struct fuse_iqueue *fiq = &fc->iq;
	LIST_HEAD(to_queue);
	unsigned int i;

	spin_lock(&fc->lock);
	if (!fc->connected) {
		spin_unlock(&fc->lock);
		return;
	}

	list_for_each_entry(fud, &fc->devices, entry) {
		struct fuse_pqueue *fpq = &fud->pq;

		spin_lock(&fpq->lock);
		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
			list_splice_tail_init(&fpq->processing[i], &to_queue);
		spin_unlock(&fpq->lock);
	}
	spin_unlock(&fc->lock);

	list_for_each_entry_safe(req, next, &to_queue, list) {
		__set_bit(FR_PENDING, &req->flags);
		/* mark the request as resend request */
		req->in.h.unique |= FUSE_UNIQUE_RESEND;
	}

	spin_lock(&fiq->lock);
	/* iq and pq requests are both oldest to newest */
	list_splice(&to_queue, &fiq->pending);
	fiq->ops->wake_pending_and_unlock(fiq);
}

static int fuse_notify_resend(struct fuse_conn *fc)
{
	fuse_resend(fc);
	return 0;
}

static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
		       unsigned int size, struct fuse_copy_state *cs)
{
@@ -1800,6 +1855,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
	case FUSE_NOTIFY_DELETE:
		return fuse_notify_delete(fc, size, cs);

	case FUSE_NOTIFY_RESEND:
		return fuse_notify_resend(fc);

	default:
		fuse_copy_finish(cs);
		return -EINVAL;
@@ -2251,17 +2309,14 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
	return 0;
}

static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
			   unsigned long arg)
static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
{
	int res;
	int oldfd;
	struct fuse_dev *fud = NULL;
	struct fd f;

	switch (cmd) {
	case FUSE_DEV_IOC_CLONE:
		if (get_user(oldfd, (__u32 __user *)arg))
	if (get_user(oldfd, argp))
		return -EFAULT;

	f = fdget(oldfd);
@@ -2281,13 +2336,64 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
		res = fuse_device_clone(fud->fc, file);
		mutex_unlock(&fuse_mutex);
	}

	fdput(f);
		break;
	return res;
}

static long fuse_dev_ioctl_backing_open(struct file *file,
					struct fuse_backing_map __user *argp)
{
	struct fuse_dev *fud = fuse_get_dev(file);
	struct fuse_backing_map map;

	if (!fud)
		return -EPERM;

	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
		return -EOPNOTSUPP;

	if (copy_from_user(&map, argp, sizeof(map)))
		return -EFAULT;

	return fuse_backing_open(fud->fc, &map);
}

static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
{
	struct fuse_dev *fud = fuse_get_dev(file);
	int backing_id;

	if (!fud)
		return -EPERM;

	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
		return -EOPNOTSUPP;

	if (get_user(backing_id, argp))
		return -EFAULT;

	return fuse_backing_close(fud->fc, backing_id);
}

static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
			   unsigned long arg)
{
	void __user *argp = (void __user *)arg;

	switch (cmd) {
	case FUSE_DEV_IOC_CLONE:
		return fuse_dev_ioctl_clone(file, argp);

	case FUSE_DEV_IOC_BACKING_OPEN:
		return fuse_dev_ioctl_backing_open(file, argp);

	case FUSE_DEV_IOC_BACKING_CLOSE:
		return fuse_dev_ioctl_backing_close(file, argp);

	default:
		res = -ENOTTY;
		break;
		return -ENOTTY;
	}
	return res;
}

const struct file_operations fuse_dev_operations = {
Loading