Commit f4e8d802 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs rw iterator updates from Christian Brauner:
 "The core fs signalfd, userfaultfd, and timerfd subsystems did still
  use f_op->read() instead of f_op->read_iter(). Convert them over since
  we should aim to get rid of f_op->read() at some point.

  Aside from that io_uring and others want to mark files as FMODE_NOWAIT
  so it can make use of per-IO nonblocking hints to enable more
  efficient IO. Converting those users to f_op->read_iter() allows them
  to be marked with FMODE_NOWAIT"

* tag 'vfs-6.10.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  signalfd: convert to ->read_iter()
  userfaultfd: convert to ->read_iter()
  timerfd: convert to ->read_iter()
  new helper: copy_to_iter_full()
parents ef31ea6c 3a93daea
Loading
Loading
Loading
Loading
+28 −16
Original line number Diff line number Diff line
@@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
/*
 * Copied from copy_siginfo_to_user() in kernel/signal.c
 */
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
			     kernel_siginfo_t const *kinfo)
static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo)
{
	struct signalfd_siginfo new;

@@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
		break;
	}

	if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo)))
	if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to))
		return -EFAULT;

	return sizeof(*uinfo);
	return sizeof(struct signalfd_siginfo);
}

static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
@@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
 * error code. The "count" parameter must be at least the size of a
 * "struct signalfd_siginfo".
 */
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
			     loff_t *ppos)
static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	struct signalfd_ctx *ctx = file->private_data;
	struct signalfd_siginfo __user *siginfo;
	int nonblock = file->f_flags & O_NONBLOCK;
	size_t count = iov_iter_count(to);
	ssize_t ret, total = 0;
	kernel_siginfo_t info;
	bool nonblock;

	count /= sizeof(struct signalfd_siginfo);
	if (!count)
		return -EINVAL;

	siginfo = (struct signalfd_siginfo __user *) buf;
	nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
	do {
		ret = signalfd_dequeue(ctx, &info, nonblock);
		if (unlikely(ret <= 0))
			break;
		ret = signalfd_copyinfo(siginfo, &info);
		ret = signalfd_copyinfo(to, &info);
		if (ret < 0)
			break;
		siginfo++;
		total += ret;
		nonblock = 1;
	} while (--count);
@@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = {
#endif
	.release	= signalfd_release,
	.poll		= signalfd_poll,
	.read		= signalfd_read,
	.read_iter	= signalfd_read_iter,
	.llseek		= noop_llseek,
};

@@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
	signotset(mask);

	if (ufd == -1) {
		struct file *file;

		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
		if (!ctx)
			return -ENOMEM;

		ctx->sigmask = *mask;

		ufd = get_unused_fd_flags(flags & O_CLOEXEC);
		if (ufd < 0) {
			kfree(ctx);
			return ufd;
		}

		file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
				       O_RDWR | (flags & O_NONBLOCK));
		if (IS_ERR(file)) {
			put_unused_fd(ufd);
			kfree(ctx);
			return ufd;
		}
		file->f_mode |= FMODE_NOWAIT;

		/*
		 * When we call this, the initialization must be complete, since
		 * anon_inode_getfd() will install the fd.
		 */
		ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
				       O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
		if (ufd < 0)
			kfree(ctx);
		fd_install(ufd, file);
	} else {
		struct fd f = fdget(ufd);
		if (!f.file)
+26 −10
Original line number Diff line number Diff line
@@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
	return events;
}

static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
			    loff_t *ppos)
static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	struct timerfd_ctx *ctx = file->private_data;
	ssize_t res;
	u64 ticks = 0;

	if (count < sizeof(ticks))
	if (iov_iter_count(to) < sizeof(ticks))
		return -EINVAL;

	spin_lock_irq(&ctx->wqh.lock);
	if (file->f_flags & O_NONBLOCK)
	if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
		res = -EAGAIN;
	else
		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
@@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
		ctx->ticks = 0;
	}
	spin_unlock_irq(&ctx->wqh.lock);
	if (ticks)
		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
	if (ticks) {
		res = copy_to_iter(&ticks, sizeof(ticks), to);
		if (!res)
			res = -EFAULT;
	}
	return res;
}

@@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
	.release	= timerfd_release,
	.poll		= timerfd_poll,
	.read		= timerfd_read,
	.read_iter	= timerfd_read_iter,
	.llseek		= noop_llseek,
	.show_fdinfo	= timerfd_show,
	.unlocked_ioctl	= timerfd_ioctl,
@@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
{
	int ufd;
	struct timerfd_ctx *ctx;
	struct file *file;

	/* Check the TFD_* constants for consistency.  */
	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)

	ctx->moffs = ktime_mono_to_real(0);

	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
	ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS);
	if (ufd < 0) {
		kfree(ctx);
		return ufd;
	}

	file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
				    O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
	if (ufd < 0)
	if (IS_ERR(file)) {
		put_unused_fd(ufd);
		kfree(ctx);
		return PTR_ERR(file);
	}

	file->f_mode |= FMODE_NOWAIT;
	fd_install(ufd, file);
	return ufd;
}

+28 −16
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/hugetlb.h>
#include <linux/swapops.h>
#include <linux/miscdevice.h>
#include <linux/uio.h>

static int sysctl_unprivileged_userfaultfd __read_mostly;

@@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
/*
 * Verify the pagetables are still not ok after having reigstered into
 * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
 * userfault that has already been resolved, if userfaultfd_read and
 * userfault that has already been resolved, if userfaultfd_read_iter and
 * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
 * threads.
 */
@@ -1181,34 +1182,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
	return ret;
}

static ssize_t userfaultfd_read(struct file *file, char __user *buf,
				size_t count, loff_t *ppos)
static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	struct userfaultfd_ctx *ctx = file->private_data;
	ssize_t _ret, ret = 0;
	struct uffd_msg msg;
	int no_wait = file->f_flags & O_NONBLOCK;
	struct inode *inode = file_inode(file);
	bool no_wait;

	if (!userfaultfd_is_initialized(ctx))
		return -EINVAL;

	no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
	for (;;) {
		if (count < sizeof(msg))
		if (iov_iter_count(to) < sizeof(msg))
			return ret ? ret : -EINVAL;
		_ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
		if (_ret < 0)
			return ret ? ret : _ret;
		if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg)))
		_ret = !copy_to_iter_full(&msg, sizeof(msg), to);
		if (_ret)
			return ret ? ret : -EFAULT;
		ret += sizeof(msg);
		buf += sizeof(msg);
		count -= sizeof(msg);
		/*
		 * Allow to read more than one fault at time but only
		 * block if waiting for the very first one.
		 */
		no_wait = O_NONBLOCK;
		no_wait = true;
	}
}

@@ -2176,7 +2177,7 @@ static const struct file_operations userfaultfd_fops = {
#endif
	.release	= userfaultfd_release,
	.poll		= userfaultfd_poll,
	.read		= userfaultfd_read,
	.read_iter	= userfaultfd_read_iter,
	.unlocked_ioctl = userfaultfd_ioctl,
	.compat_ioctl	= compat_ptr_ioctl,
	.llseek		= noop_llseek,
@@ -2196,6 +2197,7 @@ static void init_once_userfaultfd_ctx(void *mem)
static int new_userfaultfd(int flags)
{
	struct userfaultfd_ctx *ctx;
	struct file *file;
	int fd;

	BUG_ON(!current->mm);
@@ -2219,16 +2221,26 @@ static int new_userfaultfd(int flags)
	init_rwsem(&ctx->map_changing_lock);
	atomic_set(&ctx->mmap_changing, 0);
	ctx->mm = current->mm;
	/* prevent the mm struct to be freed */
	mmgrab(ctx->mm);

	fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
	if (fd < 0)
		goto err_out;

	/* Create a new inode so that the LSM can block the creation.  */
	fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
	file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
			O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
	if (fd < 0) {
		mmdrop(ctx->mm);
		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
	if (IS_ERR(file)) {
		put_unused_fd(fd);
		fd = PTR_ERR(file);
		goto err_out;
	}
	/* prevent the mm struct to be freed */
	mmgrab(ctx->mm);
	file->f_mode |= FMODE_NOWAIT;
	fd_install(fd, file);
	return fd;
err_out:
	kmem_cache_free(userfaultfd_ctx_cachep, ctx);
	return fd;
}

+10 −0
Original line number Diff line number Diff line
@@ -205,6 +205,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
	return 0;
}

static __always_inline __must_check
bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i)
{
	size_t copied = copy_to_iter(addr, bytes, i);
	if (likely(copied == bytes))
		return true;
	iov_iter_revert(i, copied);
	return false;
}

static __always_inline __must_check
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
{
+1 −8
Original line number Diff line number Diff line
@@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb)
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
				  struct iov_iter *to)
{
	int n;

	n = copy_to_iter(skb->data + off, len, to);
	if (n == len)
		return 0;

	iov_iter_revert(to, n);
	return -EFAULT;
	return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT;
}

/*