Commit 6e3da40e authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'for-6.15/io_uring-epoll-wait' into for-6.15/io_uring-reg-vec

* for-6.15/io_uring-epoll-wait:
  io_uring/epoll: add support for IORING_OP_EPOLL_WAIT
  io_uring/epoll: remove CONFIG_EPOLL guards
  eventpoll: add epoll_sendevents() helper
  eventpoll: abstract out ep_try_send_events() helper
  eventpoll: abstract out parameter sanity checking
parents 78b6f6e9 19f7e942
Loading
Loading
Loading
Loading
+63 −24
Original line number Diff line number Diff line
@@ -1980,6 +1980,22 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
	return ret;
}

static int ep_try_send_events(struct eventpoll *ep,
			      struct epoll_event __user *events, int maxevents)
{
	int res;

	/*
	 * Try to transfer events to user space. In case we get 0 events and
	 * there's still timeout left over, we go trying again in search of
	 * more luck.
	 */
	res = ep_send_events(ep, events, maxevents);
	if (res > 0)
		ep_suspend_napi_irqs(ep);
	return res;
}

/**
 * ep_poll - Retrieves ready events, and delivers them to the caller-supplied
 *           event buffer.
@@ -2031,18 +2047,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,

	while (1) {
		if (eavail) {
			/*
			 * Try to transfer events to user space. In case we get
			 * 0 events and there's still timeout left over, we go
			 * trying again in search of more luck.
			 */
			res = ep_send_events(ep, events, maxevents);
			if (res) {
				if (res > 0)
					ep_suspend_napi_irqs(ep);
			res = ep_try_send_events(ep, events, maxevents);
			if (res)
				return res;
		}
		}

		if (timed_out)
			return 0;
@@ -2445,6 +2453,47 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
	return do_epoll_ctl(epfd, op, fd, &epds, false);
}

static int ep_check_params(struct file *file, struct epoll_event __user *evs,
			   int maxevents)
{
	/* The maximum number of event must be greater than zero */
	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
		return -EINVAL;

	/* Verify that the area passed by the user is writeable */
	if (!access_ok(evs, maxevents * sizeof(struct epoll_event)))
		return -EFAULT;

	/*
	 * We have to check that the file structure underneath the fd
	 * the user passed to us _is_ an eventpoll file.
	 */
	if (!is_file_epoll(file))
		return -EINVAL;

	return 0;
}

int epoll_sendevents(struct file *file, struct epoll_event __user *events,
		     int maxevents)
{
	struct eventpoll *ep;
	int ret;

	ret = ep_check_params(file, events, maxevents);
	if (unlikely(ret))
		return ret;

	ep = file->private_data;
	/*
	 * Racy call, but that's ok - it should get retried based on
	 * poll readiness anyway.
	 */
	if (ep_events_available(ep))
		return ep_try_send_events(ep, events, maxevents);
	return 0;
}

/*
 * Implement the event wait interface for the eventpoll file. It is the kernel
 * part of the user space epoll_wait(2).
@@ -2453,26 +2502,16 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
			 int maxevents, struct timespec64 *to)
{
	struct eventpoll *ep;

	/* The maximum number of event must be greater than zero */
	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
		return -EINVAL;

	/* Verify that the area passed by the user is writeable */
	if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
		return -EFAULT;
	int ret;

	/* Get the "struct file *" for the eventpoll file */
	CLASS(fd, f)(epfd);
	if (fd_empty(f))
		return -EBADF;

	/*
	 * We have to check that the file structure underneath the fd
	 * the user passed to us _is_ an eventpoll file.
	 */
	if (!is_file_epoll(fd_file(f)))
		return -EINVAL;
	ret = ep_check_params(fd_file(f), events, maxevents);
	if (unlikely(ret))
		return ret;

	/*
	 * At this point it is safe to assume that the "private_data" contains
+4 −0
Original line number Diff line number Diff line
@@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
/* Used to release the epoll bits inside the "struct file" */
void eventpoll_release_file(struct file *file);

/* Copy ready events to userspace */
int epoll_sendevents(struct file *file, struct epoll_event __user *events,
		     int maxevents);

/*
 * This is called from inside fs/file_table.c:__fput() to unlink files
 * from the eventpoll interface. We need to have this facility to cleanup
+1 −0
Original line number Diff line number Diff line
@@ -280,6 +280,7 @@ enum io_uring_op {
	IORING_OP_BIND,
	IORING_OP_LISTEN,
	IORING_OP_RECV_ZC,
	IORING_OP_EPOLL_WAIT,

	/* this goes last, obviously */
	IORING_OP_LAST,
+5 −4
Original line number Diff line number Diff line
@@ -11,10 +11,11 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
					eventfd.o uring_cmd.o openclose.o \
					sqpoll.o xattr.o nop.o fs.o splice.o \
					sync.o msg_ring.o advise.o openclose.o \
					epoll.o statx.o timeout.o fdinfo.o \
					cancel.o waitid.o register.o \
					truncate.o memmap.o alloc_cache.o
					statx.o timeout.o fdinfo.o cancel.o \
					waitid.o register.o truncate.o \
					memmap.o alloc_cache.o
obj-$(CONFIG_IO_URING_ZCRX)	+= zcrx.o
obj-$(CONFIG_IO_WQ)		+= io-wq.o
obj-$(CONFIG_FUTEX)		+= futex.o
obj-$(CONFIG_EPOLL)		+= epoll.o
obj-$(CONFIG_NET_RX_BUSY_POLL)	+= napi.o
+33 −2
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@
#include "io_uring.h"
#include "epoll.h"

#if defined(CONFIG_EPOLL)
struct io_epoll {
	struct file			*file;
	int				epfd;
@@ -21,6 +20,12 @@ struct io_epoll {
	struct epoll_event		event;
};

struct io_epoll_wait {
	struct file			*file;
	int				maxevents;
	struct epoll_event __user	*events;
};

int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
@@ -58,4 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
	io_req_set_res(req, ret, 0);
	return IOU_OK;
}
#endif

int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);

	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
		return -EINVAL;

	iew->maxevents = READ_ONCE(sqe->len);
	iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
	return 0;
}

int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
{
	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
	int ret;

	ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
	if (ret == 0)
		return -EAGAIN;
	if (ret < 0)
		req_set_fail(req);

	io_req_set_res(req, ret, 0);
	return IOU_OK;
}
Loading