Commit 49fffac9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.16/io_uring-20250523' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:

 - Avoid indirect function calls in io-wq for executing and freeing
   work.

   The design of io-wq is such that it can be a generic mechanism, but
   as it's just used by io_uring now, may as well avoid these indirect
   calls

 - Clean up registered buffers for networking

 - Add support for IORING_OP_PIPE. Pretty straight forward, allows
   creating pipes with io_uring, particularly useful for having these be
   instantiated as direct descriptors

 - Clean up the coalescing support fore registered buffers

 - Add support for multiple interface queues for zero-copy rx
   networking. As this feature was merged for 6.15 it supported just a
   single ifq per ring

 - Clean up the eventfd support

 - Add dma-buf support to zero-copy rx

 - Clean up and improving the request draining support

 - Clean up provided buffer support, most notably with an eye toward
   making the legacy support less intrusive

 - Minor fdinfo cleanups, dropping support for dumping what credentials
   are registered

 - Improve support for overflow CQE handling, getting rid of GFP_ATOMIC
   for allocating overflow entries where possible

 - Improve detection of cases where io-wq doesn't need to spawn a new
   worker unnecessarily

 - Various little cleanups

* tag 'for-6.16/io_uring-20250523' of git://git.kernel.dk/linux: (59 commits)
  io_uring/cmd: warn on reg buf imports by ineligible cmds
  io_uring/io-wq: only create a new worker if it can make progress
  io_uring/io-wq: ignore non-busy worker going to sleep
  io_uring/io-wq: move hash helpers to the top
  trace/io_uring: fix io_uring_local_work_run ctx documentation
  io_uring: finish IOU_OK -> IOU_COMPLETE transition
  io_uring: add new helpers for posting overflows
  io_uring: pass in struct io_big_cqe to io_alloc_ocqe()
  io_uring: make io_alloc_ocqe() take a struct io_cqe pointer
  io_uring: split alloc and add of overflow
  io_uring: open code io_req_cqe_overflow()
  io_uring/fdinfo: get rid of dumping credentials
  io_uring/fdinfo: only compile if CONFIG_PROC_FS is set
  io_uring/kbuf: unify legacy buf provision and removal
  io_uring/kbuf: refactor __io_remove_buffers
  io_uring/kbuf: don't compute size twice on prep
  io_uring/kbuf: drop extra vars in io_register_pbuf_ring
  io_uring/kbuf: use mem_is_zero()
  io_uring/kbuf: account ring io_buffer_list memory
  io_uring: drain based on allocates reqs
  ...
parents 6f59de9b 6faaf6e0
Loading
Loading
Loading
Loading
+6 −9
Original line number Diff line number Diff line
@@ -40,8 +40,6 @@ enum io_uring_cmd_flags {
	IO_URING_F_TASK_DEAD		= (1 << 13),
};

struct io_zcrx_ifq;

struct io_wq_work_node {
	struct io_wq_work_node *next;
};
@@ -343,7 +341,6 @@ struct io_ring_ctx {
		unsigned		cached_cq_tail;
		unsigned		cq_entries;
		struct io_ev_fd	__rcu	*io_ev_fd;
		unsigned		cq_extra;

		void			*cq_wait_arg;
		size_t			cq_wait_size;
@@ -394,7 +391,8 @@ struct io_ring_ctx {
	struct wait_queue_head		poll_wq;
	struct io_restriction		restrictions;

	struct io_zcrx_ifq		*ifq;
	/* Stores zcrx object pointers of type struct io_zcrx_ifq */
	struct xarray			zcrx_ctxs;

	u32			pers_next;
	struct xarray		personalities;
@@ -418,6 +416,7 @@ struct io_ring_ctx {

	struct callback_head		poll_wq_task_work;
	struct list_head		defer_list;
	unsigned			nr_drained;

	struct io_alloc_cache		msg_cache;
	spinlock_t			msg_lock;
@@ -436,6 +435,7 @@ struct io_ring_ctx {

	/* protected by ->completion_lock */
	unsigned			evfd_last_cq_tail;
	unsigned			nr_req_allocated;

	/*
	 * Protection for resize vs mmap races - both the mmap and resize
@@ -448,8 +448,6 @@ struct io_ring_ctx {
	struct io_mapped_region		ring_region;
	/* used for optimised request parameter and wait argument passing  */
	struct io_mapped_region		param_region;
	/* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
	struct io_mapped_region		zcrx_region;
};

/*
@@ -653,8 +651,7 @@ struct io_kiocb {
	u8				iopoll_completed;
	/*
	 * Can be either a fixed buffer index, or used with provided buffers.
	 * For the latter, before issue it points to the buffer group ID,
	 * and after selection it points to the buffer ID itself.
	 * For the latter, it points to the selected buffer ID.
	 */
	u16				buf_index;

@@ -713,7 +710,7 @@ struct io_kiocb {
	const struct cred		*creds;
	struct io_wq_work		work;

	struct {
	struct io_big_cqe {
		u64			extra1;
		u64			extra2;
	} big_cqe;
+1 −1
Original line number Diff line number Diff line
@@ -645,7 +645,7 @@ TRACE_EVENT(io_uring_short_write,
/*
 * io_uring_local_work_run - ran ring local task work
 *
 * @tctx:		pointer to a io_uring_ctx
 * @ctx:		pointer to an io_ring_ctx
 * @count:		how many functions it ran
 * @loops:		how many loops it ran
 *
+7 −1
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@ struct io_uring_sqe {
		__u32		futex_flags;
		__u32		install_fd_flags;
		__u32		nop_flags;
		__u32		pipe_flags;
	};
	__u64	user_data;	/* data to be passed back at completion time */
	/* pack this to avoid bogus arm OABI complaints */
@@ -287,6 +288,7 @@ enum io_uring_op {
	IORING_OP_EPOLL_WAIT,
	IORING_OP_READV_FIXED,
	IORING_OP_WRITEV_FIXED,
	IORING_OP_PIPE,

	/* this goes last, obviously */
	IORING_OP_LAST,
@@ -992,12 +994,16 @@ struct io_uring_zcrx_offsets {
	__u64	__resv[2];
};

enum io_uring_zcrx_area_flags {
	IORING_ZCRX_AREA_DMABUF		= 1,
};

struct io_uring_zcrx_area_reg {
	__u64	addr;
	__u64	len;
	__u64	rq_area_token;
	__u32	flags;
	__u32	__resv1;
	__u32	dmabuf_fd;
	__u64	__resv2[2];
};

+4 −2
Original line number Diff line number Diff line
@@ -7,11 +7,11 @@ GCOV_PROFILE := y
endif

obj-$(CONFIG_IO_URING)		+= io_uring.o opdef.o kbuf.o rsrc.o notif.o \
					tctx.o filetable.o rw.o net.o poll.o \
					tctx.o filetable.o rw.o poll.o \
					eventfd.o uring_cmd.o openclose.o \
					sqpoll.o xattr.o nop.o fs.o splice.o \
					sync.o msg_ring.o advise.o openclose.o \
					statx.o timeout.o fdinfo.o cancel.o \
					statx.o timeout.o cancel.o \
					waitid.o register.o truncate.o \
					memmap.o alloc_cache.o
obj-$(CONFIG_IO_URING_ZCRX)	+= zcrx.o
@@ -19,3 +19,5 @@ obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX)		+= futex.o
obj-$(CONFIG_EPOLL)		+= epoll.o
obj-$(CONFIG_NET_RX_BUSY_POLL)	+= napi.o
obj-$(CONFIG_NET) += net.o cmd_net.o
obj-$(CONFIG_PROC_FS) += fdinfo.o
+2 −2
Original line number Diff line number Diff line
@@ -58,7 +58,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)

	ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
	io_req_set_res(req, ret, 0);
	return IOU_OK;
	return IOU_COMPLETE;
#else
	return -EOPNOTSUPP;
#endif
@@ -104,5 +104,5 @@ int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
	if (ret < 0)
		req_set_fail(req);
	io_req_set_res(req, ret, 0);
	return IOU_OK;
	return IOU_COMPLETE;
}
Loading