Commit dbae42cf authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-7.1-20260522' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring fixes from Jens Axboe:

 - Fix for an issue with IORING_OP_NOP and using injection results

 - Fix for an issue in IORING_OP_WAITID, where the info state was
   assumed cleared by the lower level syscall handler, but for some
   cases it is not. Just clear the data upfront, so that non-initialized
   data isn't copied back to userspace

 - Fix for a lockdep reported issue, where IORING_OP_BIND enters file
   create and hence hits mnt_want_write(), which creates a three part
   lockdep cycle between the super lock, io_uring's uring_lock, and the
   cred mutex

 - Fix a regression introduced in this cycle with how linked timeouts
   are deleted

 - Ensure that the ->opcode nospec indexing on the opcode issue side
   covers all the cases

* tag 'io_uring-7.1-20260522' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/nop: pass all errors to userspace
  io_uring/timeout: splice timed out link in timeout handler
  io_uring: propagate array_index_nospec opcode into req->opcode
  io_uring/waitid: clear waitid info before copying it to userspace
  io_uring/net: punt IORING_OP_BIND async if it needs file create
parents 558c3eca e97ff8b6
Loading
Loading
Loading
Loading
+4 −5
Original line number Diff line number Diff line
@@ -1738,10 +1738,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
	const struct io_issue_def *def;
	unsigned int sqe_flags;
	int personality;
	u8 opcode;

	req->ctx = ctx;
	req->opcode = opcode = READ_ONCE(sqe->opcode);
	req->opcode = READ_ONCE(sqe->opcode);
	/* same numerical values with corresponding REQ_F_*, safe to copy */
	sqe_flags = READ_ONCE(sqe->flags);
	req->flags = (__force io_req_flags_t) sqe_flags;
@@ -1751,13 +1750,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
	req->cancel_seq_set = false;
	req->async_data = NULL;

	if (unlikely(opcode >= IORING_OP_LAST)) {
	if (unlikely(req->opcode >= IORING_OP_LAST)) {
		req->opcode = 0;
		return io_init_fail_req(req, -EINVAL);
	}
	opcode = array_index_nospec(opcode, IORING_OP_LAST);
	req->opcode = array_index_nospec(req->opcode, IORING_OP_LAST);

	def = &io_issue_defs[opcode];
	def = &io_issue_defs[req->opcode];
	if (def->is_128 && !(ctx->flags & IORING_SETUP_SQE128)) {
		/*
		 * A 128b op on a non-128b SQ requires mixed SQE support as
+25 −1
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/un.h>
#include <linux/compat.h>
#include <net/compat.h>
#include <linux/io_uring.h>
@@ -1799,11 +1800,29 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
	return IOU_COMPLETE;
}

/*
 * Check if bind request would potentially end up with filename_create(),
 * which in turn end up in mnt_want_write() which will grab the fs
 * percpu start write sem. This can trigger a lockdep warning.
 */
static int io_bind_file_create(const struct io_async_msghdr *io, int addr_len)
{
	const struct sockaddr_un *sun;

	if (io->addr.ss_family != AF_UNIX)
		return 0;
	if (addr_len <= offsetof(struct sockaddr_un, sun_path))
		return 0;
	sun = (const struct sockaddr_un *) &io->addr;
	return sun->sun_path[0] != '\0';
}

int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
	struct sockaddr __user *uaddr;
	struct io_async_msghdr *io;
	int ret;

	if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
		return -EINVAL;
@@ -1814,7 +1833,12 @@ int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	io = io_msg_alloc_async(req);
	if (unlikely(!io))
		return -ENOMEM;
	return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
	ret = move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
	if (unlikely(ret))
		return ret;
	if (io_bind_file_create(io, bind->addr_len))
		req->flags |= REQ_F_FORCE_ASYNC;
	return 0;
}

int io_bind(struct io_kiocb *req, unsigned int issue_flags)
+2 −2
Original line number Diff line number Diff line
@@ -79,9 +79,9 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags)
	if (ret < 0)
		req_set_fail(req);
	if (nop->flags & IORING_NOP_CQE32)
		io_req_set_res32(req, nop->result, 0, nop->extra1, nop->extra2);
		io_req_set_res32(req, ret, 0, nop->extra1, nop->extra2);
	else
		io_req_set_res(req, nop->result, 0);
		io_req_set_res(req, ret, 0);
	if (nop->flags & IORING_NOP_TW) {
		req->io_task_work.func = io_req_task_complete;
		io_req_task_work_add(req);
+3 −1
Original line number Diff line number Diff line
@@ -417,9 +417,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
	 * done in io_req_task_link_timeout(), if needed.
	 */
	if (prev) {
		if (!req_ref_inc_not_zero(prev))
		if (!req_ref_inc_not_zero(prev)) {
			io_remove_next_linked(prev);
			prev = NULL;
		}
	}
	list_del(&timeout->list);
	timeout->prev = prev;
	raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+1 −0
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
	iw->options = READ_ONCE(sqe->file_index);
	iw->head = NULL;
	iw->infop = u64_to_user_ptr(READ_ONCE(sqe->addr2));
	memset(&iw->info, 0, sizeof(iw->info));
	return 0;
}