Commit ee7226b2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-7.1-20260515' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull io_uring fixes from Jens Axboe:

 - Small series sanitizing the locking done for either modifying or
   reading a chain of requests

 - If the application has a pid namespace, ensure that the sqthread pid
   is correctly printed in fdinfo

 - Fix for a hashing issue in the io-wq thread pool, which could lead to
   a use-after-free

 - Kill dead argument from io_prep_rw_pi()

 - Fix for a missed validation of the CQ ring head, affecting CQE refill

* tag 'io_uring-7.1-20260515' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring: validate user-controlled cq.head in io_cqe_cache_refill()
  io-wq: check that the predecessor is hashed in io_wq_remove_pending()
  io_uring/rw: drop unused attr_type_mask from io_prep_rw_pi()
  io_uring: hold uring_lock across io_kill_timeouts() in cancel path
  io_uring: defer linked-timeout chain splice out of hrtimer context
  io_uring: hold uring_lock when walking link chain in io_wq_free_work()
  io_uring/fdinfo: translate SqThread PID through caller's pid_ns
parents 78e83700 f44d38a3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -561,8 +561,8 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
	ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
	ret |= io_futex_remove_all(ctx, tctx, cancel_all);
	ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
	mutex_unlock(&ctx->uring_lock);
	ret |= io_kill_timeouts(ctx, tctx, cancel_all);
	mutex_unlock(&ctx->uring_lock);
	if (tctx)
		ret |= io_run_task_work() > 0;
	else
+2 −1
Original line number Diff line number Diff line
@@ -190,8 +190,9 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
			get_task_struct(tsk);
			rcu_read_unlock();
			usec = io_sq_cpu_usec(tsk);
			sq_pid = task_pid_nr_ns(tsk,
						proc_pid_ns(file_inode(m->file)->i_sb));
			put_task_struct(tsk);
			sq_pid = sq->task_pid;
			sq_cpu = sq->sq_cpu;
			sq_total_time = usec;
			sq_work_time = sq->work_time;
+2 −1
Original line number Diff line number Diff line
@@ -1124,7 +1124,8 @@ static inline void io_wq_remove_pending(struct io_wq *wq,
	if (io_wq_is_hashed(work) && work == wq->hash_tail[hash]) {
		if (prev)
			prev_work = container_of(prev, struct io_wq_work, list);
		if (prev_work && io_get_work_hash(prev_work) == hash)
		if (prev_work && io_wq_is_hashed(prev_work) &&
		    io_get_work_hash(prev_work) == hash)
			wq->hash_tail[hash] = prev_work;
		else
			wq->hash_tail[hash] = NULL;
+23 −6
Original line number Diff line number Diff line
@@ -686,13 +686,27 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
	return ocqe;
}

/*
 * Compute queued CQEs for free-space calculation, clamped to cq_entries.
 */
static unsigned int io_cqring_queued(struct io_ring_ctx *ctx)
{
	struct io_rings *rings = io_get_rings(ctx);
	int diff;

	diff = (int)(ctx->cached_cq_tail - READ_ONCE(rings->cq.head));
	if (diff >= 0)
		return min((unsigned int)diff, ctx->cq_entries);
	return 0;
}

/*
 * Fill an empty dummy CQE, in case alignment is off for posting a 32b CQE
 * because the ring is a single 16b entry away from wrapping.
 */
static bool io_fill_nop_cqe(struct io_ring_ctx *ctx, unsigned int off)
{
	if (__io_cqring_events(ctx) < ctx->cq_entries) {
	if (io_cqring_queued(ctx) < ctx->cq_entries) {
		struct io_uring_cqe *cqe = &ctx->rings->cqes[off];

		cqe->user_data = 0;
@@ -713,7 +727,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32)
{
	struct io_rings *rings = ctx->rings;
	unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
	unsigned int free, queued, len;
	unsigned int free, len;

	/*
	 * Posting into the CQ when there are pending overflowed CQEs may break
@@ -733,9 +747,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32)
		off = 0;
	}

	/* userspace may cheat modifying the tail, be safe and do min */
	queued = min(__io_cqring_events(ctx), ctx->cq_entries);
	free = ctx->cq_entries - queued;
	free = ctx->cq_entries - io_cqring_queued(ctx);
	/* we need a contiguous range, limit based on the current array offset */
	len = min(free, ctx->cq_entries - off);
	if (len < (cqe32 + 1))
@@ -1452,8 +1464,13 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
	struct io_kiocb *nxt = NULL;

	if (req_ref_put_and_test_atomic(req)) {
		if (req->flags & IO_REQ_LINK_FLAGS)
		if (req->flags & IO_REQ_LINK_FLAGS) {
			struct io_ring_ctx *ctx = req->ctx;

			mutex_lock(&ctx->uring_lock);
			nxt = io_req_find_next(req);
			mutex_unlock(&ctx->uring_lock);
		}
		io_free_req(req);
	}
	return nxt ? &nxt->work : NULL;
+2 −2
Original line number Diff line number Diff line
@@ -230,7 +230,7 @@ static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
}

static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
			 u64 attr_ptr, u64 attr_type_mask)
			 u64 attr_ptr)
{
	struct io_uring_attr_pi pi_attr;
	struct io_async_rw *io;
@@ -305,7 +305,7 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
			return -EINVAL;

		attr_ptr = READ_ONCE(sqe->attr_ptr);
		return io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
		return io_prep_rw_pi(req, rw, ddir, attr_ptr);
	}
	return 0;
}
Loading