Commit 10f466ab authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring: split alloc and add of overflow

Add a new helper, io_alloc_ocqe(), that simply allocates and fills an
overflow entry. Then it can get done outside of the locking section,
and hence use more appropriate gfp_t allocation flags rather than always
default to GFP_ATOMIC.

Inspired by a previous series from Pavel:

https://lore.kernel.org/io-uring/cover.1747209332.git.asml.silence@gmail.com/



Reviewed-by: default avatarCaleb Sander Mateos <csander@purestorage.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5288b9e2
Loading
Loading
Loading
Loading
+45 −29
Original line number Diff line number Diff line
@@ -697,20 +697,11 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
	}
}

static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
				     s32 res, u32 cflags, u64 extra1, u64 extra2)
static bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
				   struct io_overflow_cqe *ocqe)
{
	struct io_overflow_cqe *ocqe;
	size_t ocq_size = sizeof(struct io_overflow_cqe);
	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);

	lockdep_assert_held(&ctx->completion_lock);

	if (is_cqe32)
		ocq_size += sizeof(struct io_uring_cqe);

	ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
	if (!ocqe) {
		struct io_rings *r = ctx->rings;

@@ -728,6 +719,24 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
		atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);

	}
	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
	return true;
}

static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
					     u64 user_data, s32 res, u32 cflags,
					     u64 extra1, u64 extra2, gfp_t gfp)
{
	struct io_overflow_cqe *ocqe;
	size_t ocq_size = sizeof(struct io_overflow_cqe);
	bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);

	if (is_cqe32)
		ocq_size += sizeof(struct io_uring_cqe);

	ocqe = kmalloc(ocq_size, gfp | __GFP_ACCOUNT);
	trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
	if (ocqe) {
		ocqe->cqe.user_data = user_data;
		ocqe->cqe.res = res;
		ocqe->cqe.flags = cflags;
@@ -735,8 +744,8 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
			ocqe->cqe.big_cqe[0] = extra1;
			ocqe->cqe.big_cqe[1] = extra2;
		}
	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
	return true;
	}
	return ocqe;
}

/*
@@ -803,8 +812,12 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags

	io_cq_lock(ctx);
	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
	if (!filled)
		filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
	if (unlikely(!filled)) {
		struct io_overflow_cqe *ocqe;

		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_ATOMIC);
		filled = io_cqring_add_overflow(ctx, ocqe);
	}
	io_cq_unlock_post(ctx);
	return filled;
}
@@ -819,8 +832,11 @@ void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
	lockdep_assert(ctx->lockless_cq);

	if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
		struct io_overflow_cqe *ocqe;

		ocqe = io_alloc_ocqe(ctx, user_data, res, cflags, 0, 0, GFP_KERNEL);
		spin_lock(&ctx->completion_lock);
		io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
		io_cqring_add_overflow(ctx, ocqe);
		spin_unlock(&ctx->completion_lock);
	}
	ctx->submit_state.cq_flush = true;
@@ -1425,18 +1441,18 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
		 */
		if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
		    unlikely(!io_fill_cqe_req(ctx, req))) {
			gfp_t gfp = ctx->lockless_cq ? GFP_KERNEL : GFP_ATOMIC;
			struct io_overflow_cqe *ocqe;

			ocqe = io_alloc_ocqe(ctx, req->cqe.user_data, req->cqe.res,
					     req->cqe.flags, req->big_cqe.extra1,
					     req->big_cqe.extra2, gfp);
			if (ctx->lockless_cq) {
				spin_lock(&ctx->completion_lock);
				io_cqring_event_overflow(req->ctx, req->cqe.user_data,
							req->cqe.res, req->cqe.flags,
							req->big_cqe.extra1,
							req->big_cqe.extra2);
				io_cqring_add_overflow(ctx, ocqe);
				spin_unlock(&ctx->completion_lock);
			} else {
				io_cqring_event_overflow(req->ctx, req->cqe.user_data,
							req->cqe.res, req->cqe.flags,
							req->big_cqe.extra1,
							req->big_cqe.extra2);
				io_cqring_add_overflow(ctx, ocqe);
			}

			memset(&req->big_cqe, 0, sizeof(req->big_cqe));