Commit 5247c034 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: introduce non-circular SQ



Outside of SQPOLL, normally SQ entries are consumed by the time the
submission syscall returns. For those cases we don't need a circular
buffer and the head/tail tracking, instead the kernel can assume that
entries always start from the beginning of the SQ at index 0. This patch
introduces a setup flag doing exactly that. It's a simpler and helps
to keeps SQEs hot in cache.

The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND.
The flag is rejected if passed together with SQPOLL as it'd require
waiting for SQ before each submission. It also requires
IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there
will be users, so leave more space for future optimisations.

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0105b056
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -237,6 +237,18 @@ enum io_uring_sqe_flags_bit {
 */
#define IORING_SETUP_SQE_MIXED		(1U << 19)

/*
 * When set, io_uring ignores SQ head and tail and fetches SQEs to submit
 * starting from index 0 instead from the index stored in the head pointer.
 * IOW, the user should place all SQE at the beginning of the SQ memory
 * before issuing a submission syscall.
 *
 * It requires IORING_SETUP_NO_SQARRAY and is incompatible with
 * IORING_SETUP_SQPOLL. The user must also never change the SQ head and tail
 * values and keep it set to 0. Any other value is undefined behaviour.
 */
#define IORING_SETUP_SQ_REWIND		(1U << 20)

enum io_uring_op {
	IORING_OP_NOP,
	IORING_OP_READV,
+22 −7
Original line number Diff line number Diff line
@@ -1945,6 +1945,9 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
{
	struct io_rings *rings = ctx->rings;

	if (ctx->flags & IORING_SETUP_SQ_REWIND) {
		ctx->cached_sq_head = 0;
	} else {
		/*
		 * Ensure any loads from the SQEs are done at this point,
		 * since once we write the new head, the application could
@@ -1952,6 +1955,7 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
		 */
		smp_store_release(&rings->sq.head, ctx->cached_sq_head);
	}
}

/*
 * Fetch an sqe, if one is available. Note this returns a pointer to memory
@@ -1996,10 +2000,15 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
	__must_hold(&ctx->uring_lock)
{
	unsigned int entries = io_sqring_entries(ctx);
	unsigned int entries;
	unsigned int left;
	int ret;

	if (ctx->flags & IORING_SETUP_SQ_REWIND)
		entries = ctx->sq_entries;
	else
		entries = io_sqring_entries(ctx);

	entries = min(nr, entries);
	if (unlikely(!entries))
		return 0;
@@ -2728,6 +2737,12 @@ static int io_uring_sanitise_params(struct io_uring_params *p)
	if (flags & ~IORING_SETUP_FLAGS)
		return -EINVAL;

	if (flags & IORING_SETUP_SQ_REWIND) {
		if ((flags & IORING_SETUP_SQPOLL) ||
		    !(flags & IORING_SETUP_NO_SQARRAY))
		return -EINVAL;
	}

	/* There is no way to mmap rings without a real fd */
	if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) &&
	    !(flags & IORING_SETUP_NO_MMAP))
+2 −1
Original line number Diff line number Diff line
@@ -69,7 +69,8 @@ struct io_ctx_config {
			IORING_SETUP_NO_SQARRAY |\
			IORING_SETUP_HYBRID_IOPOLL |\
			IORING_SETUP_CQE_MIXED |\
			IORING_SETUP_SQE_MIXED)
			IORING_SETUP_SQE_MIXED |\
			IORING_SETUP_SQ_REWIND)

#define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
			IORING_ENTER_SQ_WAKEUP |\