Commit 6f377873 authored by David Wei's avatar David Wei Committed by Jens Axboe
Browse files

io_uring/zcrx: add interface queue and refill queue



Add a new object called an interface queue (ifq) that represents a net
rx queue that has been configured for zero copy. Each ifq is registered
using a new registration opcode IORING_REGISTER_ZCRX_IFQ.

The refill queue is allocated by the kernel and mapped by userspace
using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main
SQ/CQ. It is used by userspace to return buffers that it is done with,
which will then be re-used by the netdev again.

The main CQ ring is used to notify userspace of received data by using
the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each
entry contains the offset + len to the data.

For now, each io_uring instance only has a single ifq.

Reviewed-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarDavid Wei <dw@davidwei.uk>
Acked-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20250215000947.789731-2-dw@davidwei.uk


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5c496ff1
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -30,3 +30,5 @@ source "lib/Kconfig"
source "lib/Kconfig.debug"

source "Documentation/Kconfig"

source "io_uring/KConfig"
+6 −0
Original line number Diff line number Diff line
@@ -40,6 +40,8 @@ enum io_uring_cmd_flags {
	IO_URING_F_TASK_DEAD		= (1 << 13),
};

struct io_zcrx_ifq;

struct io_wq_work_node {
	struct io_wq_work_node *next;
};
@@ -382,6 +384,8 @@ struct io_ring_ctx {
	struct wait_queue_head		poll_wq;
	struct io_restriction		restrictions;

	struct io_zcrx_ifq		*ifq;

	u32			pers_next;
	struct xarray		personalities;

@@ -434,6 +438,8 @@ struct io_ring_ctx {
	struct io_mapped_region		ring_region;
	/* used for optimised request parameter and wait argument passing  */
	struct io_mapped_region		param_region;
	/* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
	struct io_mapped_region		zcrx_region;
};

/*
+42 −1
Original line number Diff line number Diff line
@@ -639,7 +639,8 @@ enum io_uring_register_op {
	/* send MSG_RING without having a ring */
	IORING_REGISTER_SEND_MSG_RING		= 31,

	/* 32 reserved for zc rx */
	/* register a netdev hw rx queue for zerocopy */
	IORING_REGISTER_ZCRX_IFQ		= 32,

	/* resize CQ ring */
	IORING_REGISTER_RESIZE_RINGS		= 33,
@@ -956,6 +957,46 @@ enum io_uring_socket_op {
	SOCKET_URING_OP_SETSOCKOPT,
};

/* Zero copy receive refill queue entry */
struct io_uring_zcrx_rqe {
	__u64	off;
	__u32	len;
	__u32	__pad;
};

struct io_uring_zcrx_cqe {
	__u64	off;
	__u64	__pad;
};

/* The bit from which area id is encoded into offsets */
#define IORING_ZCRX_AREA_SHIFT	48
#define IORING_ZCRX_AREA_MASK	(~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1))

struct io_uring_zcrx_offsets {
	__u32	head;
	__u32	tail;
	__u32	rqes;
	__u32	__resv2;
	__u64	__resv[2];
};

/*
 * Argument for IORING_REGISTER_ZCRX_IFQ
 */
struct io_uring_zcrx_ifq_reg {
	__u32	if_idx;
	__u32	if_rxq;
	__u32	rq_entries;
	__u32	flags;

	__u64	area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
	__u64	region_ptr; /* struct io_uring_region_desc * */

	struct io_uring_zcrx_offsets offsets;
	__u64	__resv[4];
};

#ifdef __cplusplus
}
#endif

io_uring/KConfig

0 → 100644
+10 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only
#
# io_uring configuration
#

config IO_URING_ZCRX
	def_bool y
	depends on PAGE_POOL
	depends on INET
	depends on NET_RX_BUSY_POLL
+1 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
					epoll.o statx.o timeout.o fdinfo.o \
					cancel.o waitid.o register.o \
					truncate.o memmap.o alloc_cache.o
obj-$(CONFIG_IO_URING_ZCRX)	+= zcrx.o
obj-$(CONFIG_IO_WQ)		+= io-wq.o
obj-$(CONFIG_FUTEX)		+= futex.o
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
Loading