Commit 7b751b01 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux

Pull more io_uring updates from Jens Axboe:
 "This is a mix of cleanups and fixes. No major fixes in here, just a
  bunch of little fixes. Some of them marked for stable as it fixes
  behavioral issues

   - Fix an issue with SOCKET_URING_OP_SETSOCKOPT for netlink sockets,
     due to a too restrictive check on it having an ioctl handler

   - Remove a redundant SQPOLL check in ring creation

   - Kill dead accounting for zero-copy send, which doesn't use ->buf
     or ->len post the initial setup

   - Fix missing clamp of the allocation hint, which could cause
     allocations to fall outside of the range the application asked
     for. Still within the allowed limits.

   - Fix for IORING_OP_PIPE's handling of direct descriptors

   - Tweak to the API for the newly added BPF filters, making them
     more future proof in terms of how applications deal with them

   - A few fixes for zcrx, fixing a few error handling conditions

   - Fix for zcrx request flag checking

   - Add support for querying the zcrx page size

   - Improve the NO_SQARRAY static branch inc/dec, avoiding busy
     conditions causing too much traffic

   - Various little cleanups"

* tag 'io_uring-7.0-20260216' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/bpf_filter: pass in expected filter payload size
  io_uring/bpf_filter: move filter size and populate helper into struct
  io_uring/cancel: de-unionize file and user_data in struct io_cancel_data
  io_uring/rsrc: improve regbuf iov validation
  io_uring: remove unneeded io_send_zc accounting
  io_uring/cmd_net: fix too strict requirement on ioctl
  io_uring: delay sqarray static branch disablement
  io_uring/query: add query.h copyright notice
  io_uring/query: return support for custom rx page size
  io_uring/zcrx: check unsupported flags on import
  io_uring/zcrx: fix post open error handling
  io_uring/zcrx: fix sgtable leak on mapping failures
  io_uring: use the right type for creds iteration
  io_uring/openclose: fix io_pipe_fixed() slot tracking for specific slots
  io_uring/filetable: clamp alloc_hint to the configured alloc range
  io_uring/rsrc: replace reg buffer bit field with flags
  io_uring/zcrx: improve types for size calculation
  io_uring/tctx: avoid modifying loop variable in io_ring_add_registered_file
  io_uring: simplify IORING_SETUP_DEFER_TASKRUN && !SQPOLL check
parents 97029699 be357312
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1090,6 +1090,14 @@ enum zcrx_reg_flags {
	ZCRX_REG_IMPORT	= 1,
};

enum zcrx_features {
	/*
	 * The user can ask for the desired rx page size by passing the
	 * value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
	 */
	ZCRX_FEATURE_RX_PAGE_SIZE	= 1 << 0,
};

/*
 * Argument for IORING_REGISTER_ZCRX_IFQ
 */
+7 −1
Original line number Diff line number Diff line
@@ -35,13 +35,19 @@ enum {
	 * If set, any currently unset opcode will have a deny filter attached
	 */
	IO_URING_BPF_FILTER_DENY_REST	= 1,
	/*
	 * If set, if kernel and application don't agree on pdu_size for
	 * the given opcode, fail the registration of the filter.
	 */
	IO_URING_BPF_FILTER_SZ_STRICT	= 2,
};

struct io_uring_bpf_filter {
	__u32	opcode;		/* io_uring opcode to filter */
	__u32	flags;
	__u32	filter_len;	/* number of BPF instructions */
	__u32	resv;
	__u8	pdu_size;	/* expected pdu size for opcode */
	__u8	resv[3];
	__u64	filter_ptr;	/* pointer to BPF filter */
	__u64	resv2[5];
};
+5 −1
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
 * Header file for the io_uring query interface.
 *
 * Copyright (C) 2026 Pavel Begunkov <asml.silence@gmail.com>
 * Copyright (C) Meta Platforms, Inc.
 */
#ifndef LINUX_IO_URING_QUERY_H
#define LINUX_IO_URING_QUERY_H
@@ -50,7 +53,8 @@ struct io_uring_query_zcrx {
	__u64 area_flags;
	/* The number of supported ZCRX_CTRL_* opcodes */
	__u32 nr_ctrl_opcodes;
	__u32 __resv1;
	/* Bitmask of ZCRX_FEATURE_* indicating which features are available */
	__u32 features;
	/* The refill ring header size */
	__u32 rq_hdr_size;
	/* The alignment for the header */
+55 −27
Original line number Diff line number Diff line
@@ -26,6 +26,8 @@ static const struct io_bpf_filter dummy_filter;
static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
				      struct io_kiocb *req)
{
	const struct io_issue_def *def = &io_issue_defs[req->opcode];

	bctx->opcode = req->opcode;
	bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS;
	bctx->user_data = req->cqe.user_data;
@@ -34,19 +36,12 @@ static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
		sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size));

	/*
	 * Opcodes can provide a handler fo populating more data into bctx,
	 * Opcodes can provide a handler for populating more data into bctx,
	 * for filters to use.
	 */
	switch (req->opcode) {
	case IORING_OP_SOCKET:
		bctx->pdu_size = sizeof(bctx->socket);
		io_socket_bpf_populate(bctx, req);
		break;
	case IORING_OP_OPENAT:
	case IORING_OP_OPENAT2:
		bctx->pdu_size = sizeof(bctx->open);
		io_openat_bpf_populate(bctx, req);
		break;
	if (def->filter_pdu_size) {
		bctx->pdu_size = def->filter_pdu_size;
		def->filter_populate(bctx, req);
	}
}

@@ -313,36 +308,69 @@ static struct io_bpf_filters *io_bpf_filter_cow(struct io_restriction *src)
	return ERR_PTR(-EBUSY);
}

#define IO_URING_BPF_FILTER_FLAGS	IO_URING_BPF_FILTER_DENY_REST
#define IO_URING_BPF_FILTER_FLAGS	(IO_URING_BPF_FILTER_DENY_REST | \
					 IO_URING_BPF_FILTER_SZ_STRICT)

int io_register_bpf_filter(struct io_restriction *res,
static int io_bpf_filter_import(struct io_uring_bpf *reg,
				struct io_uring_bpf __user *arg)
{
	struct io_bpf_filters *filters, *old_filters = NULL;
	struct io_bpf_filter *filter, *old_filter;
	struct io_uring_bpf reg;
	struct bpf_prog *prog;
	struct sock_fprog fprog;
	const struct io_issue_def *def;
	int ret;

	if (copy_from_user(&reg, arg, sizeof(reg)))
	if (copy_from_user(reg, arg, sizeof(*reg)))
		return -EFAULT;
	if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
	if (reg->cmd_type != IO_URING_BPF_CMD_FILTER)
		return -EINVAL;
	if (reg.cmd_flags || reg.resv)
	if (reg->cmd_flags || reg->resv)
		return -EINVAL;

	if (reg.filter.opcode >= IORING_OP_LAST)
	if (reg->filter.opcode >= IORING_OP_LAST)
		return -EINVAL;
	if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
	if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
		return -EINVAL;
	if (reg.filter.resv)
	if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv)))
		return -EINVAL;
	if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
	if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2)))
		return -EINVAL;
	if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
	if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS)
		return -EINVAL;

	/* Verify filter size */
	def = &io_issue_defs[array_index_nospec(reg->filter.opcode, IORING_OP_LAST)];

	/* same size, always ok */
	ret = 0;
	if (reg->filter.pdu_size == def->filter_pdu_size)
		;
	/* size differs, fail in strict mode */
	else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT)
		ret = -EMSGSIZE;
	/* userspace filter is bigger, always disallow */
	else if (reg->filter.pdu_size > def->filter_pdu_size)
		ret = -EMSGSIZE;

	/* copy back kernel filter size */
	reg->filter.pdu_size = def->filter_pdu_size;
	if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter)))
		return -EFAULT;

	return ret;
}

int io_register_bpf_filter(struct io_restriction *res,
			   struct io_uring_bpf __user *arg)
{
	struct io_bpf_filters *filters, *old_filters = NULL;
	struct io_bpf_filter *filter, *old_filter;
	struct io_uring_bpf reg;
	struct bpf_prog *prog;
	struct sock_fprog fprog;
	int ret;

	ret = io_bpf_filter_import(&reg, arg);
	if (ret)
		return ret;

	fprog.len = reg.filter.filter_len;
	fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);

+2 −4
Original line number Diff line number Diff line
@@ -6,10 +6,8 @@

struct io_cancel_data {
	struct io_ring_ctx *ctx;
	union {
	u64 data;
	struct file *file;
	};
	u8 opcode;
	u32 flags;
	int seq;
Loading