Commit d42eb05e authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring: add support for BPF filtering for opcode restrictions



Add support for loading classic BPF programs with io_uring to provide
fine-grained filtering of SQE operations. Unlike
IORING_REGISTER_RESTRICTIONS which only allows bitmap-based allow/deny
of opcodes, BPF filters can inspect request attributes and make dynamic
decisions.

The filter is registered via IORING_REGISTER_BPF_FILTER with a struct
io_uring_bpf:

struct io_uring_bpf_filter {
	__u32	opcode;		/* io_uring opcode to filter */
	__u32	flags;
	__u32	filter_len;	/* number of BPF instructions */
	__u32	resv;
	__u64	filter_ptr;	/* pointer to BPF filter */
	__u64	resv2[5];
};

enum {
	IO_URING_BPF_CMD_FILTER	= 1,
};

struct io_uring_bpf {
	__u16	cmd_type;	/* IO_URING_BPF_* values */
	__u16	cmd_flags;	/* none so far */
	__u32	resv;
	union {
		struct io_uring_bpf_filter	filter;
	};
};

and the filters get supplied a struct io_uring_bpf_ctx:

struct io_uring_bpf_ctx {
	__u64	user_data;
	__u8	opcode;
	__u8	sqe_flags;
	__u8	pdu_size;
	__u8	pad[5];
};

where it's possible to filter on opcode and sqe_flags, with pdu_size
indicating how much extra data is being passed in beyond the pad field.
This will used for specific finer grained filtering inside an opcode.
An example of that for sockets is in one of the following patches.
Anything the opcode supports can end up in this struct, populated by
the opcode itself, and hence can be filtered for.

Filters have the following semantics:
  - Return 1 to allow the request
  - Return 0 to deny the request with -EACCES
  - Multiple filters can be stacked per opcode. All filters must
    return 1 for the opcode to be allowed.
  - Filters are evaluated in registration order (most recent first)

The implementation uses classic BPF (cBPF) rather than eBPF for as
that's required for containers, and since they can be used by any
user in the system.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0105b056
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -219,9 +219,18 @@ struct io_rings {
	struct io_uring_cqe	cqes[] ____cacheline_aligned_in_smp;
};

struct io_bpf_filter;
struct io_bpf_filters {
	refcount_t refs;	/* ref for ->bpf_filters */
	spinlock_t lock;	/* protects ->bpf_filters modifications */
	struct io_bpf_filter __rcu **filters;
	struct rcu_head rcu_head;
};

struct io_restriction {
	DECLARE_BITMAP(register_op, IORING_REGISTER_LAST);
	DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
	struct io_bpf_filters *bpf_filters;
	u8 sqe_flags_allowed;
	u8 sqe_flags_required;
	/* IORING_OP_* restrictions exist */
+3 −0
Original line number Diff line number Diff line
@@ -700,6 +700,9 @@ enum io_uring_register_op {
	/* auxiliary zcrx configuration, see enum zcrx_ctrl_op */
	IORING_REGISTER_ZCRX_CTRL		= 36,

	/* register bpf filtering programs */
	IORING_REGISTER_BPF_FILTER		= 37,

	/* this goes last */
	IORING_REGISTER_LAST,

+50 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
/*
 * Header file for the io_uring BPF filters.
 */
#ifndef LINUX_IO_URING_BPF_FILTER_H
#define LINUX_IO_URING_BPF_FILTER_H

#include <linux/types.h>

/*
 * Struct passed to filters.
 */
struct io_uring_bpf_ctx {
	__u64	user_data;
	__u8	opcode;
	__u8	sqe_flags;
	__u8	pdu_size;	/* size of aux data for filter */
	__u8	pad[5];
};

enum {
	/*
	 * If set, any currently unset opcode will have a deny filter attached
	 */
	IO_URING_BPF_FILTER_DENY_REST	= 1,
};

struct io_uring_bpf_filter {
	__u32	opcode;		/* io_uring opcode to filter */
	__u32	flags;
	__u32	filter_len;	/* number of BPF instructions */
	__u32	resv;
	__u64	filter_ptr;	/* pointer to BPF filter */
	__u64	resv2[5];
};

enum {
	IO_URING_BPF_CMD_FILTER	= 1,
};

struct io_uring_bpf {
	__u16	cmd_type;	/* IO_URING_BPF_* values */
	__u16	cmd_flags;	/* none so far */
	__u32	resv;
	union {
		struct io_uring_bpf_filter	filter;
	};
};

#endif
+5 −0
Original line number Diff line number Diff line
@@ -9,3 +9,8 @@ config IO_URING_ZCRX
	depends on PAGE_POOL
	depends on INET
	depends on NET_RX_BUSY_POLL

config IO_URING_BPF
	def_bool y
	depends on BPF
	depends on NET
+1 −0
Original line number Diff line number Diff line
@@ -24,3 +24,4 @@ obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
obj-$(CONFIG_NET) += net.o cmd_net.o
obj-$(CONFIG_PROC_FS) += fdinfo.o
obj-$(CONFIG_IO_URING_MOCK_FILE) += mock_file.o
obj-$(CONFIG_IO_URING_BPF) += bpf_filter.o
Loading