Commit 0ed5f799 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-allow-void-cast-using-bpf_rdonly_cast'

Eduard Zingerman says:

====================
bpf: allow void* cast using bpf_rdonly_cast()

Currently, pointers returned by `bpf_rdonly_cast()` have a type of
"pointer to btf id", and only casts to structure types are allowed.
Access to memory pointed to by these pointers is done through
`BPF_PROBE_{MEM,MEMSX}` instructions and does not produce errors on
invalid memory access.

This patch set extends `bpf_rdonly_cast()` to allow casts to an
equivalent of 'void *', effectively replacing
`bpf_probe_read_kernel()` calls in situations where access to
individual bytes or integers is necessary.

The mechanism was suggested and explored by Andrii Nakryiko in [1].

To help with detecting support for this feature, an
`enum bpf_features` is added with intended usage as follows:

  if (bpf_core_enum_value_exists(enum bpf_features,
                                 BPF_FEAT_RDONLY_CAST_TO_VOID))
    ...

[1] https://github.com/anakryiko/linux/tree/bpf-mem-cast

Changelog:

v2: https://lore.kernel.org/bpf/20250625000520.2700423-1-eddyz87@gmail.com/
v2 -> v3:
- dropped direct numbering for __MAX_BPF_FEAT.

v1: https://lore.kernel.org/bpf/20250624191009.902874-1-eddyz87@gmail.com/


v1 -> v2:
- renamed BPF_FEAT_TOTAL to __MAX_BPF_FEAT and moved patch introducing
  bpf_features enum to the start of the series (Alexei);
- dropped patch #3 allowing optout from CAP_SYS_ADMIN drop in
  prog_tests/verifier.c, use a separate runner in prog_tests/*
  instead.
====================

Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://patch.msgid.link/20250625182414.30659-1-eddyz87@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 0967f539 12ed81f8
Loading
Loading
Loading
Loading
+67 −12
Original line number Diff line number Diff line
@@ -44,6 +44,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};
enum bpf_features {
	BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
	__MAX_BPF_FEAT,
};
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
@@ -7535,6 +7540,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
		}
	} else if (base_type(reg->type) == PTR_TO_MEM) {
		bool rdonly_mem = type_is_rdonly_mem(reg->type);
		bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
		if (type_may_be_null(reg->type)) {
			verbose(env, "R%d invalid mem access '%s'\n", regno,
@@ -7554,6 +7560,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
			return -EACCES;
		}
		/*
		 * Accesses to untrusted PTR_TO_MEM are done through probe
		 * instructions, hence no need to check bounds in that case.
		 */
		if (!rdonly_untrusted)
			err = check_mem_region_access(env, regno, off, size,
						      reg->mem_size, false);
		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
@@ -13602,16 +13613,24 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
		regs[BPF_REG_0].btf_id = meta->ret_btf_id;
	} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
		ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
		if (!ret_t || !btf_type_is_struct(ret_t)) {
			verbose(env,
				"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
		if (!ret_t) {
			verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
				meta->arg_constant.value);
			return -EINVAL;
		}
		} else if (btf_type_is_struct(ret_t)) {
			mark_reg_known_zero(env, regs, BPF_REG_0);
			regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
			regs[BPF_REG_0].btf = desc_btf;
			regs[BPF_REG_0].btf_id = meta->arg_constant.value;
		} else if (btf_type_is_void(ret_t)) {
			mark_reg_known_zero(env, regs, BPF_REG_0);
			regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
			regs[BPF_REG_0].mem_size = 0;
		} else {
			verbose(env,
				"kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
			return -EINVAL;
		}
	} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
		   meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
		enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
@@ -14410,6 +14429,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
		return -EACCES;
	}
	/*
	 * Accesses to untrusted PTR_TO_MEM are done through probe
	 * instructions, hence no need to track offsets.
	 */
	if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
		return 0;
	switch (base_type(ptr_reg->type)) {
	case PTR_TO_CTX:
	case PTR_TO_MAP_VALUE:
@@ -19618,10 +19644,27 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
			       !reg_type_mismatch_ok(prev));
}
static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
{
	switch (base_type(type)) {
	case PTR_TO_MEM:
	case PTR_TO_BTF_ID:
		return true;
	default:
		return false;
	}
}
static bool is_ptr_to_mem(enum bpf_reg_type type)
{
	return base_type(type) == PTR_TO_MEM;
}
static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
			     bool allow_trust_mismatch)
{
	enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
	enum bpf_reg_type merged_type;
	if (*prev_type == NOT_INIT) {
		/* Saw a valid insn
@@ -19638,15 +19681,24 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
		 * Reject it.
		 */
		if (allow_trust_mismatch &&
		    base_type(type) == PTR_TO_BTF_ID &&
		    base_type(*prev_type) == PTR_TO_BTF_ID) {
		    is_ptr_to_mem_or_btf_id(type) &&
		    is_ptr_to_mem_or_btf_id(*prev_type)) {
			/*
			 * Have to support a use case when one path through
			 * the program yields TRUSTED pointer while another
			 * is UNTRUSTED. Fallback to UNTRUSTED to generate
			 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
			 * Same behavior of MEM_RDONLY flag.
			 */
			*prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
			if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
				merged_type = PTR_TO_MEM;
			else
				merged_type = PTR_TO_BTF_ID;
			if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
				merged_type |= PTR_UNTRUSTED;
			if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
				merged_type |= MEM_RDONLY;
			*prev_type = merged_type;
		} else {
			verbose(env, "same insn cannot be used with different pointers\n");
			return -EINVAL;
@@ -21254,6 +21306,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
		 * for this case.
		 */
		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
		case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
			if (type == BPF_READ) {
				if (BPF_MODE(insn->code) == BPF_MEM)
					insn->code = BPF_LDX | BPF_PROBE_MEM |
@@ -24439,6 +24492,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
	u32 log_true_size;
	bool is_priv;
	BTF_TYPE_EMIT(enum bpf_features);
	/* no program is valid */
	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
		return -EINVAL;
+9 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only

#include <test_progs.h>
#include "mem_rdonly_untrusted.skel.h"

void test_mem_rdonly_untrusted(void)
{
	RUN_TESTS(mem_rdonly_untrusted);
}
+136 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
#include "../test_kmods/bpf_testmod_kfunc.h"

SEC("socket")
__success
__retval(0)
int ldx_is_ok_bad_addr(void *ctx)
{
	char *p;

	if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
		return 42;

	p = bpf_rdonly_cast(0, 0);
	return p[0x7fff];
}

SEC("socket")
__success
__retval(1)
int ldx_is_ok_good_addr(void *ctx)
{
	int v, *p;

	v = 1;
	p = bpf_rdonly_cast(&v, 0);
	return *p;
}

SEC("socket")
__success
int offset_not_tracked(void *ctx)
{
	int *p, i, s;

	p = bpf_rdonly_cast(0, 0);
	s = 0;
	bpf_for(i, 0, 1000 * 1000 * 1000) {
		p++;
		s += *p;
	}
	return s;
}

SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int stx_not_ok(void *ctx)
{
	int v, *p;

	v = 1;
	p = bpf_rdonly_cast(&v, 0);
	*p = 1;
	return 0;
}

SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int atomic_not_ok(void *ctx)
{
	int v, *p;

	v = 1;
	p = bpf_rdonly_cast(&v, 0);
	__sync_fetch_and_add(p, 1);
	return 0;
}

SEC("socket")
__failure
__msg("cannot write into rdonly_untrusted_mem")
int atomic_rmw_not_ok(void *ctx)
{
	long v, *p;

	v = 1;
	p = bpf_rdonly_cast(&v, 0);
	return __sync_val_compare_and_swap(p, 0, 42);
}

SEC("socket")
__failure
__msg("invalid access to memory, mem_size=0 off=0 size=4")
__msg("R1 min value is outside of the allowed memory range")
int kfunc_param_not_ok(void *ctx)
{
	int *p;

	p = bpf_rdonly_cast(0, 0);
	bpf_kfunc_trusted_num_test(p);
	return 0;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure
__msg("R1 type=rdonly_untrusted_mem expected=")
int helper_param_not_ok(void *ctx)
{
	char *p;

	p = bpf_rdonly_cast(0, 0);
	/*
	 * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
	 * the most permissive constraint
	 */
	bpf_copy_from_user(p, 0, (void *)42);
	return 0;
}

static __noinline u64 *get_some_addr(void)
{
	if (bpf_get_prandom_u32())
		return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
	else
		return bpf_rdonly_cast(0, 0);
}

SEC("socket")
__success
__retval(0)
int mixed_mem_type(void *ctx)
{
	u64 *p;

	/* Try to avoid compiler hoisting load to if branches by using __noinline func. */
	p = get_some_addr();
	return *p;
}

char _license[] SEC("license") = "GPL";