Commit 3547a61e authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'update-kf_rcu_protected'

Kumar Kartikeya Dwivedi says:

====================
Update KF_RCU_PROTECTED

Currently, KF_RCU_PROTECTED only applies to iterator APIs and that too
in a convoluted fashion: the presence of this flag on the kfunc is used
to set MEM_RCU in iterator type, and the lack of RCU protection results
in an error only later, once next() or destroy() methods are invoked on
the iterator. While there is no bug, this is certainly a bit unintuitive,
and makes the enforcement of the flag iterator specific.

In the interest of making this flag useful for other upcoming kfuncs,
e.g. scx_bpf_cpu_curr() [0][1], add enforcement for invoking the kfunc
in an RCU critical section in general.

In addition to this, the aforementioned kfunc also needs to return an
RCU protected pointer, which currently has no generic kfunc flag or
annotation. Add such a flag as well while we are at it.

  [0]: https://lore.kernel.org/all/20250903212311.369697-3-christian.loehle@arm.com
  [1]: https://lore.kernel.org/all/20250909195709.92669-1-arighi@nvidia.com

Changelog:
----------
v2 -> v3
v2: https://lore.kernel.org/bpf/20250917032014.4060112-1-memxor@gmail.com

 * Add back lost hunk reworking documentation for KF_RCU_PROTECTED.

v1 -> v2
v1: https://lore.kernel.org/bpf/20250915024731.1494251-1-memxor@gmail.com

 * Drop KF_RET_RCU and fold change into KF_RCU_PROTECTED. (Andrea, Alexei)
 * Update tests for non-struct pointer return values with KF_RCU_PROTECTED.
====================

Link: https://patch.msgid.link/20250917032755.4068726-1-memxor@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6ff4a0fa 8b788d66
Loading
Loading
Loading
Loading
+18 −1
Original line number Diff line number Diff line
@@ -335,9 +335,26 @@ consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
also be KF_RET_NULL.

2.4.8 KF_RCU_PROTECTED flag
---------------------------

The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
an RCU critical section. This is assumed by default in non-sleepable programs,
and must be explicitly ensured by calling ``bpf_rcu_read_lock`` for sleepable
ones.

If the kfunc returns a pointer value, this flag also enforces that the returned
pointer is RCU protected, and can only be used while the RCU critical section is
active.

The flag is distinct from the ``KF_RCU`` flag, which only ensures that its
arguments are at least RCU protected pointers. This may transitively imply that
RCU protection is ensured, but it does not work in cases of kfuncs which require
RCU protection but do not take RCU protected arguments.

.. _KF_deprecated_flag:

2.4.8 KF_DEPRECATED flag
2.4.9 KF_DEPRECATED flag
------------------------

The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
+10 −0
Original line number Diff line number Diff line
@@ -13931,6 +13931,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
		return -EACCES;
	}
	if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
		verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
		return -EACCES;
	}
	/* In case of release function, we get register number of refcounted
	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
	 */
@@ -14044,6 +14049,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
			/* Ensures we don't access the memory after a release_reference() */
			if (meta.ref_obj_id)
				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
			if (is_kfunc_rcu_protected(&meta))
				regs[BPF_REG_0].type |= MEM_RCU;
		} else {
			mark_reg_known_zero(env, regs, BPF_REG_0);
			regs[BPF_REG_0].btf = desc_btf;
@@ -14052,6 +14060,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
			if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
				regs[BPF_REG_0].type |= PTR_UNTRUSTED;
			else if (is_kfunc_rcu_protected(&meta))
				regs[BPF_REG_0].type |= MEM_RCU;
			if (is_iter_next_kfunc(&meta)) {
				struct bpf_reg_state *cur_iter;
+1 −1
Original line number Diff line number Diff line
@@ -73,7 +73,7 @@ int BPF_PROG(use_css_iter_non_sleepable)
}

SEC("lsm.s/socket_connect")
__failure __msg("expected an RCU CS")
__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
{
	u64 cgrp_id = bpf_get_current_cgroup_id();
+2 −2
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("expected an RCU CS when using bpf_iter_task_next")
__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
int BPF_PROG(iter_tasks_without_lock)
{
	struct task_struct *pos;
@@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock)
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("expected an RCU CS when using bpf_iter_css_next")
__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(iter_css_without_lock)
{
	u64 cg_id = bpf_get_current_cgroup_id();
+46 −0
Original line number Diff line number Diff line
@@ -123,3 +123,49 @@ int iter_next_ptr_mem_not_trusted(const void *ctx)
	bpf_iter_num_destroy(&num_it);
	return 0;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
int iter_ret_rcu_test_protected(const void *ctx)
{
	struct task_struct *p;

	p = bpf_kfunc_ret_rcu_test();
	return p->pid;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
int iter_ret_rcu_test_type(const void *ctx)
{
	struct task_struct *p;

	bpf_rcu_read_lock();
	p = bpf_kfunc_ret_rcu_test();
	bpf_this_cpu_ptr(p);
	bpf_rcu_read_unlock();
	return 0;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
int iter_ret_rcu_test_protected_nostruct(const void *ctx)
{
	void *p;

	p = bpf_kfunc_ret_rcu_test_nostruct(4);
	return *(int *)p;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
int iter_ret_rcu_test_type_nostruct(const void *ctx)
{
	void *p;

	bpf_rcu_read_lock();
	p = bpf_kfunc_ret_rcu_test_nostruct(4);
	bpf_this_cpu_ptr(p);
	bpf_rcu_read_unlock();
	return 0;
}
Loading