Commit 9fd06062 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-support-bpf-rbtree-traversal-and-list-peeking'

Martin KaFai Lau says:

====================
bpf: Support bpf rbtree traversal and list peeking

From: Martin KaFai Lau <martin.lau@kernel.org>

The RFC v1 [1] showed a fq qdisc implementation in bpf
that is much closer to the kernel sch_fq.c.

The fq example and bpf qdisc changes are separated out from this set.
This set is to focus on the kfunc and verifier changes that
enable the bpf rbtree traversal and list peeking.

v2:
- Added tests to check that the return value of
  the bpf_rbtree_{root,left,right} and bpf_list_{front,back} is
  marked as a non_own_ref node pointer. (Kumar)
- Added tests to ensure that the bpf_rbtree_{root,left,right} and
  bpf_list_{front,back} must be called after holding the spinlock.
- Squashed the selftests adjustment to the corresponding verifier
  changes to avoid bisect failure. (Kumar)
- Separated the bpf qdisc specific changes and fq selftest example
  from this set.

[1]: https://lore.kernel.org/bpf/20250418224652.105998-1-martin.lau@linux.dev/
====================

Link: https://patch.msgid.link/20250506015857.817950-1-martin.lau@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 62e23f18 29318b4d
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
@@ -2293,6 +2293,26 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
	return __bpf_list_del(head, true);
}

__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
{
	struct list_head *h = (struct list_head *)head;

	if (list_empty(h) || unlikely(!h->next))
		return NULL;

	return (struct bpf_list_node *)h->next;
}

__bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
{
	struct list_head *h = (struct list_head *)head;

	if (list_empty(h) || unlikely(!h->next))
		return NULL;

	return (struct bpf_list_node *)h->prev;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
						  struct bpf_rb_node *node)
{
@@ -2366,6 +2386,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
	return (struct bpf_rb_node *)rb_first_cached(r);
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
{
	struct rb_root_cached *r = (struct rb_root_cached *)root;

	return (struct bpf_rb_node *)r->rb_root.rb_node;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

	if (READ_ONCE(node_internal->owner) != root)
		return NULL;

	return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

	if (READ_ONCE(node_internal->owner) != root)
		return NULL;

	return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
}

/**
 * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
 * kfunc which is not stored in a map as a kptr, must be released by calling
@@ -3209,11 +3256,16 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)

#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
+47 −17
Original line number Diff line number Diff line
@@ -11987,6 +11987,16 @@ static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_p
	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
}
static bool is_rbtree_node_type(const struct btf_type *t)
{
	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
}
static bool is_list_node_type(const struct btf_type *t)
{
	return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
}
static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
				  const struct btf_param *arg)
{
@@ -12069,6 +12079,8 @@ enum special_kfunc_type {
	KF_bpf_list_push_back_impl,
	KF_bpf_list_pop_front,
	KF_bpf_list_pop_back,
	KF_bpf_list_front,
	KF_bpf_list_back,
	KF_bpf_cast_to_kern_ctx,
	KF_bpf_rdonly_cast,
	KF_bpf_rcu_read_lock,
@@ -12076,6 +12088,9 @@ enum special_kfunc_type {
	KF_bpf_rbtree_remove,
	KF_bpf_rbtree_add_impl,
	KF_bpf_rbtree_first,
	KF_bpf_rbtree_root,
	KF_bpf_rbtree_left,
	KF_bpf_rbtree_right,
	KF_bpf_dynptr_from_skb,
	KF_bpf_dynptr_from_xdp,
	KF_bpf_dynptr_slice,
@@ -12111,11 +12126,16 @@ BTF_ID(func, bpf_list_push_front_impl)
BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_list_front)
BTF_ID(func, bpf_list_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_rbtree_root)
BTF_ID(func, bpf_rbtree_left)
BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12144,6 +12164,8 @@ BTF_ID(func, bpf_list_push_front_impl)
BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_list_front)
BTF_ID(func, bpf_list_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
@@ -12151,6 +12173,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_rbtree_root)
BTF_ID(func, bpf_rbtree_left)
BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12579,14 +12604,19 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
	return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
	       btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
	       btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
	       btf_id == special_kfunc_list[KF_bpf_list_front] ||
	       btf_id == special_kfunc_list[KF_bpf_list_back];
}
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
}
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12686,7 +12716,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
		break;
	case BPF_RB_NODE:
		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
		break;
	default:
		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -13200,22 +13232,22 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
				return ret;
			break;
		case KF_ARG_PTR_TO_RB_NODE:
			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
					verbose(env, "rbtree_remove node input must be non-owning ref\n");
			if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
					verbose(env, "arg#%d expected pointer to allocated object\n", i);
					return -EINVAL;
				}
				if (in_rbtree_lock_required_cb(env)) {
					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
				if (!reg->ref_obj_id) {
					verbose(env, "allocated object must be referenced\n");
					return -EINVAL;
				}
			} else {
				if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
					verbose(env, "arg#%d expected pointer to allocated object\n", i);
				if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
					verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
					return -EINVAL;
				}
				if (!reg->ref_obj_id) {
					verbose(env, "allocated object must be referenced\n");
				if (in_rbtree_lock_required_cb(env)) {
					verbose(env, "%s not allowed in rbtree cb\n", func_name);
					return -EINVAL;
				}
			}
@@ -13745,13 +13777,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
				insn_aux->kptr_struct_meta =
					btf_find_struct_meta(meta.arg_btf,
							     meta.arg_btf_id);
			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
			} else if (is_list_node_type(ptr_type)) {
				struct btf_field *field = meta.arg_list_head.field;
				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
			} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
				   meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
			} else if (is_rbtree_node_type(ptr_type)) {
				struct btf_field *field = meta.arg_rbtree_root.field;
				mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
@@ -13881,7 +13911,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
			if (is_kfunc_ret_null(&meta))
				regs[BPF_REG_0].id = id;
			regs[BPF_REG_0].ref_obj_id = id;
		} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
		} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
			ref_set_non_owning(env, &regs[BPF_REG_0]);
		}
+6 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@

#include "linked_list.skel.h"
#include "linked_list_fail.skel.h"
#include "linked_list_peek.skel.h"

static char log_buf[1024 * 1024];

@@ -805,3 +806,8 @@ void test_linked_list(void)
	test_linked_list_success(LIST_IN_LIST, true);
	test_linked_list_success(TEST_ALL, false);
}

void test_linked_list_peek(void)
{
	RUN_TESTS(linked_list_peek);
}
+6 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include "rbtree_fail.skel.h"
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
#include "rbtree_search.skel.h"

static void test_rbtree_add_nodes(void)
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
{
	RUN_TESTS(rbtree_fail);
}

void test_rbtree_search(void)
{
	RUN_TESTS(rbtree_search);
}
+113 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */

#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"

struct node_data {
	struct bpf_list_node l;
	int key;
};

#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
private(A) struct bpf_spin_lock glock;
private(A) struct bpf_list_head ghead __contains(node_data, l);

#define list_entry(ptr, type, member) container_of(ptr, type, member)
#define NR_NODES 16

int zero = 0;

SEC("syscall")
__retval(0)
long list_peek(void *ctx)
{
	struct bpf_list_node *l_n;
	struct node_data *n;
	int i, err = 0;

	bpf_spin_lock(&glock);
	l_n = bpf_list_front(&ghead);
	bpf_spin_unlock(&glock);
	if (l_n)
		return __LINE__;

	bpf_spin_lock(&glock);
	l_n = bpf_list_back(&ghead);
	bpf_spin_unlock(&glock);
	if (l_n)
		return __LINE__;

	for (i = zero; i < NR_NODES && can_loop; i++) {
		n = bpf_obj_new(typeof(*n));
		if (!n)
			return __LINE__;
		n->key = i;
		bpf_spin_lock(&glock);
		bpf_list_push_back(&ghead, &n->l);
		bpf_spin_unlock(&glock);
	}

	bpf_spin_lock(&glock);

	l_n = bpf_list_front(&ghead);
	if (!l_n) {
		err = __LINE__;
		goto done;
	}

	n = list_entry(l_n, struct node_data, l);
	if (n->key != 0) {
		err = __LINE__;
		goto done;
	}

	l_n = bpf_list_back(&ghead);
	if (!l_n) {
		err = __LINE__;
		goto done;
	}

	n = list_entry(l_n, struct node_data, l);
	if (n->key != NR_NODES - 1) {
		err = __LINE__;
		goto done;
	}

done:
	bpf_spin_unlock(&glock);
	return err;
}

#define TEST_FB(op, dolock)					\
SEC("syscall")							\
__failure __msg(MSG)						\
long test_##op##_spinlock_##dolock(void *ctx)			\
{								\
	struct bpf_list_node *l_n;				\
	__u64 jiffies = 0;					\
								\
	if (dolock)						\
		bpf_spin_lock(&glock);				\
	l_n = bpf_list_##op(&ghead);				\
	if (l_n)						\
		jiffies = bpf_jiffies64();			\
	if (dolock)						\
		bpf_spin_unlock(&glock);			\
								\
	return !!jiffies;					\
}

#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
TEST_FB(front, true)
TEST_FB(back, true)
#undef MSG

#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
TEST_FB(front, false)
TEST_FB(back, false)
#undef MSG

char _license[] SEC("license") = "GPL";
Loading