Commit 9e3e66c5 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Add bpf_rbtree_{root,left,right} kfunc

In a bpf fq implementation that is much closer to the kernel fq,
it will need to traverse the rbtree:
https://lore.kernel.org/bpf/20250418224652.105998-13-martin.lau@linux.dev/



The much simplified logic that uses the bpf_rbtree_{root,left,right}
to traverse the rbtree is like:

struct fq_flow {
	struct bpf_rb_node	fq_node;
	struct bpf_rb_node	rate_node;
	struct bpf_refcount	refcount;
	unsigned long		sk_long;
};

struct fq_flow_root {
	struct bpf_spin_lock lock;
	struct bpf_rb_root root __contains(fq_flow, fq_node);
};

struct fq_flow *fq_classify(...)
{
	struct bpf_rb_node *tofree[FQ_GC_MAX];
	struct fq_flow_root *root;
	struct fq_flow *gc_f, *f;
	struct bpf_rb_node *p;
	int i, fcnt = 0;

	/* ... */

	f = NULL;
	bpf_spin_lock(&root->lock);
	p = bpf_rbtree_root(&root->root);
	while (can_loop) {
		if (!p)
			break;

		gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
		if (gc_f->sk_long == sk_long) {
			f = bpf_refcount_acquire(gc_f);
			break;
		}

		/* To be removed from the rbtree */
		if (fcnt < FQ_GC_MAX && fq_gc_candidate(gc_f, jiffies_now))
			tofree[fcnt++] = p;

		if (gc_f->sk_long > sk_long)
			p = bpf_rbtree_left(&root->root, p);
		else
			p = bpf_rbtree_right(&root->root, p);
	}

	/* remove from the rbtree */
	for (i = 0; i < fcnt; i++) {
		p = tofree[i];
		tofree[i] = bpf_rbtree_remove(&root->root, p);
	}

	bpf_spin_unlock(&root->lock);

	/* bpf_obj_drop the fq_flow(s) that have just been removed
	 * from the rbtree.
	 */
	for (i = 0; i < fcnt; i++) {
		p = tofree[i];
		if (p) {
			gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
			bpf_obj_drop(gc_f);
		}
	}

	return f;

}

The above simplified code needs to traverse the rbtree for two purposes,
1) find the flow with the desired sk_long value
2) while searching for the sk_long, collect flows that are
   the fq_gc_candidate. They will be removed from the rbtree.

This patch adds the bpf_rbtree_{root,left,right} kfunc to enable
the rbtree traversal. The returned bpf_rb_node pointer will be a
non-owning reference which is the same as the returned pointer
of the exisiting bpf_rbtree_first kfunc.

Acked-by: default avatarKumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20250506015857.817950-4-martin.lau@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 7faccdf4
Loading
Loading
Loading
Loading
+30 −0
Original line number Diff line number Diff line
@@ -2366,6 +2366,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
	return (struct bpf_rb_node *)rb_first_cached(r);
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
{
	struct rb_root_cached *r = (struct rb_root_cached *)root;

	return (struct bpf_rb_node *)r->rb_root.rb_node;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

	if (READ_ONCE(node_internal->owner) != root)
		return NULL;

	return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
}

__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
{
	struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;

	if (READ_ONCE(node_internal->owner) != root)
		return NULL;

	return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
}

/**
 * bpf_task_acquire - Acquire a reference to a task. A task acquired by this
 * kfunc which is not stored in a map as a kptr, must be released by calling
@@ -3214,6 +3241,9 @@ BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)

#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
+18 −4
Original line number Diff line number Diff line
@@ -12081,6 +12081,9 @@ enum special_kfunc_type {
	KF_bpf_rbtree_remove,
	KF_bpf_rbtree_add_impl,
	KF_bpf_rbtree_first,
	KF_bpf_rbtree_root,
	KF_bpf_rbtree_left,
	KF_bpf_rbtree_right,
	KF_bpf_dynptr_from_skb,
	KF_bpf_dynptr_from_xdp,
	KF_bpf_dynptr_slice,
@@ -12121,6 +12124,9 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_rbtree_root)
BTF_ID(func, bpf_rbtree_left)
BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12156,6 +12162,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
BTF_ID(func, bpf_rbtree_root)
BTF_ID(func, bpf_rbtree_left)
BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12591,7 +12600,10 @@ static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
	return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_first];
	       btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
	       btf_id == special_kfunc_list[KF_bpf_rbtree_right];
}
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12691,7 +12703,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
		break;
	case BPF_RB_NODE:
		ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
		       kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
		break;
	default:
		verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -13216,11 +13230,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
				}
			} else {
				if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
					verbose(env, "rbtree_remove node input must be non-owning ref\n");
					verbose(env, "%s node input must be non-owning ref\n", func_name);
					return -EINVAL;
				}
				if (in_rbtree_lock_required_cb(env)) {
					verbose(env, "rbtree_remove not allowed in rbtree cb\n");
					verbose(env, "%s not allowed in rbtree cb\n", func_name);
					return -EINVAL;
				}
			}