Commit 7251d090 authored by Chuyi Zhou's avatar Chuyi Zhou Committed by Alexei Starovoitov
Browse files

bpf: Introduce css open-coded iterator kfuncs



This Patch adds kfuncs bpf_iter_css_{new,next,destroy} which allow
creation and manipulation of struct bpf_iter_css in open-coded iterator
style. These kfuncs actually wrapps css_next_descendant_{pre, post}.
css_iter can be used to:

1) iterating a sepcific cgroup tree with pre/post/up order

2) iterating cgroup_subsystem in BPF Prog, like
for_each_mem_cgroup_tree/cpuset_for_each_descendant_pre in kernel.

The API design is consistent with cgroup_iter. bpf_iter_css_new accepts
parameters defining iteration order and starting css. Here we also reuse
BPF_CGROUP_ITER_DESCENDANTS_PRE, BPF_CGROUP_ITER_DESCENDANTS_POST,
BPF_CGROUP_ITER_ANCESTORS_UP enums.

Signed-off-by: default avatarChuyi Zhou <zhouchuyi@bytedance.com>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20231018061746.111364-5-zhouchuyi@bytedance.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent c68a78ff
Loading
Loading
Loading
Loading
+65 −0
Original line number Diff line number Diff line
@@ -294,3 +294,68 @@ static int __init bpf_cgroup_iter_init(void)
}

late_initcall(bpf_cgroup_iter_init);

struct bpf_iter_css {
	__u64 __opaque[3];
} __attribute__((aligned(8)));

struct bpf_iter_css_kern {
	struct cgroup_subsys_state *start;
	struct cgroup_subsys_state *pos;
	unsigned int flags;
} __attribute__((aligned(8)));

__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
		"Global functions as their definitions will be in vmlinux BTF");

__bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
		struct cgroup_subsys_state *start, unsigned int flags)
{
	struct bpf_iter_css_kern *kit = (void *)it;

	BUILD_BUG_ON(sizeof(struct bpf_iter_css_kern) > sizeof(struct bpf_iter_css));
	BUILD_BUG_ON(__alignof__(struct bpf_iter_css_kern) != __alignof__(struct bpf_iter_css));

	kit->start = NULL;
	switch (flags) {
	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
	case BPF_CGROUP_ITER_DESCENDANTS_POST:
	case BPF_CGROUP_ITER_ANCESTORS_UP:
		break;
	default:
		return -EINVAL;
	}

	kit->start = start;
	kit->pos = NULL;
	kit->flags = flags;
	return 0;
}

__bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it)
{
	struct bpf_iter_css_kern *kit = (void *)it;

	if (!kit->start)
		return NULL;

	switch (kit->flags) {
	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
		kit->pos = css_next_descendant_pre(kit->pos, kit->start);
		break;
	case BPF_CGROUP_ITER_DESCENDANTS_POST:
		kit->pos = css_next_descendant_post(kit->pos, kit->start);
		break;
	case BPF_CGROUP_ITER_ANCESTORS_UP:
		kit->pos = kit->pos ? kit->pos->parent : kit->start;
	}

	return kit->pos;
}

__bpf_kfunc void bpf_iter_css_destroy(struct bpf_iter_css *it)
{
}

__diag_pop();
 No newline at end of file
+3 −0
Original line number Diff line number Diff line
@@ -2566,6 +2566,9 @@ BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_dynptr_adjust)
BTF_ID_FLAGS(func, bpf_dynptr_is_null)
BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
+6 −0
Original line number Diff line number Diff line
@@ -471,4 +471,10 @@ extern int bpf_iter_task_new(struct bpf_iter_task *it,
extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;

struct bpf_iter_css;
extern int bpf_iter_css_new(struct bpf_iter_css *it,
				struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;

#endif