Commit 752b8070 authored by Matt Bobrowski's avatar Matt Bobrowski Committed by Alexei Starovoitov
Browse files

bpf: add new BPF_CGROUP_ITER_CHILDREN control option



Currently, the BPF cgroup iterator supports walking descendants in
either pre-order (BPF_CGROUP_ITER_DESCENDANTS_PRE) or post-order
(BPF_CGROUP_ITER_DESCENDANTS_POST). These modes perform an exhaustive
depth-first search (DFS) of the hierarchy. In scenarios where a BPF
program may need to inspect only the direct children of a given parent
cgroup, a full DFS is unnecessarily expensive.

This patch introduces a new BPF cgroup iterator control option,
BPF_CGROUP_ITER_CHILDREN. This control option restricts the traversal
to the immediate children of a specified parent cgroup, allowing for
more targeted and efficient iteration, particularly when exhaustive
depth-first search (DFS) traversal is not required.

Signed-off-by: default avatarMatt Bobrowski <mattbobrowski@google.com>
Link: https://lore.kernel.org/r/20260127085112.3608687-1-mattbobrowski@google.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 8016abd6
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
	/*
	 * Walks the immediate children of the specified parent
	 * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
	 * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
	 * the iterator does not include the specified parent as one of the
	 * returned iterator elements.
	 */
	BPF_CGROUP_ITER_CHILDREN,
};

union bpf_iter_link_info {
+21 −5
Original line number Diff line number Diff line
@@ -8,12 +8,13 @@

#include "../cgroup/cgroup-internal.h"  /* cgroup_mutex and cgroup_is_dead */

/* cgroup_iter provides four modes of traversal to the cgroup hierarchy.
/* cgroup_iter provides five modes of traversal to the cgroup hierarchy.
 *
 *  1. Walk the descendants of a cgroup in pre-order.
 *  2. Walk the descendants of a cgroup in post-order.
 *  3. Walk the ancestors of a cgroup.
 *  4. Show the given cgroup only.
 *  5. Walk the children of a given parent cgroup.
 *
 * For walking descendants, cgroup_iter can walk in either pre-order or
 * post-order. For walking ancestors, the iter walks up from a cgroup to
@@ -78,6 +79,8 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
		return css_next_descendant_pre(NULL, p->start_css);
	else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
		return css_next_descendant_post(NULL, p->start_css);
	else if (p->order == BPF_CGROUP_ITER_CHILDREN)
		return css_next_child(NULL, p->start_css);
	else /* BPF_CGROUP_ITER_SELF_ONLY and BPF_CGROUP_ITER_ANCESTORS_UP */
		return p->start_css;
}
@@ -113,6 +116,8 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
		return css_next_descendant_post(curr, p->start_css);
	else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
		return curr->parent;
	else if (p->order == BPF_CGROUP_ITER_CHILDREN)
		return css_next_child(curr, p->start_css);
	else  /* BPF_CGROUP_ITER_SELF_ONLY */
		return NULL;
}
@@ -200,11 +205,16 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
	int order = linfo->cgroup.order;
	struct cgroup *cgrp;

	if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE &&
	    order != BPF_CGROUP_ITER_DESCENDANTS_POST &&
	    order != BPF_CGROUP_ITER_ANCESTORS_UP &&
	    order != BPF_CGROUP_ITER_SELF_ONLY)
	switch (order) {
	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
	case BPF_CGROUP_ITER_DESCENDANTS_POST:
	case BPF_CGROUP_ITER_ANCESTORS_UP:
	case BPF_CGROUP_ITER_SELF_ONLY:
	case BPF_CGROUP_ITER_CHILDREN:
		break;
	default:
		return -EINVAL;
	}

	if (fd && id)
		return -EINVAL;
@@ -257,6 +267,8 @@ static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux,
		seq_puts(seq, "order: descendants_post\n");
	else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP)
		seq_puts(seq, "order: ancestors_up\n");
	else if (aux->cgroup.order == BPF_CGROUP_ITER_CHILDREN)
		seq_puts(seq, "order: children\n");
	else /* BPF_CGROUP_ITER_SELF_ONLY */
		seq_puts(seq, "order: self_only\n");
}
@@ -320,6 +332,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it,
	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
	case BPF_CGROUP_ITER_DESCENDANTS_POST:
	case BPF_CGROUP_ITER_ANCESTORS_UP:
	case BPF_CGROUP_ITER_CHILDREN:
		break;
	default:
		return -EINVAL;
@@ -345,6 +358,9 @@ __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *i
	case BPF_CGROUP_ITER_DESCENDANTS_POST:
		kit->pos = css_next_descendant_post(kit->pos, kit->start);
		break;
	case BPF_CGROUP_ITER_CHILDREN:
		kit->pos = css_next_child(kit->pos, kit->start);
		break;
	case BPF_CGROUP_ITER_ANCESTORS_UP:
		kit->pos = kit->pos ? kit->pos->parent : kit->start;
	}
+8 −0
Original line number Diff line number Diff line
@@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order {
	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
	/*
	 * Walks the immediate children of the specified parent
	 * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE,
	 * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP
	 * the iterator does not include the specified parent as one of the
	 * returned iterator elements.
	 */
	BPF_CGROUP_ITER_CHILDREN,
};

union bpf_iter_link_info {