Merge branch 'bpf-implement-mprog-api-on-top-of-existing-cgroup-progs' (4d2815a1) · Commits · git / linux-net

include/linux/bpf-cgroup-defs.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -63,6 +63,7 @@ struct cgroup_bpf {
		*/
		struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
		u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
		u64 revisions[MAX_CGROUP_BPF_ATTACH_TYPE];

		/* list of cgroup shared storages */
		struct list_head storages;

include/uapi/linux/bpf.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -1794,6 +1794,13 @@ union bpf_attr {
		};
		__u64 expected_revision;
		} netkit;
		struct {
		union {
		__u32 relative_fd;
		__u32 relative_id;
		};
		__u64 expected_revision;
		} cgroup;
		};
		} link_create;

kernel/bpf/cgroup.c

+160 −22

Original line number	Diff line number	Diff line
		@@ -658,6 +658,116 @@ static struct bpf_prog_list find_attach_entry(struct hlist_head progs,
		return NULL;
		}

		static struct bpf_link *bpf_get_anchor_link(u32 flags, u32 id_or_fd)
		{
		struct bpf_link *link = ERR_PTR(-EINVAL);

		if (flags & BPF_F_ID)
		link = bpf_link_by_id(id_or_fd);
		else if (id_or_fd)
		link = bpf_link_get_from_fd(id_or_fd);
		return link;
		}

		static struct bpf_prog *bpf_get_anchor_prog(u32 flags, u32 id_or_fd)
		{
		struct bpf_prog *prog = ERR_PTR(-EINVAL);

		if (flags & BPF_F_ID)
		prog = bpf_prog_by_id(id_or_fd);
		else if (id_or_fd)
		prog = bpf_prog_get(id_or_fd);
		return prog;
		}

		static struct bpf_prog_list get_prog_list(struct hlist_head progs, struct bpf_prog *prog,
		struct bpf_cgroup_link *link, u32 flags, u32 id_or_fd)
		{
		bool is_link = flags & BPF_F_LINK, is_id = flags & BPF_F_ID;
		struct bpf_prog_list pltmp, pl = ERR_PTR(-EINVAL);
		bool preorder = flags & BPF_F_PREORDER;
		struct bpf_link *anchor_link = NULL;
		struct bpf_prog *anchor_prog = NULL;
		bool is_before, is_after;

		is_before = flags & BPF_F_BEFORE;
		is_after = flags & BPF_F_AFTER;
		if (is_link \|\| is_id \|\| id_or_fd) {
		/* flags must have either BPF_F_BEFORE or BPF_F_AFTER */
		if (is_before == is_after)
		return ERR_PTR(-EINVAL);
		if ((is_link && !link) \|\| (!is_link && !prog))
		return ERR_PTR(-EINVAL);
		} else if (!hlist_empty(progs)) {
		/* flags cannot have both BPF_F_BEFORE and BPF_F_AFTER */
		if (is_before && is_after)
		return ERR_PTR(-EINVAL);
		}

		if (is_link) {
		anchor_link = bpf_get_anchor_link(flags, id_or_fd);
		if (IS_ERR(anchor_link))
		return ERR_PTR(PTR_ERR(anchor_link));
		} else if (is_id \|\| id_or_fd) {
		anchor_prog = bpf_get_anchor_prog(flags, id_or_fd);
		if (IS_ERR(anchor_prog))
		return ERR_PTR(PTR_ERR(anchor_prog));
		}

		if (!anchor_prog && !anchor_link) {
		/* if there is no anchor_prog/anchor_link, then BPF_F_PREORDER
		* doesn't matter since either prepend or append to a combined
		* list of progs will end up with correct result.
		*/
		hlist_for_each_entry(pltmp, progs, node) {
		if (is_before)
		return pltmp;
		if (pltmp->node.next)
		continue;
		return pltmp;
		}
		return NULL;
		}

		hlist_for_each_entry(pltmp, progs, node) {
		if ((anchor_prog && anchor_prog == pltmp->prog) \|\|
		(anchor_link && anchor_link == &pltmp->link->link)) {
		if (!!(pltmp->flags & BPF_F_PREORDER) != preorder)
		goto out;
		pl = pltmp;
		goto out;
		}
		}

		pl = ERR_PTR(-ENOENT);
		out:
		if (anchor_link)
		bpf_link_put(anchor_link);
		else
		bpf_prog_put(anchor_prog);
		return pl;
		}

		static int insert_pl_to_hlist(struct bpf_prog_list pl, struct hlist_head progs,
		struct bpf_prog prog, struct bpf_cgroup_link link,
		u32 flags, u32 id_or_fd)
		{
		struct bpf_prog_list *pltmp;

		pltmp = get_prog_list(progs, prog, link, flags, id_or_fd);
		if (IS_ERR(pltmp))
		return PTR_ERR(pltmp);

		if (!pltmp)
		hlist_add_head(&pl->node, progs);
		else if (flags & BPF_F_BEFORE)
		hlist_add_before(&pl->node, &pltmp->node);
		else
		hlist_add_behind(&pl->node, &pltmp->node);

		return 0;
		}

		/**
		* __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
		* propagate the change to descendants
		@@ -667,6 +777,8 @@ static struct bpf_prog_list find_attach_entry(struct hlist_head progs,
		* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
		* @type: Type of attach operation
		* @flags: Option flags
		* @id_or_fd: Relative prog id or fd
		* @revision: bpf_prog_list revision
		*
		* Exactly one of @prog or @link can be non-null.
		* Must be called with cgroup_mutex held.
		@@ -674,7 +786,8 @@ static struct bpf_prog_list find_attach_entry(struct hlist_head progs,
		static int __cgroup_bpf_attach(struct cgroup *cgrp,
		struct bpf_prog prog, struct bpf_prog replace_prog,
		struct bpf_cgroup_link *link,
		enum bpf_attach_type type, u32 flags)
		enum bpf_attach_type type, u32 flags, u32 id_or_fd,
		u64 revision)
		{
		u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE \| BPF_F_ALLOW_MULTI));
		struct bpf_prog *old_prog = NULL;
		@@ -690,6 +803,9 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
		((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
		/* invalid combination */
		return -EINVAL;
		if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE \| BPF_F_AFTER)))
		/* only either replace or insertion with before/after */
		return -EINVAL;
		if (link && (prog \|\| replace_prog))
		/* only either link or prog/replace_prog can be specified */
		return -EINVAL;
		@@ -700,6 +816,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
		atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
		if (atype < 0)
		return -EINVAL;
		if (revision && revision != cgrp->bpf.revisions[atype])
		return -ESTALE;

		progs = &cgrp->bpf.progs[atype];

		@@ -728,21 +846,17 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
		if (pl) {
		old_prog = pl->prog;
		} else {
		struct hlist_node *last = NULL;

		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
		if (!pl) {
		bpf_cgroup_storages_free(new_storage);
		return -ENOMEM;
		}
		if (hlist_empty(progs))
		hlist_add_head(&pl->node, progs);
		else
		hlist_for_each(last, progs) {
		if (last->next)
		continue;
		hlist_add_behind(&pl->node, last);
		break;

		err = insert_pl_to_hlist(pl, progs, prog, link, flags, id_or_fd);
		if (err) {
		kfree(pl);
		bpf_cgroup_storages_free(new_storage);
		return err;
		}
		}

		@@ -762,6 +876,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
		if (err)
		goto cleanup_trampoline;

		cgrp->bpf.revisions[atype] += 1;
		if (old_prog) {
		if (type == BPF_LSM_CGROUP)
		bpf_trampoline_unlink_cgroup_shim(old_prog);
		@@ -793,12 +908,13 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
		struct bpf_prog prog, struct bpf_prog replace_prog,
		struct bpf_cgroup_link *link,
		enum bpf_attach_type type,
		u32 flags)
		u32 flags, u32 id_or_fd, u64 revision)
		{
		int ret;

		cgroup_lock();
		ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
		ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags,
		id_or_fd, revision);
		cgroup_unlock();
		return ret;
		}
		@@ -886,6 +1002,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
		if (!found)
		return -ENOENT;

		cgrp->bpf.revisions[atype] += 1;
		old_prog = xchg(&link->link.prog, new_prog);
		replace_effective_prog(cgrp, atype, link);
		bpf_prog_put(old_prog);
		@@ -1011,12 +1128,14 @@ static void purge_effective_progs(struct cgroup cgrp, struct bpf_prog prog,
		* @prog: A program to detach or NULL
		* @link: A link to detach or NULL
		* @type: Type of detach operation
		* @revision: bpf_prog_list revision
		*
		* At most one of @prog or @link can be non-NULL.
		* Must be called with cgroup_mutex held.
		*/
		static int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
		struct bpf_cgroup_link *link, enum bpf_attach_type type)
		struct bpf_cgroup_link *link, enum bpf_attach_type type,
		u64 revision)
		{
		enum cgroup_bpf_attach_type atype;
		struct bpf_prog *old_prog;
		@@ -1034,6 +1153,9 @@ static int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
		if (atype < 0)
		return -EINVAL;

		if (revision && revision != cgrp->bpf.revisions[atype])
		return -ESTALE;

		progs = &cgrp->bpf.progs[atype];
		flags = cgrp->bpf.flags[atype];

		@@ -1059,6 +1181,7 @@ static int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,

		/* now can actually delete it from this cgroup list */
		hlist_del(&pl->node);
		cgrp->bpf.revisions[atype] += 1;

		kfree(pl);
		if (hlist_empty(progs))
		@@ -1074,12 +1197,12 @@ static int __cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
		}

		static int cgroup_bpf_detach(struct cgroup cgrp, struct bpf_prog prog,
		enum bpf_attach_type type)
		enum bpf_attach_type type, u64 revision)
		{
		int ret;

		cgroup_lock();
		ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
		ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, revision);
		cgroup_unlock();
		return ret;
		}
		@@ -1097,6 +1220,7 @@ static int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
		struct bpf_prog_array *effective;
		int cnt, ret = 0, i;
		int total_cnt = 0;
		u64 revision = 0;
		u32 flags;

		if (effective_query && prog_attach_flags)
		@@ -1134,6 +1258,10 @@ static int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
		return -EFAULT;
		if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
		return -EFAULT;
		if (!effective_query && from_atype == to_atype)
		revision = cgrp->bpf.revisions[from_atype];
		if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
		return -EFAULT;
		if (attr->query.prog_cnt == 0 \|\| !prog_ids \|\| !total_cnt)
		/* return early if user requested only program count + flags */
		return 0;
		@@ -1216,7 +1344,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
		}

		ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
		attr->attach_type, attr->attach_flags);
		attr->attach_type, attr->attach_flags,
		attr->relative_fd, attr->expected_revision);

		if (replace_prog)
		bpf_prog_put(replace_prog);
		@@ -1238,7 +1367,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
		if (IS_ERR(prog))
		prog = NULL;

		ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
		ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->expected_revision);
		if (prog)
		bpf_prog_put(prog);

		@@ -1267,7 +1396,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
		}

		WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
		cg_link->type));
		cg_link->type, 0));
		if (cg_link->type == BPF_LSM_CGROUP)
		bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);

		@@ -1339,6 +1468,13 @@ static const struct bpf_link_ops bpf_cgroup_link_lops = {
		.fill_link_info = bpf_cgroup_link_fill_link_info,
		};

		#define BPF_F_LINK_ATTACH_MASK \
		(BPF_F_ID \| \
		BPF_F_BEFORE \| \
		BPF_F_AFTER \| \
		BPF_F_PREORDER \| \
		BPF_F_LINK)

		int cgroup_bpf_link_attach(const union bpf_attr attr, struct bpf_prog prog)
		{
		struct bpf_link_primer link_primer;
		@@ -1346,7 +1482,7 @@ int cgroup_bpf_link_attach(const union bpf_attr attr, struct bpf_prog prog)
		struct cgroup *cgrp;
		int err;

		if (attr->link_create.flags)
		if (attr->link_create.flags & (~BPF_F_LINK_ATTACH_MASK))
		return -EINVAL;

		cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
		@@ -1370,7 +1506,9 @@ int cgroup_bpf_link_attach(const union bpf_attr attr, struct bpf_prog prog)
		}

		err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
		link->type, BPF_F_ALLOW_MULTI);
		link->type, BPF_F_ALLOW_MULTI \| attr->link_create.flags,
		attr->link_create.cgroup.relative_fd,
		attr->link_create.cgroup.expected_revision);
		if (err) {
		bpf_link_cleanup(&link_primer);
		goto out_put_cgroup;

kernel/bpf/syscall.c

+31 −15

Original line number	Diff line number	Diff line
		@@ -4186,6 +4186,25 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
		}
		}

		static bool is_cgroup_prog_type(enum bpf_prog_type ptype, enum bpf_attach_type atype,
		bool check_atype)
		{
		switch (ptype) {
		case BPF_PROG_TYPE_CGROUP_DEVICE:
		case BPF_PROG_TYPE_CGROUP_SKB:
		case BPF_PROG_TYPE_CGROUP_SOCK:
		case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
		case BPF_PROG_TYPE_CGROUP_SOCKOPT:
		case BPF_PROG_TYPE_CGROUP_SYSCTL:
		case BPF_PROG_TYPE_SOCK_OPS:
		return true;
		case BPF_PROG_TYPE_LSM:
		return check_atype ? atype == BPF_LSM_CGROUP : true;
		default:
		return false;
		}
		}

		#define BPF_PROG_ATTACH_LAST_FIELD expected_revision

		#define BPF_F_ATTACH_MASK_BASE \
		@@ -4216,6 +4235,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
		if (bpf_mprog_supported(ptype)) {
		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
		return -EINVAL;
		} else if (is_cgroup_prog_type(ptype, 0, false)) {
		if (attr->attach_flags & ~(BPF_F_ATTACH_MASK_BASE \| BPF_F_ATTACH_MASK_MPROG))
		return -EINVAL;
		} else {
		if (attr->attach_flags & ~BPF_F_ATTACH_MASK_BASE)
		return -EINVAL;
		@@ -4233,6 +4255,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
		return -EINVAL;
		}

		if (is_cgroup_prog_type(ptype, prog->expected_attach_type, true)) {
		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
		goto out;
		}

		switch (ptype) {
		case BPF_PROG_TYPE_SK_SKB:
		case BPF_PROG_TYPE_SK_MSG:
		@@ -4244,20 +4271,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
		case BPF_PROG_TYPE_FLOW_DISSECTOR:
		ret = netns_bpf_prog_attach(attr, prog);
		break;
		case BPF_PROG_TYPE_CGROUP_DEVICE:
		case BPF_PROG_TYPE_CGROUP_SKB:
		case BPF_PROG_TYPE_CGROUP_SOCK:
		case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
		case BPF_PROG_TYPE_CGROUP_SOCKOPT:
		case BPF_PROG_TYPE_CGROUP_SYSCTL:
		case BPF_PROG_TYPE_SOCK_OPS:
		case BPF_PROG_TYPE_LSM:
		if (ptype == BPF_PROG_TYPE_LSM &&
		prog->expected_attach_type != BPF_LSM_CGROUP)
		ret = -EINVAL;
		else
		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
		break;
		case BPF_PROG_TYPE_SCHED_CLS:
		if (attr->attach_type == BPF_TCX_INGRESS \|\|
		attr->attach_type == BPF_TCX_EGRESS)
		@@ -4268,7 +4281,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
		default:
		ret = -EINVAL;
		}

		out:
		if (ret)
		bpf_prog_put(prog);
		return ret;
		@@ -4296,6 +4309,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
		if (IS_ERR(prog))
		return PTR_ERR(prog);
		}
		} else if (is_cgroup_prog_type(ptype, 0, false)) {
		if (attr->attach_flags \|\| attr->relative_fd)
		return -EINVAL;
		} else if (attr->attach_flags \|\|
		attr->relative_fd \|\|
		attr->expected_revision) {

kernel/cgroup/cgroup.c

+5 −0

Original line number	Diff line number	Diff line
		@@ -2074,6 +2074,11 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
		for_each_subsys(ss, ssid)
		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);

		#ifdef CONFIG_CGROUP_BPF
		for (int i = 0; i < ARRAY_SIZE(cgrp->bpf.revisions); i++)
		cgrp->bpf.revisions[i] = 1;
		#endif

		init_waitqueue_head(&cgrp->offline_waitq);
		INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
		}