Commit 5cd0aea0 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'support-bpf_kptr_xchg-into-local-kptr'

Amery Hung says:

====================
Support bpf_kptr_xchg into local kptr

This revision adds substaintial changes to patch 2 to support structures
with kptr as the only special btf type. The test is split into
local_kptr_stash and task_kfunc_success to remove dependencies on
bpf_testmod that would break veristat results.

This series allows stashing kptr into local kptr. Currently, kptrs are
only allowed to be stashed into map value with bpf_kptr_xchg(). A
motivating use case of this series is to enable adding referenced kptr to
bpf_rbtree or bpf_list by using allocated object as graph node and the
storage of referenced kptr. For example, a bpf qdisc [0] enqueuing a
referenced kptr to a struct sk_buff* to a bpf_list serving as a fifo:

    struct skb_node {
            struct sk_buff __kptr *skb;
            struct bpf_list_node node;
    };

    private(A) struct bpf_spin_lock fifo_lock;
    private(A) struct bpf_list_head fifo __contains(skb_node, node);

    /* In Qdisc_ops.enqueue */
    struct skb_node *skbn;

    skbn = bpf_obj_new(typeof(*skbn));
    if (!skbn)
        goto drop;

    /* skb is a referenced kptr to struct sk_buff acquired earilier
     * but not shown in this code snippet.
     */
    skb = bpf_kptr_xchg(&skbn->skb, skb);
    if (skb)
        /* should not happen; do something below releasing skb to
         * satisfy the verifier */
    	...

    bpf_spin_lock(&fifo_lock);
    bpf_list_push_back(&fifo, &skbn->node);
    bpf_spin_unlock(&fifo_lock);

The implementation first searches for BPF_KPTR when generating program
BTF. Then, we teach the verifier that the detination argument of
bpf_kptr_xchg() can be local kptr, and use the btf_record in program BTF
to check against the source argument.

This series is mostly developed by Dave, who kindly helped and sent me
the patchset. The selftests in bpf qdisc (WIP) relies on this series to
work.

[0] https://lore.kernel.org/netdev/20240714175130.4051012-10-amery.hung@bytedance.com/
---
v3 -> v4
  - Allow struct in prog btf w/ kptr as the only special field type
  - Split tests of stashing referenced kptr and local kptr
  - v3: https://lore.kernel.org/bpf/20240809005131.3916464-1-amery.hung@bytedance.com/

v2 -> v3
  - Fix prog btf memory leak
  - Test stashing kptr in prog btf
  - Test unstashing kptrs after stashing into local kptrs
  - v2: https://lore.kernel.org/bpf/20240803001145.635887-1-amery.hung@bytedance.com/

v1 -> v2
  - Fix the document for bpf_kptr_xchg()
  - Add a comment explaining changes in the verifier
  - v1: https://lore.kernel.org/bpf/20240728030115.3970543-1-amery.hung@bytedance.com/
====================

Link: https://lore.kernel.org/r/20240813212424.2871455-1-amery.hung@bytedance.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f727b13d 91c96842
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -744,7 +744,7 @@ enum bpf_arg_type {
	ARG_PTR_TO_STACK,	/* pointer to stack */
	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
	ARG_PTR_TO_KPTR,	/* pointer to referenced kptr */
	ARG_KPTR_XCHG_DEST,	/* pointer to destination that kptrs are bpf_kptr_xchg'd into */
	ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
	__BPF_ARG_TYPE_MAX,

+5 −4
Original line number Diff line number Diff line
@@ -5519,11 +5519,12 @@ union bpf_attr {
 *		**-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
 *		invalid arguments are passed.
 *
 * void *bpf_kptr_xchg(void *map_value, void *ptr)
 * void *bpf_kptr_xchg(void *dst, void *ptr)
 *	Description
 *		Exchange kptr at pointer *map_value* with *ptr*, and return the
 *		old value. *ptr* can be NULL, otherwise it must be a referenced
 *		pointer which will be released when this helper is called.
 *		Exchange kptr at pointer *dst* with *ptr*, and return the old value.
 *		*dst* can be map value or local kptr. *ptr* can be NULL, otherwise
 *		it must be a referenced pointer which will be released when this helper
 *		is called.
 *	Return
 *		The old value of kptr (which can be NULL). The returned pointer
 *		if not NULL, is a reference which must be released using its
+53 −19
Original line number Diff line number Diff line
@@ -3754,6 +3754,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
	return -EINVAL;
}

/* Callers have to ensure the life cycle of btf if it is program BTF */
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
			  struct btf_field_info *info)
{
@@ -3782,7 +3783,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
		field->kptr.dtor = NULL;
		id = info->kptr.type_id;
		kptr_btf = (struct btf *)btf;
		btf_get(kptr_btf);
		goto found_dtor;
	}
	if (id < 0)
@@ -5512,36 +5512,70 @@ static const char *alloc_obj_fields[] = {
static struct btf_struct_metas *
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
{
	union {
		struct btf_id_set set;
		struct {
			u32 _cnt;
			u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
		} _arr;
	} aof;
	struct btf_struct_metas *tab = NULL;
	struct btf_id_set *aof;
	int i, n, id, ret;

	BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
	BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));

	memset(&aof, 0, sizeof(aof));
	aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
	if (!aof)
		return ERR_PTR(-ENOMEM);
	aof->cnt = 0;

	for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
		/* Try to find whether this special type exists in user BTF, and
		 * if so remember its ID so we can easily find it among members
		 * of structs that we iterate in the next loop.
		 */
		struct btf_id_set *new_aof;

		id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
		if (id < 0)
			continue;
		aof.set.ids[aof.set.cnt++] = id;

		new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
				   GFP_KERNEL | __GFP_NOWARN);
		if (!new_aof) {
			ret = -ENOMEM;
			goto free_aof;
		}
		aof = new_aof;
		aof->ids[aof->cnt++] = id;
	}

	n = btf_nr_types(btf);
	for (i = 1; i < n; i++) {
		/* Try to find if there are kptrs in user BTF and remember their ID */
		struct btf_id_set *new_aof;
		struct btf_field_info tmp;
		const struct btf_type *t;

	if (!aof.set.cnt)
		t = btf_type_by_id(btf, i);
		if (!t) {
			ret = -EINVAL;
			goto free_aof;
		}

		ret = btf_find_kptr(btf, t, 0, 0, &tmp);
		if (ret != BTF_FIELD_FOUND)
			continue;

		new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
				   GFP_KERNEL | __GFP_NOWARN);
		if (!new_aof) {
			ret = -ENOMEM;
			goto free_aof;
		}
		aof = new_aof;
		aof->ids[aof->cnt++] = i;
	}

	if (!aof->cnt)
		return NULL;
	sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
	sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);

	n = btf_nr_types(btf);
	for (i = 1; i < n; i++) {
		struct btf_struct_metas *new_tab;
		const struct btf_member *member;
@@ -5551,17 +5585,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
		int j, tab_cnt;

		t = btf_type_by_id(btf, i);
		if (!t) {
			ret = -EINVAL;
			goto free;
		}
		if (!__btf_type_is_struct(t))
			continue;

		cond_resched();

		for_each_member(j, t, member) {
			if (btf_id_set_contains(&aof.set, member->type))
			if (btf_id_set_contains(aof, member->type))
				goto parse;
		}
		continue;
@@ -5580,7 +5610,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
		type = &tab->types[tab->cnt];
		type->btf_id = i;
		record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
						  BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
						  BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
						  BPF_KPTR, t->size);
		/* The record cannot be unset, treat it as an error if so */
		if (IS_ERR_OR_NULL(record)) {
			ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5589,9 +5620,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
		type->record = record;
		tab->cnt++;
	}
	kfree(aof);
	return tab;
free:
	btf_struct_metas_free(tab);
free_aof:
	kfree(aof);
	return ERR_PTR(ret);
}

+3 −3
Original line number Diff line number Diff line
@@ -1619,9 +1619,9 @@ void bpf_wq_cancel_and_free(void *val)
	schedule_work(&work->delete_work);
}

BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
{
	unsigned long *kptr = map_value;
	unsigned long *kptr = dst;

	/* This helper may be inlined by verifier. */
	return xchg(kptr, (unsigned long)ptr);
@@ -1636,7 +1636,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
	.gpl_only     = false,
	.ret_type     = RET_PTR_TO_BTF_ID_OR_NULL,
	.ret_btf_id   = BPF_PTR_POISON,
	.arg1_type    = ARG_PTR_TO_KPTR,
	.arg1_type    = ARG_KPTR_XCHG_DEST,
	.arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
	.arg2_btf_id  = BPF_PTR_POISON,
};
+4 −2
Original line number Diff line number Diff line
@@ -550,6 +550,7 @@ void btf_record_free(struct btf_record *rec)
		case BPF_KPTR_PERCPU:
			if (rec->fields[i].kptr.module)
				module_put(rec->fields[i].kptr.module);
			if (btf_is_kernel(rec->fields[i].kptr.btf))
				btf_put(rec->fields[i].kptr.btf);
			break;
		case BPF_LIST_HEAD:
@@ -596,6 +597,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
		case BPF_KPTR_UNREF:
		case BPF_KPTR_REF:
		case BPF_KPTR_PERCPU:
			if (btf_is_kernel(fields[i].kptr.btf))
				btf_get(fields[i].kptr.btf);
			if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
				ret = -ENXIO;
Loading