Commit 4349efc5 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files
Daniel Borkmann says:

====================
pull-request: bpf 2024-01-18

We've added 10 non-merge commits during the last 5 day(s) which contain
a total of 12 files changed, 806 insertions(+), 51 deletions(-).

The main changes are:

1) Fix an issue in bpf_iter_udp under backward progress which prevents
   user space process from finishing iteration, from Martin KaFai Lau.

2) Fix BPF verifier to reject variable offset alu on registers with a type
   of PTR_TO_FLOW_KEYS to prevent oob access, from Hao Sun.

3) Follow up fixes for kernel- and libbpf-side logic around handling
   arg:ctx tagged arguments of BPF global subprogs, from Andrii Nakryiko.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  libbpf: warn on unexpected __arg_ctx type when rewriting BTF
  selftests/bpf: add tests confirming type logic in kernel for __arg_ctx
  bpf: enforce types for __arg_ctx-tagged arguments in global subprogs
  bpf: extract bpf_ctx_convert_map logic and make it more reusable
  libbpf: feature-detect arg:ctx tag support in kernel
  selftests/bpf: Add test for alu on PTR_TO_FLOW_KEYS
  bpf: Reject variable offset alu on PTR_TO_FLOW_KEYS
  selftests/bpf: Test udp and tcp iter batching
  bpf: Avoid iter->offset making backward progress in bpf_iter_udp
  bpf: iter_udp: Retry with a larger batch size without going back to the previous bucket
====================

Link: https://lore.kernel.org/r/20240118153936.11769-1-daniel@iogearbox.net


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 9cfd3b50 35ac085a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -512,7 +512,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
				struct module *owner);
struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id);
const struct btf_member *
const struct btf_type *
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
		      const struct btf_type *t, enum bpf_prog_type prog_type,
		      int arg);
+205 −26
Original line number Diff line number Diff line
@@ -5615,21 +5615,46 @@ static u8 bpf_ctx_convert_map[] = {
#undef BPF_MAP_TYPE
#undef BPF_LINK_TYPE

const struct btf_member *
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
		      const struct btf_type *t, enum bpf_prog_type prog_type,
		      int arg)
static const struct btf_type *find_canonical_prog_ctx_type(enum bpf_prog_type prog_type)
{
	const struct btf_type *conv_struct;
	const struct btf_type *ctx_struct;
	const struct btf_member *ctx_type;
	const char *tname, *ctx_tname;

	conv_struct = bpf_ctx_convert.t;
	if (!conv_struct) {
		bpf_log(log, "btf_vmlinux is malformed\n");
	if (!conv_struct)
		return NULL;
	/* prog_type is valid bpf program type. No need for bounds check. */
	ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
	/* ctx_type is a pointer to prog_ctx_type in vmlinux.
	 * Like 'struct __sk_buff'
	 */
	return btf_type_by_id(btf_vmlinux, ctx_type->type);
}

static int find_kern_ctx_type_id(enum bpf_prog_type prog_type)
{
	const struct btf_type *conv_struct;
	const struct btf_member *ctx_type;

	conv_struct = bpf_ctx_convert.t;
	if (!conv_struct)
		return -EFAULT;
	/* prog_type is valid bpf program type. No need for bounds check. */
	ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
	/* ctx_type is a pointer to prog_ctx_type in vmlinux.
	 * Like 'struct sk_buff'
	 */
	return ctx_type->type;
}

const struct btf_type *
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
		      const struct btf_type *t, enum bpf_prog_type prog_type,
		      int arg)
{
	const struct btf_type *ctx_type;
	const char *tname, *ctx_tname;

	t = btf_type_by_id(btf, t->type);
	while (btf_type_is_modifier(t))
		t = btf_type_by_id(btf, t->type);
@@ -5646,17 +5671,15 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
		bpf_log(log, "arg#%d struct doesn't have a name\n", arg);
		return NULL;
	}
	/* prog_type is valid bpf program type. No need for bounds check. */
	ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
	/* ctx_struct is a pointer to prog_ctx_type in vmlinux.
	 * Like 'struct __sk_buff'
	 */
	ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
	if (!ctx_struct)

	ctx_type = find_canonical_prog_ctx_type(prog_type);
	if (!ctx_type) {
		bpf_log(log, "btf_vmlinux is malformed\n");
		/* should not happen */
		return NULL;
	}
again:
	ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
	ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off);
	if (!ctx_tname) {
		/* should not happen */
		bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
@@ -5677,28 +5700,167 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
		/* bpf_user_pt_regs_t is a typedef, so resolve it to
		 * underlying struct and check name again
		 */
		if (!btf_type_is_modifier(ctx_struct))
		if (!btf_type_is_modifier(ctx_type))
			return NULL;
		while (btf_type_is_modifier(ctx_struct))
			ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type);
		while (btf_type_is_modifier(ctx_type))
			ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);
		goto again;
	}
	return ctx_type;
}

/* forward declarations for arch-specific underlying types of
 * bpf_user_pt_regs_t; this avoids the need for arch-specific #ifdef
 * compilation guards below for BPF_PROG_TYPE_PERF_EVENT checks, but still
 * works correctly with __builtin_types_compatible_p() on respective
 * architectures
 */
struct user_regs_struct;
struct user_pt_regs;

static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
				      const struct btf_type *t, int arg,
				      enum bpf_prog_type prog_type,
				      enum bpf_attach_type attach_type)
{
	const struct btf_type *ctx_type;
	const char *tname, *ctx_tname;

	if (!btf_is_ptr(t)) {
		bpf_log(log, "arg#%d type isn't a pointer\n", arg);
		return -EINVAL;
	}
	t = btf_type_by_id(btf, t->type);

	/* KPROBE and PERF_EVENT programs allow bpf_user_pt_regs_t typedef */
	if (prog_type == BPF_PROG_TYPE_KPROBE || prog_type == BPF_PROG_TYPE_PERF_EVENT) {
		while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
			t = btf_type_by_id(btf, t->type);

		if (btf_type_is_typedef(t)) {
			tname = btf_name_by_offset(btf, t->name_off);
			if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0)
				return 0;
		}
	}

	/* all other program types don't use typedefs for context type */
	while (btf_type_is_modifier(t))
		t = btf_type_by_id(btf, t->type);

	/* `void *ctx __arg_ctx` is always valid */
	if (btf_type_is_void(t))
		return 0;

	tname = btf_name_by_offset(btf, t->name_off);
	if (str_is_empty(tname)) {
		bpf_log(log, "arg#%d type doesn't have a name\n", arg);
		return -EINVAL;
	}

	/* special cases */
	switch (prog_type) {
	case BPF_PROG_TYPE_KPROBE:
		if (__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0)
			return 0;
		break;
	case BPF_PROG_TYPE_PERF_EVENT:
		if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
		    __btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0)
			return 0;
		if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
		    __btf_type_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
			return 0;
		if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
		    __btf_type_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
			return 0;
		break;
	case BPF_PROG_TYPE_RAW_TRACEPOINT:
	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
		/* allow u64* as ctx */
		if (btf_is_int(t) && t->size == 8)
			return 0;
		break;
	case BPF_PROG_TYPE_TRACING:
		switch (attach_type) {
		case BPF_TRACE_RAW_TP:
			/* tp_btf program is TRACING, so need special case here */
			if (__btf_type_is_struct(t) &&
			    strcmp(tname, "bpf_raw_tracepoint_args") == 0)
				return 0;
			/* allow u64* as ctx */
			if (btf_is_int(t) && t->size == 8)
				return 0;
			break;
		case BPF_TRACE_ITER:
			/* allow struct bpf_iter__xxx types only */
			if (__btf_type_is_struct(t) &&
			    strncmp(tname, "bpf_iter__", sizeof("bpf_iter__") - 1) == 0)
				return 0;
			break;
		case BPF_TRACE_FENTRY:
		case BPF_TRACE_FEXIT:
		case BPF_MODIFY_RETURN:
			/* allow u64* as ctx */
			if (btf_is_int(t) && t->size == 8)
				return 0;
			break;
		default:
			break;
		}
		break;
	case BPF_PROG_TYPE_LSM:
	case BPF_PROG_TYPE_STRUCT_OPS:
		/* allow u64* as ctx */
		if (btf_is_int(t) && t->size == 8)
			return 0;
		break;
	case BPF_PROG_TYPE_TRACEPOINT:
	case BPF_PROG_TYPE_SYSCALL:
	case BPF_PROG_TYPE_EXT:
		return 0; /* anything goes */
	default:
		break;
	}

	ctx_type = find_canonical_prog_ctx_type(prog_type);
	if (!ctx_type) {
		/* should not happen */
		bpf_log(log, "btf_vmlinux is malformed\n");
		return -EINVAL;
	}

	/* resolve typedefs and check that underlying structs are matching as well */
	while (btf_type_is_modifier(ctx_type))
		ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);

	/* if program type doesn't have distinctly named struct type for
	 * context, then __arg_ctx argument can only be `void *`, which we
	 * already checked above
	 */
	if (!__btf_type_is_struct(ctx_type)) {
		bpf_log(log, "arg#%d should be void pointer\n", arg);
		return -EINVAL;
	}

	ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off);
	if (!__btf_type_is_struct(t) || strcmp(ctx_tname, tname) != 0) {
		bpf_log(log, "arg#%d should be `struct %s *`\n", arg, ctx_tname);
		return -EINVAL;
	}

	return 0;
}

static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
				     struct btf *btf,
				     const struct btf_type *t,
				     enum bpf_prog_type prog_type,
				     int arg)
{
	const struct btf_member *prog_ctx_type, *kern_ctx_type;

	prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg);
	if (!prog_ctx_type)
	if (!btf_get_prog_ctx_type(log, btf, t, prog_type, arg))
		return -ENOENT;
	kern_ctx_type = prog_ctx_type + 1;
	return kern_ctx_type->type;
	return find_kern_ctx_type_id(prog_type);
}

int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type)
@@ -6934,6 +7096,23 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
		return -EINVAL;
	}

	for (i = 0; i < nargs; i++) {
		const char *tag;

		if (sub->args[i].arg_type != ARG_PTR_TO_CTX)
			continue;

		/* check if arg has "arg:ctx" tag */
		t = btf_type_by_id(btf, args[i].type);
		tag = btf_find_decl_tag_value(btf, fn_t, i, "arg:");
		if (IS_ERR_OR_NULL(tag) || strcmp(tag, "ctx") != 0)
			continue;

		if (btf_validate_prog_ctx_type(log, btf, t, i, prog_type,
					       prog->expected_attach_type))
			return -EINVAL;
	}

	sub->arg_cnt = nargs;
	sub->args_cached = true;

+4 −0
Original line number Diff line number Diff line
@@ -12826,6 +12826,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
	}
	switch (base_type(ptr_reg->type)) {
	case PTR_TO_FLOW_KEYS:
		if (known)
			break;
		fallthrough;
	case CONST_PTR_TO_MAP:
		/* smin_val represents the known value */
		if (known && smin_val == 0 && opcode == BPF_ADD)
+10 −12
Original line number Diff line number Diff line
@@ -3137,16 +3137,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
	struct bpf_udp_iter_state *iter = seq->private;
	struct udp_iter_state *state = &iter->state;
	struct net *net = seq_file_net(seq);
	int resume_bucket, resume_offset;
	struct udp_table *udptable;
	unsigned int batch_sks = 0;
	bool resized = false;
	struct sock *sk;

	resume_bucket = state->bucket;
	resume_offset = iter->offset;

	/* The current batch is done, so advance the bucket. */
	if (iter->st_bucket_done) {
	if (iter->st_bucket_done)
		state->bucket++;
		iter->offset = 0;
	}

	udptable = udp_get_table_seq(seq, net);

@@ -3166,19 +3168,19 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
	for (; state->bucket <= udptable->mask; state->bucket++) {
		struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];

		if (hlist_empty(&hslot2->head)) {
			iter->offset = 0;
		if (hlist_empty(&hslot2->head))
			continue;
		}

		iter->offset = 0;
		spin_lock_bh(&hslot2->lock);
		udp_portaddr_for_each_entry(sk, &hslot2->head) {
			if (seq_sk_match(seq, sk)) {
				/* Resume from the last iterated socket at the
				 * offset in the bucket before iterator was stopped.
				 */
				if (iter->offset) {
					--iter->offset;
				if (state->bucket == resume_bucket &&
				    iter->offset < resume_offset) {
					++iter->offset;
					continue;
				}
				if (iter->end_sk < iter->max_sk) {
@@ -3192,9 +3194,6 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)

		if (iter->end_sk)
			break;

		/* Reset the current bucket's offset before moving to the next bucket. */
		iter->offset = 0;
	}

	/* All done: no batch made. */
@@ -3213,7 +3212,6 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
		/* After allocating a larger batch, retry one more time to grab
		 * the whole bucket.
		 */
		state->bucket--;
		goto again;
	}
done:
+133 −9
Original line number Diff line number Diff line
@@ -6695,6 +6695,67 @@ static struct {
	/* all other program types don't have "named" context structs */
};

static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
				     const char *subprog_name, int arg_idx,
				     int arg_type_id, const char *ctx_name)
{
	const struct btf_type *t;
	const char *tname;

	/* check if existing parameter already matches verifier expectations */
	t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
	if (!btf_is_ptr(t))
		goto out_warn;

	/* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
	 * and perf_event programs, so check this case early on and forget
	 * about it for subsequent checks
	 */
	while (btf_is_mod(t))
		t = btf__type_by_id(btf, t->type);
	if (btf_is_typedef(t) &&
	    (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
		tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
		if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
			return false; /* canonical type for kprobe/perf_event */
	}

	/* now we can ignore typedefs moving forward */
	t = skip_mods_and_typedefs(btf, t->type, NULL);

	/* if it's `void *`, definitely fix up BTF info */
	if (btf_is_void(t))
		return true;

	/* if it's already proper canonical type, no need to fix up */
	tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
	if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
		return false;

	/* special cases */
	switch (prog->type) {
	case BPF_PROG_TYPE_KPROBE:
	case BPF_PROG_TYPE_PERF_EVENT:
		/* `struct pt_regs *` is expected, but we need to fix up */
		if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
			return true;
		break;
	case BPF_PROG_TYPE_RAW_TRACEPOINT:
	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
		/* allow u64* as ctx */
		if (btf_is_int(t) && t->size == 8)
			return true;
		break;
	default:
		break;
	}

out_warn:
	pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
		prog->name, subprog_name, arg_idx, ctx_name);
	return false;
}

static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
{
	int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
@@ -6757,6 +6818,69 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr
	return fn_id;
}

static int probe_kern_arg_ctx_tag(void)
{
	/* To minimize merge conflicts with BPF token series that refactors
	 * feature detection code a lot, we don't integrate
	 * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection
	 * framework yet, doing our own caching internally.
	 * This will be cleaned up a bit later when bpf/bpf-next trees settle.
	 */
	static int cached_result = -1;
	static const char strs[] = "\0a\0b\0arg:ctx\0";
	const __u32 types[] = {
		/* [1] INT */
		BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4),
		/* [2] PTR -> VOID */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
		/* [3] FUNC_PROTO `int(void *a)` */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
		BTF_PARAM_ENC(1 /* "a" */, 2),
		/* [4] FUNC 'a' -> FUNC_PROTO (main prog) */
		BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3),
		/* [5] FUNC_PROTO `int(void *b __arg_ctx)` */
		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1),
		BTF_PARAM_ENC(3 /* "b" */, 2),
		/* [6] FUNC 'b' -> FUNC_PROTO (subprog) */
		BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5),
		/* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */
		BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0),
	};
	const struct bpf_insn insns[] = {
		/* main prog */
		BPF_CALL_REL(+1),
		BPF_EXIT_INSN(),
		/* global subprog */
		BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */
		BPF_EXIT_INSN(),
	};
	const struct bpf_func_info_min func_infos[] = {
		{ 0, 4 }, /* main prog -> FUNC 'a' */
		{ 2, 6 }, /* subprog -> FUNC 'b' */
	};
	LIBBPF_OPTS(bpf_prog_load_opts, opts);
	int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns);

	if (cached_result >= 0)
		return cached_result;

	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
	if (btf_fd < 0)
		return 0;

	opts.prog_btf_fd = btf_fd;
	opts.func_info = &func_infos;
	opts.func_info_cnt = ARRAY_SIZE(func_infos);
	opts.func_info_rec_size = sizeof(func_infos[0]);

	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx",
				"GPL", insns, insn_cnt, &opts);
	close(btf_fd);

	cached_result = probe_fd(prog_fd);
	return cached_result;
}

/* Check if main program or global subprog's function prototype has `arg:ctx`
 * argument tags, and, if necessary, substitute correct type to match what BPF
 * verifier would expect, taking into account specific program type. This
@@ -6766,7 +6890,7 @@ static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_progr
 */
static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
{
	const char *ctx_name = NULL, *ctx_tag = "arg:ctx";
	const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
	struct bpf_func_info_min *func_rec;
	struct btf_type *fn_t, *fn_proto_t;
	struct btf *btf = obj->btf;
@@ -6780,6 +6904,10 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra
	if (!obj->btf_ext || !prog->func_info)
		return 0;

	/* don't do any fix ups if kernel natively supports __arg_ctx */
	if (probe_kern_arg_ctx_tag() > 0)
		return 0;

	/* some BPF program types just don't have named context structs, so
	 * this fallback mechanism doesn't work for them
	 */
@@ -6842,15 +6970,11 @@ static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_progra
		if (arg_idx < 0 || arg_idx >= arg_cnt)
			continue;

		/* check if existing parameter already matches verifier expectations */
		/* check if we should fix up argument type */
		p = &btf_params(fn_proto_t)[arg_idx];
		t = skip_mods_and_typedefs(btf, p->type, NULL);
		if (btf_is_ptr(t) &&
		    (t = skip_mods_and_typedefs(btf, t->type, NULL)) &&
		    btf_is_struct(t) &&
		    strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) {
			continue; /* no need for fix up */
		}
		fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
		if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
			continue;

		/* clone fn/fn_proto, unless we already did it for another arg */
		if (func_rec->type_id == orig_fn_id) {
Loading