Commit 515186b7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull bpf fixes from Alexei Starovoitov:

 - Fix sk_local_storage diag dump via netlink (Amery Hung)

 - Fix off-by-one in arena direct-value access (Junyoung Jang)

 - Reject TCP_NODELAY in bpf-tcp congestion control (KaFai Wan)

 - Fix type confusion in bpf_*_sock() (Kuniyuki Iwashima)

 - Reject TX-only AF_XDP sockets (Linpu Yu)

 - Don't run arg-tracking analysis twice on main subprog (Paul Chaignon)

 - Fix NULL pointer dereference in bpf_sk_storage_clone and fib lookup
   (Weiming Shi)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Fix off-by-one boundary validation in arena direct-value access
  xskmap: reject TX-only AF_XDP sockets
  bpf: Don't run arg-tracking analysis twice on main subprog
  bpf: Free reuseport cBPF prog after RCU grace period.
  bpf: tcp: Fix type confusion in sol_tcp_sockopt().
  bpf: tcp: Fix type confusion in bpf_skc_to_tcp6_sock().
  bpf: tcp: Fix type confusion in bpf_skc_to_tcp_sock().
  mptcp: bpf: Fix type confusion in bpf_mptcp_sock_from_subflow()
  selftest: bpf: Add test for bpf_tcp_sock() and RAW socket.
  bpf: tcp: Fix type confusion in bpf_tcp_sock().
  tools/headers: Regenerate stddef.h to fix BPF selftests
  bpf: Fix sk_local_storage diag dumping uninitialized special fields
  bpf: Fix NULL pointer dereference in bpf_skb_fib_lookup()
  sockmap: Fix sk_psock_drop() race vs sock_map_{unhash,close,destroy}().
  bpf: Fix NULL pointer dereference in bpf_sk_storage_clone and diag paths
  selftests/bpf: Verify bpf-tcp-cc rejects TCP_NODELAY
  selftests/bpf: Test TCP_NODELAY in TCP hdr opt callbacks
  bpf: Reject TCP_NODELAY in bpf-tcp-cc
  bpf: Reject TCP_NODELAY in TCP header option callbacks
parents 1bfaee9d 3ac1a467
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
+1 −1
Original line number Diff line number Diff line
@@ -511,7 +511,7 @@ static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32
{
	struct bpf_arena *arena = container_of(map, struct bpf_arena, map);

	if ((u64)off > arena->user_vm_end - arena->user_vm_start)
	if ((u64)off >= arena->user_vm_end - arena->user_vm_start)
		return -ERANGE;
	*imm = (unsigned long)arena->user_vm_start;
	return 0;
+7 −18
Original line number Diff line number Diff line
@@ -1914,26 +1914,15 @@ int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
		return -ENOMEM;
	}

	instance = call_instance(env, NULL, 0, 0);
	if (IS_ERR(instance)) {
		err = PTR_ERR(instance);
		goto out;
	}
	err = analyze_subprog(env, NULL, info, instance, callsites);
	if (err)
		goto out;

	/*
	 * Subprogs and callbacks that don't receive FP-derived arguments
	 * cannot access ancestor stack frames, so they were skipped during
	 * the recursive walk above.  Async callbacks (timer, workqueue) are
	 * also not reachable from the main program's call graph.  Analyze
	 * all unvisited subprogs as independent roots at depth 0.
	 * Analyze every subprog in reverse topological order (callers
	 * before callees) so that each subprog is analyzed before its
	 * callees, allowing the recursive walk inside analyze_subprog()
	 * to naturally reach callees that receive FP-derived args.
	 *
	 * Use reverse topological order (callers before callees) so that
	 * each subprog is analyzed before its callees, allowing the
	 * recursive walk inside analyze_subprog() to naturally
	 * reach nested callees that also lack FP-derived args.
	 * Subprogs and callbacks that don't receive FP-derived arguments
	 * cannot access ancestor stack frames are analyzed independently.
	 * Async callbacks (timer, workqueue) are handled the same way.
	 */
	for (k = env->subprog_cnt - 1; k >= 0; k--) {
		int sub = env->subprog_topo_order[k];
+8 −6
Original line number Diff line number Diff line
@@ -172,7 +172,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
		struct bpf_map *map;

		smap = rcu_dereference(SDATA(selem)->smap);
		if (!(smap->map.map_flags & BPF_F_CLONE))
		if (!smap || !(smap->map.map_flags & BPF_F_CLONE))
			continue;

		/* Note that for lockless listeners adding new element
@@ -531,10 +531,10 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);

static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
static int diag_get(struct bpf_local_storage_map *smap,
		    struct bpf_local_storage_data *sdata, struct sk_buff *skb)
{
	struct nlattr *nla_stg, *nla_value;
	struct bpf_local_storage_map *smap;

	/* It cannot exceed max nlattr's payload */
	BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
@@ -543,7 +543,6 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
	if (!nla_stg)
		return -EMSGSIZE;

	smap = rcu_dereference(sdata->smap);
	if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
		goto errout;

@@ -558,6 +557,7 @@ static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
				      sdata->data, true);
	else
		copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
	check_and_init_map_value(&smap->map, nla_data(nla_value));

	nla_nest_end(skb, nla_stg);
	return 0;
@@ -596,9 +596,11 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
	saved_len = skb->len;
	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
		smap = rcu_dereference(SDATA(selem)->smap);
		if (!smap)
			continue;
		diag_size += nla_value_size(smap->map.value_size);

		if (nla_stgs && diag_get(SDATA(selem), skb))
		if (nla_stgs && diag_get(smap, SDATA(selem), skb))
			/* Continue to learn diag_size */
			err = -EMSGSIZE;
	}
@@ -665,7 +667,7 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,

		diag_size += nla_value_size(diag->maps[i]->value_size);

		if (nla_stgs && diag_get(sdata, skb))
		if (nla_stgs && diag_get((struct bpf_local_storage_map *)diag->maps[i], sdata, skb))
			/* Continue to learn diag_size */
			err = -EMSGSIZE;
	}
+48 −7
Original line number Diff line number Diff line
@@ -1654,15 +1654,24 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
	return err;
}

static void sk_reuseport_prog_free_rcu(struct rcu_head *rcu)
{
	struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
	struct bpf_prog *prog = aux->prog;

	bpf_release_orig_filter(prog);
	bpf_prog_free(prog);
}

void sk_reuseport_prog_free(struct bpf_prog *prog)
{
	if (!prog)
		return;

	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
		bpf_prog_put(prog);
	if (bpf_prog_was_classic(prog))
		call_rcu(&prog->aux->rcu, sk_reuseport_prog_free_rcu);
	else
		bpf_prog_destroy(prog);
		bpf_prog_put(prog);
}

static inline int __bpf_try_make_writable(struct sk_buff *skb,
@@ -5481,7 +5490,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
			   char *optval, int *optlen,
			   bool getopt)
{
	if (sk->sk_protocol != IPPROTO_TCP)
	if (!sk_is_tcp(sk))
		return -EINVAL;

	switch (optname) {
@@ -5688,6 +5697,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
	.arg5_type	= ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
	   int, optname, char *, optval, int, optlen)
{
	/*
	 * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
	 * CA_EVENT_TX_START in bpf_tcp_cc.
	 */
	if (level == SOL_TCP && optname == TCP_NODELAY)
		return -EOPNOTSUPP;

	return _bpf_setsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
	.func		= bpf_sk_setsockopt_nodelay,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg5_type	= ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
	   int, optname, char *, optval, int, optlen)
{
@@ -5833,6 +5866,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
	if (!is_locked_tcp_sock_ops(bpf_sock))
		return -EOPNOTSUPP;

	/* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */
	if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB ||
	     bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) &&
	    level == SOL_TCP && optname == TCP_NODELAY)
		return -EOPNOTSUPP;

	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}

@@ -6443,6 +6482,8 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
		 * against MTU of FIB lookup resulting net_device
		 */
		dev = dev_get_by_index_rcu(net, params->ifindex);
		if (unlikely(!dev))
			return -ENODEV;
		if (!is_skb_forwardable(dev, skb))
			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;

@@ -7443,7 +7484,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,

BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
{
	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
	if (sk_fullsock(sk) && sk_is_tcp(sk))
		return (unsigned long)sk;

	return (unsigned long)NULL;
@@ -11915,7 +11956,7 @@ BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
	 */
	BTF_TYPE_EMIT(struct tcp6_sock);
	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
	    sk->sk_family == AF_INET6)
	    sk->sk_type == SOCK_STREAM && sk->sk_family == AF_INET6)
		return (unsigned long)sk;

	return (unsigned long)NULL;
@@ -11931,7 +11972,7 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {

BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
{
	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
	if (sk && sk_fullsock(sk) && sk_is_tcp(sk))
		return (unsigned long)sk;

	return (unsigned long)NULL;
Loading