Commit cd0eb48b authored by Martin KaFai Lau's avatar Martin KaFai Lau
Browse files

Merge branch 'bpf-reject-tcp_nodelay-in-tcp-header-option'

KaFai Wan says:

====================
bpf: Reject TCP_NODELAY in TCP header option

This small patchset is about avoid infinite recursion in TCP header option callbacks
and bpf-tcp-cc callbacks via TCP_NODELAY setsockopt.

v4:
 - Fix the test case for TCP header option callbacks (Martin and Jiayuan)
 - Reject TCP_NODELAY in bpf-tcp-cc callbacks (AI and Martin)
 - Add a test case for bpf-tcp-cc

v3:
 - Remove CONFIG_INET check and add comment (Martin and Jiayuan)
 - Fix the test case (Martin)
 https://lore.kernel.org/bpf/20260417092035.2299913-1-kafai.wan@linux.dev/

v2:
 - Reject TCP_NODELAY in bpf_sock_ops_setsockopt() (AI and Martin)
 https://lore.kernel.org/bpf/20260416112308.1820332-1-kafai.wan@linux.dev/

v1:
 https://lore.kernel.org/bpf/20260414112310.1285783-1-kafai.wan@linux.dev/
====================

Link: https://patch.msgid.link/20260421155804.135786-1-kafai.wan@linux.dev


Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents eb5249b1 2c7e33f1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -3725,6 +3725,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
+30 −0
Original line number Diff line number Diff line
@@ -5688,6 +5688,30 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
	.arg5_type	= ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_sk_setsockopt_nodelay, struct sock *, sk, int, level,
	   int, optname, char *, optval, int, optlen)
{
	/*
	 * TCP_NODELAY triggers tcp_push_pending_frames() and re-enters
	 * CA_EVENT_TX_START in bpf_tcp_cc.
	 */
	if (level == SOL_TCP && optname == TCP_NODELAY)
		return -EOPNOTSUPP;

	return _bpf_setsockopt(sk, level, optname, optval, optlen);
}

const struct bpf_func_proto bpf_sk_setsockopt_nodelay_proto = {
	.func		= bpf_sk_setsockopt_nodelay,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
	.arg5_type	= ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
	   int, optname, char *, optval, int, optlen)
{
@@ -5833,6 +5857,12 @@ BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
	if (!is_locked_tcp_sock_ops(bpf_sock))
		return -EOPNOTSUPP;

	/* TCP_NODELAY triggers tcp_push_pending_frames() and re-enters these callbacks. */
	if ((bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB ||
	     bpf_sock->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB) &&
	    level == SOL_TCP && optname == TCP_NODELAY)
		return -EOPNOTSUPP;

	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}

+1 −1
Original line number Diff line number Diff line
@@ -168,7 +168,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
		 */
		if (prog_ops_moff(prog) !=
		    offsetof(struct tcp_congestion_ops, release))
			return &bpf_sk_setsockopt_proto;
			return &bpf_sk_setsockopt_nodelay_proto;
		return NULL;
	case BPF_FUNC_getsockopt:
		/* Since get/setsockopt is usually expected to
+4 −0
Original line number Diff line number Diff line
@@ -112,6 +112,10 @@ static void test_cubic(void)

	ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");

	ASSERT_TRUE(cubic_skel->bss->nodelay_init_reject, "init reject nodelay option");
	ASSERT_TRUE(cubic_skel->bss->nodelay_cwnd_event_tx_start_reject,
		    "cwnd_event_tx_start reject nodelay option");

	bpf_link__destroy(link);
	bpf_cubic__destroy(cubic_skel);
}
+4 −0
Original line number Diff line number Diff line
@@ -507,6 +507,10 @@ static void misc(void)

	ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");

	ASSERT_TRUE(misc_skel->bss->nodelay_est_ok, "nodelay_est_ok");
	ASSERT_TRUE(misc_skel->bss->nodelay_hdr_len_reject, "nodelay_hdr_len_reject");
	ASSERT_TRUE(misc_skel->bss->nodelay_write_hdr_reject, "nodelay_write_hdr_reject");

check_linum:
	ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
	sk_fds_close(&sk_fds);
Loading