Commit 273b7f0f authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()



This patch changes bpf_getsockopt(SOL_TCP) to reuse
do_tcp_getsockopt().  It removes the duplicated code from
bpf_getsockopt(SOL_TCP).

Before this patch, there were some optnames available to
bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
and a few more.  It surprises users from time to time.  This patch
automatically closes this gap without duplicating more code.

bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
so it stays in sol_tcp_sockopt().

For string name value like TCP_CONGESTION, bpf expects it
is always null terminated, so sol_tcp_sockopt() decrements
optlen by one before calling do_tcp_getsockopt() and
the 'if (optlen < saved_optlen) memset(..,0,..);'
in __bpf_getsockopt() will always do a null termination.

Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20220902002918.2894511-1-kafai@fb.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 65ddc82d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -402,6 +402,8 @@ void tcp_init_sock(struct sock *sk);
void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
__poll_t tcp_poll(struct file *file, struct socket *sock,
		      struct poll_table_struct *wait);
int do_tcp_getsockopt(struct sock *sk, int level,
		      int optname, sockptr_t optval, sockptr_t optlen);
int tcp_getsockopt(struct sock *sk, int level, int optname,
		   char __user *optval, int __user *optlen);
bool tcp_bpf_bypass_getsockopt(int level, int optname);
+43 −31
Original line number Diff line number Diff line
@@ -5100,8 +5100,9 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
	return 0;
}

static int sol_tcp_setsockopt(struct sock *sk, int optname,
			      char *optval, int optlen)
static int sol_tcp_sockopt(struct sock *sk, int optname,
			   char *optval, int *optlen,
			   bool getopt)
{
	if (sk->sk_prot->setsockopt != tcp_setsockopt)
		return -EINVAL;
@@ -5118,17 +5119,51 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname,
	case TCP_USER_TIMEOUT:
	case TCP_NOTSENT_LOWAT:
	case TCP_SAVE_SYN:
		if (optlen != sizeof(int))
		if (*optlen != sizeof(int))
			return -EINVAL;
		break;
	case TCP_CONGESTION:
		if (*optlen < 2)
			return -EINVAL;
		break;
	case TCP_SAVED_SYN:
		if (*optlen < 1)
			return -EINVAL;
		break;
	default:
		return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
		if (getopt)
			return -EINVAL;
		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
	}

	if (getopt) {
		if (optname == TCP_SAVED_SYN) {
			struct tcp_sock *tp = tcp_sk(sk);

			if (!tp->saved_syn ||
			    *optlen > tcp_saved_syn_len(tp->saved_syn))
				return -EINVAL;
			memcpy(optval, tp->saved_syn->data, *optlen);
			/* It cannot free tp->saved_syn here because it
			 * does not know if the user space still needs it.
			 */
			return 0;
		}

		if (optname == TCP_CONGESTION) {
			if (!inet_csk(sk)->icsk_ca_ops)
				return -EINVAL;
			/* BPF expects NULL-terminated tcp-cc string */
			optval[--(*optlen)] = '\0';
		}

		return do_tcp_getsockopt(sk, SOL_TCP, optname,
					 KERNEL_SOCKPTR(optval),
					 KERNEL_SOCKPTR(optlen));
	}

	return do_tcp_setsockopt(sk, SOL_TCP, optname,
				 KERNEL_SOCKPTR(optval), optlen);
				 KERNEL_SOCKPTR(optval), *optlen);
}

static int sol_ip_setsockopt(struct sock *sk, int optname,
@@ -5183,7 +5218,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
		return sol_ipv6_setsockopt(sk, optname, optval, optlen);
	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
		return sol_tcp_setsockopt(sk, optname, optval, optlen);
		return sol_tcp_sockopt(sk, optname, optval, &optlen, false);

	return -EINVAL;
}
@@ -5206,31 +5241,8 @@ static int __bpf_getsockopt(struct sock *sk, int level, int optname,

	if (level == SOL_SOCKET) {
		err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
	} else if (IS_ENABLED(CONFIG_INET) &&
		   level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
		struct inet_connection_sock *icsk;
		struct tcp_sock *tp;

		switch (optname) {
		case TCP_CONGESTION:
			icsk = inet_csk(sk);

			if (!icsk->icsk_ca_ops || optlen <= 1)
				goto err_clear;
			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
			optval[optlen - 1] = 0;
			break;
		case TCP_SAVED_SYN:
			tp = tcp_sk(sk);

			if (optlen <= 0 || !tp->saved_syn ||
			    optlen > tcp_saved_syn_len(tp->saved_syn))
				goto err_clear;
			memcpy(optval, tp->saved_syn->data, optlen);
			break;
		default:
			goto err_clear;
		}
	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
		err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
		struct inet_sock *inet = inet_sk(sk);

+2 −2
Original line number Diff line number Diff line
@@ -4043,7 +4043,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
	return stats;
}

static int do_tcp_getsockopt(struct sock *sk, int level,
int do_tcp_getsockopt(struct sock *sk, int level,
		      int optname, sockptr_t optval, sockptr_t optlen)
{
	struct inet_connection_sock *icsk = inet_csk(sk);