Commit 1f62f58d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-cleanups-ephemeral-port-sockopts'



Matthieu Baerts says:

====================
mptcp: cleanup and support more ephemeral ports sockopts

Patch 1 is a cleanup one: mptcp_is_tcpsk() helper was modifying sock_ops
in some cases which is unexpected with that name.

Patch 2 to 4 add support for two socket options: IP_LOCAL_PORT_RANGE and
IP_BIND_ADDRESS_NO_PORT. The first one is a preparation patch, the
second one adds the support while the last one modifies an existing
selftest to validate the new features.
====================

Signed-off-by: default avatarMatthieu Baerts <matttbe@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7961ef1f 122db5e3
Loading
Loading
Loading
Loading
+44 −64
Original line number Diff line number Diff line
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
	return READ_ONCE(msk->wnd_end);
}

static bool mptcp_is_tcpsk(struct sock *sk)
static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
	struct socket *sock = sk->sk_socket;

	if (unlikely(sk->sk_prot == &tcp_prot)) {
		/* we are being invoked after mptcp_accept() has
		 * accepted a non-mp-capable flow: sk is a tcp_sk,
		 * not an mptcp one.
		 *
		 * Hand the socket over to tcp so all further socket ops
		 * bypass mptcp.
		 */
		WRITE_ONCE(sock->ops, &inet_stream_ops);
		return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
	} else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
		WRITE_ONCE(sock->ops, &inet6_stream_ops);
		return true;
	if (sk->sk_prot == &tcpv6_prot)
		return &inet6_stream_ops;
#endif
	}

	return false;
	WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
	return &inet_stream_ops;
}

static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -3258,44 +3244,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
	WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}

static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
				 bool kern)
{
	struct sock *newsk;

	pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
	newsk = inet_csk_accept(ssk, flags, err, kern);
	if (!newsk)
		return NULL;

	pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
	if (sk_is_mptcp(newsk)) {
		struct mptcp_subflow_context *subflow;
		struct sock *new_mptcp_sock;

		subflow = mptcp_subflow_ctx(newsk);
		new_mptcp_sock = subflow->conn;

		/* is_mptcp should be false if subflow->conn is missing, see
		 * subflow_syn_recv_sock()
		 */
		if (WARN_ON_ONCE(!new_mptcp_sock)) {
			tcp_sk(newsk)->is_mptcp = 0;
			goto out;
		}

		newsk = new_mptcp_sock;
		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
	} else {
		MPTCP_INC_STATS(sock_net(ssk),
				MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
	}

out:
	newsk->sk_kern_sock = kern;
	return newsk;
}

void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{
	struct mptcp_subflow_context *subflow, *tmp;
@@ -3739,7 +3687,6 @@ static struct proto mptcp_prot = {
	.connect	= mptcp_connect,
	.disconnect	= mptcp_disconnect,
	.close		= mptcp_close,
	.accept		= mptcp_accept,
	.setsockopt	= mptcp_setsockopt,
	.getsockopt	= mptcp_getsockopt,
	.shutdown	= mptcp_shutdown,
@@ -3849,18 +3796,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
	if (!ssk)
		return -EINVAL;

	newsk = mptcp_accept(ssk, flags, &err, kern);
	pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
	newsk = inet_csk_accept(ssk, flags, &err, kern);
	if (!newsk)
		return err;

	lock_sock(newsk);
	pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
	if (sk_is_mptcp(newsk)) {
		struct mptcp_subflow_context *subflow;
		struct sock *new_mptcp_sock;

		subflow = mptcp_subflow_ctx(newsk);
		new_mptcp_sock = subflow->conn;

		/* is_mptcp should be false if subflow->conn is missing, see
		 * subflow_syn_recv_sock()
		 */
		if (WARN_ON_ONCE(!new_mptcp_sock)) {
			tcp_sk(newsk)->is_mptcp = 0;
			goto tcpfallback;
		}

		newsk = new_mptcp_sock;
		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);

		newsk->sk_kern_sock = kern;
		lock_sock(newsk);
		__inet_accept(sock, newsock, newsk);
	if (!mptcp_is_tcpsk(newsock->sk)) {
		struct mptcp_sock *msk = mptcp_sk(newsk);
		struct mptcp_subflow_context *subflow;

		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
		msk = mptcp_sk(newsk);
		msk->in_accept_queue = 0;

		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3882,6 +3847,21 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
			if (unlikely(list_is_singular(&msk->conn_list)))
				inet_sk_state_store(newsk, TCP_CLOSE);
		}
	} else {
		MPTCP_INC_STATS(sock_net(ssk),
				MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
		newsk->sk_kern_sock = kern;
		lock_sock(newsk);
		__inet_accept(sock, newsock, newsk);
		/* we are being invoked after accepting a non-mp-capable
		 * flow: sk is a tcp_sk, not an mptcp one.
		 *
		 * Hand the socket over to tcp so all further socket ops
		 * bypass mptcp.
		 */
		WRITE_ONCE(newsock->sk->sk_socket->ops,
			   mptcp_fallback_tcp_ops(newsock->sk));
	}
	release_sock(newsk);

+23 −4
Original line number Diff line number Diff line
@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
		/* should work fine */
		case IP_FREEBIND:
		case IP_TRANSPARENT:
		case IP_BIND_ADDRESS_NO_PORT:
		case IP_LOCAL_PORT_RANGE:

		/* the following are control cmsg related */
		case IP_PKTINFO:
@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
		/* common stuff that need some love */
		case IP_TOS:
		case IP_TTL:
		case IP_BIND_ADDRESS_NO_PORT:
		case IP_MTU_DISCOVER:
		case IP_RECVERR:

@@ -683,7 +684,7 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
	return 0;
}

static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
				       sockptr_t optval, unsigned int optlen)
{
	struct sock *sk = (struct sock *)msk;
@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
		inet_assign_bit(TRANSPARENT, ssk,
				inet_test_bit(TRANSPARENT, sk));
		break;
	case IP_BIND_ADDRESS_NO_PORT:
		inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
		break;
	case IP_LOCAL_PORT_RANGE:
		WRITE_ONCE(inet_sk(ssk)->local_port_range,
			   READ_ONCE(inet_sk(sk)->local_port_range));
		break;
	default:
		release_sock(sk);
		WARN_ON_ONCE(1);
@@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
	switch (optname) {
	case IP_FREEBIND:
	case IP_TRANSPARENT:
		return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
	case IP_BIND_ADDRESS_NO_PORT:
	case IP_LOCAL_PORT_RANGE:
		return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
	case IP_TOS:
		return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
	}
@@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
	switch (optname) {
	case IP_TOS:
		return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
	case IP_BIND_ADDRESS_NO_PORT:
		return mptcp_put_int_option(msk, optval, optlen,
				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
	case IP_LOCAL_PORT_RANGE:
		return mptcp_put_int_option(msk, optval, optlen,
				READ_ONCE(inet_sk(sk)->local_port_range));
	}

	return -EOPNOTSUPP;
@@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)

	inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
	inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
	inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
	WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
}

void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
+12 −0
Original line number Diff line number Diff line
@@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
	.so_protocol	= IPPROTO_SCTP,
};

FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
	.so_domain	= AF_INET,
	.so_type	= SOCK_STREAM,
	.so_protocol	= IPPROTO_MPTCP,
};

FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
	.so_domain	= AF_INET6,
	.so_type	= SOCK_STREAM,
@@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
	.so_protocol	= IPPROTO_SCTP,
};

FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
	.so_domain	= AF_INET6,
	.so_type	= SOCK_STREAM,
	.so_protocol	= IPPROTO_MPTCP,
};

TEST_F(ip_local_port_range, invalid_option_value)
{
	__u16 val16;