Commit 9de1280f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-fixes-for-connect-selftest-flakes'

Matthieu Baerts says:

====================
mptcp: fixes for connect selftest flakes

Last week, Jakub reported [1] that the MPTCP Connect selftest was
unstable. It looked like it started after the introduction of some fixes
[2]. After analysis from Paolo, these patches revealed existing bugs,
that should be fixed by the following patches.

- Patch 1: Make sure ACK are sent when MPTCP-level window re-opens. In
  some corner cases, the other peer was not notified when more data
  could be sent. A fix for v5.11, but depending on a feature introduced
  in v5.19.

- Patch 2: Fix spurious wake-up under memory pressure. In this
  situation, the userspace could be invited to read data not being there
  yet. A fix for v6.7.

- Patch 3: Fix a false positive error when running the MPTCP Connect
  selftest with the "disconnect" cases. The userspace could disconnect
  the socket too soon, which would reset (MP_FASTCLOSE) the connection,
  interpreted as an error by the test. A fix for v5.17.

Link: https://lore.kernel.org/20250107131845.5e5de3c5@kernel.org [1]
Link: https://lore.kernel.org/20241230-net-mptcp-rbuf-fixes-v1-0-8608af434ceb@kernel.org [2]
====================

Link: https://patch.msgid.link/20250113-net-mptcp-connect-st-flakes-v1-0-0d986ee7b1b6@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 665bcfc9 218cc166
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -607,7 +607,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
	}
	opts->ext_copy.use_ack = 1;
	opts->suboptions = OPTION_MPTCP_DSS;
	WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));

	/* Add kind/length/subtype/flag overhead if mapping is not populated */
	if (dss_size == 0)
@@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
			}
			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
		}
		return;
		goto update_wspace;
	}

	if (rcv_wnd_new != rcv_wnd_old) {
@@ -1313,6 +1312,9 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
		th->window = htons(new_win);
		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
	}

update_wspace:
	WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}

__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
+7 −2
Original line number Diff line number Diff line
@@ -760,10 +760,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)

static inline bool mptcp_epollin_ready(const struct sock *sk)
{
	u64 data_avail = mptcp_data_avail(mptcp_sk(sk));

	if (!data_avail)
		return false;

	/* mptcp doesn't have to deal with small skbs in the receive queue,
	 * at it can always coalesce them
	 * as it can always coalesce them
	 */
	return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
	return (data_avail >= sk->sk_rcvlowat) ||
	       (mem_cgroup_sockets_enabled && sk->sk_memcg &&
		mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
	       READ_ONCE(tcp_memory_pressure);
+32 −11
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@
#include <sys/types.h>
#include <sys/mman.h>

#include <arpa/inet.h>

#include <netdb.h>
#include <netinet/in.h>

@@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name)
	exit(1);
}

void xdisconnect(int fd, int addrlen)
void xdisconnect(int fd)
{
	struct sockaddr_storage empty;
	socklen_t addrlen = sizeof(struct sockaddr_storage);
	struct sockaddr_storage addr, empty;
	int msec_sleep = 10;
	int queued = 1;
	int i;
	void *raw_addr;
	int i, cmdlen;
	char cmd[128];

	/* get the local address and convert it to string */
	if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0)
		xerror("getsockname");

	if (addr.ss_family == AF_INET)
		raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr);
	else if (addr.ss_family == AF_INET6)
		raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr);
	else
		xerror("bad family");

	strcpy(cmd, "ss -M | grep -q ");
	cmdlen = strlen(cmd);
	if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
		       sizeof(cmd) - cmdlen))
		xerror("inet_ntop");

	shutdown(fd, SHUT_WR);

	/* while until the pending data is completely flushed, the later
	/*
	 * wait until the pending data is completely flushed and all
	 * the MPTCP sockets reached the closed status.
	 * disconnect will bypass/ignore/drop any pending data.
	 */
	for (i = 0; ; i += msec_sleep) {
		if (ioctl(fd, SIOCOUTQ, &queued) < 0)
			xerror("can't query out socket queue: %d", errno);

		if (!queued)
		/* closed socket are not listed by 'ss' */
		if (system(cmd) != 0)
			break;

		if (i > poll_timeout)
@@ -1281,9 +1302,9 @@ int main_loop(void)
		return ret;

	if (cfg_truncate > 0) {
		xdisconnect(fd, peer->ai_addrlen);
		xdisconnect(fd);
	} else if (--cfg_repeat > 0) {
		xdisconnect(fd, peer->ai_addrlen);
		xdisconnect(fd);

		/* the socket could be unblocking at this point, we need the
		 * connect to be blocking