Commit b90c7ca4 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-make-add_addr-retransmission-timeout-adaptive'

Matthieu Baerts says:

====================
mptcp: make ADD_ADDR retransmission timeout adaptive

Currently, the MPTCP ADD_ADDR notifications are retransmitted after a
fixed timeout controlled by the net.mptcp.add_addr_timeout sysctl knob,
if the corresponding "echo" packets are not received before. This can be
too slow (or too quick), especially with a too cautious default value
set to 2 minutes.

- Patch 1: make ADD_ADDR retransmission timeout adaptive, using the
  TCP's retransmission timeout. The corresponding sysctl knob is now
  used as a maximum value.

- Patch 2: now that these ADD_ADDR retransmissions can happen faster,
  all MPTCP Join subtests checking ADD_ADDR counters accept more
  ADD_ADDR than expected (if any). This is aligned with the previous
  behaviour, when the ADD_ADDR RTO was lowered down to 1 second.

- Patch 3: Some CIs have reported that some MPTCP Join signalling tests
  were unstable. It seems that it is due to the time it can take in slow
  environments to send a bunch of ADD_ADDR notifications and wait each
  time for their echo reply. Use a longer transfer to avoid such errors.

v1: https://lore.kernel.org/d5397026-92eb-4a43-9534-954b43ab9305@kernel.org
====================

Link: https://patch.msgid.link/20250907-net-next-mptcp-add_addr-retrans-adapt-v1-0-824cc805772b@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 4ea83b75 e2cda634
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -8,9 +8,11 @@ MPTCP Sysfs variables
===============================

add_addr_timeout - INTEGER (seconds)
	Set the timeout after which an ADD_ADDR control message will be
	resent to an MPTCP peer that has not acknowledged a previous
	ADD_ADDR message.
	Set the maximum value of timeout after which an ADD_ADDR control message
	will be resent to an MPTCP peer that has not acknowledged a previous
	ADD_ADDR message. A dynamically estimated retransmission timeout based
	on the estimated connection round-trip-time is used if this value is
	lower than the maximum one.

	Do not retransmit if set to 0.

+24 −4
Original line number Diff line number Diff line
@@ -268,6 +268,27 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
	return -EINVAL;
}

static unsigned int mptcp_adjust_add_addr_timeout(struct mptcp_sock *msk)
{
	const struct net *net = sock_net((struct sock *)msk);
	unsigned int rto = mptcp_get_add_addr_timeout(net);
	struct mptcp_subflow_context *subflow;
	unsigned int max = 0;

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
		struct inet_connection_sock *icsk = inet_csk(ssk);

		if (icsk->icsk_rto > max)
			max = icsk->icsk_rto;
	}

	if (max && max < rto)
		rto = max;

	return rto;
}

static void mptcp_pm_add_timer(struct timer_list *timer)
{
	struct mptcp_pm_add_entry *entry = timer_container_of(entry, timer,
@@ -292,7 +313,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
		goto out;
	}

	timeout = mptcp_get_add_addr_timeout(sock_net(sk));
	timeout = mptcp_adjust_add_addr_timeout(msk);
	if (!timeout)
		goto out;

@@ -307,7 +328,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)

	if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
		sk_reset_timer(sk, timer,
			       jiffies + timeout);
			       jiffies + (timeout << entry->retrans_times));

	spin_unlock_bh(&msk->pm.lock);

@@ -348,7 +369,6 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
{
	struct mptcp_pm_add_entry *add_entry = NULL;
	struct sock *sk = (struct sock *)msk;
	struct net *net = sock_net(sk);
	unsigned int timeout;

	lockdep_assert_held(&msk->pm.lock);
@@ -374,7 +394,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,

	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
reset_timer:
	timeout = mptcp_get_add_addr_timeout(net);
	timeout = mptcp_adjust_add_addr_timeout(msk);
	if (timeout)
		sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);

+11 −14
Original line number Diff line number Diff line
@@ -358,6 +358,7 @@ reset_with_add_addr_timeout()
		tables="${ip6tables}"
	fi

	# set a maximum, to avoid too long timeout with exponential backoff
	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1

	if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@@ -1669,7 +1670,6 @@ chk_add_nr()
	local tx=""
	local rx=""
	local count
	local timeout

	if [[ $ns_invert = "invert" ]]; then
		ns_tx=$ns2
@@ -1678,15 +1678,13 @@ chk_add_nr()
		rx=" server"
	fi

	timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)

	print_check "add addr rx${rx}"
	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
	if [ -z "$count" ]; then
		print_skip
	# if the test configured a short timeout tolerate greater then expected
	# add addrs options, due to retransmissions
	elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
	# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
	elif [ "$count" != "$add_nr" ] &&
	     { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then
		fail_test "got $count ADD_ADDR[s] expected $add_nr"
	else
		print_ok
@@ -1774,18 +1772,15 @@ chk_add_tx_nr()
{
	local add_tx_nr=$1
	local echo_tx_nr=$2
	local timeout
	local count

	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)

	print_check "add addr tx"
	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
	if [ -z "$count" ]; then
		print_skip
	# if the test configured a short timeout tolerate greater then expected
	# add addrs options, due to retransmissions
	elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
	# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
	elif [ "$count" != "$add_tx_nr" ] &&
	     { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
		fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
	else
		print_ok
@@ -2273,6 +2268,7 @@ signal_address_tests()
		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
		pm_nl_set_limits $ns2 3 3
		speed=slow \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 3 3 3
		chk_add_nr 3 3
@@ -2285,6 +2281,7 @@ signal_address_tests()
		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
		pm_nl_set_limits $ns2 3 3
		speed=slow \
			run_tests $ns1 $ns2 10.0.1.1
		join_syn_tx=3 \
			chk_join_nr 1 1 1