Commit ff2c5916 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-misc-fixes-for-v7-0-rc2'

Matthieu Baerts says:

====================
mptcp: misc fixes for v7.0-rc2

Here are various unrelated fixes:

- Patch 1: avoid bufferbloat in simult_flows selftest which can cause
  instabilities. A fix for v5.10.

- Patches 2-3: reduce RM_ADDR lost by not sending it over the same
  subflow as the one being removed, if possible. A fix for v5.13.

- Patches 4-5: avoid a WARN when using signal + subflow endpoints with a
  subflow limit of 0, and removing such endpoints during an active
  connection. A fix for v5.17.
====================

Link: https://patch.msgid.link/20260303-net-mptcp-misc-fixes-7-0-rc2-v1-0-4b5462b6f016@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f43ed0c5 1777f349
Loading
Loading
Loading
Loading
+43 −12
Original line number Diff line number Diff line
@@ -212,9 +212,24 @@ void mptcp_pm_send_ack(struct mptcp_sock *msk,
	spin_lock_bh(&msk->pm.lock);
}

void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
static bool subflow_in_rm_list(const struct mptcp_subflow_context *subflow,
			       const struct mptcp_rm_list *rm_list)
{
	struct mptcp_subflow_context *subflow, *alt = NULL;
	u8 i, id = subflow_get_local_id(subflow);

	for (i = 0; i < rm_list->nr; i++) {
		if (rm_list->ids[i] == id)
			return true;
	}

	return false;
}

static void
mptcp_pm_addr_send_ack_avoid_list(struct mptcp_sock *msk,
				  const struct mptcp_rm_list *rm_list)
{
	struct mptcp_subflow_context *subflow, *stale = NULL, *same_id = NULL;

	msk_owned_by_me(msk);
	lockdep_assert_held(&msk->pm.lock);
@@ -224,19 +239,35 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
		return;

	mptcp_for_each_subflow(msk, subflow) {
		if (__mptcp_subflow_active(subflow)) {
			if (!subflow->stale) {
				mptcp_pm_send_ack(msk, subflow, false, false);
				return;
			}
		if (!__mptcp_subflow_active(subflow))
			continue;

			if (!alt)
				alt = subflow;
		if (unlikely(subflow->stale)) {
			if (!stale)
				stale = subflow;
		} else if (unlikely(rm_list &&
				    subflow_in_rm_list(subflow, rm_list))) {
			if (!same_id)
				same_id = subflow;
		} else {
			goto send_ack;
		}
	}

	if (alt)
		mptcp_pm_send_ack(msk, alt, false, false);
	if (same_id)
		subflow = same_id;
	else if (stale)
		subflow = stale;
	else
		return;

send_ack:
	mptcp_pm_send_ack(msk, subflow, false, false);
}

void mptcp_pm_addr_send_ack(struct mptcp_sock *msk)
{
	mptcp_pm_addr_send_ack_avoid_list(msk, NULL);
}

int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -470,7 +501,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
	msk->pm.rm_list_tx = *rm_list;
	rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
	WRITE_ONCE(msk->pm.addr_signal, rm_addr);
	mptcp_pm_addr_send_ack(msk);
	mptcp_pm_addr_send_ack_avoid_list(msk, rm_list);
	return 0;
}

+9 −0
Original line number Diff line number Diff line
@@ -418,6 +418,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
	}

exit:
	/* If an endpoint has both the signal and subflow flags, but it is not
	 * possible to create subflows -- the 'while' loop body above never
	 * executed --  then still mark the endp as used, which is somehow the
	 * case. This avoids issues later when removing the endpoint and calling
	 * __mark_subflow_endp_available(), which expects the increment here.
	 */
	if (signal_and_subflow && local.addr.id != msk->mpc_endpoint_id)
		msk->pm.local_addr_used++;

	mptcp_pm_nl_check_work_pending(msk);
}

+49 −0
Original line number Diff line number Diff line
@@ -104,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
			       6 0 0 65535,
			       6 0 0 0"

# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption
# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c &&
#				  (ip[66] & 0xf0) == 0x40'"
CBPF_MPTCP_SUBOPTION_RM_ADDR="13,
			      48 0 0 0,
			      84 0 0 240,
			      21 0 9 64,
			      48 0 0 32,
			      84 0 0 240,
			      21 0 6 192,
			      48 0 0 53,
			      21 0 4 12,
			      48 0 0 66,
			      84 0 0 240,
			      21 0 1 64,
			      6 0 0 65535,
			      6 0 0 0"

init_partial()
{
	capout=$(mktemp)
@@ -2608,6 +2626,19 @@ remove_tests()
		chk_rst_nr 0 0
	fi

	# signal+subflow with limits, remove
	if reset "remove signal+subflow with limits"; then
		pm_nl_set_limits $ns1 0 0
		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
		pm_nl_set_limits $ns2 0 0
		addr_nr_ns1=-1 speed=slow \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 0 0 0
		chk_add_nr 1 1
		chk_rm_nr 1 0 invert
		chk_rst_nr 0 0
	fi

	# addresses remove
	if reset "remove addresses"; then
		pm_nl_set_limits $ns1 3 3
@@ -4217,6 +4248,14 @@ endpoint_tests()
		chk_subflow_nr "after no reject" 3
		chk_mptcp_info subflows 2 subflows 2

		# To make sure RM_ADDR are sent over a different subflow, but
		# allow the rest to quickly and cleanly close the subflow
		local ipt=1
		ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \
			-p tcp -m tcp --tcp-option 30 \
			-m bpf --bytecode \
			"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
			-j DROP || ipt=0
		local i
		for i in $(seq 3); do
			pm_nl_del_endpoint $ns2 1 10.0.1.2
@@ -4229,6 +4268,7 @@ endpoint_tests()
			chk_subflow_nr "after re-add id 0 ($i)" 3
			chk_mptcp_info subflows 3 subflows 3
		done
		[ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1

		mptcp_lib_kill_group_wait $tests_pid

@@ -4288,11 +4328,20 @@ endpoint_tests()
		chk_mptcp_info subflows 2 subflows 2
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2

		# To make sure RM_ADDR are sent over a different subflow, but
		# allow the rest to quickly and cleanly close the subflow
		local ipt=1
		ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \
			-p tcp -m tcp --tcp-option 30 \
			-m bpf --bytecode \
			"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
			-j DROP || ipt=0
		pm_nl_del_endpoint $ns1 42 10.0.1.1
		sleep 0.5
		chk_subflow_nr "after delete ID 0" 2
		chk_mptcp_info subflows 2 subflows 2
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
		[ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1

		pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
		wait_mpj 4
+7 −4
Original line number Diff line number Diff line
@@ -237,10 +237,13 @@ run_test()
	for dev in ns2eth1 ns2eth2; do
		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
	done
	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2

	# keep the queued pkts number low, or the RTT estimator will see
	# increasing latency over time.
	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50

	# time is measured in ms, account for transfer size, aggregated link speed
	# and header overhead (10%)