Commit 4026310a authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-misc-fixes-for-v6-18-rc7'

Matthieu Baerts says:

====================
mptcp: misc fixes for v6.18-rc7

Here are various unrelated fixes:

- Patch 1: Fix window space computation for fallback connections which
  can affect ACK generation. A fix for v5.11.

- Patch 2: Avoid unneeded subflow-level drops due to unsynced received
  window. A fix for v5.11.

- Patch 3: Avoid premature close for fallback connections with PREEMPT
  kernels. A fix for v5.12.

- Patch 4: Reset instead of fallback in case of data in the MPTCP
  out-of-order queue. A fix for v5.7.

- Patches 5-7: Avoid also sending "plain" TCP reset when closing with an
  MP_FASTCLOSE. A fix for v6.1.

- Patches 8-9: Longer timeout for background connections in MPTCP Join
  selftests. An additional fix for recent patches for v5.13/v6.1.

- Patches 10-11: Fix typo in a check introduce in a recent refactoring.
  A fix for v6.15.
====================

Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-0-806d3781c95f@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e31a11be 0eee0fdf
Loading
Loading
Loading
Loading
+53 −1
Original line number Diff line number Diff line
@@ -838,8 +838,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,

	opts->suboptions = 0;

	/* Force later mptcp_write_options(), but do not use any actual
	 * option space.
	 */
	if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
		return false;
		return true;

	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
		if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) ||
@@ -1041,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
	WRITE_ONCE(msk->snd_una, new_snd_una);
}

static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
			struct sk_buff *skb)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
	struct tcp_sock *tp = tcp_sk(ssk);
	u64 mptcp_rcv_wnd;

	/* Avoid touching extra cachelines if TCP is going to accept this
	 * skb without filling the TCP-level window even with a possibly
	 * outdated mptcp-level rwin.
	 */
	if (!skb->len || skb->len < tcp_receive_window(tp))
		return;

	mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent);
	if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent))
		return;

	/* Some other subflow grew the mptcp-level rwin since rcv_wup,
	 * resync.
	 */
	tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
	subflow->rcv_wnd_sent = mptcp_rcv_wnd;
}

static void ack_update_msk(struct mptcp_sock *msk,
			   struct sock *ssk,
			   struct mptcp_options_received *mp_opt)
@@ -1208,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
	 */
	if (mp_opt.use_ack)
		ack_update_msk(msk, sk, &mp_opt);
	rwin_update(msk, sk, skb);

	/* Zero-data-length packets are dropped by the caller and not
	 * propagated to the MPTCP layer, so the skb extension does not
@@ -1294,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)

	if (rcv_wnd_new != rcv_wnd_old) {
raise_win:
		/* The msk-level rcv wnd is after the tcp level one,
		 * sync the latter.
		 */
		rcv_wnd_new = rcv_wnd_old;
		win = rcv_wnd_old - ack_seq;
		tp->rcv_wnd = min_t(u64, win, U32_MAX);
		new_win = tp->rcv_wnd;
@@ -1317,6 +1350,21 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)

update_wspace:
	WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
	subflow->rcv_wnd_sent = rcv_wnd_new;
}

static void mptcp_track_rwin(struct tcp_sock *tp)
{
	const struct sock *ssk = (const struct sock *)tp;
	struct mptcp_subflow_context *subflow;
	struct mptcp_sock *msk;

	if (!ssk)
		return;

	subflow = mptcp_subflow_ctx(ssk);
	msk = mptcp_sk(subflow->conn);
	WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}

__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
@@ -1611,6 +1659,10 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
				      opts->reset_transient,
				      opts->reset_reason);
		return;
	} else if (unlikely(!opts->suboptions)) {
		/* Fallback to TCP */
		mptcp_track_rwin(tp);
		return;
	}

	if (OPTION_MPTCP_PRIO & opts->suboptions) {
+1 −1
Original line number Diff line number Diff line
@@ -672,7 +672,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)

void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
{
	if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
	if (rm_id && !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
		u8 limit_add_addr_accepted =
			mptcp_pm_get_limit_add_addr_accepted(msk);

+41 −18
Original line number Diff line number Diff line
@@ -76,6 +76,13 @@ bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib)
	if (__mptcp_check_fallback(msk))
		return true;

	/* The caller possibly is not holding the msk socket lock, but
	 * in the fallback case only the current subflow is touching
	 * the OoO queue.
	 */
	if (!RB_EMPTY_ROOT(&msk->out_of_order_queue))
		return false;

	spin_lock_bh(&msk->fallback_lock);
	if (!msk->allow_infinite_fallback) {
		spin_unlock_bh(&msk->fallback_lock);
@@ -2402,7 +2409,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)

/* flags for __mptcp_close_ssk() */
#define MPTCP_CF_PUSH		BIT(1)
#define MPTCP_CF_FASTCLOSE	BIT(2)

/* be sure to send a reset only if the caller asked for it, also
 * clean completely the subflow status when the subflow reaches
@@ -2413,7 +2419,7 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
				       unsigned int flags)
{
	if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
	    (flags & MPTCP_CF_FASTCLOSE)) {
	    subflow->send_fastclose) {
		/* The MPTCP code never wait on the subflow sockets, TCP-level
		 * disconnect should never fail
		 */
@@ -2460,14 +2466,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,

	lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);

	if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
		/* be sure to force the tcp_close path
		 * to generate the egress reset
		 */
		ssk->sk_lingertime = 0;
		sock_set_flag(ssk, SOCK_LINGER);
		subflow->send_fastclose = 1;
	}
	if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
		tcp_set_state(ssk, TCP_CLOSE);

	need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
	if (!dispose_it) {
@@ -2563,7 +2563,8 @@ static void __mptcp_close_subflow(struct sock *sk)

		if (ssk_state != TCP_CLOSE &&
		    (ssk_state != TCP_CLOSE_WAIT ||
		     inet_sk_state_load(sk) != TCP_ESTABLISHED))
		     inet_sk_state_load(sk) != TCP_ESTABLISHED ||
		     __mptcp_check_fallback(msk)))
			continue;

		/* 'subflow_data_ready' will re-sched once rx queue is empty */
@@ -2771,9 +2772,26 @@ static void mptcp_do_fastclose(struct sock *sk)
	struct mptcp_sock *msk = mptcp_sk(sk);

	mptcp_set_state(sk, TCP_CLOSE);
	mptcp_for_each_subflow_safe(msk, subflow, tmp)
		__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
				  subflow, MPTCP_CF_FASTCLOSE);

	/* Explicitly send the fastclose reset as need */
	if (__mptcp_check_fallback(msk))
		return;

	mptcp_for_each_subflow_safe(msk, subflow, tmp) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

		lock_sock(ssk);

		/* Some subflow socket states don't allow/need a reset.*/
		if ((1 << ssk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
			goto unlock;

		subflow->send_fastclose = 1;
		tcp_send_active_reset(ssk, ssk->sk_allocation,
				      SK_RST_REASON_TCP_ABORT_ON_CLOSE);
unlock:
		release_sock(ssk);
	}
}

static void mptcp_worker(struct work_struct *work)
@@ -2800,7 +2818,11 @@ static void mptcp_worker(struct work_struct *work)
		__mptcp_close_subflow(sk);

	if (mptcp_close_tout_expired(sk)) {
		struct mptcp_subflow_context *subflow, *tmp;

		mptcp_do_fastclose(sk);
		mptcp_for_each_subflow_safe(msk, subflow, tmp)
			__mptcp_close_ssk(sk, subflow->tcp_sock, subflow, 0);
		mptcp_close_wake_up(sk);
	}

@@ -3225,7 +3247,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
	/* msk->subflow is still intact, the following will not free the first
	 * subflow
	 */
	mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
	mptcp_do_fastclose(sk);
	mptcp_destroy_common(msk);

	/* The first subflow is already in TCP_CLOSE status, the following
	 * can't overlap with a fallback anymore
@@ -3404,7 +3427,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
		msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
}

void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
void mptcp_destroy_common(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow, *tmp;
	struct sock *sk = (struct sock *)msk;
@@ -3413,7 +3436,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)

	/* join list will be eventually flushed (with rst) at sock lock release time */
	mptcp_for_each_subflow_safe(msk, subflow, tmp)
		__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
		__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0);

	__skb_queue_purge(&sk->sk_receive_queue);
	skb_rbtree_purge(&msk->out_of_order_queue);
@@ -3431,7 +3454,7 @@ static void mptcp_destroy(struct sock *sk)

	/* allow the following to close even the initial subflow */
	msk->free_first = 1;
	mptcp_destroy_common(msk, 0);
	mptcp_destroy_common(msk);
	sk_sockets_allocated_dec(sk);
}

+2 −1
Original line number Diff line number Diff line
@@ -509,6 +509,7 @@ struct mptcp_subflow_context {
	u64	remote_key;
	u64	idsn;
	u64	map_seq;
	u64	rcv_wnd_sent;
	u32	snd_isn;
	u32	token;
	u32	rel_write_seq;
@@ -976,7 +977,7 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
	local_bh_enable();
}

void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
void mptcp_destroy_common(struct mptcp_sock *msk);

#define MPTCP_TOKEN_MAX_RETRIES	4

+16 −11
Original line number Diff line number Diff line
@@ -3500,7 +3500,6 @@ fullmesh_tests()
fastclose_tests()
{
	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		test_linkfail=1024 fastclose=client \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 0 0 0
@@ -3509,7 +3508,6 @@ fastclose_tests()
	fi

	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		test_linkfail=1024 fastclose=server \
			run_tests $ns1 $ns2 10.0.1.1
		join_rst_nr=1 \
@@ -3806,7 +3804,7 @@ userspace_tests()
	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
		set_userspace_pm $ns1
		pm_nl_set_limits $ns2 2 2
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!
		wait_mpj $ns1
@@ -3839,7 +3837,7 @@ userspace_tests()
	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
		set_userspace_pm $ns2
		pm_nl_set_limits $ns1 0 1
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!
		wait_mpj $ns2
@@ -3867,7 +3865,7 @@ userspace_tests()
	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
		set_userspace_pm $ns2
		pm_nl_set_limits $ns1 0 1
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!
		wait_mpj $ns2
@@ -3888,7 +3886,7 @@ userspace_tests()
	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
		set_userspace_pm $ns2
		pm_nl_set_limits $ns1 0 1
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!
		wait_mpj $ns2
@@ -3912,7 +3910,7 @@ userspace_tests()
	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
		set_userspace_pm $ns1
		pm_nl_set_limits $ns2 1 1
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!
		wait_mpj $ns1
@@ -3943,7 +3941,7 @@ endpoint_tests()
		pm_nl_set_limits $ns1 2 2
		pm_nl_set_limits $ns2 2 2
		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
		{ test_linkfail=128 speed=slow \
		{ timeout_test=120 test_linkfail=128 speed=slow \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!

@@ -3970,7 +3968,7 @@ endpoint_tests()
		pm_nl_set_limits $ns2 0 3
		pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!

@@ -4048,7 +4046,7 @@ endpoint_tests()
		# broadcast IP: no packet for this address will be received on ns1
		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
		pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
		{ test_linkfail=128 speed=5 \
		{ timeout_test=120 test_linkfail=128 speed=5 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!

@@ -4057,38 +4055,45 @@ endpoint_tests()
			$ns1 10.0.2.1 id 1 flags signal
		chk_subflow_nr "before delete" 2
		chk_mptcp_info subflows 1 subflows 1
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 1

		pm_nl_del_endpoint $ns1 1 10.0.2.1
		pm_nl_del_endpoint $ns1 2 224.0.0.1
		sleep 0.5
		chk_subflow_nr "after delete" 1
		chk_mptcp_info subflows 0 subflows 0
		chk_mptcp_info add_addr_signal 0 add_addr_accepted 0

		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
		wait_mpj $ns2
		chk_subflow_nr "after re-add" 3
		chk_mptcp_info subflows 2 subflows 2
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2

		pm_nl_del_endpoint $ns1 42 10.0.1.1
		sleep 0.5
		chk_subflow_nr "after delete ID 0" 2
		chk_mptcp_info subflows 2 subflows 2
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2

		pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
		wait_mpj $ns2
		chk_subflow_nr "after re-add ID 0" 3
		chk_mptcp_info subflows 3 subflows 3
		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2

		pm_nl_del_endpoint $ns1 99 10.0.1.1
		sleep 0.5
		chk_subflow_nr "after re-delete ID 0" 2
		chk_mptcp_info subflows 2 subflows 2
		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2

		pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
		wait_mpj $ns2
		chk_subflow_nr "after re-re-add ID 0" 3
		chk_mptcp_info subflows 3 subflows 3
		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
		mptcp_lib_kill_group_wait $tests_pid

		kill_events_pids
@@ -4121,7 +4126,7 @@ endpoint_tests()
		# broadcast IP: no packet for this address will be received on ns1
		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
		{ test_linkfail=128 speed=20 \
		{ timeout_test=120 test_linkfail=128 speed=20 \
			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
		local tests_pid=$!