Commit 0d76fc7e authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-pm-fix-ids-not-being-reusable'

Matthieu Baerts says:

====================
mptcp: pm: fix IDs not being reusable

Here are more fixes for the MPTCP in-kernel path-manager. In this
series, the fixes are around the endpoint IDs not being reusable for
on-going connections when re-creating endpoints with previously used IDs.

- Patch 1 fixes this case for endpoints being used to send ADD_ADDR.
  Patch 2 validates this fix. The issue is present since v5.10.

- Patch 3 fixes this case for endpoints being used to establish new
  subflows. Patch 4 validates this fix. The issue is present since v5.10.

- Patch 5 fixes this case when all endpoints are flushed. Patch 6
  validates this fix. The issue is present since v5.13.

- Patch 7 removes a helper that is confusing, and introduced in v5.10.
  It helps simplifying the next patches.

- Patch 8 makes sure a 'subflow' counter is only decremented when
  removing a 'subflow' endpoint. Can be backported up to v5.13.

- Patch 9 is similar, but for a 'signal' counter. Can be backported up
  to v5.10.

- Patch 10 checks the last max accepted ADD_ADDR limit before accepting
  new ADD_ADDR. For v5.10 as well.

- Patch 11 removes a wrong restriction for the userspace PM, added
  during a refactoring in v6.5.

- Patch 12 makes sure the fullmesh mode sets the ID 0 when a new subflow
  using the source address of the initial subflow is created. Patch 13
  covers this case. This issue is present since v5.15.

- Patch 14 avoid possible UaF when selecting an address from the
  endpoints list.
====================

Link: https://patch.msgid.link/20240819-net-mptcp-pm-reusing-id-v1-0-38035d40de5b@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c07ff859 48e50dcb
Loading
Loading
Loading
Loading
+0 −13
Original line number Diff line number Diff line
@@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
	return 0;
}

int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
{
	pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);

	spin_lock_bh(&msk->pm.lock);
	mptcp_pm_nl_rm_subflow_received(msk, rm_list);
	spin_unlock_bh(&msk->pm.lock);
	return 0;
}

/* path manager event handlers */

void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
@@ -444,9 +434,6 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id
	*flags = 0;
	*ifindex = 0;

	if (!id)
		return 0;

	if (mptcp_pm_is_userspace(msk))
		return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
	return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
+94 −48
Original line number Diff line number Diff line
@@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
	return false;
}

static struct mptcp_pm_addr_entry *
static bool
select_local_address(const struct pm_nl_pernet *pernet,
		     const struct mptcp_sock *msk)
		     const struct mptcp_sock *msk,
		     struct mptcp_pm_addr_entry *new_entry)
{
	struct mptcp_pm_addr_entry *entry, *ret = NULL;
	struct mptcp_pm_addr_entry *entry;
	bool found = false;

	msk_owned_by_me(msk);

@@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet,
		if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
			continue;

		ret = entry;
		*new_entry = *entry;
		found = true;
		break;
	}
	rcu_read_unlock();
	return ret;

	return found;
}

static struct mptcp_pm_addr_entry *
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
static bool
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
		      struct mptcp_pm_addr_entry *new_entry)
{
	struct mptcp_pm_addr_entry *entry, *ret = NULL;
	struct mptcp_pm_addr_entry *entry;
	bool found = false;

	rcu_read_lock();
	/* do not keep any additional per socket state, just signal
@@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
			continue;

		ret = entry;
		*new_entry = *entry;
		found = true;
		break;
	}
	rcu_read_unlock();
	return ret;

	return found;
}

unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
@@ -512,9 +520,10 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)

static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
	struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
	struct sock *sk = (struct sock *)msk;
	struct mptcp_pm_addr_entry local;
	unsigned int add_addr_signal_max;
	bool signal_and_subflow = false;
	unsigned int local_addr_max;
	struct pm_nl_pernet *pernet;
	unsigned int subflows_max;
@@ -565,23 +574,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
		if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
			return;

		local = select_signal_address(pernet, msk);
		if (!local)
		if (!select_signal_address(pernet, msk, &local))
			goto subflow;

		/* If the alloc fails, we are on memory pressure, not worth
		 * continuing, and trying to create subflows.
		 */
		if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
		if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
			return;

		__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
		__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
		msk->pm.add_addr_signaled++;
		mptcp_pm_announce_addr(msk, &local->addr, false);
		mptcp_pm_announce_addr(msk, &local.addr, false);
		mptcp_pm_nl_addr_send_ack(msk);

		if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
			signal_and_subflow = local;
		if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
			signal_and_subflow = true;
	}

subflow:
@@ -592,26 +600,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
		bool fullmesh;
		int i, nr;

		if (signal_and_subflow) {
			local = signal_and_subflow;
			signal_and_subflow = NULL;
		} else {
			local = select_local_address(pernet, msk);
			if (!local)
		if (signal_and_subflow)
			signal_and_subflow = false;
		else if (!select_local_address(pernet, msk, &local))
			break;
		}

		fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
		fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH);

		msk->pm.local_addr_used++;
		__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
		nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
		__clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
		nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs);
		if (nr == 0)
			continue;

		spin_unlock_bh(&msk->pm.lock);
		for (i = 0; i < nr; i++)
			__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
			__mptcp_subflow_connect(sk, &local.addr, &addrs[i]);
		spin_lock_bh(&msk->pm.lock);
	}
	mptcp_pm_nl_check_work_pending(msk);
@@ -636,6 +640,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
{
	struct sock *sk = (struct sock *)msk;
	struct mptcp_pm_addr_entry *entry;
	struct mptcp_addr_info mpc_addr;
	struct pm_nl_pernet *pernet;
	unsigned int subflows_max;
	int i = 0;
@@ -643,6 +648,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
	pernet = pm_nl_get_pernet_from_msk(msk);
	subflows_max = mptcp_pm_get_subflows_max(msk);

	mptcp_local_address((struct sock_common *)msk, &mpc_addr);

	rcu_read_lock();
	list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
@@ -653,7 +660,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,

		if (msk->pm.subflows < subflows_max) {
			msk->pm.subflows++;
			addrs[i++] = entry->addr;
			addrs[i] = entry->addr;

			/* Special case for ID0: set the correct ID */
			if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port))
				addrs[i].id = 0;

			i++;
		}
	}
	rcu_read_unlock();
@@ -829,25 +842,27 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
			mptcp_close_ssk(sk, ssk, subflow);
			spin_lock_bh(&msk->pm.lock);

			removed = true;
			removed |= subflow->request_join;
			if (rm_type == MPTCP_MIB_RMSUBFLOW)
				__MPTCP_INC_STATS(sock_net(sk), rm_type);
		}
		if (rm_type == MPTCP_MIB_RMSUBFLOW)
			__set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
		else if (rm_type == MPTCP_MIB_RMADDR)

		if (rm_type == MPTCP_MIB_RMADDR)
			__MPTCP_INC_STATS(sock_net(sk), rm_type);

		if (!removed)
			continue;

		if (!mptcp_pm_is_kernel(msk))
			continue;

		if (rm_type == MPTCP_MIB_RMADDR) {
			msk->pm.add_addr_accepted--;
		if (rm_type == MPTCP_MIB_RMADDR && rm_id &&
		    !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
			/* Note: if the subflow has been closed before, this
			 * add_addr_accepted counter will not be decremented.
			 */
			if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk))
				WRITE_ONCE(msk->pm.accept_addr, true);
		} else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
			msk->pm.local_addr_used--;
		}
	}
}
@@ -857,7 +872,7 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
	mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
}

void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
					    const struct mptcp_rm_list *rm_list)
{
	mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
@@ -1393,6 +1408,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int
	struct sock *sk = (struct sock *)msk;
	struct net *net = sock_net(sk);

	/* No entries with ID 0 */
	if (id == 0)
		return 0;

	rcu_read_lock();
	entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
	if (entry) {
@@ -1431,13 +1450,24 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
	ret = remove_anno_list_by_saddr(msk, addr);
	if (ret || force) {
		spin_lock_bh(&msk->pm.lock);
		msk->pm.add_addr_signaled -= ret;
		if (ret) {
			__set_bit(addr->id, msk->pm.id_avail_bitmap);
			msk->pm.add_addr_signaled--;
		}
		mptcp_pm_remove_addr(msk, &list);
		spin_unlock_bh(&msk->pm.lock);
	}
	return ret;
}

static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
{
	/* If it was marked as used, and not ID 0, decrement local_addr_used */
	if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
	    id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
		msk->pm.local_addr_used--;
}

static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
						   const struct mptcp_pm_addr_entry *entry)
{
@@ -1466,8 +1496,19 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
		remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
		mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
					  !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
		if (remove_subflow)
			mptcp_pm_remove_subflow(msk, &list);

		if (remove_subflow) {
			spin_lock_bh(&msk->pm.lock);
			mptcp_pm_nl_rm_subflow_received(msk, &list);
			spin_unlock_bh(&msk->pm.lock);
		}

		if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
			spin_lock_bh(&msk->pm.lock);
			__mark_subflow_endp_available(msk, list.ids[0]);
			spin_unlock_bh(&msk->pm.lock);
		}

		release_sock(sk);

next:
@@ -1502,6 +1543,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
		spin_lock_bh(&msk->pm.lock);
		mptcp_pm_remove_addr(msk, &list);
		mptcp_pm_nl_rm_subflow_received(msk, &list);
		__mark_subflow_endp_available(msk, 0);
		spin_unlock_bh(&msk->pm.lock);
		release_sock(sk);

@@ -1605,14 +1647,17 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
			alist.ids[alist.nr++] = entry->addr.id;
	}

	if (alist.nr) {
	spin_lock_bh(&msk->pm.lock);
	if (alist.nr) {
		msk->pm.add_addr_signaled -= alist.nr;
		mptcp_pm_remove_addr(msk, &alist);
		spin_unlock_bh(&msk->pm.lock);
	}
	if (slist.nr)
		mptcp_pm_remove_subflow(msk, &slist);
		mptcp_pm_nl_rm_subflow_received(msk, &slist);
	/* Reset counters: maybe some subflows have been removed before */
	bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
	msk->pm.local_addr_used = 0;
	spin_unlock_bh(&msk->pm.lock);
}

static void mptcp_nl_remove_addrs_list(struct net *net,
@@ -1900,6 +1945,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,

	spin_lock_bh(&msk->pm.lock);
	mptcp_pm_nl_rm_subflow_received(msk, &list);
	__mark_subflow_endp_available(msk, list.ids[0]);
	mptcp_pm_create_subflow_or_signal_addr(msk);
	spin_unlock_bh(&msk->pm.lock);
}
+0 −3
Original line number Diff line number Diff line
@@ -1026,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
			   const struct mptcp_addr_info *addr,
			   bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);

void mptcp_free_local_addr_list(struct mptcp_sock *msk);
@@ -1133,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo

void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
				     const struct mptcp_rm_list *rm_list);
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
+66 −10
Original line number Diff line number Diff line
@@ -436,9 +436,10 @@ reset_with_tcp_filter()
	local ns="${!1}"
	local src="${2}"
	local target="${3}"
	local chain="${4:-INPUT}"

	if ! ip netns exec "${ns}" ${iptables} \
			-A INPUT \
			-A "${chain}" \
			-s "${src}" \
			-p tcp \
			-j "${target}"; then
@@ -3058,6 +3059,7 @@ fullmesh_tests()
		pm_nl_set_limits $ns1 1 3
		pm_nl_set_limits $ns2 1 3
		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
		fullmesh=1 speed=slow \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 3 3 3
@@ -3571,10 +3573,10 @@ endpoint_tests()
		mptcp_lib_kill_wait $tests_pid
	fi

	if reset "delete and re-add" &&
	if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
		pm_nl_set_limits $ns1 1 1
		pm_nl_set_limits $ns2 1 1
		pm_nl_set_limits $ns1 0 2
		pm_nl_set_limits $ns2 0 2
		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
		test_linkfail=4 speed=20 \
			run_tests $ns1 $ns2 10.0.1.1 &
@@ -3591,19 +3593,37 @@ endpoint_tests()
		chk_subflow_nr "after delete" 1
		chk_mptcp_info subflows 0 subflows 0

		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
		wait_mpj $ns2
		chk_subflow_nr "after re-add" 2
		chk_mptcp_info subflows 1 subflows 1

		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
		wait_attempt_fail $ns2
		chk_subflow_nr "after new reject" 2
		chk_mptcp_info subflows 1 subflows 1

		ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
		pm_nl_del_endpoint $ns2 3 10.0.3.2
		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
		wait_mpj $ns2
		chk_subflow_nr "after no reject" 3
		chk_mptcp_info subflows 2 subflows 2

		mptcp_lib_kill_wait $tests_pid

		chk_join_nr 3 3 3
		chk_rm_nr 1 1
	fi

	# remove and re-add
	if reset "delete re-add signal" &&
	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
		pm_nl_set_limits $ns1 1 1
		pm_nl_set_limits $ns2 1 1
		pm_nl_set_limits $ns1 0 2
		pm_nl_set_limits $ns2 2 2
		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
		# broadcast IP: no packet for this address will be received on ns1
		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
		test_linkfail=4 speed=20 \
			run_tests $ns1 $ns2 10.0.1.1 &
		local tests_pid=$!
@@ -3615,17 +3635,53 @@ endpoint_tests()
		chk_mptcp_info subflows 1 subflows 1

		pm_nl_del_endpoint $ns1 1 10.0.2.1
		pm_nl_del_endpoint $ns1 2 224.0.0.1
		sleep 0.5
		chk_subflow_nr "after delete" 1
		chk_mptcp_info subflows 0 subflows 0

		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
		wait_mpj $ns2
		chk_subflow_nr "after re-add" 2
		chk_mptcp_info subflows 1 subflows 1
		chk_subflow_nr "after re-add" 3
		chk_mptcp_info subflows 2 subflows 2
		mptcp_lib_kill_wait $tests_pid

		chk_join_nr 3 3 3
		chk_add_nr 4 4
		chk_rm_nr 2 1 invert
	fi

	# flush and re-add
	if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
		pm_nl_set_limits $ns1 0 2
		pm_nl_set_limits $ns2 1 2
		# broadcast IP: no packet for this address will be received on ns1
		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
		test_linkfail=4 speed=20 \
			run_tests $ns1 $ns2 10.0.1.1 &
		local tests_pid=$!

		wait_attempt_fail $ns2
		chk_subflow_nr "before flush" 1
		chk_mptcp_info subflows 0 subflows 0

		pm_nl_flush_endpoint $ns2
		pm_nl_flush_endpoint $ns1
		wait_rm_addr $ns2 0
		ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
		wait_mpj $ns2
		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
		wait_mpj $ns2
		mptcp_lib_kill_wait $tests_pid

		chk_join_nr 2 2 2
		chk_add_nr 2 2
		chk_rm_nr 1 0 invert
	fi
}

# [$1: error message]