Commit 6639498e authored by Paolo Abeni's avatar Paolo Abeni Committed by Jakub Kicinski
Browse files

mptcp: cleanup mem accounting



After the previous patch, updating sk_forward_memory is cheap and
we can drop a lot of complexity from the MPTCP memory accounting,
removing the custom fwd mem allocations for rmem.

Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Reviewed-by: default avatarMat Martineau <martineau@kernel.org>
Signed-off-by: default avatarMatthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20250218-net-next-mptcp-rx-path-refactor-v1-4-4a47d90d7998@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent bc68b0ef
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
	mptcp_data_lock(sk);
	DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));

	mptcp_set_owner_r(skb, sk);
	skb_set_owner_r(skb, sk);
	__skb_queue_tail(&sk->sk_receive_queue, skb);
	mptcp_sk(sk)->bytes_received += skb->len;

+8 −107
Original line number Diff line number Diff line
@@ -118,17 +118,6 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
	__kfree_skb(skb);
}

static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
{
	WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
		   mptcp_sk(sk)->rmem_fwd_alloc + size);
}

static void mptcp_rmem_charge(struct sock *sk, int size)
{
	mptcp_rmem_fwd_alloc_add(sk, -size);
}

static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
			       struct sk_buff *from)
{
@@ -151,7 +140,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
	 * negative one
	 */
	atomic_add(delta, &sk->sk_rmem_alloc);
	mptcp_rmem_charge(sk, delta);
	sk_mem_charge(sk, delta);
	kfree_skb_partial(from, fragstolen);

	return true;
@@ -166,44 +155,6 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
	return mptcp_try_coalesce((struct sock *)msk, to, from);
}

static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
	amount >>= PAGE_SHIFT;
	mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
	__sk_mem_reduce_allocated(sk, amount);
}

static void mptcp_rmem_uncharge(struct sock *sk, int size)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	int reclaimable;

	mptcp_rmem_fwd_alloc_add(sk, size);
	reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);

	/* see sk_mem_uncharge() for the rationale behind the following schema */
	if (unlikely(reclaimable >= PAGE_SIZE))
		__mptcp_rmem_reclaim(sk, reclaimable);
}

static void mptcp_rfree(struct sk_buff *skb)
{
	unsigned int len = skb->truesize;
	struct sock *sk = skb->sk;

	atomic_sub(len, &sk->sk_rmem_alloc);
	mptcp_rmem_uncharge(sk, len);
}

void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
	skb_orphan(skb);
	skb->sk = sk;
	skb->destructor = mptcp_rfree;
	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
	mptcp_rmem_charge(sk, skb->truesize);
}

/* "inspired" by tcp_data_queue_ofo(), main differences:
 * - use mptcp seqs
 * - don't cope with sacks
@@ -316,25 +267,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)

end:
	skb_condense(skb);
	mptcp_set_owner_r(skb, sk);
}

static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
	int amt, amount;

	if (size <= msk->rmem_fwd_alloc)
		return true;

	size -= msk->rmem_fwd_alloc;
	amt = sk_mem_pages(size);
	amount = amt << PAGE_SHIFT;
	if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
		return false;

	mptcp_rmem_fwd_alloc_add(sk, amount);
	return true;
	skb_set_owner_r(skb, sk);
}

static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -352,7 +285,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
	skb_orphan(skb);

	/* try to fetch required memory from subflow */
	if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) {
	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
		goto drop;
	}
@@ -377,7 +310,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
		if (tail && mptcp_try_coalesce(sk, tail, skb))
			return true;

		mptcp_set_owner_r(skb, sk);
		skb_set_owner_r(skb, sk);
		__skb_queue_tail(&sk->sk_receive_queue, skb);
		return true;
	} else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -1987,9 +1920,10 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
		}

		if (!(flags & MSG_PEEK)) {
			/* we will bulk release the skb memory later */
			/* avoid the indirect call, we know the destructor is sock_wfree */
			skb->destructor = NULL;
			WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
			atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
			sk_mem_uncharge(sk, skb->truesize);
			__skb_unlink(skb, &sk->sk_receive_queue);
			__kfree_skb(skb);
			msk->bytes_consumed += count;
@@ -2103,18 +2037,6 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
	msk->rcvq_space.time = mstamp;
}

static void __mptcp_update_rmem(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (!msk->rmem_released)
		return;

	atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
	mptcp_rmem_uncharge(sk, msk->rmem_released);
	WRITE_ONCE(msk->rmem_released, 0);
}

static bool __mptcp_move_skbs(struct sock *sk)
{
	struct mptcp_subflow_context *subflow;
@@ -2138,7 +2060,6 @@ static bool __mptcp_move_skbs(struct sock *sk)
			break;

		slowpath = lock_sock_fast(ssk);
		__mptcp_update_rmem(sk);
		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);

		if (unlikely(ssk->sk_err))
@@ -2146,12 +2067,7 @@ static bool __mptcp_move_skbs(struct sock *sk)
		unlock_sock_fast(ssk, slowpath);
	} while (!done);

	ret = moved > 0;
	if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) ||
	    !skb_queue_empty(&sk->sk_receive_queue)) {
		__mptcp_update_rmem(sk);
		ret |= __mptcp_ofo_queue(msk);
	}
	ret = moved > 0 || __mptcp_ofo_queue(msk);
	if (ret)
		mptcp_check_data_fin((struct sock *)msk);
	return ret;
@@ -2817,8 +2733,6 @@ static void __mptcp_init_sock(struct sock *sk)
	INIT_WORK(&msk->work, mptcp_worker);
	msk->out_of_order_queue = RB_ROOT;
	msk->first_pending = NULL;
	WRITE_ONCE(msk->rmem_fwd_alloc, 0);
	WRITE_ONCE(msk->rmem_released, 0);
	msk->timer_ival = TCP_RTO_MIN;
	msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;

@@ -3044,8 +2958,6 @@ static void __mptcp_destroy_sock(struct sock *sk)

	sk->sk_prot->destroy(sk);

	WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
	WARN_ON_ONCE(msk->rmem_released);
	sk_stream_kill_queues(sk);
	xfrm_sk_free_policy(sk);

@@ -3403,8 +3315,6 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
	/* move all the rx fwd alloc into the sk_mem_reclaim_final in
	 * inet_sock_destruct() will dispose it
	 */
	sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
	WRITE_ONCE(msk->rmem_fwd_alloc, 0);
	mptcp_token_destroy(msk);
	mptcp_pm_free_anno_list(msk);
	mptcp_free_local_addr_list(msk);
@@ -3500,8 +3410,6 @@ static void mptcp_release_cb(struct sock *sk)
		if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
			__mptcp_sync_sndbuf(sk);
	}

	__mptcp_update_rmem(sk);
}

/* MP_JOIN client subflow must wait for 4th ack before sending any data:
@@ -3672,12 +3580,6 @@ static void mptcp_shutdown(struct sock *sk, int how)
		__mptcp_wr_shutdown(sk);
}

static int mptcp_forward_alloc_get(const struct sock *sk)
{
	return READ_ONCE(sk->sk_forward_alloc) +
	       READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
}

static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
{
	const struct sock *sk = (void *)msk;
@@ -3836,7 +3738,6 @@ static struct proto mptcp_prot = {
	.hash		= mptcp_hash,
	.unhash		= mptcp_unhash,
	.get_port	= mptcp_get_port,
	.forward_alloc_get	= mptcp_forward_alloc_get,
	.stream_memory_free	= mptcp_stream_memory_free,
	.sockets_allocated	= &mptcp_sockets_allocated,

+1 −3
Original line number Diff line number Diff line
@@ -281,7 +281,6 @@ struct mptcp_sock {
	u64		rcv_data_fin_seq;
	u64		bytes_retrans;
	u64		bytes_consumed;
	int		rmem_fwd_alloc;
	int		snd_burst;
	int		old_wspace;
	u64		recovery_snd_nxt;	/* in recovery mode accept up to this seq;
@@ -296,7 +295,6 @@ struct mptcp_sock {
	u32		last_ack_recv;
	unsigned long	timer_ival;
	u32		token;
	int		rmem_released;
	unsigned long	flags;
	unsigned long	cb_flags;
	bool		recovery;		/* closing subflow write queue reinjected */
@@ -387,7 +385,7 @@ static inline void msk_owned_by_me(const struct mptcp_sock *msk)
 */
static inline int __mptcp_rmem(const struct sock *sk)
{
	return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
	return atomic_read(&sk->sk_rmem_alloc);
}

static inline int mptcp_win_from_space(const struct sock *sk, int space)