Commit 3ff9bcec authored by Eric Dumazet's avatar Eric Dumazet Committed by Paolo Abeni
Browse files

net: avoid extra access to sk->sk_wmem_alloc in sock_wfree()



UDP TX packets destructor is sock_wfree().

It suffers from a cache line bouncing in sock_def_write_space_wfree().

Instead of reading sk->sk_wmem_alloc after we just did an atomic RMW
on it, use __refcount_sub_and_test() to get the old value for free,
and pass the new value to sock_def_write_space_wfree().

Add __sock_writeable() helper.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251017133712.2842665-1-edumazet@google.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent d1d7998d
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -2607,12 +2607,16 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)

bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);

static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc)
{
	return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1);
}
/*
 *	Default write policy as shown to user space via poll/select/SIGIO
 */
static inline bool sock_writeable(const struct sock *sk)
{
	return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
	return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc));
}

static inline gfp_t gfp_any(void)
+8 −6
Original line number Diff line number Diff line
@@ -155,7 +155,7 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);

static void sock_def_write_space_wfree(struct sock *sk);
static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc);
static void sock_def_write_space(struct sock *sk);

/**
@@ -2659,16 +2659,18 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 */
void sock_wfree(struct sk_buff *skb)
{
	struct sock *sk = skb->sk;
	unsigned int len = skb->truesize;
	struct sock *sk = skb->sk;
	bool free;
	int old;

	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
		if (sock_flag(sk, SOCK_RCU_FREE) &&
		    sk->sk_write_space == sock_def_write_space) {
			rcu_read_lock();
			free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
			sock_def_write_space_wfree(sk);
			free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc,
						       &old);
			sock_def_write_space_wfree(sk, old - len);
			rcu_read_unlock();
			if (unlikely(free))
				__sk_free(sk);
@@ -3612,12 +3614,12 @@ static void sock_def_write_space(struct sock *sk)
 * for SOCK_RCU_FREE sockets under RCU read section and after putting
 * ->sk_wmem_alloc.
 */
static void sock_def_write_space_wfree(struct sock *sk)
static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc)
{
	/* Do not wake up a writer until he can make "significant"
	 * progress.  --DaveM
	 */
	if (sock_writeable(sk)) {
	if (__sock_writeable(sk, wmem_alloc)) {
		struct socket_wq *wq = rcu_dereference(sk->sk_wq);

		/* rely on refcount_sub from sock_wfree() */