Commit e2507985 authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'net-better-drop-accounting'

Eric Dumazet says:

====================
net: better drop accounting

Incrementing sk->sk_drops for every dropped packet can
cause serious cache line contention under DOS.

Add optional sk->sk_drop_counters pointer so that
protocols can opt-in to use two dedicated cache lines
to hold drop counters.

Convert UDP and RAW to use this infrastructure.

Tested on UDP (see patch 4/5 for details)

Before:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 615091             0.0
Udp6InErrors                    3904277            0.0
Udp6RcvbufErrors                3904277            0.0

After:

nstat -n ; sleep 1 ; nstat | grep Udp
Udp6InDatagrams                 816281             0.0
Udp6InErrors                    7497093            0.0
Udp6RcvbufErrors                7497093            0.0
====================

Link: https://patch.msgid.link/20250826125031.1578842-1-edumazet@google.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents c2a75689 b81aa232
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -295,7 +295,7 @@ struct raw6_sock {
	__u32			offset;		/* checksum offset  */
	struct icmp6_filter	filter;
	__u32			ip6mr_table;

	struct socket_drop_counters drop_counters;
	struct ipv6_pinfo	inet6;
};

+1 −1
Original line number Diff line number Diff line
@@ -315,7 +315,7 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,

static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
{
	sk_drops_add(sk, skb);
	sk_drops_skbadd(sk, skb);
	kfree_skb(skb);
}

+1 −0
Original line number Diff line number Diff line
@@ -108,6 +108,7 @@ struct udp_sock {
	 * the last UDP socket cacheline.
	 */
	struct hlist_node	tunnel_list;
	struct socket_drop_counters drop_counters;
};

#define udp_test_bit(nr, sk)			\
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ struct raw_sock {
	struct inet_sock   inet;
	struct icmp_filter filter;
	u32		   ipmr_table;
	struct socket_drop_counters drop_counters;
};

#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
+53 −3
Original line number Diff line number Diff line
@@ -102,6 +102,11 @@ struct net;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;

struct socket_drop_counters {
	atomic_t	drops0 ____cacheline_aligned_in_smp;
	atomic_t	drops1 ____cacheline_aligned_in_smp;
};

/**
 *	struct sock_common - minimal network layer representation of sockets
 *	@skc_daddr: Foreign IPv4 addr
@@ -282,6 +287,7 @@ struct sk_filter;
  *	@sk_err_soft: errors that don't cause failure but are the cause of a
  *		      persistent failure not just 'timed out'
  *	@sk_drops: raw/udp drops counter
  *	@sk_drop_counters: optional pointer to socket_drop_counters
  *	@sk_ack_backlog: current listen backlog
  *	@sk_max_ack_backlog: listen backlog set in listen()
  *	@sk_uid: user id of owner
@@ -449,6 +455,7 @@ struct sock {
#ifdef CONFIG_XFRM
	struct xfrm_policy __rcu *sk_policy[2];
#endif
	struct socket_drop_counters *sk_drop_counters;
	__cacheline_group_end(sock_read_rxtx);

	__cacheline_group_begin(sock_write_rxtx);
@@ -2682,18 +2689,61 @@ struct sock_skb_cb {
#define sock_skb_cb_check_size(size) \
	BUILD_BUG_ON((size) > SOCK_SKB_CB_OFFSET)

static inline void sk_drops_add(struct sock *sk, int segs)
{
	struct socket_drop_counters *sdc = sk->sk_drop_counters;

	if (sdc) {
		int n = numa_node_id() % 2;

		if (n)
			atomic_add(segs, &sdc->drops1);
		else
			atomic_add(segs, &sdc->drops0);
	} else {
		atomic_add(segs, &sk->sk_drops);
	}
}

static inline void sk_drops_inc(struct sock *sk)
{
	sk_drops_add(sk, 1);
}

static inline int sk_drops_read(const struct sock *sk)
{
	const struct socket_drop_counters *sdc = sk->sk_drop_counters;

	if (sdc) {
		DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
		return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
	}
	return atomic_read(&sk->sk_drops);
}

static inline void sk_drops_reset(struct sock *sk)
{
	struct socket_drop_counters *sdc = sk->sk_drop_counters;

	if (sdc) {
		atomic_set(&sdc->drops0, 0);
		atomic_set(&sdc->drops1, 0);
	}
	atomic_set(&sk->sk_drops, 0);
}

static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
	SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
						atomic_read(&sk->sk_drops) : 0;
						sk_drops_read(sk) : 0;
}

static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
static inline void sk_drops_skbadd(struct sock *sk, const struct sk_buff *skb)
{
	int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);

	atomic_add(segs, &sk->sk_drops);
	sk_drops_add(sk, segs);
}

static inline ktime_t sock_read_timestamp(struct sock *sk)
Loading