Commit fdae0ab6 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: use NUMA drop counters for softnet_data.dropped



Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).

This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.

Generalize the sk_drop_counters infrastucture
added in commit c51613fa ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.

This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 278289bc
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -295,7 +295,7 @@ struct raw6_sock {
	__u32			offset;		/* checksum offset  */
	struct icmp6_filter	filter;
	__u32			ip6mr_table;
	struct socket_drop_counters drop_counters;
	struct numa_drop_counters drop_counters;
	struct ipv6_pinfo	inet6;
};

+27 −1
Original line number Diff line number Diff line
@@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev)
	return dev->header_ops && dev->header_ops->create;
}

struct numa_drop_counters {
	atomic_t	drops0 ____cacheline_aligned_in_smp;
	atomic_t	drops1 ____cacheline_aligned_in_smp;
};

static inline int numa_drop_read(const struct numa_drop_counters *ndc)
{
	return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1);
}

static inline void numa_drop_add(struct numa_drop_counters *ndc, int val)
{
	int n = numa_node_id() % 2;

	if (n)
		atomic_add(val, &ndc->drops1);
	else
		atomic_add(val, &ndc->drops0);
}

static inline void numa_drop_reset(struct numa_drop_counters *ndc)
{
	atomic_set(&ndc->drops0, 0);
	atomic_set(&ndc->drops1, 0);
}

/*
 * Incoming packets are placed on per-CPU queues
 */
@@ -3504,7 +3530,7 @@ struct softnet_data {
	struct sk_buff_head	input_pkt_queue;
	struct napi_struct	backlog;

	atomic_t		dropped ____cacheline_aligned_in_smp;
	struct numa_drop_counters drop_counters;

	/* Another possibly contended cache line */
	spinlock_t		defer_lock ____cacheline_aligned_in_smp;
+1 −1
Original line number Diff line number Diff line
@@ -108,7 +108,7 @@ struct udp_sock {
	 * the last UDP socket cacheline.
	 */
	struct hlist_node	tunnel_list;
	struct socket_drop_counters drop_counters;
	struct numa_drop_counters drop_counters;
};

#define udp_test_bit(nr, sk)			\
+1 −1
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ struct raw_sock {
	struct inet_sock   inet;
	struct icmp_filter filter;
	u32		   ipmr_table;
	struct socket_drop_counters drop_counters;
	struct numa_drop_counters drop_counters;
};

#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
+12 −25
Original line number Diff line number Diff line
@@ -102,11 +102,6 @@ struct net;
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;

struct socket_drop_counters {
	atomic_t	drops0 ____cacheline_aligned_in_smp;
	atomic_t	drops1 ____cacheline_aligned_in_smp;
};

/**
 *	struct sock_common - minimal network layer representation of sockets
 *	@skc_daddr: Foreign IPv4 addr
@@ -287,7 +282,7 @@ struct sk_filter;
  *	@sk_err_soft: errors that don't cause failure but are the cause of a
  *		      persistent failure not just 'timed out'
  *	@sk_drops: raw/udp drops counter
  *	@sk_drop_counters: optional pointer to socket_drop_counters
  *	@sk_drop_counters: optional pointer to numa_drop_counters
  *	@sk_ack_backlog: current listen backlog
  *	@sk_max_ack_backlog: listen backlog set in listen()
  *	@sk_uid: user id of owner
@@ -456,7 +451,7 @@ struct sock {
#ifdef CONFIG_XFRM
	struct xfrm_policy __rcu *sk_policy[2];
#endif
	struct socket_drop_counters *sk_drop_counters;
	struct numa_drop_counters *sk_drop_counters;
	__cacheline_group_end(sock_read_rxtx);

	__cacheline_group_begin(sock_write_rxtx);
@@ -2698,19 +2693,13 @@ struct sock_skb_cb {

static inline void sk_drops_add(struct sock *sk, int segs)
{
	struct socket_drop_counters *sdc = sk->sk_drop_counters;

	if (sdc) {
		int n = numa_node_id() % 2;
	struct numa_drop_counters *ndc = sk->sk_drop_counters;

		if (n)
			atomic_add(segs, &sdc->drops1);
	if (ndc)
		numa_drop_add(ndc, segs);
	else
			atomic_add(segs, &sdc->drops0);
	} else {
		atomic_add(segs, &sk->sk_drops);
}
}

static inline void sk_drops_inc(struct sock *sk)
{
@@ -2719,23 +2708,21 @@ static inline void sk_drops_inc(struct sock *sk)

static inline int sk_drops_read(const struct sock *sk)
{
	const struct socket_drop_counters *sdc = sk->sk_drop_counters;
	const struct numa_drop_counters *ndc = sk->sk_drop_counters;

	if (sdc) {
	if (ndc) {
		DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
		return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
		return numa_drop_read(ndc);
	}
	return atomic_read(&sk->sk_drops);
}

static inline void sk_drops_reset(struct sock *sk)
{
	struct socket_drop_counters *sdc = sk->sk_drop_counters;
	struct numa_drop_counters *ndc = sk->sk_drop_counters;

	if (sdc) {
		atomic_set(&sdc->drops0, 0);
		atomic_set(&sdc->drops1, 0);
	}
	if (ndc)
		numa_drop_reset(ndc);
	atomic_set(&sk->sk_drops, 0);
}

Loading