Commit c72568c2 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: rps: softnet_data reorg to make enqueue_to_backlog() fast



enqueue_to_backlog() is showing up in kernel profiles on hosts
with many cores, when RFS/RPS is used.

The following softnet_data fields need to be updated:

- input_queue_tail
- input_pkt_queue (next, prev, qlen, lock)
- backlog.state (if input_pkt_queue was empty)

Unfortunately they are currenly using two cache lines:

	/* --- cacheline 3 boundary (192 bytes) --- */
	call_single_data_t         csd __attribute__((__aligned__(64))); /*  0xc0  0x20 */
	struct softnet_data *      rps_ipi_next;         /*  0xe0   0x8 */
	unsigned int               cpu;                  /*  0xe8   0x4 */
	unsigned int               input_queue_tail;     /*  0xec   0x4 */
	struct sk_buff_head        input_pkt_queue;      /*  0xf0  0x18 */

	/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */

	struct napi_struct         backlog __attribute__((__aligned__(8))); /* 0x108 0x1f0 */

Add one ____cacheline_aligned_in_smp to make sure they now are using
a single cache line.

Also, because napi_struct has written fields, make @state its first field.

We want to make sure that cpus adding packets to sd->input_pkt_queue
are not slowing down cpus processing their backlog because of
false sharing.

After this patch new layout is:

	/* --- cacheline 5 boundary (320 bytes) --- */
	long int                   pad[3] __attribute__((__aligned__(64))); /* 0x140  0x18 */
	unsigned int               input_queue_tail;     /* 0x158   0x4 */

	/* XXX 4 bytes hole, try to pack */

	struct sk_buff_head        input_pkt_queue;      /* 0x160  0x18 */
	struct napi_struct         backlog __attribute__((__aligned__(8))); /* 0x178 0x1f0 */

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251024091240.3292546-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent a086e986
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -377,6 +377,8 @@ struct napi_config {
 * Structure for NAPI scheduling similar to tasklet but with weighting
 */
struct napi_struct {
	/* This field should be first or softnet_data.backlog needs tweaks. */
	unsigned long		state;
	/* The poll_list must only be managed by the entity which
	 * changes the state of the NAPI_STATE_SCHED bit.  This means
	 * whoever atomically sets that bit can add this napi_struct
@@ -385,7 +387,6 @@ struct napi_struct {
	 */
	struct list_head	poll_list;

	unsigned long		state;
	int			weight;
	u32			defer_hard_irqs_count;
	int			(*poll)(struct napi_struct *, int);
@@ -3529,9 +3530,17 @@ struct softnet_data {
	call_single_data_t	csd ____cacheline_aligned_in_smp;
	struct softnet_data	*rps_ipi_next;
	unsigned int		cpu;

	/* We force a cacheline alignment from here, to hold together
	 * input_queue_tail, input_pkt_queue and backlog.state.
	 * We add holes so that backlog.state is the last field
	 * of this cache line.
	 */
	long			pad[3] ____cacheline_aligned_in_smp;
	unsigned int		input_queue_tail;
#endif
	struct sk_buff_head	input_pkt_queue;

	struct napi_struct	backlog;

	struct numa_drop_counters drop_counters;