Commit acb8d4ed authored by Joe Damato's avatar Joe Damato Committed by Jakub Kicinski
Browse files

net: napi: Make gro_flush_timeout per-NAPI



Allow per-NAPI gro_flush_timeout setting.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device gro_flush_timeout
field. Reads from sysfs will read from the net_device field.

The ability to set gro_flush_timeout on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: default avatarJoe Damato <jdamato@fastly.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-4-jdamato@fastly.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 51601046
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -186,5 +186,6 @@ struct dpll_pin* dpll_pin
struct hlist_head                   page_pools
struct dim_irq_moder*               irq_moder
u64                                 max_pacing_offload_horizon
unsigned_long                       gro_flush_timeout
u32                                 napi_defer_hard_irqs
=================================== =========================== =================== =================== ===================================================================================
+2 −1
Original line number Diff line number Diff line
@@ -373,6 +373,7 @@ struct napi_struct {
	unsigned int		napi_id;
	struct hrtimer		timer;
	struct task_struct	*thread;
	unsigned long		gro_flush_timeout;
	u32			defer_hard_irqs;
	/* control-path-only fields follow */
	struct list_head	dev_list;
@@ -2085,7 +2086,6 @@ struct net_device {
	int			ifindex;
	unsigned int		real_num_rx_queues;
	struct netdev_rx_queue	*_rx;
	unsigned long		gro_flush_timeout;
	unsigned int		gro_max_size;
	unsigned int		gro_ipv4_max_size;
	rx_handler_func_t __rcu	*rx_handler;
@@ -2413,6 +2413,7 @@ struct net_device {
	struct dim_irq_moder	*irq_moder;

	u64			max_pacing_offload_horizon;
	unsigned long		gro_flush_timeout;
	u32			napi_defer_hard_irqs;

	/**
+6 −6
Original line number Diff line number Diff line
@@ -6232,12 +6232,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done)

	if (work_done) {
		if (n->gro_bitmask)
			timeout = READ_ONCE(n->dev->gro_flush_timeout);
			timeout = napi_get_gro_flush_timeout(n);
		n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
	}
	if (n->defer_hard_irqs_count > 0) {
		n->defer_hard_irqs_count--;
		timeout = READ_ONCE(n->dev->gro_flush_timeout);
		timeout = napi_get_gro_flush_timeout(n);
		if (timeout)
			ret = false;
	}
@@ -6372,7 +6372,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,

	if (flags & NAPI_F_PREFER_BUSY_POLL) {
		napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
		timeout = READ_ONCE(napi->dev->gro_flush_timeout);
		timeout = napi_get_gro_flush_timeout(napi);
		if (napi->defer_hard_irqs_count && timeout) {
			hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
			skip_schedule = true;
@@ -6654,6 +6654,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
	napi->timer.function = napi_watchdog;
	napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
	napi_set_gro_flush_timeout(napi, READ_ONCE(dev->gro_flush_timeout));
	init_gro_hash(napi);
	napi->skb = NULL;
	INIT_LIST_HEAD(&napi->rx_list);
@@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
	WARN_ON(dev->reg_state == NETREG_REGISTERED);

	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
		dev->gro_flush_timeout = 20000;
		netdev_set_gro_flush_timeout(dev, 20000);
		netdev_set_defer_hard_irqs(dev, 1);
	}
}
@@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void)
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void)
#ifdef CONFIG_NET_XGRESS
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100);
	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 92);
}

/*
+40 −0
Original line number Diff line number Diff line
@@ -184,6 +184,46 @@ static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
		napi_set_defer_hard_irqs(napi, defer);
}

/**
 * napi_get_gro_flush_timeout - get the gro_flush_timeout
 * @n: napi struct to get the gro_flush_timeout from
 *
 * Return: the per-NAPI value of the gro_flush_timeout field.
 */
static inline unsigned long
napi_get_gro_flush_timeout(const struct napi_struct *n)
{
	return READ_ONCE(n->gro_flush_timeout);
}

/**
 * napi_set_gro_flush_timeout - set the gro_flush_timeout for a napi
 * @n: napi struct to set the gro_flush_timeout
 * @timeout: timeout value to set
 *
 * napi_set_gro_flush_timeout sets the per-NAPI gro_flush_timeout
 */
static inline void napi_set_gro_flush_timeout(struct napi_struct *n,
					      unsigned long timeout)
{
	WRITE_ONCE(n->gro_flush_timeout, timeout);
}

/**
 * netdev_set_gro_flush_timeout - set gro_flush_timeout of a netdev's NAPIs
 * @netdev: the net_device for which all NAPIs will have gro_flush_timeout set
 * @timeout: the timeout value to set
 */
static inline void netdev_set_gro_flush_timeout(struct net_device *netdev,
						unsigned long timeout)
{
	struct napi_struct *napi;

	WRITE_ONCE(netdev->gro_flush_timeout, timeout);
	list_for_each_entry(napi, &netdev->napi_list, dev_list)
		napi_set_gro_flush_timeout(napi, timeout);
}

int rps_cpumask_housekeeping(struct cpumask *mask);

#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
+1 −1
Original line number Diff line number Diff line
@@ -409,7 +409,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);

static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
	WRITE_ONCE(dev->gro_flush_timeout, val);
	netdev_set_gro_flush_timeout(dev, val);
	return 0;
}