Commit f15e3b3d authored by Joe Damato's avatar Joe Damato Committed by Jakub Kicinski
Browse files

net: napi: Make napi_defer_hard_irqs per-NAPI



Add defer_hard_irqs to napi_struct in preparation for per-NAPI
settings.

The existing sysfs parameter is respected; writes to sysfs will write to
all NAPI structs for the device and the net_device defer_hard_irq field.
Reads from sysfs show the net_device field.

The ability to set defer_hard_irqs on specific NAPI instances will be
added in a later commit, via netdev-genl.

Signed-off-by: default avatarJoe Damato <jdamato@fastly.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20241011184527.16393-2-jdamato@fastly.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent ff1585e9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -186,4 +186,5 @@ struct dpll_pin* dpll_pin
struct hlist_head                   page_pools
struct dim_irq_moder*               irq_moder
u64                                 max_pacing_offload_horizon
u32                                 napi_defer_hard_irqs
=================================== =========================== =================== =================== ===================================================================================
+2 −1
Original line number Diff line number Diff line
@@ -373,6 +373,7 @@ struct napi_struct {
	unsigned int		napi_id;
	struct hrtimer		timer;
	struct task_struct	*thread;
	u32			defer_hard_irqs;
	/* control-path-only fields follow */
	struct list_head	dev_list;
	struct hlist_node	napi_hash_node;
@@ -2085,7 +2086,6 @@ struct net_device {
	unsigned int		real_num_rx_queues;
	struct netdev_rx_queue	*_rx;
	unsigned long		gro_flush_timeout;
	u32			napi_defer_hard_irqs;
	unsigned int		gro_max_size;
	unsigned int		gro_ipv4_max_size;
	rx_handler_func_t __rcu	*rx_handler;
@@ -2413,6 +2413,7 @@ struct net_device {
	struct dim_irq_moder	*irq_moder;

	u64			max_pacing_offload_horizon;
	u32			napi_defer_hard_irqs;

	/**
	 * @lock: protects @net_shaper_hierarchy, feel free to use for other
+5 −5
Original line number Diff line number Diff line
@@ -6233,7 +6233,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
	if (work_done) {
		if (n->gro_bitmask)
			timeout = READ_ONCE(n->dev->gro_flush_timeout);
		n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
		n->defer_hard_irqs_count = napi_get_defer_hard_irqs(n);
	}
	if (n->defer_hard_irqs_count > 0) {
		n->defer_hard_irqs_count--;
@@ -6371,7 +6371,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
	bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);

	if (flags & NAPI_F_PREFER_BUSY_POLL) {
		napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
		napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
		timeout = READ_ONCE(napi->dev->gro_flush_timeout);
		if (napi->defer_hard_irqs_count && timeout) {
			hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
@@ -6653,6 +6653,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
	INIT_HLIST_NODE(&napi->napi_hash_node);
	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
	napi->timer.function = napi_watchdog;
	napi_set_defer_hard_irqs(napi, READ_ONCE(dev->napi_defer_hard_irqs));
	init_gro_hash(napi);
	napi->skb = NULL;
	INIT_LIST_HEAD(&napi->rx_list);
@@ -11059,7 +11060,7 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)

	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
		dev->gro_flush_timeout = 20000;
		dev->napi_defer_hard_irqs = 1;
		netdev_set_defer_hard_irqs(dev, 1);
	}
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
@@ -12003,7 +12004,6 @@ static void __init net_dev_struct_check(void)
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
@@ -12015,7 +12015,7 @@ static void __init net_dev_struct_check(void)
#ifdef CONFIG_NET_XGRESS
	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
#endif
	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
	CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 100);
}

/*
+36 −0
Original line number Diff line number Diff line
@@ -148,6 +148,42 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
	WRITE_ONCE(dev->gro_ipv4_max_size, size);
}

/**
 * napi_get_defer_hard_irqs - get the NAPI's defer_hard_irqs
 * @n: napi struct to get the defer_hard_irqs field from
 *
 * Return: the per-NAPI value of the defar_hard_irqs field.
 */
static inline u32 napi_get_defer_hard_irqs(const struct napi_struct *n)
{
	return READ_ONCE(n->defer_hard_irqs);
}

/**
 * napi_set_defer_hard_irqs - set the defer_hard_irqs for a napi
 * @n: napi_struct to set the defer_hard_irqs field
 * @defer: the value the field should be set to
 */
static inline void napi_set_defer_hard_irqs(struct napi_struct *n, u32 defer)
{
	WRITE_ONCE(n->defer_hard_irqs, defer);
}

/**
 * netdev_set_defer_hard_irqs - set defer_hard_irqs for all NAPIs of a netdev
 * @netdev: the net_device for which all NAPIs will have defer_hard_irqs set
 * @defer: the defer_hard_irqs value to set
 */
static inline void netdev_set_defer_hard_irqs(struct net_device *netdev,
					      u32 defer)
{
	struct napi_struct *napi;

	WRITE_ONCE(netdev->napi_defer_hard_irqs, defer);
	list_for_each_entry(napi, &netdev->napi_list, dev_list)
		napi_set_defer_hard_irqs(napi, defer);
}

int rps_cpumask_housekeeping(struct cpumask *mask);

#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
+1 −1
Original line number Diff line number Diff line
@@ -429,7 +429,7 @@ static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val
	if (val > S32_MAX)
		return -ERANGE;

	WRITE_ONCE(dev->napi_defer_hard_irqs, val);
	netdev_set_defer_hard_irqs(dev, (u32)val);
	return 0;
}