Commit 8a2b61e9 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: no longer assume RTNL is held in flush_all_backlogs()



flush_all_backlogs() uses per-cpu and static data to hold its
temporary data, on the assumption it is called under RTNL
protection.

Following patch in the series will break this assumption.

Use instead a dynamically allocated piece of memory.

In the unlikely case the allocation fails,
use a boot-time allocated memory.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarJesse Brandeburg <jbrandeburg@cloudflare.com>
Link: https://patch.msgid.link/20250114205531.967841-3-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 0734d7c3
Loading
Loading
Loading
Loading
+35 −18
Original line number Diff line number Diff line
@@ -6013,8 +6013,6 @@ void netif_receive_skb_list(struct list_head *head)
}
EXPORT_SYMBOL(netif_receive_skb_list);

static DEFINE_PER_CPU(struct work_struct, flush_works);

/* Network device is going away, flush any packets still pending */
static void flush_backlog(struct work_struct *work)
{
@@ -6071,36 +6069,54 @@ static bool flush_required(int cpu)
	return true;
}

struct flush_backlogs {
	cpumask_t		flush_cpus;
	struct work_struct	w[];
};

static struct flush_backlogs *flush_backlogs_alloc(void)
{
	return kmalloc(struct_size_t(struct flush_backlogs, w, nr_cpu_ids),
		       GFP_KERNEL);
}

static struct flush_backlogs *flush_backlogs_fallback;
static DEFINE_MUTEX(flush_backlogs_mutex);

static void flush_all_backlogs(void)
{
	static cpumask_t flush_cpus;
	struct flush_backlogs *ptr = flush_backlogs_alloc();
	unsigned int cpu;

	/* since we are under rtnl lock protection we can use static data
	 * for the cpumask and avoid allocating on stack the possibly
	 * large mask
	 */
	ASSERT_RTNL();
	if (!ptr) {
		mutex_lock(&flush_backlogs_mutex);
		ptr = flush_backlogs_fallback;
	}
	cpumask_clear(&ptr->flush_cpus);

	cpus_read_lock();

	cpumask_clear(&flush_cpus);
	for_each_online_cpu(cpu) {
		if (flush_required(cpu)) {
			queue_work_on(cpu, system_highpri_wq,
				      per_cpu_ptr(&flush_works, cpu));
			cpumask_set_cpu(cpu, &flush_cpus);
			INIT_WORK(&ptr->w[cpu], flush_backlog);
			queue_work_on(cpu, system_highpri_wq, &ptr->w[cpu]);
			__cpumask_set_cpu(cpu, &ptr->flush_cpus);
		}
	}

	/* we can have in flight packet[s] on the cpus we are not flushing,
	 * synchronize_net() in unregister_netdevice_many() will take care of
	 * them
	 * them.
	 */
	for_each_cpu(cpu, &flush_cpus)
		flush_work(per_cpu_ptr(&flush_works, cpu));
	for_each_cpu(cpu, &ptr->flush_cpus)
		flush_work(&ptr->w[cpu]);

	cpus_read_unlock();

	if (ptr != flush_backlogs_fallback)
		kfree(ptr);
	else
		mutex_unlock(&flush_backlogs_mutex);
}

static void net_rps_send_ipi(struct softnet_data *remsd)
@@ -12313,12 +12329,13 @@ static int __init net_dev_init(void)
	 *	Initialise the packet receive queues.
	 */

	flush_backlogs_fallback = flush_backlogs_alloc();
	if (!flush_backlogs_fallback)
		goto out;

	for_each_possible_cpu(i) {
		struct work_struct *flush = per_cpu_ptr(&flush_works, i);
		struct softnet_data *sd = &per_cpu(softnet_data, i);

		INIT_WORK(flush, flush_backlog);

		skb_queue_head_init(&sd->input_pkt_queue);
		skb_queue_head_init(&sd->process_queue);
#ifdef CONFIG_XFRM_OFFLOAD