Commit a6fc88b2 authored by Joel Fernandes's avatar Joel Fernandes Committed by Boqun Feng
Browse files

srcu: Use irq_work to start GP in tiny SRCU



Tiny SRCU's srcu_gp_start_if_needed() directly calls schedule_work(),
which acquires the workqueue pool->lock.

This causes a lockdep splat when call_srcu() is called with a scheduler
lock held, due to:

  call_srcu() [holding pi_lock]
    srcu_gp_start_if_needed()
      schedule_work() -> pool->lock

  workqueue_init() / create_worker() [holding pool->lock]
    wake_up_process() -> try_to_wake_up() -> pi_lock

Also add irq_work_sync() to cleanup_srcu_struct() to prevent a
use-after-free if a queued irq_work fires after cleanup begins.

Tested with rcutorture SRCU-T and no lockdep warnings.

[ Thanks to Boqun for similar fix in patch "rcu: Use an intermediate irq_work
to start process_srcu()" ]

Signed-off-by: default avatarJoel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Signed-off-by: default avatarBoqun Feng <boqun@kernel.org>
parent 7c405fb3
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#ifndef _LINUX_SRCU_TINY_H
#define _LINUX_SRCU_TINY_H

#include <linux/irq_work_types.h>
#include <linux/swait.h>

struct srcu_struct {
@@ -24,18 +25,21 @@ struct srcu_struct {
	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
	struct work_struct srcu_work;	/* For driving grace periods. */
	struct irq_work srcu_irq_work;	/* Defer schedule_work() to irq work. */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map dep_map;
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
};

void srcu_drive_gp(struct work_struct *wp);
void srcu_tiny_irq_work(struct irq_work *irq_work);

#define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored)	\
{									\
	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
	.srcu_cb_tail = &name.srcu_cb_head,				\
	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
	.srcu_irq_work = { .func = srcu_tiny_irq_work },		\
	__SRCU_DEP_MAP_INIT(name)					\
}

+18 −1
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 */

#include <linux/export.h>
#include <linux/irq_work.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
#include <linux/rcupdate_wait.h>
@@ -41,6 +42,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp)
	ssp->srcu_idx_max = 0;
	INIT_WORK(&ssp->srcu_work, srcu_drive_gp);
	INIT_LIST_HEAD(&ssp->srcu_work.entry);
	init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work);
	return 0;
}

@@ -84,6 +86,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
void cleanup_srcu_struct(struct srcu_struct *ssp)
{
	WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
	irq_work_sync(&ssp->srcu_irq_work);
	flush_work(&ssp->srcu_work);
	WARN_ON(ssp->srcu_gp_running);
	WARN_ON(ssp->srcu_gp_waiting);
@@ -177,6 +180,20 @@ void srcu_drive_gp(struct work_struct *wp)
}
EXPORT_SYMBOL_GPL(srcu_drive_gp);

/*
 * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue
 * pool->lock while the caller might hold scheduler locks, causing lockdep
 * splats due to workqueue_init() doing a wakeup.
 */
void srcu_tiny_irq_work(struct irq_work *irq_work)
{
	struct srcu_struct *ssp;

	ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work);
	schedule_work(&ssp->srcu_work);
}
EXPORT_SYMBOL_GPL(srcu_tiny_irq_work);

static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
	unsigned long cookie;
@@ -189,7 +206,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
	WRITE_ONCE(ssp->srcu_idx_max, cookie);
	if (!READ_ONCE(ssp->srcu_gp_running)) {
		if (likely(srcu_init_done))
			schedule_work(&ssp->srcu_work);
			irq_work_queue(&ssp->srcu_irq_work);
		else if (list_empty(&ssp->srcu_work.entry))
			list_add(&ssp->srcu_work.entry, &srcu_boot_list);
	}