Commit c27cea44 authored by Paul E. McKenney's avatar Paul E. McKenney Committed by Boqun Feng
Browse files

rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast



This commit saves more than 500 lines of RCU code by re-implementing
RCU Tasks Trace in terms of SRCU-fast.  Follow-up work will remove
more code that does not cause problems by its presence, but that is no
longer required.

This variant places smp_mb() in rcu_read_{,un}lock_trace(), and in the
same place that srcu_read_{,un}lock() would put them. These smp_mb()
calls will be removed on common-case architectures in a later commit.
In the meantime, it serves to enforce ordering between the underlying
srcu_read_{,un}lock_fast() markers and the intervening critical section,
even on architectures that permit attaching tracepoints on regions of
code not watched by RCU.  Such architectures defeat SRCU-fast's use of
implicit single-instruction, interrupts-disabled, and atomic-operation
RCU read-side critical sections, which have no effect when RCU is not
watching.  The aforementioned later commit will insert these smp_mb()
calls only on architectures that have not used noinstr to prevent
attaching tracepoints to code where RCU is not watching.

[ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ]
[ paulmck: Split out Tiny SRCU fixes per Andrii Nakryiko feedback. ]

Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Tested-by: default avatarkernel test robot <oliver.sang@intel.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Reviewed-by: default avatarJoel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: default avatarBoqun Feng <boqun.feng@gmail.com>
parent 8f0b4cce
Loading
Loading
Loading
Loading
+79 −28
Original line number Diff line number Diff line
@@ -12,28 +12,28 @@
#include <linux/rcupdate.h>
#include <linux/cleanup.h>

extern struct lockdep_map rcu_trace_lock_map;
#ifdef CONFIG_TASKS_TRACE_RCU
extern struct srcu_struct rcu_tasks_trace_srcu_struct;
#endif // #ifdef CONFIG_TASKS_TRACE_RCU

#ifdef CONFIG_DEBUG_LOCK_ALLOC
#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)

static inline int rcu_read_lock_trace_held(void)
{
	return lock_is_held(&rcu_trace_lock_map);
	return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct);
}

#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)

static inline int rcu_read_lock_trace_held(void)
{
	return 1;
}

#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU)

#ifdef CONFIG_TASKS_TRACE_RCU

void rcu_read_unlock_trace_special(struct task_struct *t);

/**
 * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
 *
@@ -50,12 +50,14 @@ static inline void rcu_read_lock_trace(void)
{
	struct task_struct *t = current;

	WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
	barrier();
	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
	    t->trc_reader_special.b.need_mb)
		smp_mb(); // Pairs with update-side barriers
	rcu_lock_acquire(&rcu_trace_lock_map);
	if (t->trc_reader_nesting++) {
		// In case we interrupted a Tasks Trace RCU reader.
		rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
		return;
	}
	barrier();  // nesting before scp to protect against interrupt handler.
	t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
	smp_mb(); // Placeholder for more selective ordering
}

/**
@@ -69,26 +71,75 @@ static inline void rcu_read_lock_trace(void)
 */
static inline void rcu_read_unlock_trace(void)
{
	int nesting;
	struct srcu_ctr __percpu *scp;
	struct task_struct *t = current;

	rcu_lock_release(&rcu_trace_lock_map);
	nesting = READ_ONCE(t->trc_reader_nesting) - 1;
	barrier(); // Critical section before disabling.
	// Disable IPI-based setting of .need_qs.
	WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting);
	if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
		WRITE_ONCE(t->trc_reader_nesting, nesting);
		return;  // We assume shallow reader nesting.
	smp_mb(); // Placeholder for more selective ordering
	scp = t->trc_reader_scp;
	barrier();  // scp before nesting to protect against interrupt handler.
	if (!--t->trc_reader_nesting)
		srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
	else
		srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
}

/**
 * call_rcu_tasks_trace() - Queue a callback trace task-based grace period
 * @rhp: structure to be used for queueing the RCU updates.
 * @func: actual callback function to be invoked after the grace period
 *
 * The callback function will be invoked some time after a trace rcu-tasks
 * grace period elapses, in other words after all currently executing
 * trace rcu-tasks read-side critical sections have completed. These
 * read-side critical sections are delimited by calls to rcu_read_lock_trace()
 * and rcu_read_unlock_trace().
 *
 * See the description of call_rcu() for more detailed information on
 * memory ordering guarantees.
 */
static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func)
{
	call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func);
}

/**
 * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period
 *
 * Control will return to the caller some time after a trace rcu-tasks
 * grace period has elapsed, in other words after all currently executing
 * trace rcu-tasks read-side critical sections have elapsed. These read-side
 * critical sections are delimited by calls to rcu_read_lock_trace()
 * and rcu_read_unlock_trace().
 *
 * This is a very specialized primitive, intended only for a few uses in
 * tracing and other situations requiring manipulation of function preambles
 * and profiling hooks.  The synchronize_rcu_tasks_trace() function is not
 * (yet) intended for heavy use from multiple CPUs.
 *
 * See the description of synchronize_rcu() for more detailed information
 * on memory ordering guarantees.
 */
static inline void synchronize_rcu_tasks_trace(void)
{
	synchronize_srcu(&rcu_tasks_trace_srcu_struct);
}
	WARN_ON_ONCE(nesting != 0);
	rcu_read_unlock_trace_special(t);

/**
 * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks.
 *
 * Note that rcu_barrier_tasks_trace() is not obligated to actually wait,
 * for example, if there are no pending callbacks.
 */
static inline void rcu_barrier_tasks_trace(void)
{
	srcu_barrier(&rcu_tasks_trace_srcu_struct);
}

void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
// Placeholders to enable stepwise transition.
void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq);
void __init rcu_tasks_trace_suppress_unused(void);
struct task_struct *get_rcu_tasks_trace_gp_kthread(void);

#else
/*
 * The BPF JIT forms these addresses even when it doesn't call these
+1 −0
Original line number Diff line number Diff line
@@ -945,6 +945,7 @@ struct task_struct {

#ifdef CONFIG_TASKS_TRACE_RCU
	int				trc_reader_nesting;
	struct srcu_ctr __percpu	*trc_reader_scp;
	int				trc_ipi_to_cpu;
	union rcu_special		trc_reader_special;
	struct list_head		trc_holdout_list;
+19 −602

File changed.

Preview size limit exceeded, changes collapsed.