Commit 0d40a6d8 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Peter Zijlstra
Browse files

perf: Move swevent_htable::recursion into task_struct.



The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.

Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarMarco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de
parent 5af42f92
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -970,12 +970,6 @@ struct perf_event_context {
	local_t				nr_pending;
};

/*
 * Number of contexts where an event can trigger:
 *	task, softirq, hardirq, nmi.
 */
#define PERF_NR_CONTEXTS	4

struct perf_cpu_pmu_context {
	struct perf_event_pmu_context	epc;
	struct perf_event_pmu_context	*task_epc;
+7 −0
Original line number Diff line number Diff line
@@ -734,6 +734,12 @@ enum perf_event_task_context {
	perf_nr_task_contexts,
};

/*
 * Number of contexts where an event can trigger:
 *      task, softirq, hardirq, nmi.
 */
#define PERF_NR_CONTEXTS	4

struct wake_q_node {
	struct wake_q_node *next;
};
@@ -1256,6 +1262,7 @@ struct task_struct {
	unsigned int			futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
	u8				perf_recursion[PERF_NR_CONTEXTS];
	struct perf_event_context	*perf_event_ctxp;
	struct mutex			perf_event_mutex;
	struct list_head		perf_event_list;
+3 −10
Original line number Diff line number Diff line
@@ -9763,11 +9763,7 @@ struct swevent_htable {
	struct swevent_hlist		*swevent_hlist;
	struct mutex			hlist_mutex;
	int				hlist_refcount;

	/* Recursion avoidance in each contexts */
	u8				recursion[PERF_NR_CONTEXTS];
};

static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);

/*
@@ -9965,17 +9961,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);

int perf_swevent_get_recursion_context(void)
{
	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);

	return get_recursion_context(swhash->recursion);
	return get_recursion_context(current->perf_recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);

void perf_swevent_put_recursion_context(int rctx)
{
	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);

	put_recursion_context(swhash->recursion, rctx);
	put_recursion_context(current->perf_recursion, rctx);
}

void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
@@ -13642,6 +13634,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{
	int ret;

	memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
	child->perf_event_ctxp = NULL;
	mutex_init(&child->perf_event_mutex);
	INIT_LIST_HEAD(&child->perf_event_list);
+1 −1
Original line number Diff line number Diff line
@@ -221,7 +221,7 @@ static inline int get_recursion_context(u8 *recursion)
	return rctx;
}

static inline void put_recursion_context(u8 *recursion, int rctx)
static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{
	barrier();
	recursion[rctx]--;