Commit c9bc1753 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

perf: Fix __perf_event_overflow() vs perf_remove_from_context() race



Make sure that __perf_event_overflow() runs with IRQs disabled for all
possible callchains. Specifically the software events can end up running
it with only preemption disabled.

This opens up a race vs perf_event_exit_event() and friends that will go
and free various things the overflow path expects to be present, like
the BPF program.

Fixes: 592903cd ("perf_counter: add an event_list")
Reported-by: default avatarSimond Hu <cmdhh1767@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarSimond Hu <cmdhh1767@gmail.com>
Link: https://patch.msgid.link/20260224122909.GV1395416@noisy.programming.kicks-ass.net
parent 77de62ad
Loading
Loading
Loading
Loading
+41 −1
Original line number Diff line number Diff line
@@ -10777,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
			struct perf_sample_data *data,
			struct pt_regs *regs)
{
	/*
	 * Entry point from hardware PMI, interrupts should be disabled here.
	 * This serializes us against perf_event_remove_from_context() in
	 * things like perf_event_release_kernel().
	 */
	lockdep_assert_irqs_disabled();

	return __perf_event_overflow(event, 1, data, regs);
}

@@ -10853,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
	struct hw_perf_event *hwc = &event->hw;

	/*
	 * This is:
	 *   - software		preempt
	 *   - tracepoint	preempt
	 *   -   tp_target_task	irq (ctx->lock)
	 *   - uprobes		preempt/irq
	 *   - kprobes		preempt/irq
	 *   - hw_breakpoint	irq
	 *
	 * Any of these are sufficient to hold off RCU and thus ensure @event
	 * exists.
	 */
	lockdep_assert_preemption_disabled();
	local64_add(nr, &event->count);

	if (!regs)
@@ -10861,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
	if (!is_sampling_event(event))
		return;

	/*
	 * Serialize against event_function_call() IPIs like normal overflow
	 * event handling. Specifically, must not allow
	 * perf_event_release_kernel() -> perf_remove_from_context() to make
	 * progress and 'release' the event from under us.
	 */
	guard(irqsave)();
	if (event->state != PERF_EVENT_STATE_ACTIVE)
		return;

	if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
		data->period = nr;
		return perf_swevent_overflow(event, 1, data, regs);
@@ -11359,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
	struct perf_sample_data data;
	struct perf_event *event;

	/*
	 * Per being a tracepoint, this runs with preemption disabled.
	 */
	lockdep_assert_preemption_disabled();

	struct perf_raw_record raw = {
		.frag = {
			.size = entry_size,
@@ -11691,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
	struct perf_sample_data sample;
	struct pt_regs *regs = data;

	/*
	 * Exception context, will have interrupts disabled.
	 */
	lockdep_assert_irqs_disabled();

	perf_sample_data_init(&sample, bp->attr.bp_addr, 0);

	if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12155,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)

	if (regs && !perf_exclude_event(event, regs)) {
		if (!(event->attr.exclude_idle && is_idle_task(current)))
			if (__perf_event_overflow(event, 1, &data, regs))
			if (perf_event_overflow(event, &data, regs))
				ret = HRTIMER_NORESTART;
	}