Commit c5d93d23 authored by Sebastian Andrzej Siewior's avatar Sebastian Andrzej Siewior Committed by Peter Zijlstra
Browse files

perf: Enqueue SIGTRAP always via task_work.



A signal is delivered by raising irq_work() which works from any context
including NMI. irq_work() can be delayed if the architecture does not
provide an interrupt vector. In order not to lose a signal, the signal
is injected via task_work during event_sched_out().

Instead going via irq_work, the signal could be added directly via
task_work. The signal is sent to current and can be enqueued on its
return path to userland.

Queue signal via task_work and consider possible NMI context. Remove
perf_event::pending_sigtrap and and use perf_event::pending_work
instead.

Reported-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarMarco Elver <elver@google.com>
Tested-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-4-bigeasy@linutronix.de
parent 466e4d80
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -781,7 +781,6 @@ struct perf_event {
	unsigned int			pending_wakeup;
	unsigned int			pending_kill;
	unsigned int			pending_disable;
	unsigned int			pending_sigtrap;
	unsigned long			pending_addr;	/* SIGTRAP */
	struct irq_work			pending_irq;
	struct callback_head		pending_task;
@@ -963,7 +962,7 @@ struct perf_event_context {
	struct rcu_head			rcu_head;

	/*
	 * Sum (event->pending_sigtrap + event->pending_work)
	 * Sum (event->pending_work + event->pending_work)
	 *
	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
	 * that until the signal is delivered.
+10 −21
Original line number Diff line number Diff line
@@ -2283,17 +2283,6 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
		state = PERF_EVENT_STATE_OFF;
	}

	if (event->pending_sigtrap) {
		event->pending_sigtrap = 0;
		if (state != PERF_EVENT_STATE_OFF &&
		    !event->pending_work &&
		    !task_work_add(current, &event->pending_task, TWA_RESUME)) {
			event->pending_work = 1;
		} else {
			local_dec(&event->ctx->nr_pending);
		}
	}

	perf_event_set_state(event, state);

	if (!is_software_event(event))
@@ -6776,11 +6765,6 @@ static void __perf_pending_irq(struct perf_event *event)
	 * Yay, we hit home and are in the context of the event.
	 */
	if (cpu == smp_processor_id()) {
		if (event->pending_sigtrap) {
			event->pending_sigtrap = 0;
			perf_sigtrap(event);
			local_dec(&event->ctx->nr_pending);
		}
		if (event->pending_disable) {
			event->pending_disable = 0;
			perf_event_disable_local(event);
@@ -9721,21 +9705,26 @@ static int __perf_event_overflow(struct perf_event *event,
		 */
		bool valid_sample = sample_is_allowed(event, regs);
		unsigned int pending_id = 1;
		enum task_work_notify_mode notify_mode;

		if (regs)
			pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
		if (!event->pending_sigtrap) {
			event->pending_sigtrap = pending_id;

		notify_mode = in_nmi() ? TWA_NMI_CURRENT : TWA_RESUME;

		if (!event->pending_work &&
		    !task_work_add(current, &event->pending_task, notify_mode)) {
			event->pending_work = pending_id;
			local_inc(&event->ctx->nr_pending);

			event->pending_addr = 0;
			if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
				event->pending_addr = data->addr;
			irq_work_queue(&event->pending_irq);

		} else if (event->attr.exclude_kernel && valid_sample) {
			/*
			 * Should not be able to return to user space without
			 * consuming pending_sigtrap; with exceptions:
			 * consuming pending_work; with exceptions:
			 *
			 *  1. Where !exclude_kernel, events can overflow again
			 *     in the kernel without returning to user space.
@@ -9745,7 +9734,7 @@ static int __perf_event_overflow(struct perf_event *event,
			 *     To approximate progress (with false negatives),
			 *     check 32-bit hash of the current IP.
			 */
			WARN_ON_ONCE(event->pending_sigtrap != pending_id);
			WARN_ON_ONCE(event->pending_work != pending_id);
		}
	}