Commit cb36eabc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events fixes from Ingo Molnar:

 - Fix lock ordering bug found by lockdep in perf_event_wakeup()

 - Fix uncore counter enumeration on Granite Rapids and Sierra Forest

 - Fix perf_mmap() refcount bug found by Syzkaller

 - Fix __perf_event_overflow() vs perf_remove_from_context() race

* tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Fix __perf_event_overflow() vs perf_remove_from_context() race
  perf/core: Fix refcount bug and potential UAF in perf_mmap
  perf/x86/intel/uncore: Add per-scheduler IMC CAS count events
  perf/core: Fix invalid wait context in ctx_sched_in()
parents b4102208 c9bc1753
Loading
Loading
Loading
Loading
+27 −1
Original line number Diff line number Diff line
@@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = {
	.attr_update		= uncore_alias_groups,
};

static struct uncore_event_desc gnr_uncore_imc_events[] = {
	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0x01,umask=0x00"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0,  "event=0x05,umask=0xcf"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1,  "event=0x06,umask=0xcf"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"),
	INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"),
	{ /* end: all zeroes */ },
};

static struct intel_uncore_type gnr_uncore_imc = {
	SPR_UNCORE_MMIO_COMMON_FORMAT(),
	.name			= "imc",
	.fixed_ctr_bits		= 48,
	.fixed_ctr		= SNR_IMC_MMIO_PMON_FIXED_CTR,
	.fixed_ctl		= SNR_IMC_MMIO_PMON_FIXED_CTL,
	.event_descs		= gnr_uncore_imc_events,
};

static struct intel_uncore_type gnr_uncore_pciex8 = {
	SPR_UNCORE_PCI_COMMON_FORMAT(),
	.name			= "pciex8",
@@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
	NULL,
	&spr_uncore_pcu,
	&gnr_uncore_ubox,
	&spr_uncore_imc,
	&gnr_uncore_imc,
	NULL,
	&gnr_uncore_upi,
	NULL,
+62 −21
Original line number Diff line number Diff line
@@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data)
			if (*perf_event_fasync(event))
				event->pending_kill = POLL_ERR;

			perf_event_wakeup(event);
			event->pending_wakeup = 1;
			irq_work_queue(&event->pending_irq);
		} else {
			struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu);

@@ -7464,7 +7465,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
			ret = perf_mmap_aux(vma, event, nr_pages);
		if (ret)
			return ret;
	}

		/*
		 * Since pinned accounting is per vm we cannot allow fork() to copy our
@@ -7486,6 +7486,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
		ret = map_range(event->rb, vma);
		if (ret)
			perf_mmap_close(vma);
	}

	return ret;
}
@@ -10776,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
			struct perf_sample_data *data,
			struct pt_regs *regs)
{
	/*
	 * Entry point from hardware PMI, interrupts should be disabled here.
	 * This serializes us against perf_event_remove_from_context() in
	 * things like perf_event_release_kernel().
	 */
	lockdep_assert_irqs_disabled();

	return __perf_event_overflow(event, 1, data, regs);
}

@@ -10852,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
{
	struct hw_perf_event *hwc = &event->hw;

	/*
	 * This is:
	 *   - software		preempt
	 *   - tracepoint	preempt
	 *   -   tp_target_task	irq (ctx->lock)
	 *   - uprobes		preempt/irq
	 *   - kprobes		preempt/irq
	 *   - hw_breakpoint	irq
	 *
	 * Any of these are sufficient to hold off RCU and thus ensure @event
	 * exists.
	 */
	lockdep_assert_preemption_disabled();
	local64_add(nr, &event->count);

	if (!regs)
@@ -10860,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
	if (!is_sampling_event(event))
		return;

	/*
	 * Serialize against event_function_call() IPIs like normal overflow
	 * event handling. Specifically, must not allow
	 * perf_event_release_kernel() -> perf_remove_from_context() to make
	 * progress and 'release' the event from under us.
	 */
	guard(irqsave)();
	if (event->state != PERF_EVENT_STATE_ACTIVE)
		return;

	if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
		data->period = nr;
		return perf_swevent_overflow(event, 1, data, regs);
@@ -11358,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
	struct perf_sample_data data;
	struct perf_event *event;

	/*
	 * Per being a tracepoint, this runs with preemption disabled.
	 */
	lockdep_assert_preemption_disabled();

	struct perf_raw_record raw = {
		.frag = {
			.size = entry_size,
@@ -11690,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
	struct perf_sample_data sample;
	struct pt_regs *regs = data;

	/*
	 * Exception context, will have interrupts disabled.
	 */
	lockdep_assert_irqs_disabled();

	perf_sample_data_init(&sample, bp->attr.bp_addr, 0);

	if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12154,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)

	if (regs && !perf_exclude_event(event, regs)) {
		if (!(event->attr.exclude_idle && is_idle_task(current)))
			if (__perf_event_overflow(event, 1, &data, regs))
			if (perf_event_overflow(event, &data, regs))
				ret = HRTIMER_NORESTART;
	}