Commit e5cf0260 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events fixes from Ingo Molnar:

 - Fix deadlock in the perf_mmap() failure path (Peter Zijlstra)

 - Intel ACR (Auto Counter Reload) fixes (Dapeng Mi):
     - Fix validation and configuration of ACR masks
     - Fix ACR rescheduling bug causing stale masks
     - Disable the PMI on ACR-enabled hardware
     - Enable ACR on Panther Cover uarch too

* tag 'perf-urgent-2026-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/intel: Enable auto counter reload for DMR
  perf/x86/intel: Disable PMI for self-reloaded ACR events
  perf/x86/intel: Always reprogram ACR events to prevent stale masks
  perf/x86/intel: Improve validation and configuration of ACR masks
  perf/core: Fix deadlock in perf_mmap() failure path
parents 27a26ccf aa4384bc
Loading
Loading
Loading
Loading
+8 −5
Original line number Diff line number Diff line
@@ -1294,13 +1294,16 @@ int x86_perf_rdpmc_index(struct perf_event *event)
	return event->hw.event_base_rdpmc;
}

static inline int match_prev_assignment(struct hw_perf_event *hwc,
static inline int match_prev_assignment(struct perf_event *event,
					struct cpu_hw_events *cpuc,
					int i)
{
	struct hw_perf_event *hwc = &event->hw;

	return hwc->idx == cpuc->assign[i] &&
	       hwc->last_cpu == smp_processor_id() &&
		hwc->last_tag == cpuc->tags[i];
	       hwc->last_tag == cpuc->tags[i] &&
	       !is_acr_event_group(event);
}

static void x86_pmu_start(struct perf_event *event, int flags);
@@ -1346,7 +1349,7 @@ static void x86_pmu_enable(struct pmu *pmu)
			 * - no other event has used the counter since
			 */
			if (hwc->idx == -1 ||
			    match_prev_assignment(hwc, cpuc, i))
			    match_prev_assignment(event, cpuc, i))
				continue;

			/*
@@ -1367,7 +1370,7 @@ static void x86_pmu_enable(struct pmu *pmu)
			event = cpuc->event_list[i];
			hwc = &event->hw;

			if (!match_prev_assignment(hwc, cpuc, i))
			if (!match_prev_assignment(event, cpuc, i))
				x86_assign_hw_event(event, cpuc, i);
			else if (i < n_running)
				continue;
+39 −11
Original line number Diff line number Diff line
@@ -3118,11 +3118,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
	intel_set_masks(event, idx);

	/*
	 * Enable IRQ generation (0x8), if not PEBS,
	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
	 * if requested:
	 * Enable IRQ generation (0x8), if not PEBS or self-reloaded
	 * ACR event, and enable ring-3 counting (0x2) and ring-0
	 * counting (0x1) if requested:
	 */
	if (!event->attr.precise_ip)
	if (!event->attr.precise_ip && !is_acr_self_reload_event(event))
		bits |= INTEL_FIXED_0_ENABLE_PMI;
	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
		bits |= INTEL_FIXED_0_USER;
@@ -3306,6 +3306,15 @@ static void intel_pmu_enable_event(struct perf_event *event)
		intel_set_masks(event, idx);
		static_call_cond(intel_pmu_enable_acr_event)(event);
		static_call_cond(intel_pmu_enable_event_ext)(event);
		/*
		 * For self-reloaded ACR event, don't enable PMI since
		 * HW won't set overflow bit in GLOBAL_STATUS. Otherwise,
		 * the PMI would be recognized as a suspicious NMI.
		 */
		if (is_acr_self_reload_event(event))
			hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
		else if (!event->attr.precise_ip)
			hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
		__x86_pmu_enable_event(hwc, enable_mask);
		break;
	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
@@ -3332,23 +3341,41 @@ static void intel_pmu_enable_event(struct perf_event *event)
static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
{
	struct perf_event *event, *leader;
	int i, j, idx;
	int i, j, k, bit, idx;

	/*
	 * FIXME: ACR mask parsing relies on cpuc->event_list[] (active events only).
	 * Disabling an ACR event causes bit-shifting errors in the acr_mask of
	 * remaining group members. As ACR sampling requires all events to be active,
	 * this limitation is acceptable for now. Revisit if independent event toggling
	 * is required.
	 */
	for (i = 0; i < cpuc->n_events; i++) {
		leader = cpuc->event_list[i];
		if (!is_acr_event_group(leader))
			continue;

		/* The ACR events must be contiguous. */
		/* Find the last event of the ACR group. */
		for (j = i; j < cpuc->n_events; j++) {
			event = cpuc->event_list[j];
			if (event->group_leader != leader->group_leader)
				break;
			for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
				if (i + idx >= cpuc->n_events ||
				    !is_acr_event_group(cpuc->event_list[i + idx]))
					return;
				__set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
		}

		/*
		 * Translate the user-space ACR mask (attr.config2) into the physical
		 * counter bitmask (hw.config1) for each ACR event in the group.
		 * NOTE: ACR event contiguity is guaranteed by intel_pmu_hw_config().
		 */
		for (k = i; k < j; k++) {
			event = cpuc->event_list[k];
			event->hw.config1 = 0;
			for_each_set_bit(bit, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
				idx = i + bit;
				/* Event index of ACR group must locate in [i, j). */
				if (idx >= j || !is_acr_event_group(cpuc->event_list[idx]))
					continue;
				__set_bit(cpuc->assign[idx], (unsigned long *)&event->hw.config1);
			}
		}
		i = j - 1;
@@ -7504,6 +7531,7 @@ static __always_inline void intel_pmu_init_pnc(struct pmu *pmu)
	hybrid(pmu, event_constraints) = intel_pnc_event_constraints;
	hybrid(pmu, pebs_constraints) = intel_pnc_pebs_event_constraints;
	hybrid(pmu, extra_regs) = intel_pnc_extra_regs;
	static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
}

static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
+10 −0
Original line number Diff line number Diff line
@@ -137,6 +137,16 @@ static inline bool is_acr_event_group(struct perf_event *event)
	return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
}

static inline bool is_acr_self_reload_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (hwc->idx < 0)
		return false;

	return test_bit(hwc->idx, (unsigned long *)&hwc->config1);
}

struct amd_nb {
	int nb_id;  /* NorthBridge id */
	int refcnt; /* reference count */
+55 −15
Original line number Diff line number Diff line
@@ -7006,6 +7006,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
}

static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);

/*
 * A buffer can be mmap()ed multiple times; either directly through the same
@@ -7021,8 +7022,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
	mapped_f unmapped = get_mapped(event, event_unmapped);
	struct perf_buffer *rb = ring_buffer_get(event);
	struct user_struct *mmap_user = rb->mmap_user;
	int mmap_locked = rb->mmap_locked;
	unsigned long size = perf_data_size(rb);
	bool detach_rest = false;

	/* FIXIES vs perf_pmu_unregister() */
@@ -7117,11 +7116,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
	 * Aside from that, this buffer is 'fully' detached and unmapped,
	 * undo the VM accounting.
	 */

	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
			&mmap_user->locked_vm);
	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
	free_uid(mmap_user);
	perf_mmap_unaccount(vma, rb);

out_put:
	ring_buffer_put(rb); /* could be last */
@@ -7261,6 +7256,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
	atomic64_add(extra, &vma->vm_mm->pinned_vm);
}

static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
{
	struct user_struct *user = rb->mmap_user;

	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
			&user->locked_vm);
	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
}

static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
			unsigned long nr_pages)
{
@@ -7323,8 +7327,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
	if (!rb)
		return -ENOMEM;

	refcount_set(&rb->mmap_count, 1);
	rb->mmap_user = get_current_user();
	rb->mmap_locked = extra;

	ring_buffer_attach(event, rb);
@@ -7474,16 +7476,54 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
			mapped(event, vma->vm_mm);

		/*
		 * Try to map it into the page table. On fail, invoke
		 * perf_mmap_close() to undo the above, as the callsite expects
		 * full cleanup in this case and therefore does not invoke
		 * vmops::close().
		 * Try to map it into the page table. On fail undo the above,
		 * as the callsite expects full cleanup in this case and
		 * therefore does not invoke vmops::close().
		 */
		ret = map_range(event->rb, vma);
		if (ret)
			perf_mmap_close(vma);
		if (likely(!ret))
			return 0;

		/* Error path */

		/*
		 * If this is the first mmap(), then event->mmap_count should
		 * be stable at 1. It is only modified by:
		 * perf_mmap_{open,close}() and perf_mmap().
		 *
		 * The former are not possible because this mmap() hasn't been
		 * successful yet, and the latter is serialized by
		 * event->mmap_mutex which we still hold (note that mmap_lock
		 * is not strictly sufficient here, because the event fd can
		 * be passed to another process through trivial means like
		 * fork(), leading to concurrent mmap() from different mm).
		 *
		 * Make sure to remove event->rb before releasing
		 * event->mmap_mutex, such that any concurrent mmap() will not
		 * attempt use this failed buffer.
		 */
		if (refcount_read(&event->mmap_count) == 1) {
			/*
			 * Minimal perf_mmap_close(); there can't be AUX or
			 * other events on account of this being the first.
			 */
			mapped = get_mapped(event, event_unmapped);
			if (mapped)
				mapped(event, vma->vm_mm);
			perf_mmap_unaccount(vma, event->rb);
			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
			refcount_set(&event->mmap_count, 0);
			return ret;
		}

		/*
		 * Otherwise this is an already existing buffer, and there is
		 * no race vs first exposure, so fall-through and call
		 * perf_mmap_close().
		 */
	}

	perf_mmap_close(vma);
	return ret;
}

+1 −0
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
	struct perf_buffer *rb;

	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
	free_uid(rb->mmap_user);
	rb_free(rb);
}

Loading