Commit 6c26fbe8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:
 "Callchain support:

   - Add support for deferred user-space stack unwinding for perf,
     enabled on x86. (Peter Zijlstra, Steven Rostedt)

   - unwind_user/x86: Enable frame pointer unwinding on x86 (Josh
     Poimboeuf)

  x86 PMU support and infrastructure:

   - x86/insn: Simplify for_each_insn_prefix() (Peter Zijlstra)

   - x86/insn,uprobes,alternative: Unify insn_is_nop() (Peter Zijlstra)

  Intel PMU driver:

   - Large series to prepare for and implement architectural PEBS
     support for Intel platforms such as Clearwater Forest (CWF) and
     Panther Lake (PTL). (Dapeng Mi, Kan Liang)

   - Check dynamic constraints (Kan Liang)

   - Optimize PEBS extended config (Peter Zijlstra)

   - cstates:
      - Remove PC3 support from LunarLake (Zhang Rui)
      - Add Pantherlake support (Zhang Rui)
      - Clearwater Forest support (Zide Chen)

  AMD PMU driver:

   - x86/amd: Check event before enable to avoid GPF (George Kennedy)

  Fixes and cleanups:

   - task_work: Fix NMI race condition (Peter Zijlstra)

   - perf/x86: Fix NULL event access and potential PEBS record loss
     (Dapeng Mi)

   - Misc other fixes and cleanups (Dapeng Mi, Ingo Molnar, Peter
     Zijlstra)"

* tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
  perf/x86/intel: Fix and clean up intel_pmu_drain_arch_pebs() type use
  perf/x86/intel: Optimize PEBS extended config
  perf/x86/intel: Check PEBS dyn_constraints
  perf/x86/intel: Add a check for dynamic constraints
  perf/x86/intel: Add counter group support for arch-PEBS
  perf/x86/intel: Setup PEBS data configuration and enable legacy groups
  perf/x86/intel: Update dyn_constraint base on PEBS event precise level
  perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR
  perf/x86/intel: Process arch-PEBS records or record fragments
  perf/x86/intel/ds: Factor out PEBS group processing code to functions
  perf/x86/intel/ds: Factor out PEBS record processing code to functions
  perf/x86/intel: Initialize architectural PEBS
  perf/x86/intel: Correct large PEBS flag check
  perf/x86/intel: Replace x86_pmu.drain_pebs calling with static call
  perf/x86: Fix NULL event access and potential PEBS record loss
  perf/x86: Remove redundant is_x86_event() prototype
  entry,unwind/deferred: Fix unwind_reset_info() placement
  unwind_user/x86: Fix arch=um build
  perf: Support deferred user unwind
  unwind_user/x86: Teach FP unwind about start of function
  ...
parents 63e69950 9929dffc
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -298,6 +298,7 @@ config X86
	select HAVE_SYSCALL_TRACEPOINTS
	select HAVE_UACCESS_VALIDATION		if HAVE_OBJTOOL
	select HAVE_UNSTABLE_SCHED_CLOCK
	select HAVE_UNWIND_USER_FP		if X86_64
	select HAVE_USER_RETURN_NOTIFIER
	select HAVE_GENERIC_VDSO
	select VDSO_GETRANDOM			if X86_64
+1 −2
Original line number Diff line number Diff line
@@ -29,11 +29,10 @@
bool insn_has_rep_prefix(struct insn *insn)
{
	insn_byte_t p;
	int i;

	insn_get_prefixes(insn);

	for_each_insn_prefix(insn, i, p) {
	for_each_insn_prefix(insn, p) {
		if (p == 0xf2 || p == 0xf3)
			return true;
	}
+6 −1
Original line number Diff line number Diff line
@@ -763,6 +763,11 @@ static void amd_pmu_enable_all(int added)
		if (!test_bit(idx, cpuc->active_mask))
			continue;

		/*
		 * FIXME: cpuc->events[idx] can become NULL in a subtle race
		 * condition with NMI->throttle->x86_pmu_stop().
		 */
		if (cpuc->events[idx])
			amd_pmu_enable_event(cpuc->events[idx]);
	}
}
+19 −47
Original line number Diff line number Diff line
@@ -554,14 +554,22 @@ static inline int precise_br_compat(struct perf_event *event)
	return m == b;
}

int x86_pmu_max_precise(void)
int x86_pmu_max_precise(struct pmu *pmu)
{
	int precise = 0;

	/* Support for constant skid */
	if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
		/* arch PEBS */
		if (x86_pmu.arch_pebs) {
			precise = 2;
			if (hybrid(pmu, arch_pebs_cap).pdists)
				precise++;

			return precise;
		}

		/* legacy PEBS - support for constant skid */
		precise++;
		/* Support for IP fixup */
		if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
			precise++;
@@ -569,13 +577,14 @@ int x86_pmu_max_precise(void)
		if (x86_pmu.pebs_prec_dist)
			precise++;
	}

	return precise;
}

int x86_pmu_hw_config(struct perf_event *event)
{
	if (event->attr.precise_ip) {
		int precise = x86_pmu_max_precise();
		int precise = x86_pmu_max_precise(event->pmu);

		if (event->attr.precise_ip > precise)
			return -EOPNOTSUPP;
@@ -1344,6 +1353,7 @@ static void x86_pmu_enable(struct pmu *pmu)
				hwc->state |= PERF_HES_ARCH;

			x86_pmu_stop(event, PERF_EF_UPDATE);
			cpuc->events[hwc->idx] = NULL;
		}

		/*
@@ -1365,6 +1375,7 @@ static void x86_pmu_enable(struct pmu *pmu)
			 * if cpuc->enabled = 0, then no wrmsr as
			 * per x86_pmu_enable_event()
			 */
			cpuc->events[hwc->idx] = event;
			x86_pmu_start(event, PERF_EF_RELOAD);
		}
		cpuc->n_added = 0;
@@ -1531,7 +1542,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)

	event->hw.state = 0;

	cpuc->events[idx] = event;
	__set_bit(idx, cpuc->active_mask);
	static_call(x86_pmu_enable)(event);
	perf_event_update_userpage(event);
@@ -1610,7 +1620,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
	if (test_bit(hwc->idx, cpuc->active_mask)) {
		static_call(x86_pmu_disable)(event);
		__clear_bit(hwc->idx, cpuc->active_mask);
		cpuc->events[hwc->idx] = NULL;
		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
		hwc->state |= PERF_HES_STOPPED;
	}
@@ -1648,6 +1657,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
	 * Not a TXN, therefore cleanup properly.
	 */
	x86_pmu_stop(event, PERF_EF_UPDATE);
	cpuc->events[event->hw.idx] = NULL;

	for (i = 0; i < cpuc->n_events; i++) {
		if (event == cpuc->event_list[i])
@@ -2629,7 +2639,9 @@ static ssize_t max_precise_show(struct device *cdev,
				  struct device_attribute *attr,
				  char *buf)
{
	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
	struct pmu *pmu = dev_get_drvdata(cdev);

	return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu));
}

static DEVICE_ATTR_RO(max_precise);
@@ -2845,46 +2857,6 @@ static unsigned long get_segment_base(unsigned int segment)
	return get_desc_base(desc);
}

#ifdef CONFIG_UPROBES
/*
 * Heuristic-based check if uprobe is installed at the function entry.
 *
 * Under assumption of user code being compiled with frame pointers,
 * `push %rbp/%ebp` is a good indicator that we indeed are.
 *
 * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
 * If we get this wrong, captured stack trace might have one extra bogus
 * entry, but the rest of stack trace will still be meaningful.
 */
static bool is_uprobe_at_func_entry(struct pt_regs *regs)
{
	struct arch_uprobe *auprobe;

	if (!current->utask)
		return false;

	auprobe = current->utask->auprobe;
	if (!auprobe)
		return false;

	/* push %rbp/%ebp */
	if (auprobe->insn[0] == 0x55)
		return true;

	/* endbr64 (64-bit only) */
	if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
		return true;

	return false;
}

#else
static bool is_uprobe_at_func_entry(struct pt_regs *regs)
{
	return false;
}
#endif /* CONFIG_UPROBES */

#ifdef CONFIG_IA32_EMULATION

#include <linux/compat.h>
+414 −30
Original line number Diff line number Diff line
@@ -2563,6 +2563,44 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
	cpuc->fixed_ctrl_val &= ~mask;
}

static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	u32 msr;

	if (idx < INTEL_PMC_IDX_FIXED) {
		msr = MSR_IA32_PMC_V6_GP0_CFG_C +
		      x86_pmu.addr_offset(idx, false);
	} else {
		msr = MSR_IA32_PMC_V6_FX0_CFG_C +
		      x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
	}

	cpuc->cfg_c_val[idx] = ext;
	wrmsrq(msr, ext);
}

static void intel_pmu_disable_event_ext(struct perf_event *event)
{
	/*
	 * Only clear CFG_C MSR for PEBS counter group events,
	 * it avoids the HW counter's value to be added into
	 * other PEBS records incorrectly after PEBS counter
	 * group events are disabled.
	 *
	 * For other events, it's unnecessary to clear CFG_C MSRs
	 * since CFG_C doesn't take effect if counter is in
	 * disabled state. That helps to reduce the WRMSR overhead
	 * in context switches.
	 */
	if (!is_pebs_counter_event_group(event))
		return;

	__intel_pmu_update_event_ext(event->hw.idx, 0);
}

DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext);

static void intel_pmu_disable_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;
@@ -2571,9 +2609,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
	switch (idx) {
	case 0 ... INTEL_PMC_IDX_FIXED - 1:
		intel_clear_masks(event, idx);
		static_call_cond(intel_pmu_disable_event_ext)(event);
		x86_pmu_disable_event(event);
		break;
	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
		static_call_cond(intel_pmu_disable_event_ext)(event);
		fallthrough;
	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
		intel_pmu_disable_fixed(event);
		break;
@@ -2940,6 +2981,79 @@ static void intel_pmu_enable_acr(struct perf_event *event)

DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);

static void intel_pmu_enable_event_ext(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	union arch_pebs_index old, new;
	struct arch_pebs_cap cap;
	u64 ext = 0;

	cap = hybrid(cpuc->pmu, arch_pebs_cap);

	if (event->attr.precise_ip) {
		u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);

		ext |= ARCH_PEBS_EN;
		if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
			ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;

		if (pebs_data_cfg && cap.caps) {
			if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
				ext |= ARCH_PEBS_AUX & cap.caps;

			if (pebs_data_cfg & PEBS_DATACFG_GP)
				ext |= ARCH_PEBS_GPR & cap.caps;

			if (pebs_data_cfg & PEBS_DATACFG_XMMS)
				ext |= ARCH_PEBS_VECR_XMM & cap.caps;

			if (pebs_data_cfg & PEBS_DATACFG_LBRS)
				ext |= ARCH_PEBS_LBR & cap.caps;

			if (pebs_data_cfg &
			    (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
				ext |= ARCH_PEBS_CNTR_GP & cap.caps;

			if (pebs_data_cfg &
			    (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
				ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;

			if (pebs_data_cfg & PEBS_DATACFG_METRICS)
				ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
		}

		if (cpuc->n_pebs == cpuc->n_large_pebs)
			new.thresh = ARCH_PEBS_THRESH_MULTI;
		else
			new.thresh = ARCH_PEBS_THRESH_SINGLE;

		rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
		if (new.thresh != old.thresh || !old.en) {
			if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
				/*
				 * Large PEBS was enabled.
				 * Drain PEBS buffer before applying the single PEBS.
				 */
				intel_pmu_drain_pebs_buffer();
			} else {
				new.wr = 0;
				new.full = 0;
				new.en = 1;
				wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
			}
		}
	}

	if (is_pebs_counter_event_group(event))
		ext |= ARCH_PEBS_CNTR_ALLOW;

	if (cpuc->cfg_c_val[hwc->idx] != ext)
		__intel_pmu_update_event_ext(hwc->idx, ext);
}

DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext);

static void intel_pmu_enable_event(struct perf_event *event)
{
	u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2955,10 +3069,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
			enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
		intel_set_masks(event, idx);
		static_call_cond(intel_pmu_enable_acr_event)(event);
		static_call_cond(intel_pmu_enable_event_ext)(event);
		__x86_pmu_enable_event(hwc, enable_mask);
		break;
	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
		static_call_cond(intel_pmu_enable_acr_event)(event);
		static_call_cond(intel_pmu_enable_event_ext)(event);
		fallthrough;
	case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
		intel_pmu_enable_fixed(event);
@@ -3215,6 +3331,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
			status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
	}

	/*
	 * Arch PEBS sets bit 54 in the global status register
	 */
	if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
				 (unsigned long *)&status)) {
		handled++;
		static_call(x86_pmu_drain_pebs)(regs, &data);

		if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
		    is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
			status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
	}

	/*
	 * Intel PT
	 */
@@ -3269,7 +3398,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
		 * The PEBS buffer has to be drained before handling the A-PMI
		 */
		if (is_pebs_counter_event_group(event))
			x86_pmu.drain_pebs(regs, &data);
			static_call(x86_pmu_drain_pebs)(regs, &data);

		last_period = event->hw.last_period;

@@ -4029,7 +4158,9 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
	if (!event->attr.exclude_kernel)
		flags &= ~PERF_SAMPLE_REGS_USER;
	if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
		flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
		flags &= ~PERF_SAMPLE_REGS_USER;
	if (event->attr.sample_regs_intr & ~PEBS_GP_REGS)
		flags &= ~PERF_SAMPLE_REGS_INTR;
	return flags;
}

@@ -4204,6 +4335,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
	return false;
}

static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
{
	u64 caps;

	if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
		return true;

	caps = hybrid(pmu, arch_pebs_cap).caps;
	if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
		return true;

	return false;
}

static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
						 u64 *cause_mask, int *num)
{
@@ -4237,6 +4382,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
	}

	if (event->attr.precise_ip) {
		struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap);

		if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
			return -EINVAL;

@@ -4250,6 +4397,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
		}
		if (x86_pmu.pebs_aliases)
			x86_pmu.pebs_aliases(event);

		if (x86_pmu.arch_pebs) {
			u64 cntr_mask = hybrid(event->pmu, intel_ctrl) &
						~GLOBAL_CTRL_EN_PERF_METRICS;
			u64 pebs_mask = event->attr.precise_ip >= 3 ?
						pebs_cap.pdists : pebs_cap.counters;
			if (cntr_mask != pebs_mask)
				event->hw.dyn_constraint &= pebs_mask;
		}
	}

	if (needs_branch_stack(event)) {
@@ -4341,8 +4497,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
	}

	if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
	    (x86_pmu.intel_cap.pebs_format >= 6) &&
	    x86_pmu.intel_cap.pebs_baseline &&
	    intel_pmu_has_pebs_counter_group(event->pmu) &&
	    is_sampling_event(event) &&
	    event->attr.precise_ip)
		event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
@@ -5212,7 +5367,13 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)

static int intel_pmu_cpu_prepare(int cpu)
{
	return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
	int ret;

	ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
	if (ret)
		return ret;

	return alloc_arch_pebs_buf_on_cpu(cpu);
}

static void flip_smm_bit(void *data)
@@ -5257,6 +5418,163 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
					      u64 fixed_cntr_mask,
					      u64 intel_ctrl);

enum dyn_constr_type {
	DYN_CONSTR_NONE,
	DYN_CONSTR_BR_CNTR,
	DYN_CONSTR_ACR_CNTR,
	DYN_CONSTR_ACR_CAUSE,
	DYN_CONSTR_PEBS,
	DYN_CONSTR_PDIST,

	DYN_CONSTR_MAX,
};

static const char * const dyn_constr_type_name[] = {
	[DYN_CONSTR_NONE] = "a normal event",
	[DYN_CONSTR_BR_CNTR] = "a branch counter logging event",
	[DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event",
	[DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event",
	[DYN_CONSTR_PEBS] = "a PEBS event",
	[DYN_CONSTR_PDIST] = "a PEBS PDIST event",
};

static void __intel_pmu_check_dyn_constr(struct event_constraint *constr,
					 enum dyn_constr_type type, u64 mask)
{
	struct event_constraint *c1, *c2;
	int new_weight, check_weight;
	u64 new_mask, check_mask;

	for_each_event_constraint(c1, constr) {
		new_mask = c1->idxmsk64 & mask;
		new_weight = hweight64(new_mask);

		/* ignore topdown perf metrics event */
		if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN)
			continue;

		if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) {
			pr_info("The event 0x%llx is not supported as %s.\n",
				c1->code, dyn_constr_type_name[type]);
		}

		if (new_weight <= 1)
			continue;

		for_each_event_constraint(c2, c1 + 1) {
			bool check_fail = false;

			check_mask = c2->idxmsk64 & mask;
			check_weight = hweight64(check_mask);

			if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN ||
			    !check_weight)
				continue;

			/* The same constraints or no overlap */
			if (new_mask == check_mask ||
			    (new_mask ^ check_mask) == (new_mask | check_mask))
				continue;

			/*
			 * A scheduler issue may be triggered in the following cases.
			 * - Two overlap constraints have the same weight.
			 *   E.g., A constraints: 0x3, B constraints: 0x6
			 *   event	counter		failure case
			 *   B		PMC[2:1]	1
			 *   A		PMC[1:0]	0
			 *   A		PMC[1:0]	FAIL
			 * - Two overlap constraints have different weight.
			 *   The constraint has a low weight, but has high last bit.
			 *   E.g., A constraints: 0x7, B constraints: 0xC
			 *   event	counter		failure case
			 *   B		PMC[3:2]	2
			 *   A		PMC[2:0]	0
			 *   A		PMC[2:0]	1
			 *   A		PMC[2:0]	FAIL
			 */
			if (new_weight == check_weight) {
				check_fail = true;
			} else if (new_weight < check_weight) {
				if ((new_mask | check_mask) != check_mask &&
				    fls64(new_mask) > fls64(check_mask))
					check_fail = true;
			} else {
				if ((new_mask | check_mask) != new_mask &&
				    fls64(new_mask) < fls64(check_mask))
					check_fail = true;
			}

			if (check_fail) {
				pr_info("The two events 0x%llx and 0x%llx may not be "
					"fully scheduled under some circumstances as "
					"%s.\n",
					c1->code, c2->code, dyn_constr_type_name[type]);
			}
		}
	}
}

static void intel_pmu_check_dyn_constr(struct pmu *pmu,
				       struct event_constraint *constr,
				       u64 cntr_mask)
{
	enum dyn_constr_type i;
	u64 mask;

	for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) {
		mask = 0;
		switch (i) {
		case DYN_CONSTR_NONE:
			mask = cntr_mask;
			break;
		case DYN_CONSTR_BR_CNTR:
			if (x86_pmu.flags & PMU_FL_BR_CNTR)
				mask = x86_pmu.lbr_counters;
			break;
		case DYN_CONSTR_ACR_CNTR:
			mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
			break;
		case DYN_CONSTR_ACR_CAUSE:
			if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64))
				continue;
			mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
			break;
		case DYN_CONSTR_PEBS:
			if (x86_pmu.arch_pebs)
				mask = hybrid(pmu, arch_pebs_cap).counters;
			break;
		case DYN_CONSTR_PDIST:
			if (x86_pmu.arch_pebs)
				mask = hybrid(pmu, arch_pebs_cap).pdists;
			break;
		default:
			pr_warn("Unsupported dynamic constraint type %d\n", i);
		}

		if (mask)
			__intel_pmu_check_dyn_constr(constr, i, mask);
	}
}

static void intel_pmu_check_event_constraints_all(struct pmu *pmu)
{
	struct event_constraint *event_constraints = hybrid(pmu, event_constraints);
	struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints);
	u64 cntr_mask = hybrid(pmu, cntr_mask64);
	u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64);
	u64 intel_ctrl = hybrid(pmu, intel_ctrl);

	intel_pmu_check_event_constraints(event_constraints, cntr_mask,
					  fixed_cntr_mask, intel_ctrl);

	if (event_constraints)
		intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask);

	if (pebs_constraints)
		intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask);
}

static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);

static inline bool intel_pmu_broken_perf_cap(void)
@@ -5269,34 +5587,89 @@ static inline bool intel_pmu_broken_perf_cap(void)
	return false;
}

static inline void __intel_update_pmu_caps(struct pmu *pmu)
{
	struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());

	if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
		dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
}

static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
{
	u64 caps = hybrid(pmu, arch_pebs_cap).caps;

	x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
	if (caps & ARCH_PEBS_LBR)
		x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
	if (caps & ARCH_PEBS_CNTR_MASK)
		x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;

	if (!(caps & ARCH_PEBS_AUX))
		x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
	if (!(caps & ARCH_PEBS_GPR)) {
		x86_pmu.large_pebs_flags &=
			~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
	}
}

#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))

static void update_pmu_cap(struct pmu *pmu)
{
	unsigned int cntr, fixed_cntr, ecx, edx;
	union cpuid35_eax eax;
	union cpuid35_ebx ebx;
	unsigned int eax, ebx, ecx, edx;
	union cpuid35_eax eax_0;
	union cpuid35_ebx ebx_0;
	u64 cntrs_mask = 0;
	u64 pebs_mask = 0;
	u64 pdists_mask = 0;

	cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
	cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx);

	if (ebx.split.umask2)
	if (ebx_0.split.umask2)
		hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
	if (ebx.split.eq)
	if (ebx_0.split.eq)
		hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;

	if (eax.split.cntr_subleaf) {
	if (eax_0.split.cntr_subleaf) {
		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
			    &cntr, &fixed_cntr, &ecx, &edx);
		hybrid(pmu, cntr_mask64) = cntr;
		hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
			    &eax, &ebx, &ecx, &edx);
		hybrid(pmu, cntr_mask64) = eax;
		hybrid(pmu, fixed_cntr_mask64) = ebx;
		cntrs_mask = counter_mask(eax, ebx);
	}

	if (eax.split.acr_subleaf) {
	if (eax_0.split.acr_subleaf) {
		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
			    &cntr, &fixed_cntr, &ecx, &edx);
			    &eax, &ebx, &ecx, &edx);
		/* The mask of the counters which can be reloaded */
		hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);

		hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx);
		/* The mask of the counters which can cause a reload of reloadable counters */
		hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
		hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx);
	}

	/* Bits[5:4] should be set simultaneously if arch-PEBS is supported */
	if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) {
		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF,
			    &eax, &ebx, &ecx, &edx);
		hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32;

		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF,
			    &eax, &ebx, &ecx, &edx);
		pebs_mask   = counter_mask(eax, ecx);
		pdists_mask = counter_mask(ebx, edx);
		hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
		hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;

		if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
			x86_pmu.arch_pebs = 0;
		} else {
			__intel_update_pmu_caps(pmu);
			__intel_update_large_pebs_flags(pmu);
		}
	} else {
		WARN_ON(x86_pmu.arch_pebs == 1);
		x86_pmu.arch_pebs = 0;
	}

	if (!intel_pmu_broken_perf_cap()) {
@@ -5319,10 +5692,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
	else
		pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS;

	intel_pmu_check_event_constraints(pmu->event_constraints,
					  pmu->cntr_mask64,
					  pmu->fixed_cntr_mask64,
					  pmu->intel_ctrl);
	intel_pmu_check_event_constraints_all(&pmu->pmu);

	intel_pmu_check_extra_regs(pmu->extra_regs);
}
@@ -5418,6 +5788,7 @@ static void intel_pmu_cpu_starting(int cpu)
		return;

	init_debug_store_on_cpu(cpu);
	init_arch_pebs_on_cpu(cpu);
	/*
	 * Deal with CPUs that don't clear their LBRs on power-up, and that may
	 * even boot with LBRs enabled.
@@ -5456,6 +5827,8 @@ static void intel_pmu_cpu_starting(int cpu)
		}
	}

	__intel_update_pmu_caps(cpuc->pmu);

	if (!cpuc->shared_regs)
		return;

@@ -5515,6 +5888,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
	fini_debug_store_on_cpu(cpu);
	fini_arch_pebs_on_cpu(cpu);
}

void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@@ -5535,6 +5909,7 @@ static void intel_pmu_cpu_dead(int cpu)
{
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

	release_arch_pebs_buf_on_cpu(cpu);
	intel_cpuc_finish(cpuc);

	if (is_hybrid() && cpuc->pmu)
@@ -6250,7 +6625,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
static umode_t
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
	return x86_pmu.ds_pebs ? attr->mode : 0;
	return intel_pmu_has_pebs() ? attr->mode : 0;
}

static umode_t
@@ -6940,8 +7315,11 @@ __init int intel_pmu_init(void)
	 * Many features on and after V6 require dynamic constraint,
	 * e.g., Arch PEBS, ACR.
	 */
	if (version >= 6)
	if (version >= 6) {
		x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
		x86_pmu.late_setup = intel_pmu_late_setup;
	}

	/*
	 * Install the hw-cache-events table:
	 */
@@ -7727,6 +8105,14 @@ __init int intel_pmu_init(void)
	if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
		update_pmu_cap(NULL);

	if (x86_pmu.arch_pebs) {
		static_call_update(intel_pmu_disable_event_ext,
				   intel_pmu_disable_event_ext);
		static_call_update(intel_pmu_enable_event_ext,
				   intel_pmu_enable_event_ext);
		pr_cont("Architectural PEBS, ");
	}

	intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
				      &x86_pmu.fixed_cntr_mask64,
				      &x86_pmu.intel_ctrl);
@@ -7735,10 +8121,8 @@ __init int intel_pmu_init(void)
	if (x86_pmu.intel_cap.anythread_deprecated)
		x86_pmu.format_attrs = intel_arch_formats_attr;

	intel_pmu_check_event_constraints(x86_pmu.event_constraints,
					  x86_pmu.cntr_mask64,
					  x86_pmu.fixed_cntr_mask64,
					  x86_pmu.intel_ctrl);
	intel_pmu_check_event_constraints_all(NULL);

	/*
	 * Access LBR MSR may cause #GP under certain circumstances.
	 * Check all LBR MSR here.
Loading