Commit 722e42e4 authored by Kan Liang's avatar Kan Liang Committed by Peter Zijlstra
Browse files

perf/x86: Support counter mask



The current perf assumes that both GP and fixed counters are contiguous.
But it's not guaranteed on newer Intel platforms or in a virtualization
environment.

Use the counter mask to replace the number of counters for both GP and
the fixed counters. For the other ARCHs or old platforms which don't
support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to
replace. There is no functional change for them.

The interface to KVM is not changed. The number of counters still be
passed to KVM. It can be updated later separately.

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Reviewed-by: default avatarIan Rogers <irogers@google.com>
Link: https://lkml.kernel.org/r/20240626143545.480761-3-kan.liang@linux.intel.com
parent a23eb2fc
Loading
Loading
Loading
Loading
+12 −12
Original line number Diff line number Diff line
@@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
	 * be removed on one CPU at a time AND PMU is disabled
	 * when we come here
	 */
	for (i = 0; i < x86_pmu.num_counters; i++) {
	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		struct perf_event *tmp = event;

		if (try_cmpxchg(nb->owners + i, &tmp, NULL))
@@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
	 * because of successive calls to x86_schedule_events() from
	 * hw_perf_group_sched_in() without hw_perf_enable()
	 */
	for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
	for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
		if (new == -1 || hwc->idx == idx)
			/* assign free slot, prefer hwc->idx */
			old = cmpxchg(nb->owners + idx, NULL, event);
@@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
	/*
	 * initialize all possible NB constraints
	 */
	for (i = 0; i < x86_pmu.num_counters; i++) {
	for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		__set_bit(i, nb->event_constraints[i].idxmsk);
		nb->event_constraints[i].weight = 1;
	}
@@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void)
	 * counters are always enabled when this function is called and
	 * ARCH_PERFMON_EVENTSEL_INT is always set.
	 */
	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		if (!test_bit(idx, cpuc->active_mask))
			continue;

@@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added)

	amd_brs_enable_all();

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		/* only activate events which are marked as active */
		if (!test_bit(idx, cpuc->active_mask))
			continue;
@@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
	/* Clear any reserved bits set by buggy microcode */
	status &= amd_pmu_global_cntr_mask;

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		if (!test_bit(idx, cpuc->active_mask))
			continue;

@@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
	.addr_offset            = amd_pmu_addr_offset,
	.event_map		= amd_pmu_event_map,
	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
	.num_counters		= AMD64_NUM_COUNTERS,
	.cntr_mask64		= GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
	.add			= amd_pmu_add_event,
	.del			= amd_pmu_del_event,
	.cntval_bits		= 48,
@@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
	 */
	x86_pmu.eventsel	= MSR_F15H_PERF_CTL;
	x86_pmu.perfctr		= MSR_F15H_PERF_CTR;
	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE;
	x86_pmu.cntr_mask64	= GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);

	/* Check for Performance Monitoring v2 support */
	if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
		x86_pmu.version = 2;

		/* Find the number of available Core PMCs */
		x86_pmu.num_counters = ebx.split.num_core_pmc;
		x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);

		amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
		amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;

		/* Update PMC handling functions */
		x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
		 * even numbered counter that has a consecutive adjacent odd
		 * numbered counter following it.
		 */
		for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
		for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
			even_ctr_mask |= BIT_ULL(i);

		pair_constraint = (struct event_constraint)
				    __EVENT_CONSTRAINT(0, even_ctr_mask, 0,
				    x86_pmu.num_counters / 2, 0,
				    x86_pmu_max_num_counters(NULL) / 2, 0,
				    PERF_X86_EVENT_PAIR);

		x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
+47 −51
Original line number Diff line number Diff line
@@ -189,29 +189,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex);

#ifdef CONFIG_X86_LOCAL_APIC

static inline int get_possible_num_counters(void)
static inline u64 get_possible_counter_mask(void)
{
	int i, num_counters = x86_pmu.num_counters;
	u64 cntr_mask = x86_pmu.cntr_mask64;
	int i;

	if (!is_hybrid())
		return num_counters;
		return cntr_mask;

	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
		num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
		cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;

	return num_counters;
	return cntr_mask;
}

static bool reserve_pmc_hardware(void)
{
	int i, num_counters = get_possible_num_counters();
	u64 cntr_mask = get_possible_counter_mask();
	int i, end;

	for (i = 0; i < num_counters; i++) {
	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
		if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
			goto perfctr_fail;
	}

	for (i = 0; i < num_counters; i++) {
	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
		if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
			goto eventsel_fail;
	}
@@ -219,13 +221,14 @@ static bool reserve_pmc_hardware(void)
	return true;

eventsel_fail:
	for (i--; i >= 0; i--)
	end = i;
	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
		release_evntsel_nmi(x86_pmu_config_addr(i));

	i = num_counters;
	i = X86_PMC_IDX_MAX;

perfctr_fail:
	for (i--; i >= 0; i--)
	end = i;
	for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
		release_perfctr_nmi(x86_pmu_event_addr(i));

	return false;
@@ -233,9 +236,10 @@ static bool reserve_pmc_hardware(void)

static void release_pmc_hardware(void)
{
	int i, num_counters = get_possible_num_counters();
	u64 cntr_mask = get_possible_counter_mask();
	int i;

	for (i = 0; i < num_counters; i++) {
	for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
		release_perfctr_nmi(x86_pmu_event_addr(i));
		release_evntsel_nmi(x86_pmu_config_addr(i));
	}
@@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}

#endif

bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
		     unsigned long *fixed_cntr_mask)
{
	u64 val, val_fail = -1, val_new= ~0;
	int i, reg, reg_fail = -1, ret = 0;
@@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
	 * Check to see if the BIOS enabled any of the counters, if so
	 * complain and bail.
	 */
	for (i = 0; i < num_counters; i++) {
	for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
		reg = x86_pmu_config_addr(i);
		ret = rdmsrl_safe(reg, &val);
		if (ret)
@@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
		}
	}

	if (num_counters_fixed) {
	if (*(u64 *)fixed_cntr_mask) {
		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		ret = rdmsrl_safe(reg, &val);
		if (ret)
			goto msr_fail;
		for (i = 0; i < num_counters_fixed; i++) {
		for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
			if (fixed_counter_disabled(i, pmu))
				continue;
			if (val & (0x03ULL << i*4)) {
@@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int idx;

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
		u64 val;

@@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int idx;

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

		if (!test_bit(idx, cpuc->active_mask))
@@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);

int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
	int num_counters = hybrid(cpuc->pmu, num_counters);
	struct event_constraint *c;
	struct perf_event *e;
	int n0, i, wmin, wmax, unsched = 0;
@@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)

	/* slow path */
	if (i != n) {
		int gpmax = num_counters;
		int gpmax = x86_pmu_max_num_counters(cpuc->pmu);

		/*
		 * Do not allow scheduling of more than half the available
@@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
		 * the extra Merge events needed by large increment events.
		 */
		if (x86_pmu.flags & PMU_FL_PAIR) {
			gpmax = num_counters - cpuc->n_pair;
			gpmax -= cpuc->n_pair;
			WARN_ON(gpmax <= 0);
		}

@@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
 */
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
	int num_counters = hybrid(cpuc->pmu, num_counters);
	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
	struct perf_event *event;
	int n, max_count;

	max_count = num_counters + num_counters_fixed;
	max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);

	/* current number of events already accepted */
	n = cpuc->n_events;
@@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
	u64 pebs, debugctl;
	int cpu = smp_processor_id();
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
	int num_counters = hybrid(cpuc->pmu, num_counters);
	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
	unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
	unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
	struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
	unsigned long flags;
	int idx;

	if (!num_counters)
	if (!*(u64 *)cntr_mask)
		return;

	local_irq_save(flags);
@@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
	}
	pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);

	for (idx = 0; idx < num_counters; idx++) {
	for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
		rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
		rdmsrl(x86_pmu_event_addr(idx), pmc_count);

@@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
			cpu, idx, prev_left);
	}
	for (idx = 0; idx < num_counters_fixed; idx++) {
	for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
		if (fixed_counter_disabled(idx, cpuc->pmu))
			continue;
		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
@@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
	 */
	apic_write(APIC_LVTPC, APIC_DM_NMI);

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		if (!test_bit(idx, cpuc->active_mask))
			continue;

@@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
	static_call(x86_pmu_update)(event);
}

void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
			  u64 intel_ctrl)
void x86_pmu_show_pmu_cap(struct pmu *pmu)
{
	pr_info("... version:                %d\n",     x86_pmu.version);
	pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
	pr_info("... generic registers:      %d\n",     num_counters);
	pr_info("... generic registers:      %d\n",     x86_pmu_num_counters(pmu));
	pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
	pr_info("... fixed-purpose events:   %lu\n",
			hweight64((((1ULL << num_counters_fixed) - 1)
					<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
	pr_info("... event mask:             %016Lx\n", intel_ctrl);
	pr_info("... fixed-purpose events:   %d\n",     x86_pmu_num_counters_fixed(pmu));
	pr_info("... event mask:             %016Lx\n", hybrid(pmu, intel_ctrl));
}

static int __init init_hw_perf_events(void)
@@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
	pmu_check_apic();

	/* sanity check that the hardware exists or is emulated */
	if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
	if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
		goto out_bad_pmu;

	pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
		quirk->func();

	if (!x86_pmu.intel_ctrl)
		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
		x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;

	perf_events_lapic_init();
	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");

	unconstrained = (struct event_constraint)
		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
				   0, x86_pmu.num_counters, 0, 0);
		__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
				   0, x86_pmu_num_counters(NULL), 0, 0);

	x86_pmu_format_group.attrs = x86_pmu.format_attrs;

@@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)

	pmu.attr_update = x86_pmu.attr_update;

	if (!is_hybrid()) {
		x86_pmu_show_pmu_cap(x86_pmu.num_counters,
				     x86_pmu.num_counters_fixed,
				     x86_pmu.intel_ctrl);
	}
	if (!is_hybrid())
		x86_pmu_show_pmu_cap(NULL);

	if (!x86_pmu.read)
		x86_pmu.read = _x86_pmu_read;
@@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
	for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
		if (i >= INTEL_PMC_IDX_FIXED) {
			/* Metrics and fake events don't have corresponding HW counters. */
			if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
			if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
				continue;

			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
@@ -2983,8 +2979,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
	 * base PMU holds the correct number of counters for P-cores.
	 */
	cap->version		= x86_pmu.version;
	cap->num_counters_gp	= x86_pmu.num_counters;
	cap->num_counters_fixed	= x86_pmu.num_counters_fixed;
	cap->num_counters_gp	= x86_pmu_num_counters(NULL);
	cap->num_counters_fixed	= x86_pmu_num_counters_fixed(NULL);
	cap->bit_width_gp	= x86_pmu.cntval_bits;
	cap->bit_width_fixed	= x86_pmu.cntval_bits;
	cap->events_mask	= (unsigned int)x86_pmu.events_maskl;
+80 −84
Original line number Diff line number Diff line
@@ -2874,23 +2874,23 @@ static void intel_pmu_reset(void)
{
	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
	int num_counters = hybrid(cpuc->pmu, num_counters);
	unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
	unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
	unsigned long flags;
	int idx;

	if (!num_counters)
	if (!*(u64 *)cntr_mask)
		return;

	local_irq_save(flags);

	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());

	for (idx = 0; idx < num_counters; idx++) {
	for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
	}
	for (idx = 0; idx < num_counters_fixed; idx++) {
	for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
		if (fixed_counter_disabled(idx, cpuc->pmu))
			continue;
		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -2940,8 +2940,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
	    !guest_pebs_idxs)
		return;

	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs,
			 INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) {
	for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) {
		event = cpuc->events[bit];
		if (!event->attr.precise_ip)
			continue;
@@ -4199,7 +4198,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
	int idx;

	for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		struct perf_event *event = cpuc->events[idx];

		arr[idx].msr = x86_pmu_config_addr(idx);
@@ -4217,7 +4216,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data)
			arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
	}

	*nr = x86_pmu.num_counters;
	*nr = x86_pmu_max_num_counters(cpuc->pmu);
	return arr;
}

@@ -4232,7 +4231,7 @@ static void core_pmu_enable_all(int added)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int idx;

	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
	for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

		if (!test_bit(idx, cpuc->active_mask) ||
@@ -4684,13 +4683,33 @@ static void flip_smm_bit(void *data)
	}
}

static void intel_pmu_check_num_counters(int *num_counters,
					 int *num_counters_fixed,
					 u64 *intel_ctrl, u64 fixed_mask);
static void intel_pmu_check_counters_mask(u64 *cntr_mask,
					  u64 *fixed_cntr_mask,
					  u64 *intel_ctrl)
{
	unsigned int bit;

	bit = fls64(*cntr_mask);
	if (bit > INTEL_PMC_MAX_GENERIC) {
		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
		     bit, INTEL_PMC_MAX_GENERIC);
		*cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
	}
	*intel_ctrl = *cntr_mask;

	bit = fls64(*fixed_cntr_mask);
	if (bit > INTEL_PMC_MAX_FIXED) {
		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
		     bit, INTEL_PMC_MAX_FIXED);
		*fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
	}

	*intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED;
}

static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
					      int num_counters,
					      int num_counters_fixed,
					      u64 cntr_mask,
					      u64 fixed_cntr_mask,
					      u64 intel_ctrl);

static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
@@ -4713,11 +4732,10 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
	if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
		cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
			    &eax, &ebx, &ecx, &edx);
		pmu->num_counters = fls(eax);
		pmu->num_counters_fixed = fls(ebx);
		pmu->cntr_mask64 = eax;
		pmu->fixed_cntr_mask64 = ebx;
	}


	if (!intel_pmu_broken_perf_cap()) {
		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
@@ -4726,12 +4744,12 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)

static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
{
	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
	pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
	intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64,
				      &pmu->intel_ctrl);
	pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
	pmu->unconstrained = (struct event_constraint)
			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
						0, pmu->num_counters, 0, 0);
			     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
						0, x86_pmu_num_counters(&pmu->pmu), 0, 0);

	if (pmu->intel_cap.perf_metrics)
		pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
@@ -4744,8 +4762,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
		pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;

	intel_pmu_check_event_constraints(pmu->event_constraints,
					  pmu->num_counters,
					  pmu->num_counters_fixed,
					  pmu->cntr_mask64,
					  pmu->fixed_cntr_mask64,
					  pmu->intel_ctrl);

	intel_pmu_check_extra_regs(pmu->extra_regs);
@@ -4806,7 +4824,7 @@ static bool init_hybrid_pmu(int cpu)

	intel_pmu_check_hybrid_pmus(pmu);

	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
	if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask))
		return false;

	pr_info("%s PMU driver: ", pmu->name);
@@ -4816,8 +4834,7 @@ static bool init_hybrid_pmu(int cpu)

	pr_cont("\n");

	x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed,
			     pmu->intel_ctrl);
	x86_pmu_show_pmu_cap(&pmu->pmu);

end:
	cpumask_set_cpu(cpu, &pmu->supported_cpus);
@@ -5955,29 +5972,9 @@ static const struct attribute_group *hybrid_attr_update[] = {

static struct attribute *empty_attrs;

static void intel_pmu_check_num_counters(int *num_counters,
					 int *num_counters_fixed,
					 u64 *intel_ctrl, u64 fixed_mask)
{
	if (*num_counters > INTEL_PMC_MAX_GENERIC) {
		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
		     *num_counters, INTEL_PMC_MAX_GENERIC);
		*num_counters = INTEL_PMC_MAX_GENERIC;
	}
	*intel_ctrl = (1ULL << *num_counters) - 1;

	if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) {
		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
		     *num_counters_fixed, INTEL_PMC_MAX_FIXED);
		*num_counters_fixed = INTEL_PMC_MAX_FIXED;
	}

	*intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED;
}

static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
					      int num_counters,
					      int num_counters_fixed,
					      u64 cntr_mask,
					      u64 fixed_cntr_mask,
					      u64 intel_ctrl)
{
	struct event_constraint *c;
@@ -6014,10 +6011,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
			 * generic counters
			 */
			if (!use_fixed_pseudo_encoding(c->code))
				c->idxmsk64 |= (1ULL << num_counters) - 1;
				c->idxmsk64 |= cntr_mask;
		}
		c->idxmsk64 &=
			~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed));
		c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED);
		c->weight = hweight64(c->idxmsk64);
	}
}
@@ -6068,12 +6064,12 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
		pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
		pmu->name = intel_hybrid_pmu_type_map[bit].name;

		pmu->num_counters = x86_pmu.num_counters;
		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
		pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
		pmu->cntr_mask64 = x86_pmu.cntr_mask64;
		pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
		pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
		pmu->unconstrained = (struct event_constraint)
				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
							0, pmu->num_counters, 0, 0);
				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
							0, x86_pmu_num_counters(&pmu->pmu), 0, 0);

		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
		if (pmu->pmu_type & hybrid_small) {
@@ -6186,14 +6182,14 @@ __init int intel_pmu_init(void)
		x86_pmu = intel_pmu;

	x86_pmu.version			= version;
	x86_pmu.num_counters		= eax.split.num_counters;
	x86_pmu.cntr_mask64		= GENMASK_ULL(eax.split.num_counters - 1, 0);
	x86_pmu.cntval_bits		= eax.split.bit_width;
	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;

	x86_pmu.events_maskl		= ebx.full;
	x86_pmu.events_mask_len		= eax.split.mask_length;

	x86_pmu.pebs_events_mask	= intel_pmu_pebs_mask(GENMASK_ULL(x86_pmu.num_counters - 1, 0));
	x86_pmu.pebs_events_mask	= intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
	x86_pmu.pebs_capable		= PEBS_COUNTER_MASK;

	/*
@@ -6203,12 +6199,10 @@ __init int intel_pmu_init(void)
	if (version > 1 && version < 5) {
		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);

		x86_pmu.num_counters_fixed =
			max((int)edx.split.num_counters_fixed, assume);

		fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1;
		x86_pmu.fixed_cntr_mask64 =
			GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0);
	} else if (version >= 5)
		x86_pmu.num_counters_fixed = fls(fixed_mask);
		x86_pmu.fixed_cntr_mask64 = fixed_mask;

	if (boot_cpu_has(X86_FEATURE_PDCM)) {
		u64 capabilities;
@@ -6803,11 +6797,13 @@ __init int intel_pmu_init(void)
		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
		intel_pmu_init_glc(&pmu->pmu);
		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
			pmu->num_counters = x86_pmu.num_counters + 2;
			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
			pmu->cntr_mask64 <<= 2;
			pmu->cntr_mask64 |= 0x3;
			pmu->fixed_cntr_mask64 <<= 1;
			pmu->fixed_cntr_mask64 |= 0x1;
		} else {
			pmu->num_counters = x86_pmu.num_counters;
			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
			pmu->cntr_mask64 = x86_pmu.cntr_mask64;
			pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
		}

		/*
@@ -6817,15 +6813,16 @@ __init int intel_pmu_init(void)
		 * mistakenly add extra counters for P-cores. Correct the number of
		 * counters here.
		 */
		if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
			pmu->num_counters = x86_pmu.num_counters;
			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
		if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) {
			pmu->cntr_mask64 = x86_pmu.cntr_mask64;
			pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64;
		}

		pmu->pebs_events_mask = intel_pmu_pebs_mask(GENMASK_ULL(pmu->num_counters - 1, 0));
		pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64);
		pmu->unconstrained = (struct event_constraint)
					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
							   0, pmu->num_counters, 0, 0);
				     __EVENT_CONSTRAINT(0, pmu->cntr_mask64,
				     0, x86_pmu_num_counters(&pmu->pmu), 0, 0);

		pmu->extra_regs = intel_glc_extra_regs;

		/* Initialize Atom core specific PerfMon capabilities.*/
@@ -6892,9 +6889,9 @@ __init int intel_pmu_init(void)
			 * The constraints may be cut according to the CPUID enumeration
			 * by inserting the EVENT_CONSTRAINT_END.
			 */
			if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED)
				x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
			intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1;
			if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED)
				x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0);
			intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1;
			x86_pmu.event_constraints = intel_v5_gen_event_constraints;
			pr_cont("generic architected perfmon, ");
			name = "generic_arch_v5+";
@@ -6921,18 +6918,17 @@ __init int intel_pmu_init(void)
		x86_pmu.attr_update = hybrid_attr_update;
	}

	intel_pmu_check_num_counters(&x86_pmu.num_counters,
				     &x86_pmu.num_counters_fixed,
				     &x86_pmu.intel_ctrl,
				     (u64)fixed_mask);
	intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
				      &x86_pmu.fixed_cntr_mask64,
				      &x86_pmu.intel_ctrl);

	/* AnyThread may be deprecated on arch perfmon v5 or later */
	if (x86_pmu.intel_cap.anythread_deprecated)
		x86_pmu.format_attrs = intel_arch_formats_attr;

	intel_pmu_check_event_constraints(x86_pmu.event_constraints,
					  x86_pmu.num_counters,
					  x86_pmu.num_counters_fixed,
					  x86_pmu.cntr_mask64,
					  x86_pmu.fixed_cntr_mask64,
					  x86_pmu.intel_ctrl);
	/*
	 * Access LBR MSR may cause #GP under certain circumstances.
+8 −11

File changed.

Preview size limit exceeded, changes collapsed.

+1 −1
Original line number Diff line number Diff line
@@ -303,7 +303,7 @@ static const struct x86_pmu knc_pmu __initconst = {
	.apic			= 1,
	.max_period		= (1ULL << 39) - 1,
	.version		= 0,
	.num_counters		= 2,
	.cntr_mask64		= 0x3,
	.cntval_bits		= 40,
	.cntval_mask		= (1ULL << 40) - 1,
	.get_event_constraints	= x86_get_event_constraints,
Loading