Commit 2721e8da authored by Dapeng Mi's avatar Dapeng Mi Committed by Peter Zijlstra
Browse files

perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR



Arch-PEBS introduces a new MSR IA32_PEBS_BASE to store the arch-PEBS
buffer physical address. This patch allocates arch-PEBS buffer and then
initialize IA32_PEBS_BASE MSR with the buffer physical address.

Co-developed-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Signed-off-by: default avatarDapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-10-dapeng1.mi@linux.intel.com
parent d21954c8
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -5227,7 +5227,13 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)

static int intel_pmu_cpu_prepare(int cpu)
{
	return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
	int ret;

	ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
	if (ret)
		return ret;

	return alloc_arch_pebs_buf_on_cpu(cpu);
}

static void flip_smm_bit(void *data)
@@ -5458,6 +5464,7 @@ static void intel_pmu_cpu_starting(int cpu)
		return;

	init_debug_store_on_cpu(cpu);
	init_arch_pebs_on_cpu(cpu);
	/*
	 * Deal with CPUs that don't clear their LBRs on power-up, and that may
	 * even boot with LBRs enabled.
@@ -5555,6 +5562,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
	fini_debug_store_on_cpu(cpu);
	fini_arch_pebs_on_cpu(cpu);
}

void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@@ -5575,6 +5583,7 @@ static void intel_pmu_cpu_dead(int cpu)
{
	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

	release_arch_pebs_buf_on_cpu(cpu);
	intel_cpuc_finish(cpuc);

	if (is_hybrid() && cpuc->pmu)
+70 −12
Original line number Diff line number Diff line
@@ -625,13 +625,18 @@ static int alloc_pebs_buffer(int cpu)
	int max, node = cpu_to_node(cpu);
	void *buffer, *insn_buff, *cea;

	if (!x86_pmu.ds_pebs)
	if (!intel_pmu_has_pebs())
		return 0;

	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
	if (unlikely(!buffer))
		return -ENOMEM;

	if (x86_pmu.arch_pebs) {
		hwev->pebs_vaddr = buffer;
		return 0;
	}

	/*
	 * HSW+ already provides us the eventing ip; no need to allocate this
	 * buffer then.
@@ -644,7 +649,7 @@ static int alloc_pebs_buffer(int cpu)
		}
		per_cpu(insn_buffer, cpu) = insn_buff;
	}
	hwev->ds_pebs_vaddr = buffer;
	hwev->pebs_vaddr = buffer;
	/* Update the cpu entry area mapping */
	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
	ds->pebs_buffer_base = (unsigned long) cea;
@@ -660,17 +665,20 @@ static void release_pebs_buffer(int cpu)
	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
	void *cea;

	if (!x86_pmu.ds_pebs)
	if (!intel_pmu_has_pebs())
		return;

	if (x86_pmu.ds_pebs) {
		kfree(per_cpu(insn_buffer, cpu));
		per_cpu(insn_buffer, cpu) = NULL;

		/* Clear the fixmap */
		cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
		ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
	hwev->ds_pebs_vaddr = NULL;
	}

	dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
	hwev->pebs_vaddr = NULL;
}

static int alloc_bts_buffer(int cpu)
@@ -823,6 +831,56 @@ void reserve_ds_buffers(void)
	}
}

inline int alloc_arch_pebs_buf_on_cpu(int cpu)
{
	if (!x86_pmu.arch_pebs)
		return 0;

	return alloc_pebs_buffer(cpu);
}

inline void release_arch_pebs_buf_on_cpu(int cpu)
{
	if (!x86_pmu.arch_pebs)
		return;

	release_pebs_buffer(cpu);
}

void init_arch_pebs_on_cpu(int cpu)
{
	struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
	u64 arch_pebs_base;

	if (!x86_pmu.arch_pebs)
		return;

	if (!cpuc->pebs_vaddr) {
		WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
		x86_pmu.pebs_active = 0;
		return;
	}

	/*
	 * 4KB-aligned pointer of the output buffer
	 * (__alloc_pages_node() return page aligned address)
	 * Buffer Size = 4KB * 2^SIZE
	 * contiguous physical buffer (__alloc_pages_node() with order)
	 */
	arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
	wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
		     (u32)(arch_pebs_base >> 32));
	x86_pmu.pebs_active = 1;
}

inline void fini_arch_pebs_on_cpu(int cpu)
{
	if (!x86_pmu.arch_pebs)
		return;

	wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
}

/*
 * BTS
 */
@@ -2883,8 +2941,8 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
		return;
	}

	base = cpuc->ds_pebs_vaddr;
	top = (void *)((u64)cpuc->ds_pebs_vaddr +
	base = cpuc->pebs_vaddr;
	top = (void *)((u64)cpuc->pebs_vaddr +
		       (index.wr << ARCH_PEBS_INDEX_WR_SHIFT));

	index.wr = 0;
+10 −1
Original line number Diff line number Diff line
@@ -283,8 +283,9 @@ struct cpu_hw_events {
	 * Intel DebugStore bits
	 */
	struct debug_store	*ds;
	void			*ds_pebs_vaddr;
	void			*ds_bts_vaddr;
	/* DS based PEBS or arch-PEBS buffer address */
	void			*pebs_vaddr;
	u64			pebs_enabled;
	int			n_pebs;
	int			n_large_pebs;
@@ -1617,6 +1618,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);

int intel_pmu_init(void);

int alloc_arch_pebs_buf_on_cpu(int cpu);

void release_arch_pebs_buf_on_cpu(int cpu);

void init_arch_pebs_on_cpu(int cpu);

void fini_arch_pebs_on_cpu(int cpu);

void init_debug_store_on_cpu(int cpu);

void fini_debug_store_on_cpu(int cpu);
+2 −1
Original line number Diff line number Diff line
@@ -4,7 +4,8 @@
#include <linux/percpu-defs.h>

#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
#define PEBS_BUFFER_SHIFT	4
#define PEBS_BUFFER_SIZE	(PAGE_SIZE << PEBS_BUFFER_SHIFT)

/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4	8