Commit d21954c8 authored by Dapeng Mi's avatar Dapeng Mi Committed by Peter Zijlstra
Browse files

perf/x86/intel: Process arch-PEBS records or record fragments



A significant difference with adaptive PEBS is that arch-PEBS record
supports fragments which means an arch-PEBS record could be split into
several independent fragments which have its own arch-PEBS header in
each fragment.

This patch defines architectural PEBS record layout structures and add
helpers to process arch-PEBS records or fragments. Only legacy PEBS
groups like basic, GPR, XMM and LBR groups are supported in this patch,
the new added YMM/ZMM/OPMASK vector registers capturing would be
supported in the future.

Signed-off-by: default avatarDapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-9-dapeng1.mi@linux.intel.com
parent 167cde7d
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -3215,6 +3215,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
			status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
	}

	/*
	 * Arch PEBS sets bit 54 in the global status register
	 */
	if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
				 (unsigned long *)&status)) {
		handled++;
		static_call(x86_pmu_drain_pebs)(regs, &data);

		if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
		    is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
			status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
	}

	/*
	 * Intel PT
	 */
+184 −0
Original line number Diff line number Diff line
@@ -2270,6 +2270,117 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
			format_group);
}

static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
{
	/* Continue bit or null PEBS record indicates fragment follows. */
	return header->cont || !(header->format & GENMASK_ULL(63, 16));
}

static void setup_arch_pebs_sample_data(struct perf_event *event,
					struct pt_regs *iregs,
					void *__pebs,
					struct perf_sample_data *data,
					struct pt_regs *regs)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	u64 sample_type = event->attr.sample_type;
	struct arch_pebs_header *header = NULL;
	struct arch_pebs_aux *meminfo = NULL;
	struct arch_pebs_gprs *gprs = NULL;
	struct x86_perf_regs *perf_regs;
	void *next_record;
	void *at = __pebs;

	if (at == NULL)
		return;

	perf_regs = container_of(regs, struct x86_perf_regs, regs);
	perf_regs->xmm_regs = NULL;

	__setup_perf_sample_data(event, iregs, data);

	*regs = *iregs;

again:
	header = at;
	next_record = at + sizeof(struct arch_pebs_header);
	if (header->basic) {
		struct arch_pebs_basic *basic = next_record;
		u16 retire = 0;

		next_record = basic + 1;

		if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
			retire = basic->valid ? basic->retire : 0;
		__setup_pebs_basic_group(event, regs, data, sample_type,
				 basic->ip, basic->tsc, retire);
	}

	/*
	 * The record for MEMINFO is in front of GP
	 * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
	 * Save the pointer here but process later.
	 */
	if (header->aux) {
		meminfo = next_record;
		next_record = meminfo + 1;
	}

	if (header->gpr) {
		gprs = next_record;
		next_record = gprs + 1;

		__setup_pebs_gpr_group(event, regs,
				       (struct pebs_gprs *)gprs,
				       sample_type);
	}

	if (header->aux) {
		u64 ax = gprs ? gprs->ax : 0;

		__setup_pebs_meminfo_group(event, data, sample_type,
					   meminfo->cache_latency,
					   meminfo->instr_latency,
					   meminfo->address, meminfo->aux,
					   meminfo->tsx_tuning, ax);
	}

	if (header->xmm) {
		struct pebs_xmm *xmm;

		next_record += sizeof(struct arch_pebs_xer_header);

		xmm = next_record;
		perf_regs->xmm_regs = xmm->xmm;
		next_record = xmm + 1;
	}

	if (header->lbr) {
		struct arch_pebs_lbr_header *lbr_header = next_record;
		struct lbr_entry *lbr;
		int num_lbr;

		next_record = lbr_header + 1;
		lbr = next_record;

		num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
				lbr_header->depth :
				header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
		next_record += num_lbr * sizeof(struct lbr_entry);

		if (has_branch_stack(event)) {
			intel_pmu_store_pebs_lbrs(lbr);
			intel_pmu_lbr_save_brstack(data, cpuc, event);
		}
	}

	/* Parse followed fragments if there are. */
	if (arch_pebs_record_continued(header)) {
		at = at + header->size;
		goto again;
	}
}

static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
@@ -2753,6 +2864,78 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
					    setup_pebs_adaptive_sample_data);
}

static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
				      struct perf_sample_data *data)
{
	short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
	void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	union arch_pebs_index index;
	struct x86_perf_regs perf_regs;
	struct pt_regs *regs = &perf_regs.regs;
	void *base, *at, *top;
	u64 mask;

	rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);

	if (unlikely(!index.wr)) {
		intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
		return;
	}

	base = cpuc->ds_pebs_vaddr;
	top = (void *)((u64)cpuc->ds_pebs_vaddr +
		       (index.wr << ARCH_PEBS_INDEX_WR_SHIFT));

	index.wr = 0;
	index.full = 0;
	wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);

	mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;

	if (!iregs)
		iregs = &dummy_iregs;

	/* Process all but the last event for each counter. */
	for (at = base; at < top;) {
		struct arch_pebs_header *header;
		struct arch_pebs_basic *basic;
		u64 pebs_status;

		header = at;

		if (WARN_ON_ONCE(!header->size))
			break;

		/* 1st fragment or single record must have basic group */
		if (!header->basic) {
			at += header->size;
			continue;
		}

		basic = at + sizeof(struct arch_pebs_header);
		pebs_status = mask & basic->applicable_counters;
		__intel_pmu_handle_pebs_record(iregs, regs, data, at,
					       pebs_status, counts, last,
					       setup_arch_pebs_sample_data);

		/* Skip non-last fragments */
		while (arch_pebs_record_continued(header)) {
			if (!header->size)
				break;
			at += header->size;
			header = at;
		}

		/* Skip last fragment or the single record */
		at += header->size;
	}

	__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
					    counts, last,
					    setup_arch_pebs_sample_data);
}

static void __init intel_arch_pebs_init(void)
{
	/*
@@ -2762,6 +2945,7 @@ static void __init intel_arch_pebs_init(void)
	 */
	x86_pmu.arch_pebs = 1;
	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
	x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
	x86_pmu.pebs_capable = ~0ULL;

	x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
+6 −0
Original line number Diff line number Diff line
@@ -327,6 +327,12 @@
					 PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
					 PERF_CAP_PEBS_TIMING_INFO)

/* Arch PEBS */
#define MSR_IA32_PEBS_BASE		0x000003f4
#define MSR_IA32_PEBS_INDEX		0x000003f5
#define ARCH_PEBS_OFFSET_MASK		0x7fffff
#define ARCH_PEBS_INDEX_WR_SHIFT	4

#define MSR_IA32_RTIT_CTL		0x00000570
#define RTIT_CTL_TRACEEN		BIT(0)
#define RTIT_CTL_CYCLEACC		BIT(1)
+96 −0
Original line number Diff line number Diff line
@@ -437,6 +437,8 @@ static inline bool is_topdown_idx(int idx)
#define GLOBAL_STATUS_LBRS_FROZEN		BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT		55
#define GLOBAL_STATUS_TRACE_TOPAPMI		BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT	54
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD	BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT	48

#define GLOBAL_CTRL_EN_PERF_METRICS		BIT_ULL(48)
@@ -507,6 +509,100 @@ struct pebs_cntr_header {

#define INTEL_CNTR_METRICS		0x3

/*
 * Arch PEBS
 */
union arch_pebs_index {
	struct {
		u64 rsvd:4,
		    wr:23,
		    rsvd2:4,
		    full:1,
		    en:1,
		    rsvd3:3,
		    thresh:23,
		    rsvd4:5;
	};
	u64 whole;
};

struct arch_pebs_header {
	union {
		u64 format;
		struct {
			u64 size:16,	/* Record size */
			    rsvd:14,
			    mode:1,	/* 64BIT_MODE */
			    cont:1,
			    rsvd2:3,
			    cntr:5,
			    lbr:2,
			    rsvd3:7,
			    xmm:1,
			    ymmh:1,
			    rsvd4:2,
			    opmask:1,
			    zmmh:1,
			    h16zmm:1,
			    rsvd5:5,
			    gpr:1,
			    aux:1,
			    basic:1;
		};
	};
	u64 rsvd6;
};

struct arch_pebs_basic {
	u64 ip;
	u64 applicable_counters;
	u64 tsc;
	u64 retire	:16,	/* Retire Latency */
	    valid	:1,
	    rsvd	:47;
	u64 rsvd2;
	u64 rsvd3;
};

struct arch_pebs_aux {
	u64 address;
	u64 rsvd;
	u64 rsvd2;
	u64 rsvd3;
	u64 rsvd4;
	u64 aux;
	u64 instr_latency	:16,
	    pad2		:16,
	    cache_latency	:16,
	    pad3		:16;
	u64 tsx_tuning;
};

struct arch_pebs_gprs {
	u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
	u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
	u64 rsvd;
};

struct arch_pebs_xer_header {
	u64 xstate;
	u64 rsvd;
};

#define ARCH_PEBS_LBR_NAN		0x0
#define ARCH_PEBS_LBR_NUM_8		0x1
#define ARCH_PEBS_LBR_NUM_16		0x2
#define ARCH_PEBS_LBR_NUM_VAR		0x3
#define ARCH_PEBS_BASE_LBR_ENTRIES	8
struct arch_pebs_lbr_header {
	u64 rsvd;
	u64 ctl;
	u64 depth;
	u64 ler_from;
	u64 ler_to;
	u64 ler_info;
};

/*
 * AMD Extended Performance Monitoring and Debug cpuid feature detection
 */