Commit eeefc13c authored by Ravi Bangoria's avatar Ravi Bangoria Committed by Arnaldo Carvalho de Melo
Browse files

perf amd ibs: Add Load Latency bits in raw dump



IBS OP PMU on Zen5 supports Load Latency filtering. Decode and dump Load
Latency filtering related bits into perf script raw dump.

Also add oneliner example in the perf-amd-ibs man page.

Signed-off-by: default avatarRavi Bangoria <ravi.bangoria@amd.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joe Mario <jmario@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20250429035938.1301-2-ravi.bangoria@amd.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 4d728bb9
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -85,6 +85,15 @@ System-wide profile, uOps event, sampling period: 100000, L3MissOnly (Zen4 onwar

	# perf record -e ibs_op/cnt_ctl=1,l3missonly=1/ -c 100000 -a

System-wide profile, cycles event, sampling period: 100000, LdLat filtering (Zen5
onward)

	# perf record -e ibs_op/ldlat=128/ -c 100000 -a

	Supported load latency threshold values are 128 to 2048 (both inclusive).
	Latency value which is a multiple of 128 incurs a little less profiling
	overhead compared to other values.

Per process(upstream v6.2 onward), uOps event, sampling period: 100000

	# perf record -e ibs_op/cnt_ctl=1/ -c 100000 -p 1234
+12 −2
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@

static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
static bool zen4_ibs_extensions;
static bool ldlat_cap;

static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
{
@@ -78,14 +79,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
static void pr_ibs_op_ctl(union ibs_op_ctl reg)
{
	char l3_miss_only[sizeof(" L3MissOnly _")] = "";
	char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = "";

	if (zen4_ibs_extensions)
		snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);

	printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
	if (ldlat_cap) {
		snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d",
			 reg.ldlat_thrsh, reg.ldlat_en);
	}

	printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n",
		reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
		reg.op_en, reg.op_val, reg.cnt_ctl,
		reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
		reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat);
}

static void pr_ibs_op_data(union ibs_op_data reg)
@@ -331,6 +338,9 @@ bool evlist__has_amd_ibs(struct evlist *evlist)
	if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
		zen4_ibs_extensions = 1;

	if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat"))
		ldlat_cap = 1;

	if (ibs_fetch_type || ibs_op_type) {
		if (!cpu_family)
			parse_cpuid(env);