Commit 57610d69 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

Merge tag 'power-utilities-for-v6.18-merge' of...

Merge tag 'power-utilities-for-v6.18-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

Merge turbostat and x86_energy_perf_policy bug fixes for v6.18 merge
window from Len Brown.

* tag 'power-utilities-for-v6.18-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
  tools/power x86_energy_perf_policy.8: Emphasize preference for SW interfaces
  tools/power x86_energy_perf_policy: Add make snapshot target
  tools/power x86_energy_perf_policy: Prefer driver HWP limits
  tools/power x86_energy_perf_policy: EPB access is only via sysfs
  tools/power x86_energy_perf_policy: Prepare for MSR/sysfs refactoring
  tools/power x86_energy_perf_policy: Enhance HWP enable
  tools/power x86_energy_perf_policy: Enhance HWP enabled check
  tools/power x86_energy_perf_policy: Fix incorrect fopen mode usage
  tools/power turbostat: Fix incorrect sorting of PMT telemetry
parents 2bc1adb0 66f43052
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1890,7 +1890,7 @@ int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
	sscanf((*a)->d_name, "telem%u", &aidx);
	sscanf((*b)->d_name, "telem%u", &bidx);

	return aidx >= bidx;
	return (aidx > bidx) ? 1 : (aidx < bidx) ? -1 : 0;
}

const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
+28 −1
Original line number Diff line number Diff line
@@ -3,6 +3,10 @@ CC = $(CROSS_COMPILE)gcc
BUILD_OUTPUT	:= $(CURDIR)
PREFIX		:= /usr
DESTDIR		:=
DAY		:= $(shell date +%Y.%m.%d)
SNAPSHOT	= x86_energy_perf_policy-$(DAY)



ifeq ("$(origin O)", "command line")
	BUILD_OUTPUT := $(O)
@@ -27,3 +31,26 @@ install : x86_energy_perf_policy
	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
	install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8

snapshot: x86_energy_perf_policy
	@rm -rf $(SNAPSHOT)
	@mkdir $(SNAPSHOT)
	@cp x86_energy_perf_policy Makefile x86_energy_perf_policy.c x86_energy_perf_policy.8 $(SNAPSHOT)

	@sed -e 's/^#include <linux\/bits.h>/#include "bits.h"/' -e 's/u64/unsigned long long/' ../../../../arch/x86/include/asm/msr-index.h > $(SNAPSHOT)/msr-index.h
	@echo '#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))' >> $(SNAPSHOT)/msr-index.h
	@echo "#define BIT(x) (1 << (x))" > $(SNAPSHOT)/bits.h
	@echo "#define BIT_ULL(nr) (1ULL << (nr))" >> $(SNAPSHOT)/bits.h
	@echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h
	@echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h

	@echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h
	@echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h

	@echo PWD=. > $(SNAPSHOT)/Makefile
	@echo "CFLAGS +=	-DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile
	@echo "CFLAGS +=	-DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile
	@sed -e's/.*MSRHEADER.*//' Makefile >> $(SNAPSHOT)/Makefile

	@rm -f $(SNAPSHOT).tar.gz
	tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT)
+11 −4
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
.\"  Distributed under the GPL, Copyleft 1994.
.TH X86_ENERGY_PERF_POLICY 8
.SH NAME
x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
x86_energy_perf_policy \- Manage Energy vs. Performance Policy
.SH SYNOPSIS
.B x86_energy_perf_policy
.RB "[ options ] [ scope ] [field \ value]"
@@ -19,9 +19,14 @@ x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Spe
.SH DESCRIPTION
\fBx86_energy_perf_policy\fP
displays and updates energy-performance policy settings specific to
Intel Architecture Processors.  Settings are accessed via Model Specific Register (MSR)
updates, no matter if the Linux cpufreq sub-system is enabled or not.
Intel Architecture Processors.  It summarizes settings available
in standard Linux interfaces (eg. cpufreq),
and also decodes underlying Model Specific Register (MSRs).
While \fBx86_energy_perf_policy\fP can manage energy-performance policy
using only MSR access, it prefers standard
Linux kernel interfaces, when they are available.

.SH BACKGROUND
Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
may affect a wide range of hardware decisions,
such as how aggressively the hardware enters and exits CPU idle states (C-states)
@@ -200,7 +205,9 @@ runs only as root.
.SH FILES
.ta
.nf
/dev/cpu/*/msr
EPB: /sys/devices/system/cpu/cpu*/power/energy_perf_bias
EPP: /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference
MSR: /dev/cpu/*/msr
.fi
.SH "SEE ALSO"
.nf
+89 −44
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@
 * policy preference bias on recent X86 processors.
 */
/*
 * Copyright (c) 2010 - 2017 Intel Corporation.
 * Copyright (c) 2010 - 2025 Intel Corporation.
 * Len Brown <len.brown@intel.com>
 */

@@ -62,6 +62,7 @@ unsigned char turbo_update_value;
unsigned char update_hwp_epp;
unsigned char update_hwp_min;
unsigned char update_hwp_max;
unsigned char hwp_limits_done_via_sysfs;
unsigned char update_hwp_desired;
unsigned char update_hwp_window;
unsigned char update_hwp_use_pkg;
@@ -517,7 +518,7 @@ void for_packages(unsigned long long pkg_set, int (func)(int))

void print_version(void)
{
	printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
	printf("x86_energy_perf_policy 2025.9.19 Len Brown <lenb@kernel.org>\n");
}

void cmdline(int argc, char **argv)
@@ -630,7 +631,7 @@ void cmdline(int argc, char **argv)
 */
FILE *fopen_or_die(const char *path, const char *mode)
{
	FILE *filep = fopen(path, "r");
	FILE *filep = fopen(path, mode);

	if (!filep)
		err(1, "%s: open failed", path);
@@ -644,7 +645,7 @@ void err_on_hypervisor(void)
	char *buffer;

	/* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
	cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
	cpuinfo = fopen_or_die("/proc/cpuinfo", "r");

	buffer = malloc(4096);
	if (!buffer) {
@@ -809,7 +810,7 @@ void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
		h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
		h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
}
void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
void read_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
	unsigned long long msr;

@@ -823,7 +824,7 @@ void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr
	hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
}

void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
void write_hwp_request_msr(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
{
	unsigned long long msr = 0;

@@ -843,7 +844,7 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
	put_msr(cpu, msr_offset, msr);
}

static int get_epb(int cpu)
static int get_epb_sysfs(int cpu)
{
	char path[SYSFS_PATH_MAX];
	char linebuf[3];
@@ -865,7 +866,7 @@ static int get_epb(int cpu)
	return (int)val;
}

static int set_epb(int cpu, int val)
static int set_epb_sysfs(int cpu, int val)
{
	char path[SYSFS_PATH_MAX];
	char linebuf[3];
@@ -895,14 +896,14 @@ int print_cpu_msrs(int cpu)
	struct msr_hwp_cap cap;
	int epb;

	epb = get_epb(cpu);
	epb = get_epb_sysfs(cpu);
	if (epb >= 0)
		printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);

	if (!has_hwp)
		return 0;

	read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
	read_hwp_request_msr(cpu, &req, MSR_HWP_REQUEST);
	print_hwp_request(cpu, &req, "");

	read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
@@ -919,7 +920,7 @@ int print_pkg_msrs(int pkg)
	if (!has_hwp)
		return 0;

	read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
	read_hwp_request_msr(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
	print_hwp_request_pkg(pkg, &req, "");

	if (has_hwp_notify) {
@@ -951,8 +952,10 @@ int ratio_2_sysfs_khz(int ratio)
}
/*
 * If HWP is enabled and cpufreq sysfs attribtes are present,
 * then update sysfs, so that it will not become
 * stale when we write to MSRs.
 * then update via sysfs. The intel_pstate driver may modify (clip)
 * this request, say, when HWP_CAP is outside of PLATFORM_INFO limits,
 * and the driver-chosen value takes precidence.
 *
 * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
 *  so we don't have to touch that.)
 */
@@ -1007,6 +1010,8 @@ int update_sysfs(int cpu)
	if (update_hwp_max)
		update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);

	hwp_limits_done_via_sysfs = 1;

	return 0;
}

@@ -1074,21 +1079,21 @@ int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, s
	return 0;
}

int update_hwp_request(int cpu)
int update_hwp_request_msr(int cpu)
{
	struct msr_hwp_request req;
	struct msr_hwp_cap cap;

	int msr_offset = MSR_HWP_REQUEST;

	read_hwp_request(cpu, &req, msr_offset);
	read_hwp_request_msr(cpu, &req, msr_offset);
	if (debug)
		print_hwp_request(cpu, &req, "old: ");

	if (update_hwp_min)
	if (update_hwp_min && !hwp_limits_done_via_sysfs)
		req.hwp_min = req_update.hwp_min;

	if (update_hwp_max)
	if (update_hwp_max && !hwp_limits_done_via_sysfs)
		req.hwp_max = req_update.hwp_max;

	if (update_hwp_desired)
@@ -1111,15 +1116,15 @@ int update_hwp_request(int cpu)

	verify_hwp_req_self_consistency(cpu, &req);

	write_hwp_request(cpu, &req, msr_offset);
	write_hwp_request_msr(cpu, &req, msr_offset);

	if (debug) {
		read_hwp_request(cpu, &req, msr_offset);
		read_hwp_request_msr(cpu, &req, msr_offset);
		print_hwp_request(cpu, &req, "new: ");
	}
	return 0;
}
int update_hwp_request_pkg(int pkg)
int update_hwp_request_pkg_msr(int pkg)
{
	struct msr_hwp_request req;
	struct msr_hwp_cap cap;
@@ -1127,7 +1132,7 @@ int update_hwp_request_pkg(int pkg)

	int msr_offset = MSR_HWP_REQUEST_PKG;

	read_hwp_request(cpu, &req, msr_offset);
	read_hwp_request_msr(cpu, &req, msr_offset);
	if (debug)
		print_hwp_request_pkg(pkg, &req, "old: ");

@@ -1155,10 +1160,10 @@ int update_hwp_request_pkg(int pkg)

	verify_hwp_req_self_consistency(cpu, &req);

	write_hwp_request(cpu, &req, msr_offset);
	write_hwp_request_msr(cpu, &req, msr_offset);

	if (debug) {
		read_hwp_request(cpu, &req, msr_offset);
		read_hwp_request_msr(cpu, &req, msr_offset);
		print_hwp_request_pkg(pkg, &req, "new: ");
	}
	return 0;
@@ -1166,31 +1171,40 @@ int update_hwp_request_pkg(int pkg)

int enable_hwp_on_cpu(int cpu)
{
	unsigned long long msr;
	unsigned long long old_msr, new_msr;

	get_msr(cpu, MSR_PM_ENABLE, &old_msr);

	get_msr(cpu, MSR_PM_ENABLE, &msr);
	put_msr(cpu, MSR_PM_ENABLE, 1);
	if (old_msr & 1)
		return 0;	/* already enabled */

	new_msr = old_msr | 1;
	put_msr(cpu, MSR_PM_ENABLE, new_msr);

	if (verbose)
		printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
		printf("cpu%d: MSR_PM_ENABLE old: %llX new: %llX\n", cpu, old_msr, new_msr);

	return 0;
}

int update_cpu_msrs(int cpu)
int update_cpu_epb_sysfs(int cpu)
{
	unsigned long long msr;
	int epb;

	if (update_epb) {
		epb = get_epb(cpu);
		set_epb(cpu, new_epb);
	epb = get_epb_sysfs(cpu);
	set_epb_sysfs(cpu, new_epb);

	if (verbose)
		printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
			cpu, epb, (unsigned int) new_epb);

	return 0;
}

int update_cpu_msrs(int cpu)
{
	unsigned long long msr;

	if (update_turbo) {
		int turbo_is_present_and_disabled;

@@ -1224,7 +1238,7 @@ int update_cpu_msrs(int cpu)
	if (!hwp_update_enabled())
		return 0;

	update_hwp_request(cpu);
	update_hwp_request_msr(cpu);
	return 0;
}

@@ -1312,6 +1326,17 @@ void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
		if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
			func(cpu_num);
}
int for_all_cpus_in_set_and(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
{
	int cpu_num;
	int retval = 1;

	for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
		if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
			retval &= func(cpu_num);

	return retval;
}

void init_data_structures(void)
{
@@ -1326,21 +1351,38 @@ void init_data_structures(void)
	for_all_proc_cpus(mark_cpu_present);
}

/* clear has_hwp if it is not enable (or being enabled) */
int is_hwp_enabled_on_cpu(int cpu_num)
{
	unsigned long long msr;
	int retval;

	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
	get_msr(cpu_num, MSR_PM_ENABLE, &msr);
	retval = (msr & 1);

	if (verbose)
		fprintf(stderr, "cpu%d: %sHWP\n", cpu_num, retval ? "" : "No-");

	return retval;
}

/*
 * verify_hwp_is_enabled()
 *
 * Set (has_hwp=0) if no HWP feature or any of selected CPU set does not have HWP enabled
 */
void verify_hwp_is_enabled(void)
{
	unsigned long long msr;
	int retval;

	if (!has_hwp)	/* set in early_cpuid() */
		return;

	/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
	get_msr(base_cpu, MSR_PM_ENABLE, &msr);
	if ((msr & 1) == 0) {
	retval = for_all_cpus_in_set_and(cpu_setsize, cpu_selected_set, is_hwp_enabled_on_cpu);

	if (retval == 0) {
		fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
		has_hwp = 0;
		return;
	}
}

@@ -1551,10 +1593,13 @@ int main(int argc, char **argv)

	/* update CPU set */
	if (cpu_selected_set) {
		if (update_epb)
			for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_epb_sysfs);
		for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
		for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);

	} else if (pkg_selected_set)
		for_packages(pkg_selected_set, update_hwp_request_pkg);
		for_packages(pkg_selected_set, update_hwp_request_pkg_msr);

	return 0;
}