Merge back cpufreq material for 6.12 (287f97a1) · Commits · git / linux-net

arch/x86/include/asm/topology.h

+13 −0

Original line number	Diff line number	Diff line
		@@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu)
		}
		#define arch_scale_freq_capacity arch_scale_freq_capacity

		bool arch_enable_hybrid_capacity_scale(void);
		void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
		unsigned long cap_freq, unsigned long base_freq);

		unsigned long arch_scale_cpu_capacity(int cpu);
		#define arch_scale_cpu_capacity arch_scale_cpu_capacity

		extern void arch_set_max_freq_ratio(bool turbo_disabled);
		extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
		#else
		static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
		static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
		unsigned long max_cap,
		unsigned long cap_freq,
		unsigned long base_freq) { }

		static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
		static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
		#endif

arch/x86/kernel/cpu/aperfmperf.c

+87 −2

Original line number	Diff line number	Diff line
		@@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work,
		DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
		EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);

		static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);

		struct arch_hybrid_cpu_scale {
		unsigned long capacity;
		unsigned long freq_ratio;
		};

		static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;

		/**
		* arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling
		*
		* Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
		* initialize it and set the static key controlling its code paths.
		*
		* Must be called before arch_set_cpu_capacity().
		*/
		bool arch_enable_hybrid_capacity_scale(void)
		{
		int cpu;

		if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
		WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
		return true;
		}

		arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
		if (!arch_cpu_scale)
		return false;

		for_each_possible_cpu(cpu) {
		per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
		per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
		}

		static_branch_enable(&arch_hybrid_cap_scale_key);

		pr_info("Hybrid CPU capacity scaling enabled\n");

		return true;
		}

		/**
		* arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU
		* @cpu: Target CPU.
		* @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
		* @max_cap: System-wide maximum CPU capacity.
		* @cap_freq: Frequency of @cpu corresponding to @cap.
		* @base_freq: Frequency of @cpu at which MPERF counts.
		*
		* The units in which @cap and @max_cap are expressed do not matter, so long
		* as they are consistent, because the former is effectively divided by the
		* latter. Analogously for @cap_freq and @base_freq.
		*
		* After calling this function for all CPUs, call arch_rebuild_sched_domains()
		* to let the scheduler know that capacity-aware scheduling can be used going
		* forward.
		*/
		void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
		unsigned long cap_freq, unsigned long base_freq)
		{
		if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
		div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
		div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
		} else {
		WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
		}
		}

		unsigned long arch_scale_cpu_capacity(int cpu)
		{
		if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
		return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);

		return SCHED_CAPACITY_SCALE;
		}
		EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);

		static void scale_freq_tick(u64 acnt, u64 mcnt)
		{
		u64 freq_scale;
		u64 freq_scale, freq_ratio;

		if (!arch_scale_freq_invariant())
		return;
		@@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt)
		if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
		goto error;

		if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) \|\| !mcnt)
		if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
		freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
		else
		freq_ratio = arch_max_freq_ratio;

		if (check_mul_overflow(mcnt, freq_ratio, &mcnt) \|\| !mcnt)
		goto error;

		freq_scale = div64_u64(acnt, mcnt);

drivers/cpufreq/cpufreq.c

+4 −23

Original line number	Diff line number	Diff line
		@@ -575,30 +575,11 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
		return policy->transition_delay_us;

		latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
		if (latency) {
		unsigned int max_delay_us = 2 * MSEC_PER_SEC;
		if (latency)
		/* Give a 50% breathing room between updates */
		return latency + (latency >> 1);

		/*
		* If the platform already has high transition_latency, use it
		* as-is.
		*/
		if (latency > max_delay_us)
		return latency;

		/*
		* For platforms that can change the frequency very fast (< 2
		* us), the above formula gives a decent transition delay. But
		* for platforms where transition_latency is in milliseconds, it
		* ends up giving unrealistic values.
		*
		* Cap the default transition delay to 2 ms, which seems to be
		* a reasonable amount of time after which we should reevaluate
		* the frequency.
		*/
		return min(latency * LATENCY_MULTIPLIER, max_delay_us);
		}

		return LATENCY_MULTIPLIER;
		return USEC_PER_MSEC;
		}
		EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us);

drivers/cpufreq/intel_pstate.c

+236 −4

Original line number	Diff line number	Diff line
		@@ -16,6 +16,7 @@
		#include <linux/tick.h>
		#include <linux/slab.h>
		#include <linux/sched/cpufreq.h>
		#include <linux/sched/smt.h>
		#include <linux/list.h>
		#include <linux/cpu.h>
		#include <linux/cpufreq.h>
		@@ -215,6 +216,7 @@ struct global_params {
		* @hwp_req_cached: Cached value of the last HWP Request MSR
		* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
		* @last_io_update: Last time when IO wake flag was set
		* @capacity_perf: Highest perf used for scale invariance
		* @sched_flags: Store scheduler flags for possible cross CPU update
		* @hwp_boost_min: Last HWP boosted min performance
		* @suspended: Whether or not the driver has been suspended.
		@@ -253,6 +255,7 @@ struct cpudata {
		u64 hwp_req_cached;
		u64 hwp_cap_cached;
		u64 last_io_update;
		unsigned int capacity_perf;
		unsigned int sched_flags;
		u32 hwp_boost_min;
		bool suspended;
		@@ -295,6 +298,7 @@ static int hwp_mode_bdw __ro_after_init;
		static bool per_cpu_limits __ro_after_init;
		static bool hwp_forced __ro_after_init;
		static bool hwp_boost __read_mostly;
		static bool hwp_is_hybrid;

		static struct cpufreq_driver *intel_pstate_driver __read_mostly;

		@@ -934,6 +938,139 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
		NULL,
		};

		static struct cpudata *hybrid_max_perf_cpu __read_mostly;
		/*
		* Protects hybrid_max_perf_cpu, the capacity_perf fields in struct cpudata,
		* and the x86 arch scale-invariance information from concurrent updates.
		*/
		static DEFINE_MUTEX(hybrid_capacity_lock);

		static void hybrid_set_cpu_capacity(struct cpudata *cpu)
		{
		arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
		hybrid_max_perf_cpu->capacity_perf,
		cpu->capacity_perf,
		cpu->pstate.max_pstate_physical);

		pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
		cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
		cpu->pstate.max_pstate_physical);
		}

		static void hybrid_clear_cpu_capacity(unsigned int cpunum)
		{
		arch_set_cpu_capacity(cpunum, 1, 1, 1, 1);
		}

		static void hybrid_get_capacity_perf(struct cpudata *cpu)
		{
		if (READ_ONCE(global.no_turbo)) {
		cpu->capacity_perf = cpu->pstate.max_pstate_physical;
		return;
		}

		cpu->capacity_perf = HWP_HIGHEST_PERF(READ_ONCE(cpu->hwp_cap_cached));
		}

		static void hybrid_set_capacity_of_cpus(void)
		{
		int cpunum;

		for_each_online_cpu(cpunum) {
		struct cpudata *cpu = all_cpu_data[cpunum];

		if (cpu)
		hybrid_set_cpu_capacity(cpu);
		}
		}

		static void hybrid_update_cpu_capacity_scaling(void)
		{
		struct cpudata *max_perf_cpu = NULL;
		unsigned int max_cap_perf = 0;
		int cpunum;

		for_each_online_cpu(cpunum) {
		struct cpudata *cpu = all_cpu_data[cpunum];

		if (!cpu)
		continue;

		/*
		* During initialization, CPU performance at full capacity needs
		* to be determined.
		*/
		if (!hybrid_max_perf_cpu)
		hybrid_get_capacity_perf(cpu);

		/*
		* If hybrid_max_perf_cpu is not NULL at this point, it is
		* being replaced, so don't take it into account when looking
		* for the new one.
		*/
		if (cpu == hybrid_max_perf_cpu)
		continue;

		if (cpu->capacity_perf > max_cap_perf) {
		max_cap_perf = cpu->capacity_perf;
		max_perf_cpu = cpu;
		}
		}

		if (max_perf_cpu) {
		hybrid_max_perf_cpu = max_perf_cpu;
		hybrid_set_capacity_of_cpus();
		} else {
		pr_info("Found no CPUs with nonzero maximum performance\n");
		/* Revert to the flat CPU capacity structure. */
		for_each_online_cpu(cpunum)
		hybrid_clear_cpu_capacity(cpunum);
		}
		}

		static void __hybrid_init_cpu_capacity_scaling(void)
		{
		hybrid_max_perf_cpu = NULL;
		hybrid_update_cpu_capacity_scaling();
		}

		static void hybrid_init_cpu_capacity_scaling(void)
		{
		bool disable_itmt = false;

		mutex_lock(&hybrid_capacity_lock);

		/*
		* If hybrid_max_perf_cpu is set at this point, the hybrid CPU capacity
		* scaling has been enabled already and the driver is just changing the
		* operation mode.
		*/
		if (hybrid_max_perf_cpu) {
		__hybrid_init_cpu_capacity_scaling();
		goto unlock;
		}

		/*
		* On hybrid systems, use asym capacity instead of ITMT, but because
		* the capacity of SMT threads is not deterministic even approximately,
		* do not do that when SMT is in use.
		*/
		if (hwp_is_hybrid && !sched_smt_active() && arch_enable_hybrid_capacity_scale()) {
		__hybrid_init_cpu_capacity_scaling();
		disable_itmt = true;
		}

		unlock:
		mutex_unlock(&hybrid_capacity_lock);

		/*
		* Disabling ITMT causes sched domains to be rebuilt to disable asym
		* packing and enable asym capacity.
		*/
		if (disable_itmt)
		sched_clear_itmt_support();
		}

		static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
		{
		u64 cap;
		@@ -962,6 +1099,43 @@ static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
		}
		}

		static void hybrid_update_capacity(struct cpudata *cpu)
		{
		unsigned int max_cap_perf;

		mutex_lock(&hybrid_capacity_lock);

		if (!hybrid_max_perf_cpu)
		goto unlock;

		/*
		* The maximum performance of the CPU may have changed, but assume
		* that the performance of the other CPUs has not changed.
		*/
		max_cap_perf = hybrid_max_perf_cpu->capacity_perf;

		intel_pstate_get_hwp_cap(cpu);

		hybrid_get_capacity_perf(cpu);
		/* Should hybrid_max_perf_cpu be replaced by this CPU? */
		if (cpu->capacity_perf > max_cap_perf) {
		hybrid_max_perf_cpu = cpu;
		hybrid_set_capacity_of_cpus();
		goto unlock;
		}

		/* If this CPU is hybrid_max_perf_cpu, should it be replaced? */
		if (cpu == hybrid_max_perf_cpu && cpu->capacity_perf < max_cap_perf) {
		hybrid_update_cpu_capacity_scaling();
		goto unlock;
		}

		hybrid_set_cpu_capacity(cpu);

		unlock:
		mutex_unlock(&hybrid_capacity_lock);
		}

		static void intel_pstate_hwp_set(unsigned int cpu)
		{
		struct cpudata *cpu_data = all_cpu_data[cpu];
		@@ -1070,6 +1244,22 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
		value \|= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);

		wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);

		mutex_lock(&hybrid_capacity_lock);

		if (!hybrid_max_perf_cpu) {
		mutex_unlock(&hybrid_capacity_lock);

		return;
		}

		if (hybrid_max_perf_cpu == cpu)
		hybrid_update_cpu_capacity_scaling();

		mutex_unlock(&hybrid_capacity_lock);

		/* Reset the capacity of the CPU going offline to the initial value. */
		hybrid_clear_cpu_capacity(cpu->cpu);
		}

		#define POWER_CTL_EE_ENABLE 1
		@@ -1165,21 +1355,46 @@ static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
		static void intel_pstate_update_limits(unsigned int cpu)
		{
		struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);
		struct cpudata *cpudata;

		if (!policy)
		return;

		__intel_pstate_update_max_freq(all_cpu_data[cpu], policy);
		cpudata = all_cpu_data[cpu];

		__intel_pstate_update_max_freq(cpudata, policy);

		/* Prevent the driver from being unregistered now. */
		mutex_lock(&intel_pstate_driver_lock);

		cpufreq_cpu_release(policy);

		hybrid_update_capacity(cpudata);

		mutex_unlock(&intel_pstate_driver_lock);
		}

		static void intel_pstate_update_limits_for_all(void)
		{
		int cpu;

		for_each_possible_cpu(cpu)
		intel_pstate_update_limits(cpu);
		for_each_possible_cpu(cpu) {
		struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpu);

		if (!policy)
		continue;

		__intel_pstate_update_max_freq(all_cpu_data[cpu], policy);

		cpufreq_cpu_release(policy);
		}

		mutex_lock(&hybrid_capacity_lock);

		if (hybrid_max_perf_cpu)
		__hybrid_init_cpu_capacity_scaling();

		mutex_unlock(&hybrid_capacity_lock);
		}

		/************************ sysfs begin **********************/
		@@ -1618,6 +1833,13 @@ static void intel_pstate_notify_work(struct work_struct *work)
		__intel_pstate_update_max_freq(cpudata, policy);

		cpufreq_cpu_release(policy);

		/*
		* The driver will not be unregistered while this function is
		* running, so update the capacity without acquiring the driver
		* lock.
		*/
		hybrid_update_capacity(cpudata);
		}

		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0);
		@@ -2034,8 +2256,10 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)

		if (pstate_funcs.get_cpu_scaling) {
		cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
		if (cpu->pstate.scaling != perf_ctl_scaling)
		if (cpu->pstate.scaling != perf_ctl_scaling) {
		intel_pstate_hybrid_hwp_adjust(cpu);
		hwp_is_hybrid = true;
		}
		} else {
		cpu->pstate.scaling = perf_ctl_scaling;
		}
		@@ -2425,6 +2649,10 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
		X86_MATCH(INTEL_ICELAKE_X, core_funcs),
		X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs),
		X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs),
		X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs),
		X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs),
		X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
		X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
		{}
		};
		#endif
		@@ -2703,6 +2931,8 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
		*/
		intel_pstate_hwp_reenable(cpu);
		cpu->suspended = false;

		hybrid_update_capacity(cpu);
		}

		return 0;
		@@ -3143,6 +3373,8 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)

		global.min_perf_pct = min_perf_pct_min();

		hybrid_init_cpu_capacity_scaling();

		return 0;
		}

drivers/cpufreq/maple-cpufreq.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -238,4 +238,5 @@ static int __init maple_cpufreq_init(void)
		module_init(maple_cpufreq_init);


		MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards");
		MODULE_LICENSE("GPL");