Commit cb64c513 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull power management fixes from Rafael Wysocki:
 "These are mostly cpufreq fixes, some of which address recent
  regressions and some address older issues that have come to light
  during the last two weeks, and a runtime PM documentation correction:

   - Fix the performance-to-frequency scaling factor computation on
     systems using HWP in the intel_pstate driver after a recent
     incorrect update of it (Rafael Wysocki)

   - Fix the usage of the CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag
     in the schedutil cpufreq governor after a recent update of it that
     has caused frequency limits changes to be missed sometimes (Rafael
     Wysocki)

   - Address some recently discovered synchronization issues related to
     frequency limits changes in the schedutil cpufreq governor and in
     the cpufreq core (Rafael Wysocki)

   - Fix ITMT support in the amd-pstate cpufreq driver so that it is
     enabled after asym priorities have been correctly initialized for
     all CPUs (K Prateek Nayak)

   - Fix changing min/max limits in the amd-pstate cpufreq driver while
     on the performance governor (Dhananjay Ugwekar)

   - Fix a function name in the runtime PM documentation that was
     previously incorrectly updated by mistake (Sakari Ailus)"

* tag 'pm-6.15-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: Avoid using inconsistent policy->min and policy->max
  cpufreq/sched: Set need_freq_update in ignore_dl_rate_limit()
  cpufreq/sched: Explicitly synchronize limits_changed flag handling
  cpufreq/sched: Fix the usage of CPUFREQ_NEED_UPDATE_LIMITS
  Documentation: PM: runtime: Fix a reference to pm_runtime_autosuspend()
  cpufreq: intel_pstate: Fix hwp_get_cpu_scaling()
  cpufreq/amd-pstate: Enable ITMT support after initializing core rankings
  cpufreq/amd-pstate: Fix min_limit perf and freq updation for performance governor
parents 4b828867 f3b25a1b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the
device in that case.  If there is no idle callback, or if the callback returns
0, then the PM core will attempt to carry out a runtime suspend of the device,
also respecting devices configured for autosuspend.  In essence this means a
call to __pm_runtime_autosuspend() (do note that drivers needs to update the
call to pm_runtime_autosuspend() (do note that drivers needs to update the
device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
this circumstance).  To prevent this (for example, if the callback routine has
started a delayed suspend), the routine must return a non-zero value.  Negative
+15 −21
Original line number Diff line number Diff line
@@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
	union perf_cached perf = READ_ONCE(cpudata->perf);

	perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max);
	perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
	WRITE_ONCE(cpudata->max_limit_freq, policy->max);

	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
		perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf);

	WRITE_ONCE(cpudata->max_limit_freq, policy->max);
		WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq));
	} else {
		perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min);
		WRITE_ONCE(cpudata->min_limit_freq, policy->min);
	}

	WRITE_ONCE(cpudata->perf, perf);
}

@@ -791,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu)
	wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}

/*
 * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
 * due to locking, so queue the work for later.
 */
static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
{
	sched_set_itmt_support();
}
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);

#define CPPC_MAX_PERF	U8_MAX

static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
@@ -811,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)

	cpudata->hw_prefcore = true;

	/*
	 * The priorities can be set regardless of whether or not
	 * sched_set_itmt_support(true) has been called and it is valid to
	 * update them at any time after it has been called.
	 */
	/* Priorities must be initialized before ITMT support can be toggled on. */
	sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);

	schedule_work(&sched_prefcore_work);
}

static void amd_pstate_update_limits(unsigned int cpu)
@@ -1193,6 +1180,9 @@ static ssize_t show_energy_performance_preference(

static void amd_pstate_driver_cleanup(void)
{
	if (amd_pstate_prefcore)
		sched_clear_itmt_support();

	cppc_state = AMD_PSTATE_DISABLE;
	current_pstate_driver = NULL;
}
@@ -1235,6 +1225,10 @@ static int amd_pstate_register_driver(int mode)
		return ret;
	}

	/* Enable ITMT support once all CPUs have initialized their asym priorities. */
	if (amd_pstate_prefcore)
		sched_set_itmt_support();

	return 0;
}

+25 −7
Original line number Diff line number Diff line
@@ -540,8 +540,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
{
	unsigned int idx;

	target_freq = clamp_val(target_freq, policy->min, policy->max);

	if (!policy->freq_table)
		return target_freq;

@@ -565,7 +563,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
					 unsigned int target_freq)
{
	return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
	unsigned int min = READ_ONCE(policy->min);
	unsigned int max = READ_ONCE(policy->max);

	/*
	 * If this function runs in parallel with cpufreq_set_policy(), it may
	 * read policy->min before the update and policy->max after the update
	 * or the other way around, so there is no ordering guarantee.
	 *
	 * Resolve this by always honoring the max (in case it comes from
	 * thermal throttling or similar).
	 */
	if (unlikely(min > max))
		min = max;

	return __resolve_freq(policy, clamp_val(target_freq, min, max),
			      CPUFREQ_RELATION_LE);
}
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);

@@ -2384,6 +2397,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
	if (cpufreq_disabled())
		return -ENODEV;

	target_freq = clamp_val(target_freq, policy->min, policy->max);
	target_freq = __resolve_freq(policy, target_freq, relation);

	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
@@ -2708,11 +2722,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
	 * Resolve policy min/max to available frequencies. It ensures
	 * no frequency resolution will neither overshoot the requested maximum
	 * nor undershoot the requested minimum.
	 *
	 * Avoid storing intermediate values in policy->max or policy->min and
	 * compiler optimizations around them because they may be accessed
	 * concurrently by cpufreq_driver_resolve_freq() during the update.
	 */
	policy->min = new_data.min;
	policy->max = new_data.max;
	policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
	policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
	WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H));
	new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L);
	WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min);

	trace_cpu_frequency_limits(policy);

	cpufreq_update_pressure(policy);
+1 −1
Original line number Diff line number Diff line
@@ -2209,7 +2209,7 @@ static int knl_get_turbo_pstate(int cpu)
static int hwp_get_cpu_scaling(int cpu)
{
	if (hybrid_scaling_factor) {
		struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
		struct cpuinfo_x86 *c = &cpu_data(cpu);
		u8 cpu_type = c->topo.intel_type;

		/*
+42 −7
Original line number Diff line number Diff line
@@ -81,9 +81,23 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
	if (!cpufreq_this_cpu_can_update(sg_policy->policy))
		return false;

	if (unlikely(sg_policy->limits_changed)) {
		sg_policy->limits_changed = false;
		sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
	if (unlikely(READ_ONCE(sg_policy->limits_changed))) {
		WRITE_ONCE(sg_policy->limits_changed, false);
		sg_policy->need_freq_update = true;

		/*
		 * The above limits_changed update must occur before the reads
		 * of policy limits in cpufreq_driver_resolve_freq() or a policy
		 * limits update might be missed, so use a memory barrier to
		 * ensure it.
		 *
		 * This pairs with the write memory barrier in sugov_limits().
		 */
		smp_mb();

		return true;
	} else if (sg_policy->need_freq_update) {
		/* ignore_dl_rate_limit() wants a new frequency to be found. */
		return true;
	}

@@ -95,10 +109,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
				   unsigned int next_freq)
{
	if (sg_policy->need_freq_update)
	if (sg_policy->need_freq_update) {
		sg_policy->need_freq_update = false;
	else if (sg_policy->next_freq == next_freq)
		/*
		 * The policy limits have changed, but if the return value of
		 * cpufreq_driver_resolve_freq() after applying the new limits
		 * is still equal to the previously selected frequency, the
		 * driver callback need not be invoked unless the driver
		 * specifically wants that to happen on every update of the
		 * policy limits.
		 */
		if (sg_policy->next_freq == next_freq &&
		    !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS))
			return false;
	} else if (sg_policy->next_freq == next_freq) {
		return false;
	}

	sg_policy->next_freq = next_freq;
	sg_policy->last_freq_update_time = time;
@@ -365,7 +391,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; }
static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
{
	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min)
		sg_cpu->sg_policy->limits_changed = true;
		sg_cpu->sg_policy->need_freq_update = true;
}

static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
@@ -871,7 +897,16 @@ static void sugov_limits(struct cpufreq_policy *policy)
		mutex_unlock(&sg_policy->work_lock);
	}

	sg_policy->limits_changed = true;
	/*
	 * The limits_changed update below must take place before the updates
	 * of policy limits in cpufreq_set_policy() or a policy limits update
	 * might be missed, so use a memory barrier to ensure it.
	 *
	 * This pairs with the memory barrier in sugov_should_update_freq().
	 */
	smp_wmb();

	WRITE_ONCE(sg_policy->limits_changed, true);
}

struct cpufreq_governor schedutil_gov = {