Commit c0379108 authored by K Prateek Nayak's avatar K Prateek Nayak Committed by Mario Limonciello (AMD)
Browse files

cpufreq: Pass the policy to cpufreq_driver->adjust_perf()

cpufreq_cpu_get() can sleep on PREEMPT_RT in presence of concurrent
writer(s), however amd-pstate depends on fetching the cpudata via the
policy's driver data which necessitates grabbing the reference.

Since schedutil governor can call "cpufreq_driver->update_perf()"
during sched_tick/enqueue/dequeue with rq_lock held and IRQs disabled,
fetching the policy object using the cpufreq_cpu_get() helper in the
scheduler fast-path leads to "BUG: scheduling while atomic" on
PREEMPT_RT [1].

Pass the cached cpufreq policy object in sg_policy to the update_perf()
instead of just the CPU. The CPU can be inferred using "policy->cpu".

The lifetime of cpufreq_policy object outlasts that of the governor and
the cpufreq driver (allocated when the CPU is onlined and only reclaimed
when the CPU is offlined / the CPU device is removed) which makes it
safe to be referenced throughout the governor's lifetime.

Closes:https://lore.kernel.org/all/20250731092316.3191-1-spasswolf@web.de/

 [1]

Fixes: 1d215f03 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State")
Reported-by: default avatarBert Karwatzki <spasswolf@web.de>
Acked-by: default avatarViresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: default avatarK Prateek Nayak <kprateek.nayak@amd.com>
Acked-by: Gary Guo <gary@garyguo.net> # Rust
Reviewed-by: default avatarGautham R. Shenoy <gautham.shenoy@amd.com>
Reviewed-by: default avatarZhongqiu Han <zhongqiu.han@oss.qualcomm.com>
Link: https://lore.kernel.org/r/20260316081849.19368-3-kprateek.nayak@amd.com


Signed-off-by: default avatarMario Limonciello (AMD) <superm1@kernel.org>
parent 86d71f1d
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -788,13 +788,12 @@ static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
	return policy->cur;
}

static void amd_pstate_adjust_perf(unsigned int cpu,
static void amd_pstate_adjust_perf(struct cpufreq_policy *policy,
				   unsigned long _min_perf,
				   unsigned long target_perf,
				   unsigned long capacity)
{
	u8 max_perf, min_perf, des_perf, cap_perf;
	struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu);
	struct amd_cpudata *cpudata;
	union perf_cached perf;

+3 −3
Original line number Diff line number Diff line
@@ -2231,7 +2231,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);

/**
 * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
 * @cpu: Target CPU.
 * @policy: cpufreq policy object of the target CPU.
 * @min_perf: Minimum (required) performance level (units of @capacity).
 * @target_perf: Target (desired) performance level (units of @capacity).
 * @capacity: Capacity of the target CPU.
@@ -2250,12 +2250,12 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 * parallel with either ->target() or ->target_index() or ->fast_switch() for
 * the same CPU.
 */
void cpufreq_driver_adjust_perf(unsigned int cpu,
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
				 unsigned long min_perf,
				 unsigned long target_perf,
				 unsigned long capacity)
{
	cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
	cpufreq_driver->adjust_perf(policy, min_perf, target_perf, capacity);
}

/**
+2 −2
Original line number Diff line number Diff line
@@ -3239,12 +3239,12 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
	return target_pstate * cpu->pstate.scaling;
}

static void intel_cpufreq_adjust_perf(unsigned int cpunum,
static void intel_cpufreq_adjust_perf(struct cpufreq_policy *policy,
				      unsigned long min_perf,
				      unsigned long target_perf,
				      unsigned long capacity)
{
	struct cpudata *cpu = all_cpu_data[cpunum];
	struct cpudata *cpu = all_cpu_data[policy->cpu];
	u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached);
	int old_pstate = cpu->pstate.current_pstate;
	int cap_pstate, min_pstate, max_pstate, target_pstate;
+2 −2
Original line number Diff line number Diff line
@@ -372,7 +372,7 @@ struct cpufreq_driver {
	 * conditions) scale invariance can be disabled, which causes the
	 * schedutil governor to fall back to the latter.
	 */
	void		(*adjust_perf)(unsigned int cpu,
	void		(*adjust_perf)(struct cpufreq_policy *policy,
				       unsigned long min_perf,
				       unsigned long target_perf,
				       unsigned long capacity);
@@ -617,7 +617,7 @@ struct cpufreq_governor {
/* Pass a target to the cpufreq driver */
unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
					unsigned int target_freq);
void cpufreq_driver_adjust_perf(unsigned int cpu,
void cpufreq_driver_adjust_perf(struct cpufreq_policy *policy,
				unsigned long min_perf,
				unsigned long target_perf,
				unsigned long capacity);
+3 −2
Original line number Diff line number Diff line
@@ -461,6 +461,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
				     unsigned int flags)
{
	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
	unsigned long prev_util = sg_cpu->util;
	unsigned long max_cap;

@@ -482,10 +483,10 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
	if (sugov_hold_freq(sg_cpu) && sg_cpu->util < prev_util)
		sg_cpu->util = prev_util;

	cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_min,
	cpufreq_driver_adjust_perf(sg_policy->policy, sg_cpu->bw_min,
				   sg_cpu->util, max_cap);

	sg_cpu->sg_policy->last_freq_update_time = time;
	sg_policy->last_freq_update_time = time;
}

static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
Loading