Commit 5a9d1014 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

x86/sched: Add basic support for CPU capacity scaling



In order be able to compute the sizes of tasks consistently across all
CPUs in a hybrid system, it is necessary to provide CPU capacity scaling
information to the scheduler via arch_scale_cpu_capacity().  Moreover,
the value returned by arch_scale_freq_capacity() for the given CPU must
correspond to the arch_scale_cpu_capacity() return value for it, or
utilization computations will be inaccurate.

Add support for it through per-CPU variables holding the capacity and
maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will
be returned by arch_scale_cpu_capacity() and used by scale_freq_tick()
to compute arch_freq_scale for the current CPU, respectively.

In order to avoid adding measurable overhead for non-hybrid x86 systems,
which are the vast majority in the field, whether or not the new hybrid
CPU capacity scaling will be in effect is controlled by a static key.
This static key is set by calling arch_enable_hybrid_capacity_scale()
which also allocates memory for the per-CPU data and initializes it.
Next, arch_set_cpu_capacity() is used to set the per-CPU variables
mentioned above for each CPU and arch_rebuild_sched_domains() needs
to be called for the scheduler to realize that capacity-aware
scheduling can be used going forward.

Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarRicardo Neri <ricardo.neri-calderon@linux.intel.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> # scale invariance
Link: https://patch.msgid.link/10523497.nUPlyArG6x@rjwysocki.net


[ rjw: Added parens to function kerneldoc comments ]
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent c4a6c82c
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity

bool arch_enable_hybrid_capacity_scale(void);
void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
			   unsigned long cap_freq, unsigned long base_freq);

unsigned long arch_scale_cpu_capacity(int cpu);
#define arch_scale_cpu_capacity arch_scale_cpu_capacity

extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
					 unsigned long max_cap,
					 unsigned long cap_freq,
					 unsigned long base_freq) { }

static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
+87 −2
Original line number Diff line number Diff line
@@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work,
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);

static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);

struct arch_hybrid_cpu_scale {
	unsigned long capacity;
	unsigned long freq_ratio;
};

static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;

/**
 * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling
 *
 * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
 * initialize it and set the static key controlling its code paths.
 *
 * Must be called before arch_set_cpu_capacity().
 */
bool arch_enable_hybrid_capacity_scale(void)
{
	int cpu;

	if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
		WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
		return true;
	}

	arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
	if (!arch_cpu_scale)
		return false;

	for_each_possible_cpu(cpu) {
		per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
		per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
	}

	static_branch_enable(&arch_hybrid_cap_scale_key);

	pr_info("Hybrid CPU capacity scaling enabled\n");

	return true;
}

/**
 * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU
 * @cpu: Target CPU.
 * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
 * @max_cap: System-wide maximum CPU capacity.
 * @cap_freq: Frequency of @cpu corresponding to @cap.
 * @base_freq: Frequency of @cpu at which MPERF counts.
 *
 * The units in which @cap and @max_cap are expressed do not matter, so long
 * as they are consistent, because the former is effectively divided by the
 * latter.  Analogously for @cap_freq and @base_freq.
 *
 * After calling this function for all CPUs, call arch_rebuild_sched_domains()
 * to let the scheduler know that capacity-aware scheduling can be used going
 * forward.
 */
void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
			   unsigned long cap_freq, unsigned long base_freq)
{
	if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
			   div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
			   div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
	} else {
		WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
	}
}

unsigned long arch_scale_cpu_capacity(int cpu)
{
	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
		return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);

	return SCHED_CAPACITY_SCALE;
}
EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);

static void scale_freq_tick(u64 acnt, u64 mcnt)
{
	u64 freq_scale;
	u64 freq_scale, freq_ratio;

	if (!arch_scale_freq_invariant())
		return;
@@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt)
	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
		goto error;

	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
		freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
	else
		freq_ratio = arch_max_freq_ratio;

	if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
		goto error;

	freq_scale = div64_u64(acnt, mcnt);