sched/fair: Proportional newidle balance (33cf66d8) · Commits · git / linux-net

include/linux/sched/topology.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -92,6 +92,9 @@ struct sched_domain {
		unsigned int nr_balance_failed; /* initialise to 0 */

		/* idle_balance() stats */
		unsigned int newidle_call;
		unsigned int newidle_success;
		unsigned int newidle_ratio;
		u64 max_newidle_lb_cost;
		unsigned long last_decay_max_lb_cost;

kernel/sched/core.c

+3 −0

Original line number	Diff line number	Diff line
		@@ -121,6 +121,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
		EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);

		DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
		DEFINE_PER_CPU(struct rnd_state, sched_rnd_state);

		#ifdef CONFIG_SCHED_PROXY_EXEC
		DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
		@@ -8489,6 +8490,8 @@ void __init sched_init_smp(void)
		{
		sched_init_numa(NUMA_NO_NODE);

		prandom_init_once(&sched_rnd_state);

		/*
		* There's no userspace yet to cause hotplug operations; hence all the
		* CPU masks are stable and all blatant races in the below code cannot

kernel/sched/fair.c

+40 −4

Original line number	Diff line number	Diff line
		@@ -12224,11 +12224,27 @@ void update_max_interval(void)
		max_load_balance_interval = HZ*num_online_cpus()/10;
		}

		static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost)
		static inline void update_newidle_stats(struct sched_domain *sd, unsigned int success)
		{
		sd->newidle_call++;
		sd->newidle_success += success;

		if (sd->newidle_call >= 1024) {
		sd->newidle_ratio = sd->newidle_success;
		sd->newidle_call /= 2;
		sd->newidle_success /= 2;
		}
		}

		static inline bool
		update_newidle_cost(struct sched_domain *sd, u64 cost, unsigned int success)
		{
		unsigned long next_decay = sd->last_decay_max_lb_cost + HZ;
		unsigned long now = jiffies;

		if (cost)
		update_newidle_stats(sd, success);

		if (cost > sd->max_newidle_lb_cost) {
		/*
		* Track max cost of a domain to make sure to not delay the
		@@ -12276,7 +12292,7 @@ static void sched_balance_domains(struct rq *rq, enum cpu_idle_type idle)
		* Decay the newidle max times here because this is a regular
		* visit to all the domains.
		*/
		need_decay = update_newidle_cost(sd, 0);
		need_decay = update_newidle_cost(sd, 0, 0);
		max_cost += sd->max_newidle_lb_cost;

		/*
		@@ -12912,6 +12928,22 @@ static int sched_balance_newidle(struct rq this_rq, struct rq_flags rf)
		break;

		if (sd->flags & SD_BALANCE_NEWIDLE) {
		unsigned int weight = 1;

		if (sched_feat(NI_RANDOM)) {
		/*
		* Throw a 1k sided dice; and only run
		* newidle_balance according to the success
		* rate.
		*/
		u32 d1k = sched_rng() % 1024;
		weight = 1 + sd->newidle_ratio;
		if (d1k > weight) {
		update_newidle_stats(sd, 0);
		continue;
		}
		weight = (1024 + weight/2) / weight;
		}

		pulled_task = sched_balance_rq(this_cpu, this_rq,
		sd, CPU_NEWLY_IDLE,
		@@ -12919,10 +12951,14 @@ static int sched_balance_newidle(struct rq this_rq, struct rq_flags rf)

		t1 = sched_clock_cpu(this_cpu);
		domain_cost = t1 - t0;
		update_newidle_cost(sd, domain_cost);

		curr_cost += domain_cost;
		t0 = t1;

		/*
		* Track max cost of a domain to make sure to not delay the
		* next wakeup on the CPU.
		*/
		update_newidle_cost(sd, domain_cost, weight * !!pulled_task);
		}

		/*

kernel/sched/features.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -121,3 +121,8 @@ SCHED_FEAT(WA_BIAS, true)
		SCHED_FEAT(UTIL_EST, true)

		SCHED_FEAT(LATENCY_WARN, false)

		/*
		* Do newidle balancing proportional to its success rate using randomization.
		*/
		SCHED_FEAT(NI_RANDOM, true)

kernel/sched/sched.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -5,6 +5,7 @@
		#ifndef _KERNEL_SCHED_SCHED_H
		#define _KERNEL_SCHED_SCHED_H

		#include <linux/prandom.h>
		#include <linux/sched/affinity.h>
		#include <linux/sched/autogroup.h>
		#include <linux/sched/cpufreq.h>
		@@ -1348,6 +1349,12 @@ static inline bool is_migration_disabled(struct task_struct *p)
		}

		DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
		DECLARE_PER_CPU(struct rnd_state, sched_rnd_state);

		static inline u32 sched_rng(void)
		{
		return prandom_u32_state(this_cpu_ptr(&sched_rnd_state));
		}

		#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
		#define this_rq() this_cpu_ptr(&runqueues)