Commit 9fe89f02 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/fair: More complex proportional newidle balance



It turns out that a few workloads (easyWave, fio) have a fairly low
success rate on newidle balance, but still benefit greatly from having
it anyway.

Luckliky these workloads have a faily low newidle rate, so the cost if
doing the newidle is relatively low, even if unsuccessfull.

Add a simple rate based part to the newidle ratio compute, such that
low rate newidle will still have a high newidle ratio.

This cures the easyWave and fio workloads while not affecting the
schbench numbers either (which have a very high newidle rate).

Reported-by: default avatarMario Roy <marioeroy@gmail.com>
Reported-by: default avatar"Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarMario Roy <marioeroy@gmail.com>
Tested-by: default avatar"Mohamed Abuelfotoh, Hazem" <abuehaze@amazon.com>
Link: https://patch.msgid.link/20260127151748.GA1079264@noisy.programming.kicks-ass.net
parent 3b68df97
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -95,6 +95,7 @@ struct sched_domain {
	unsigned int newidle_call;
	unsigned int newidle_success;
	unsigned int newidle_ratio;
	u64 newidle_stamp;
	u64 max_newidle_lb_cost;
	unsigned long last_decay_max_lb_cost;

+25 −2
Original line number Diff line number Diff line
@@ -12289,7 +12289,30 @@ static inline void update_newidle_stats(struct sched_domain *sd, unsigned int su
	sd->newidle_success += success;

	if (sd->newidle_call >= 1024) {
		sd->newidle_ratio = sd->newidle_success;
		u64 now = sched_clock();
		s64 delta = now - sd->newidle_stamp;
		sd->newidle_stamp = now;
		int ratio = 0;

		if (delta < 0)
			delta = 0;

		if (sched_feat(NI_RATE)) {
			/*
			 * ratio  delta   freq
			 *
			 * 1024 -  4  s -  128 Hz
			 *  512 -  2  s -  256 Hz
			 *  256 -  1  s -  512 Hz
			 *  128 - .5  s - 1024 Hz
			 *   64 - .25 s - 2048 Hz
			 */
			ratio = delta >> 22;
		}

		ratio += sd->newidle_success;

		sd->newidle_ratio = min(1024, ratio);
		sd->newidle_call /= 2;
		sd->newidle_success /= 2;
	}
@@ -12996,7 +13019,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
		if (sd->flags & SD_BALANCE_NEWIDLE) {
			unsigned int weight = 1;

			if (sched_feat(NI_RANDOM)) {
			if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024) {
				/*
				 * Throw a 1k sided dice; and only run
				 * newidle_balance according to the success
+1 −0
Original line number Diff line number Diff line
@@ -126,3 +126,4 @@ SCHED_FEAT(LATENCY_WARN, false)
 * Do newidle balancing proportional to its success rate using randomization.
 */
SCHED_FEAT(NI_RANDOM, true)
SCHED_FEAT(NI_RATE, true)
+3 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
 */

#include <linux/sched/isolation.h>
#include <linux/sched/clock.h>
#include <linux/bsearch.h>
#include "sched.h"

@@ -1642,6 +1643,7 @@ sd_init(struct sched_domain_topology_level *tl,
	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
	int sd_id, sd_weight, sd_flags = 0;
	struct cpumask *sd_span;
	u64 now = sched_clock();

	sd_weight = cpumask_weight(tl->mask(tl, cpu));

@@ -1679,6 +1681,7 @@ sd_init(struct sched_domain_topology_level *tl,
		.newidle_call		= 512,
		.newidle_success	= 256,
		.newidle_ratio		= 512,
		.newidle_stamp		= now,

		.max_newidle_lb_cost	= 0,
		.last_decay_max_lb_cost	= jiffies,