Commit d3d663fa authored by Vincent Guittot's avatar Vincent Guittot Committed by Peter Zijlstra
Browse files

sched/fair: Filter false overloaded_group case for EAS



With EAS, a group should be set overloaded if at least 1 CPU in the group
is overutilized but it can happen that a CPU is fully utilized by tasks
because of clamping the compute capacity of the CPU. In such case, the CPU
is not overutilized and as a result should not be set overloaded as well.

group_overloaded being a higher priority than group_misfit, such group can
be selected as the busiest group instead of a group with a mistfit task
and prevents load_balance to select the CPU with the misfit task to pull
the latter on a fitting CPU.

Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarPierre Gondois <pierre.gondois@arm.com>
Link: https://patch.msgid.link/20260206095454.1520619-1-vincent.guittot@linaro.org
parent 92647580
Loading
Loading
Loading
Loading
+13 −5
Original line number Diff line number Diff line
@@ -10211,6 +10211,7 @@ struct sg_lb_stats {
	unsigned int group_asym_packing;	/* Tasks should be moved to preferred CPU */
	unsigned int group_smt_balance;		/* Task on busy SMT be moved */
	unsigned long group_misfit_task_load;	/* A CPU has a task too big for its capacity */
	unsigned int group_overutilized;	/* At least one CPU is overutilized in the group */
#ifdef CONFIG_NUMA_BALANCING
	unsigned int nr_numa_running;
	unsigned int nr_preferred_running;
@@ -10443,6 +10444,13 @@ group_has_capacity(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
static inline bool
group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
{
	/*
	 * With EAS and uclamp, 1 CPU in the group must be overutilized to
	 * consider the group overloaded.
	 */
	if (sched_energy_enabled() && !sgs->group_overutilized)
		return false;

	if (sgs->sum_nr_running <= sgs->group_weight)
		return false;

@@ -10626,14 +10634,12 @@ sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 * @group: sched_group whose statistics are to be updated.
 * @sgs: variable to hold the statistics for this group.
 * @sg_overloaded: sched_group is overloaded
 * @sg_overutilized: sched_group is overutilized
 */
static inline void update_sg_lb_stats(struct lb_env *env,
				      struct sd_lb_stats *sds,
				      struct sched_group *group,
				      struct sg_lb_stats *sgs,
				      bool *sg_overloaded,
				      bool *sg_overutilized)
				      bool *sg_overloaded)
{
	int i, nr_running, local_group, sd_flags = env->sd->flags;
	bool balancing_at_rd = !env->sd->parent;
@@ -10655,7 +10661,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
		sgs->sum_nr_running += nr_running;

		if (cpu_overutilized(i))
			*sg_overutilized = 1;
			sgs->group_overutilized = 1;

		/*
		 * No need to call idle_cpu() if nr_running is not 0
@@ -11326,13 +11332,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
				update_group_capacity(env->sd, env->dst_cpu);
		}

		update_sg_lb_stats(env, sds, sg, sgs, &sg_overloaded, &sg_overutilized);
		update_sg_lb_stats(env, sds, sg, sgs, &sg_overloaded);

		if (!local_group && update_sd_pick_busiest(env, sds, sg, sgs)) {
			sds->busiest = sg;
			sds->busiest_stat = *sgs;
		}

		sg_overutilized |= sgs->group_overutilized;

		/* Now, start updating sd_lb_stats */
		sds->total_load += sgs->group_load;
		sds->total_capacity += sgs->group_capacity;