Commit a430d99e authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/fair: Fix value reported by hot tasks pulled in /proc/schedstat



In /proc/schedstat, lb_hot_gained reports the number hot tasks pulled
during load balance. This value is incremented in can_migrate_task()
if the task is migratable and hot. After incrementing the value,
load balancer can still decide not to migrate this task leading to wrong
accounting. Fix this by incrementing stats when hot tasks are detached.
This issue only exists in detach_tasks() where we can decide to not
migrate hot task even if it is migratable. However, in detach_one_task(),
we migrate it unconditionally.

[Swapnil: Handled the case where nr_failed_migrations_hot was not accounted properly and wrote commit log]

Fixes: d3198084 ("sched: Move up affinity check to mitigate useless redoing overhead")
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: default avatar"Gautham R. Shenoy" <gautham.shenoy@amd.com>
Not-yet-signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarSwapnil Sapkal <swapnil.sapkal@amd.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241220063224.17767-2-swapnil.sapkal@amd.com
parent ee8118c1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -937,6 +937,7 @@ struct task_struct {
	unsigned			sched_reset_on_fork:1;
	unsigned			sched_contributes_to_load:1;
	unsigned			sched_migrated:1;
	unsigned			sched_task_hot:1;

	/* Force alignment to the next boundary: */
	unsigned			:0;
+13 −4
Original line number Diff line number Diff line
@@ -9396,6 +9396,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
	int tsk_cache_hot;

	lockdep_assert_rq_held(env->src_rq);
	if (p->sched_task_hot)
		p->sched_task_hot = 0;

	/*
	 * We do not migrate tasks that are:
@@ -9472,10 +9474,8 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)

	if (tsk_cache_hot <= 0 ||
	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
		if (tsk_cache_hot == 1) {
			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
			schedstat_inc(p->stats.nr_forced_migrations);
		}
		if (tsk_cache_hot == 1)
			p->sched_task_hot = 1;
		return 1;
	}

@@ -9490,6 +9490,12 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
{
	lockdep_assert_rq_held(env->src_rq);

	if (p->sched_task_hot) {
		p->sched_task_hot = 0;
		schedstat_inc(env->sd->lb_hot_gained[env->idle]);
		schedstat_inc(p->stats.nr_forced_migrations);
	}

	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
	set_task_cpu(p, env->dst_cpu);
}
@@ -9650,6 +9656,9 @@ static int detach_tasks(struct lb_env *env)

		continue;
next:
		if (p->sched_task_hot)
			schedstat_inc(p->stats.nr_failed_migrations_hot);

		list_move(&p->se.group_node, tasks);
	}