Commit 2ff899e3 authored by Juri Lelli's avatar Juri Lelli Committed by Peter Zijlstra
Browse files

sched/deadline: Rebuild root domain accounting after every update



Rebuilding of root domains accounting information (total_bw) is
currently broken on some cases, e.g. suspend/resume on aarch64. Problem
is that the way we keep track of domain changes and try to add bandwidth
back is convoluted and fragile.

Fix it by simplify things by making sure bandwidth accounting is cleared
and completely restored after root domains changes (after root domains
are again stable).

To be sure we always call dl_rebuild_rd_accounting while holding
cpuset_mutex we also add cpuset_reset_sched_domains() wrapper.

Fixes: 53916d5f ("sched/deadline: Check bandwidth overflow earlier for hotplug")
Reported-by: default avatarJon Hunter <jonathanh@nvidia.com>
Co-developed-by: default avatarWaiman Long <llong@redhat.com>
Signed-off-by: default avatarWaiman Long <llong@redhat.com>
Signed-off-by: default avatarJuri Lelli <juri.lelli@redhat.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarDietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: default avatarDietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/Z9MRfeJKJUOyUSto@jlelli-thinkpadt14gen4.remote.csb
parent 45007c6f
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -128,6 +128,7 @@ extern bool current_cpuset_is_being_rebound(void);
extern void rebuild_sched_domains(void);

extern void cpuset_print_current_mems_allowed(void);
extern void cpuset_reset_sched_domains(void);

/*
 * read_mems_allowed_begin is required when making decisions involving
@@ -264,6 +265,11 @@ static inline void rebuild_sched_domains(void)
	partition_sched_domains(1, NULL, NULL);
}

static inline void cpuset_reset_sched_domains(void)
{
	partition_sched_domains(1, NULL, NULL);
}

static inline void cpuset_print_current_mems_allowed(void)
{
}
+1 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@ static inline bool dl_time_before(u64 a, u64 b)
struct root_domain;
extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);
extern void dl_clear_root_domain_cpu(int cpu);

#endif /* CONFIG_SMP */

+2 −0
Original line number Diff line number Diff line
@@ -166,6 +166,8 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
	return to_cpumask(sd->span);
}

extern void dl_rebuild_rd_accounting(void);

extern void partition_sched_domains_locked(int ndoms_new,
					   cpumask_var_t doms_new[],
					   struct sched_domain_attr *dattr_new);
+16 −7
Original line number Diff line number Diff line
@@ -954,10 +954,12 @@ static void dl_update_tasks_root_domain(struct cpuset *cs)
	css_task_iter_end(&it);
}

static void dl_rebuild_rd_accounting(void)
void dl_rebuild_rd_accounting(void)
{
	struct cpuset *cs = NULL;
	struct cgroup_subsys_state *pos_css;
	int cpu;
	u64 cookie = ++dl_cookie;

	lockdep_assert_held(&cpuset_mutex);
	lockdep_assert_cpus_held();
@@ -965,11 +967,12 @@ static void dl_rebuild_rd_accounting(void)

	rcu_read_lock();

	/*
	 * Clear default root domain DL accounting, it will be computed again
	 * if a task belongs to it.
	 */
	dl_clear_root_domain(&def_root_domain);
	for_each_possible_cpu(cpu) {
		if (dl_bw_visited(cpu, cookie))
			continue;

		dl_clear_root_domain_cpu(cpu);
	}

	cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {

@@ -996,7 +999,6 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
{
	sched_domains_mutex_lock();
	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
	dl_rebuild_rd_accounting();
	sched_domains_mutex_unlock();
}

@@ -1083,6 +1085,13 @@ void rebuild_sched_domains(void)
	cpus_read_unlock();
}

void cpuset_reset_sched_domains(void)
{
	mutex_lock(&cpuset_mutex);
	partition_sched_domains(1, NULL, NULL);
	mutex_unlock(&cpuset_mutex);
}

/**
 * cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
 * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+2 −2
Original line number Diff line number Diff line
@@ -8229,7 +8229,7 @@ static void cpuset_cpu_active(void)
		 * operation in the resume sequence, just build a single sched
		 * domain, ignoring cpusets.
		 */
		partition_sched_domains(1, NULL, NULL);
		cpuset_reset_sched_domains();
		if (--num_cpus_frozen)
			return;
		/*
@@ -8248,7 +8248,7 @@ static void cpuset_cpu_inactive(unsigned int cpu)
		cpuset_update_active_cpus();
	} else {
		num_cpus_frozen++;
		partition_sched_domains(1, NULL, NULL);
		cpuset_reset_sched_domains();
	}
}

Loading