Commit 5a40a9bb authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/debug: Fix dl_server (re)start conditions



There are two problems with sched_server_write_common() that can cause the
dl_server to malfunction upon attempting to change the parameters:

1) when, after having disabled the dl_server by setting runtime=0, it is
   enabled again while tasks are already enqueued. In this case is_active would
   still be 0 and dl_server_start() would not be called.

2) when dl_server_apply_params() would fail, runtime is not applied and does
   not reflect the new state.

Instead have dl_server_start() check its actual dl_runtime, and have
sched_server_write_common() unconditionally (re)start the dl_server. It will
automatically stop if there isn't anything to do, so spurious activation is
harmless -- while failing to start it is a problem.

While there, move the printk out of the locked region and make it symmetric,
also printing on enable.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260203103407.GK1282955@noisy.programming.kicks-ass.net
parent 76d12132
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -1799,7 +1799,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
	struct rq *rq = dl_se->rq;

	dl_se->dl_defer_idle = 0;
	if (!dl_server(dl_se) || dl_se->dl_server_active)
	if (!dl_server(dl_se) || dl_se->dl_server_active || !dl_se->dl_runtime)
		return;

	/*
@@ -1898,7 +1898,6 @@ int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 perio
	int cpu = cpu_of(rq);
	struct dl_bw *dl_b;
	unsigned long cap;
	int retval = 0;
	int cpus;

	dl_b = dl_bw_of(cpu);
@@ -1930,7 +1929,7 @@ int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 perio
	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);

	return retval;
	return 0;
}

/*
+14 −18
Original line number Diff line number Diff line
@@ -338,9 +338,9 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
					 void *server)
{
	long cpu = (long) ((struct seq_file *) filp->private_data)->private;
	struct rq *rq = cpu_rq(cpu);
	struct sched_dl_entity *dl_se = (struct sched_dl_entity *)server;
	u64 runtime, period;
	u64 old_runtime, runtime, period;
	struct rq *rq = cpu_rq(cpu);
	int retval = 0;
	size_t err;
	u64 value;
@@ -350,9 +350,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
		return err;

	scoped_guard (rq_lock_irqsave, rq) {
		bool is_active;

		runtime = dl_se->dl_runtime;
		old_runtime = runtime = dl_se->dl_runtime;
		period = dl_se->dl_period;

		switch (param) {
@@ -374,25 +372,23 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
			return  -EINVAL;
		}

		is_active = dl_server_active(dl_se);
		if (is_active) {
		update_rq_clock(rq);
		dl_server_stop(dl_se);
		}

		retval = dl_server_apply_params(dl_se, runtime, period, 0);

		if (!runtime)
			printk_deferred("%s server disabled in CPU %d, system may crash due to starvation.\n",
					server == &rq->fair_server ? "Fair" : "Ext", cpu_of(rq));

		if (is_active && runtime)
		dl_server_start(dl_se);

		if (retval < 0)
			return retval;
	}

	if (!!old_runtime ^ !!runtime) {
		pr_info("%s server %sabled on CPU %d%s.\n",
			server == &rq->fair_server ? "Fair" : "Ext",
			runtime ? "en" : "dis",
			cpu_of(rq),
			runtime ? "" : ", system may malfunction due to starvation");
	}

	*ppos += cnt;
	return cnt;
}