Commit e636ffb9 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/deadline: Fix dl_server time accounting



The dl_server time accounting code is a little odd. The normal scheduler
pattern is to update curr before doing something, such that the old state is
fully accounted before changing state.

Notably, the dl_server_timer() needs to propagate the current time accounting
since the current task could be ran by dl_server and thus this can affect
dl_se->runtime. Similarly for dl_server_start().

And since the (deferred) dl_server wants idle time accounted, rework
sched_idle_class time accounting to be more like all the others.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251020141130.GJ3245006@noisy.programming.kicks-ass.net
parent e40cea33
Loading
Loading
Loading
Loading
+15 −25
Original line number Diff line number Diff line
@@ -1166,8 +1166,12 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
		sched_clock_tick();
		update_rq_clock(rq);

		if (!dl_se->dl_runtime)
			return HRTIMER_NORESTART;
		/*
		 * Make sure current has propagated its pending runtime into
		 * any relevant server through calling dl_server_update() and
		 * friends.
		 */
		rq->donor->sched_class->update_curr(rq);

		if (dl_se->dl_defer_armed) {
			/*
@@ -1543,35 +1547,16 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
 * as time available for the fair server, avoiding a penalty for the
 * rt scheduler that did not consumed that time.
 */
void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec)
{
	s64 delta_exec;

	if (!rq->fair_server.dl_defer)
		return;

	/* no need to discount more */
	if (rq->fair_server.runtime < 0)
		return;

	delta_exec = rq_clock_task(rq) - p->se.exec_start;
	if (delta_exec < 0)
		return;

	rq->fair_server.runtime -= delta_exec;

	if (rq->fair_server.runtime < 0) {
		rq->fair_server.dl_defer_running = 0;
		rq->fair_server.runtime = 0;
	}

	p->se.exec_start = rq_clock_task(rq);
	if (dl_se->dl_server_active && dl_se->dl_runtime && dl_se->dl_defer)
		update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}

void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
	/* 0 runtime = fair server disabled */
	if (dl_se->dl_runtime)
	if (dl_se->dl_server_active && dl_se->dl_runtime)
		update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}

@@ -1582,6 +1567,11 @@ void dl_server_start(struct sched_dl_entity *dl_se)
	if (!dl_server(dl_se) || dl_se->dl_server_active)
		return;

	/*
	 * Update the current task to 'now'.
	 */
	rq->donor->sched_class->update_curr(rq);

	if (WARN_ON_ONCE(!cpu_online(cpu_of(rq))))
		return;

+2 −7
Original line number Diff line number Diff line
@@ -1212,7 +1212,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
		 *    against fair_server such that it can account for this time
		 *    and possibly avoid running this period.
		 */
		if (dl_server_active(&rq->fair_server))
		dl_server_update(&rq->fair_server, delta_exec);
	}

@@ -6961,12 +6960,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
			h_nr_idle = 1;
	}

	if (!rq_h_nr_queued && rq->cfs.h_nr_queued) {
		/* Account for idle runtime */
		if (!rq->nr_running)
			dl_server_update_idle_time(rq, rq->curr);
	if (!rq_h_nr_queued && rq->cfs.h_nr_queued)
		dl_server_start(&rq->fair_server);
	}

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, 1);
+15 −1
Original line number Diff line number Diff line
@@ -452,9 +452,11 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
	resched_curr(rq);
}

static void update_curr_idle(struct rq *rq);

static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
	dl_server_update_idle_time(rq, prev);
	update_curr_idle(rq);
	scx_update_idle(rq, false, true);
}

@@ -496,6 +498,7 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
 */
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
{
	update_curr_idle(rq);
}

static void switching_to_idle(struct rq *rq, struct task_struct *p)
@@ -514,6 +517,17 @@ prio_changed_idle(struct rq *rq, struct task_struct *p, u64 oldprio)

static void update_curr_idle(struct rq *rq)
{
	struct sched_entity *se = &rq->idle->se;
	u64 now = rq_clock_task(rq);
	s64 delta_exec;

	delta_exec = now - se->exec_start;
	if (unlikely(delta_exec <= 0))
		return;

	se->exec_start = now;

	dl_server_update_idle(&rq->fair_server, delta_exec);
}

/*
+1 −2
Original line number Diff line number Diff line
@@ -404,6 +404,7 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
 * naturally thottled to once per period, avoiding high context switch
 * workloads from spamming the hrtimer program/cancel paths.
 */
extern void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
@@ -411,8 +412,6 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
		    dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);

extern void dl_server_update_idle_time(struct rq *rq,
		    struct task_struct *p);
extern void fair_server_init(struct rq *rq);
extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
extern int dl_server_apply_params(struct sched_dl_entity *dl_se,