Commit acd855a9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_urgent_for_v6.13_rc3-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:

 - Prevent incorrect dequeueing of the deadline dlserver helper task and
   fix its time accounting

 - Properly track the CFS runqueue runnable stats

 - Check the total number of all queued tasks in a sched fair's runqueue
   hierarchy before deciding to stop the tick

 - Fix the scheduling of the task that got woken last (NEXT_BUDDY) by
   preventing those from being delayed

* tag 'sched_urgent_for_v6.13_rc3-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/dlserver: Fix dlserver time accounting
  sched/dlserver: Fix dlserver double enqueue
  sched/eevdf: More PELT vs DELAYED_DEQUEUE
  sched/fair: Fix sched_can_stop_tick() for fair tasks
  sched/fair: Fix NEXT_BUDDY
parents 81576a9a c7f7e9c7
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -656,6 +656,12 @@ struct sched_dl_entity {
	 * @dl_defer_armed tells if the deferrable server is waiting
	 * for the replenishment timer to activate it.
	 *
	 * @dl_server_active tells if the dlserver is active(started).
	 * dlserver is started on first cfs enqueue on an idle runqueue
	 * and is stopped when a dequeue results in 0 cfs tasks on the
	 * runqueue. In other words, dlserver is active only when cpu's
	 * runqueue has atleast one cfs task.
	 *
	 * @dl_defer_running tells if the deferrable server is actually
	 * running, skipping the defer phase.
	 */
@@ -664,6 +670,7 @@ struct sched_dl_entity {
	unsigned int			dl_non_contending : 1;
	unsigned int			dl_overrun	  : 1;
	unsigned int			dl_server         : 1;
	unsigned int			dl_server_active  : 1;
	unsigned int			dl_defer	  : 1;
	unsigned int			dl_defer_armed	  : 1;
	unsigned int			dl_defer_running  : 1;
+1 −1
Original line number Diff line number Diff line
@@ -1341,7 +1341,7 @@ bool sched_can_stop_tick(struct rq *rq)
	if (scx_enabled() && !scx_can_stop_tick(rq))
		return false;

	if (rq->cfs.nr_running > 1)
	if (rq->cfs.h_nr_running > 1)
		return false;

	/*
+6 −2
Original line number Diff line number Diff line
@@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
	if (!dl_se->dl_runtime)
		return;

	dl_se->dl_server_active = 1;
	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
	if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
		resched_curr(dl_se->rq);
@@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
	hrtimer_try_to_cancel(&dl_se->dl_timer);
	dl_se->dl_defer_armed = 0;
	dl_se->dl_throttled = 0;
	dl_se->dl_server_active = 0;
}

void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
@@ -2421,8 +2423,10 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
	if (dl_server(dl_se)) {
		p = dl_se->server_pick_task(dl_se);
		if (!p) {
			if (dl_server_active(dl_se)) {
				dl_se->dl_yielded = 1;
				update_curr_dl_se(rq, dl_se, 0);
			}
			goto again;
		}
		rq->dl_server = dl_se;
+1 −0
Original line number Diff line number Diff line
@@ -845,6 +845,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed);
	SEQ_printf(m, "  .%-30s: %d\n", "idle_nr_running",
			cfs_rq->idle_nr_running);
	SEQ_printf(m, "  .%-30s: %d\n", "idle_h_nr_running",
+57 −16
Original line number Diff line number Diff line
@@ -1159,8 +1159,6 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
	trace_sched_stat_runtime(p, delta_exec);
	account_group_exec_runtime(p, delta_exec);
	cgroup_account_cputime(p, delta_exec);
	if (p->dl_server)
		dl_server_update(p->dl_server, delta_exec);
}

static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -1237,11 +1235,16 @@ static void update_curr(struct cfs_rq *cfs_rq)
		update_curr_task(p, delta_exec);

		/*
		 * Any fair task that runs outside of fair_server should
		 * account against fair_server such that it can account for
		 * this time and possibly avoid running this period.
		 * If the fair_server is active, we need to account for the
		 * fair_server time whether or not the task is running on
		 * behalf of fair_server or not:
		 *  - If the task is running on behalf of fair_server, we need
		 *    to limit its time based on the assigned runtime.
		 *  - Fair task that runs outside of fair_server should account
		 *    against fair_server such that it can account for this time
		 *    and possibly avoid running this period.
		 */
		if (p->dl_server != &rq->fair_server)
		if (dl_server_active(&rq->fair_server))
			dl_server_update(&rq->fair_server, delta_exec);
	}

@@ -5471,9 +5474,33 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)

static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);

static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
static void set_delayed(struct sched_entity *se)
{
	se->sched_delayed = 1;
	for_each_sched_entity(se) {
		struct cfs_rq *cfs_rq = cfs_rq_of(se);

		cfs_rq->h_nr_delayed++;
		if (cfs_rq_throttled(cfs_rq))
			break;
	}
}

static void clear_delayed(struct sched_entity *se)
{
	se->sched_delayed = 0;
	for_each_sched_entity(se) {
		struct cfs_rq *cfs_rq = cfs_rq_of(se);

		cfs_rq->h_nr_delayed--;
		if (cfs_rq_throttled(cfs_rq))
			break;
	}
}

static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
{
	clear_delayed(se);
	if (sched_feat(DELAY_ZERO) && se->vlag > 0)
		se->vlag = 0;
}
@@ -5484,6 +5511,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
	bool sleep = flags & DEQUEUE_SLEEP;

	update_curr(cfs_rq);
	clear_buddies(cfs_rq, se);

	if (flags & DEQUEUE_DELAYED) {
		SCHED_WARN_ON(!se->sched_delayed);
@@ -5500,10 +5528,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

		if (sched_feat(DELAY_DEQUEUE) && delay &&
		    !entity_eligible(cfs_rq, se)) {
			if (cfs_rq->next == se)
				cfs_rq->next = NULL;
			update_load_avg(cfs_rq, se, 0);
			se->sched_delayed = 1;
			set_delayed(se);
			return false;
		}
	}
@@ -5526,8 +5552,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

	update_stats_dequeue_fair(cfs_rq, se, flags);

	clear_buddies(cfs_rq, se);

	update_entity_lag(cfs_rq, se);
	if (sched_feat(PLACE_REL_DEADLINE) && !sleep) {
		se->deadline -= se->vruntime;
@@ -5917,7 +5941,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
	struct rq *rq = rq_of(cfs_rq);
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta, dequeue = 1;
	long task_delta, idle_task_delta, delayed_delta, dequeue = 1;
	long rq_h_nr_running = rq->cfs.h_nr_running;

	raw_spin_lock(&cfs_b->lock);
@@ -5950,6 +5974,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)

	task_delta = cfs_rq->h_nr_running;
	idle_task_delta = cfs_rq->idle_h_nr_running;
	delayed_delta = cfs_rq->h_nr_delayed;
	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
		int flags;
@@ -5973,6 +5998,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)

		qcfs_rq->h_nr_running -= task_delta;
		qcfs_rq->idle_h_nr_running -= idle_task_delta;
		qcfs_rq->h_nr_delayed -= delayed_delta;

		if (qcfs_rq->load.weight) {
			/* Avoid re-evaluating load for this entity: */
@@ -5995,6 +6021,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)

		qcfs_rq->h_nr_running -= task_delta;
		qcfs_rq->idle_h_nr_running -= idle_task_delta;
		qcfs_rq->h_nr_delayed -= delayed_delta;
	}

	/* At this point se is NULL and we are at root level*/
@@ -6020,7 +6047,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
	struct rq *rq = rq_of(cfs_rq);
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta;
	long task_delta, idle_task_delta, delayed_delta;
	long rq_h_nr_running = rq->cfs.h_nr_running;

	se = cfs_rq->tg->se[cpu_of(rq)];
@@ -6056,6 +6083,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

	task_delta = cfs_rq->h_nr_running;
	idle_task_delta = cfs_rq->idle_h_nr_running;
	delayed_delta = cfs_rq->h_nr_delayed;
	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);

@@ -6073,6 +6101,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

		qcfs_rq->h_nr_running += task_delta;
		qcfs_rq->idle_h_nr_running += idle_task_delta;
		qcfs_rq->h_nr_delayed += delayed_delta;

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(qcfs_rq))
@@ -6090,6 +6119,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

		qcfs_rq->h_nr_running += task_delta;
		qcfs_rq->idle_h_nr_running += idle_task_delta;
		qcfs_rq->h_nr_delayed += delayed_delta;

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(qcfs_rq))
@@ -6943,7 +6973,7 @@ requeue_delayed_entity(struct sched_entity *se)
	}

	update_load_avg(cfs_rq, se, 0);
	se->sched_delayed = 0;
	clear_delayed(se);
}

/*
@@ -6957,6 +6987,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	struct cfs_rq *cfs_rq;
	struct sched_entity *se = &p->se;
	int idle_h_nr_running = task_has_idle_policy(p);
	int h_nr_delayed = 0;
	int task_new = !(flags & ENQUEUE_WAKEUP);
	int rq_h_nr_running = rq->cfs.h_nr_running;
	u64 slice = 0;
@@ -6983,6 +7014,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	if (p->in_iowait)
		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);

	if (task_new)
		h_nr_delayed = !!se->sched_delayed;

	for_each_sched_entity(se) {
		if (se->on_rq) {
			if (se->sched_delayed)
@@ -7005,6 +7039,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

		cfs_rq->h_nr_running++;
		cfs_rq->idle_h_nr_running += idle_h_nr_running;
		cfs_rq->h_nr_delayed += h_nr_delayed;

		if (cfs_rq_is_idle(cfs_rq))
			idle_h_nr_running = 1;
@@ -7028,6 +7063,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

		cfs_rq->h_nr_running++;
		cfs_rq->idle_h_nr_running += idle_h_nr_running;
		cfs_rq->h_nr_delayed += h_nr_delayed;

		if (cfs_rq_is_idle(cfs_rq))
			idle_h_nr_running = 1;
@@ -7090,6 +7126,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
	struct task_struct *p = NULL;
	int idle_h_nr_running = 0;
	int h_nr_running = 0;
	int h_nr_delayed = 0;
	struct cfs_rq *cfs_rq;
	u64 slice = 0;

@@ -7097,6 +7134,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
		p = task_of(se);
		h_nr_running = 1;
		idle_h_nr_running = task_has_idle_policy(p);
		if (!task_sleep && !task_delayed)
			h_nr_delayed = !!se->sched_delayed;
	} else {
		cfs_rq = group_cfs_rq(se);
		slice = cfs_rq_min_slice(cfs_rq);
@@ -7114,6 +7153,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)

		cfs_rq->h_nr_running -= h_nr_running;
		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
		cfs_rq->h_nr_delayed -= h_nr_delayed;

		if (cfs_rq_is_idle(cfs_rq))
			idle_h_nr_running = h_nr_running;
@@ -7152,6 +7192,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)

		cfs_rq->h_nr_running -= h_nr_running;
		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
		cfs_rq->h_nr_delayed -= h_nr_delayed;

		if (cfs_rq_is_idle(cfs_rq))
			idle_h_nr_running = h_nr_running;
@@ -8780,7 +8821,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
		return;

	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
	if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
		set_next_buddy(pse);
	}

Loading