Commit ac8e69e6 authored by Vincent Guittot's avatar Vincent Guittot Committed by Peter Zijlstra
Browse files

sched/fair: Fix wakeup_preempt_fair() vs delayed dequeue



Similar to how pick_next_entity() must dequeue delayed entities, so too must
wakeup_preempt_fair(). Any delayed task being found means it is eligible and
hence past the 0-lag point, ready for removal.

Worse, by not removing delayed entities from consideration, it can skew the
preemption decision, with the end result that a short slice wakeup will not
result in a preemption.

                     tip/sched/core  tip/sched/core    +this patch
cyclictest slice  (ms) (default)2.8             8               8
hackbench slice   (ms) (default)2.8            20              20
Total Samples          |    22559           22595           22683
Average           (us) |      157              64( 59%)        59(  8%)
Median (P50)      (us) |       57              57(  0%)        58(- 2%)
90th Percentile   (us) |       64              60(  6%)        60(  0%)
99th Percentile   (us) |     2407              67( 97%)        67(  0%)
99.9th Percentile (us) |     3400            2288( 33%)       727( 68%)
Maximum           (us) |     5037            9252(-84%)      7461( 19%)

Fixes: f12e1488 ("sched/fair: Prepare pick_next_task() for delayed dequeue")
Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260422093400.319251-1-vincent.guittot@linaro.org
parent c5cd6fd7
Loading
Loading
Loading
Loading
+14 −13
Original line number Diff line number Diff line
@@ -1104,7 +1104,7 @@ static inline void cancel_protect_slice(struct sched_entity *se)
 *
 * Which allows tree pruning through eligibility.
 */
static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
{
	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
	struct sched_entity *se = __pick_first_entity(cfs_rq);
@@ -1175,11 +1175,6 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
	return best;
}

static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
{
	return __pick_eevdf(cfs_rq, true);
}

struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
@@ -5754,11 +5749,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
 * 4) do not run the "skip" process, if something else is available
 */
static struct sched_entity *
pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq, bool protect)
{
	struct sched_entity *se;

	se = pick_eevdf(cfs_rq);
	se = pick_eevdf(cfs_rq, protect);
	if (se->sched_delayed) {
		dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
		/*
@@ -9032,7 +9027,7 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
{
	enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK;
	struct task_struct *donor = rq->donor;
	struct sched_entity *se = &donor->se, *pse = &p->se;
	struct sched_entity *nse, *se = &donor->se, *pse = &p->se;
	struct cfs_rq *cfs_rq = task_cfs_rq(donor);
	int cse_is_idle, pse_is_idle;

@@ -9143,11 +9138,17 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
	}

pick:
	nse = pick_next_entity(rq, cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT);
	/* If @p has become the most eligible task, force preemption */
	if (nse == pse)
		goto preempt;

	/*
	 * If @p has become the most eligible task, force preemption.
	 * Because p is enqueued, nse being null can only mean that we
	 * dequeued a delayed task.
	 */
	if (__pick_eevdf(cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT) == pse)
		goto preempt;
	if (!nse)
		goto pick;

	if (sched_feat(RUN_TO_PARITY))
		update_protect_slice(cfs_rq, se);
@@ -9184,7 +9185,7 @@ static struct task_struct *pick_task_fair(struct rq *rq, struct rq_flags *rf)

		throttled |= check_cfs_rq_runtime(cfs_rq);

		se = pick_next_entity(rq, cfs_rq);
		se = pick_next_entity(rq, cfs_rq, true);
		if (!se)
			goto again;
		cfs_rq = group_cfs_rq(se);