Commit 0df340ce authored by Tejun Heo's avatar Tejun Heo
Browse files

Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into for-6.12



Pull tip/sched/core to resolve the following four conflicts. While 2-4 are
simple context conflicts, 1 is a bit subtle and easy to resolve incorrectly.

1. 2c8d046d ("sched: Add normal_policy()")
   vs.
   faa42d29 ("sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy")

The former converts direct test on p->policy to use the helper
normal_policy(). The latter moves the p->policy test to a different
location. Resolve by converting the test on p->plicy in the new location to
use normal_policy().

2. a7a9fc54 ("sched_ext: Add boilerplate for extensible scheduler class")
   vs.
   a110a81c ("sched/deadline: Deferrable dl server")

Both add calls to put_prev_task_idle() and set_next_task_idle(). Simple
context conflict. Resolve by taking changes from both.

3. a7a9fc54 ("sched_ext: Add boilerplate for extensible scheduler class")
   vs.
   c2459100 ("sched/core: Add clearing of ->dl_server in put_prev_task_balance()")

The former changes for_each_class() itertion to use for_each_active_class().
The latter moves away the adjacent dl_server handling code. Simple context
conflict. Resolve by taking changes from both.

4. 60c27fb5 ("sched_ext: Implement sched_ext_ops.cpu_online/offline()")
   vs.
   31b164e2 ("sched/smt: Introduce sched_smt_present_inc/dec() helper")
   2f027354 ("sched/core: Introduce sched_set_rq_on/offline() helper")

The former adds scx_rq_deactivate() call. The latter two change code around
it. Simple context conflict. Resolve by taking changes from both.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parents e99129e5 cea5a347
Loading
Loading
Loading
Loading
+16 −1
Original line number Diff line number Diff line
@@ -641,12 +641,26 @@ struct sched_dl_entity {
	 *
	 * @dl_overrun tells if the task asked to be informed about runtime
	 * overruns.
	 *
	 * @dl_server tells if this is a server entity.
	 *
	 * @dl_defer tells if this is a deferred or regular server. For
	 * now only defer server exists.
	 *
	 * @dl_defer_armed tells if the deferrable server is waiting
	 * for the replenishment timer to activate it.
	 *
	 * @dl_defer_running tells if the deferrable server is actually
	 * running, skipping the defer phase.
	 */
	unsigned int			dl_throttled      : 1;
	unsigned int			dl_yielded        : 1;
	unsigned int			dl_non_contending : 1;
	unsigned int			dl_overrun	  : 1;
	unsigned int			dl_server         : 1;
	unsigned int			dl_defer	  : 1;
	unsigned int			dl_defer_armed	  : 1;
	unsigned int			dl_defer_running  : 1;

	/*
	 * Bandwidth enforcement timer. Each -deadline task has its
@@ -674,7 +688,8 @@ struct sched_dl_entity {
	 */
	struct rq			*rq;
	dl_server_has_tasks_f		server_has_tasks;
	dl_server_pick_f		server_pick;
	dl_server_pick_f		server_pick_next;
	dl_server_pick_f		server_pick_task;

#ifdef CONFIG_RT_MUTEXES
	/*
+107 −39
Original line number Diff line number Diff line
@@ -163,6 +163,9 @@ static inline int __task_prio(const struct task_struct *p)
	if (p->sched_class == &stop_sched_class) /* trumps deadline */
		return -2;

	if (p->dl_server)
		return -1; /* deadline */

	if (rt_prio(p->prio)) /* includes deadline */
		return p->prio; /* [-1, 99] */

@@ -195,8 +198,24 @@ static inline bool prio_less(const struct task_struct *a,
	if (-pb < -pa)
		return false;

	if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
		return !dl_time_before(a->dl.deadline, b->dl.deadline);
	if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */
		const struct sched_dl_entity *a_dl, *b_dl;

		a_dl = &a->dl;
		/*
		 * Since,'a' and 'b' can be CFS tasks served by DL server,
		 * __task_prio() can return -1 (for DL) even for those. In that
		 * case, get to the dl_server's DL entity.
		 */
		if (a->dl_server)
			a_dl = a->dl_server;

		b_dl = &b->dl;
		if (b->dl_server)
			b_dl = b->dl_server;

		return !dl_time_before(a_dl->deadline, b_dl->deadline);
	}

	if (pa == MAX_RT_PRIO + MAX_NICE)	/* fair */
		return cfs_prio_less(a, b, in_fi);
@@ -1280,7 +1299,7 @@ bool sched_can_stop_tick(struct rq *rq)
	 * dequeued by migrating while the constrained task continues to run.
	 * E.g. going from 2->1 without going through pick_next_task().
	 */
	if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
	if (__need_bw_check(rq, rq->curr)) {
		if (cfs_task_bw_constrained(rq->curr))
			return false;
	}
@@ -2255,6 +2274,12 @@ void migrate_disable(void)
	struct task_struct *p = current;

	if (p->migration_disabled) {
#ifdef CONFIG_DEBUG_PREEMPT
		/*
		 *Warn about overflow half-way through the range.
		 */
		WARN_ON_ONCE((s16)p->migration_disabled < 0);
#endif
		p->migration_disabled++;
		return;
	}
@@ -2273,14 +2298,20 @@ void migrate_enable(void)
		.flags     = SCA_MIGRATE_ENABLE,
	};

#ifdef CONFIG_DEBUG_PREEMPT
	/*
	 * Check both overflow from migrate_disable() and superfluous
	 * migrate_enable().
	 */
	if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
		return;
#endif

	if (p->migration_disabled > 1) {
		p->migration_disabled--;
		return;
	}

	if (WARN_ON_ONCE(!p->migration_disabled))
		return;

	/*
	 * Ensure stop_task runs either before or after this, and that
	 * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
@@ -4737,7 +4768,7 @@ void wake_up_new_task(struct task_struct *p)
	update_rq_clock(rq);
	post_init_entity_util_avg(p);

	activate_task(rq, p, ENQUEUE_NOCLOCK);
	activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
	trace_sched_wakeup_new(p);
	wakeup_preempt(rq, p, WF_FORK);
#ifdef CONFIG_SMP
@@ -5855,6 +5886,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
#endif

	put_prev_task(rq, prev);

	/*
	 * We've updated @prev and no longer need the server link, clear it.
	 * Must be done before ->pick_next_task() because that can (re)set
	 * ->dl_server.
	 */
	if (prev->dl_server)
		prev->dl_server = NULL;
}

/*
@@ -5888,6 +5927,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
			p = pick_next_task_idle(rq);
		}

		/*
		 * This is a normal CFS pick, but the previous could be a DL pick.
		 * Clear it as previous is no longer picked.
		 */
		if (prev->dl_server)
			prev->dl_server = NULL;

		/*
		 * This is the fast path; it cannot be a DL server pick;
		 * therefore even if @p == @prev, ->dl_server must be NULL.
@@ -5901,14 +5947,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
restart:
	put_prev_task_balance(rq, prev, rf);

	/*
	 * We've updated @prev and no longer need the server link, clear it.
	 * Must be done before ->pick_next_task() because that can (re)set
	 * ->dl_server.
	 */
	if (prev->dl_server)
		prev->dl_server = NULL;

	for_each_active_class(class) {
		p = class->pick_next_task(rq);
		if (p) {
@@ -7925,6 +7963,30 @@ void set_rq_offline(struct rq *rq)
	}
}

static inline void sched_set_rq_online(struct rq *rq, int cpu)
{
	struct rq_flags rf;

	rq_lock_irqsave(rq, &rf);
	if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_online(rq);
	}
	rq_unlock_irqrestore(rq, &rf);
}

static inline void sched_set_rq_offline(struct rq *rq, int cpu)
{
	struct rq_flags rf;

	rq_lock_irqsave(rq, &rf);
	if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_offline(rq);
	}
	rq_unlock_irqrestore(rq, &rf);
}

/*
 * used to mark begin/end of suspend/resume:
 */
@@ -7975,10 +8037,25 @@ static int cpuset_cpu_inactive(unsigned int cpu)
	return 0;
}

static inline void sched_smt_present_inc(int cpu)
{
#ifdef CONFIG_SCHED_SMT
	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_inc_cpuslocked(&sched_smt_present);
#endif
}

static inline void sched_smt_present_dec(int cpu)
{
#ifdef CONFIG_SCHED_SMT
	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_dec_cpuslocked(&sched_smt_present);
#endif
}

int sched_cpu_activate(unsigned int cpu)
{
	struct rq *rq = cpu_rq(cpu);
	struct rq_flags rf;

	/*
	 * Clear the balance_push callback and prepare to schedule
@@ -7986,13 +8063,10 @@ int sched_cpu_activate(unsigned int cpu)
	 */
	balance_push_set(cpu, false);

#ifdef CONFIG_SCHED_SMT
	/*
	 * When going up, increment the number of cores with SMT present.
	 */
	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_inc_cpuslocked(&sched_smt_present);
#endif
	sched_smt_present_inc(cpu);
	set_cpu_active(cpu, true);

	if (sched_smp_initialized) {
@@ -8012,12 +8086,7 @@ int sched_cpu_activate(unsigned int cpu)
	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
	 *    domains.
	 */
	rq_lock_irqsave(rq, &rf);
	if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_online(rq);
	}
	rq_unlock_irqrestore(rq, &rf);
	sched_set_rq_online(rq, cpu);

	return 0;
}
@@ -8025,7 +8094,6 @@ int sched_cpu_activate(unsigned int cpu)
int sched_cpu_deactivate(unsigned int cpu)
{
	struct rq *rq = cpu_rq(cpu);
	struct rq_flags rf;
	int ret;

	/*
@@ -8056,22 +8124,16 @@ int sched_cpu_deactivate(unsigned int cpu)
	 */
	synchronize_rcu();

	rq_lock_irqsave(rq, &rf);
	if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_offline(rq);
	}
	rq_unlock_irqrestore(rq, &rf);
	sched_set_rq_offline(rq, cpu);

	scx_rq_deactivate(rq);

#ifdef CONFIG_SCHED_SMT
	/*
	 * When going down, decrement the number of cores with SMT present.
	 */
	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_dec_cpuslocked(&sched_smt_present);
	sched_smt_present_dec(cpu);

#ifdef CONFIG_SCHED_SMT
	sched_core_cpu_deactivate(cpu);
#endif

@@ -8081,6 +8143,8 @@ int sched_cpu_deactivate(unsigned int cpu)
	sched_update_numa(cpu, false);
	ret = cpuset_cpu_inactive(cpu);
	if (ret) {
		sched_smt_present_inc(cpu);
		sched_set_rq_online(rq, cpu);
		balance_push_set(cpu, false);
		set_cpu_active(cpu, true);
		sched_update_numa(cpu, true);
@@ -8290,8 +8354,6 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */
	}

	init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());

#ifdef CONFIG_SMP
	init_defrootdomain();
#endif
@@ -8346,8 +8408,13 @@ void __init sched_init(void)
		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
#endif /* CONFIG_FAIR_GROUP_SCHED */

		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
		/*
		 * This is required for init cpu because rt.c:__enable_runtime()
		 * starts working after scheduler_running, which is not the case
		 * yet.
		 */
		rq->rt.rt_runtime = global_rt_runtime();
		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
#ifdef CONFIG_SMP
@@ -8379,6 +8446,7 @@ void __init sched_init(void)
#endif /* CONFIG_SMP */
		hrtick_rq_init(rq);
		atomic_set(&rq->nr_iowait, 0);
		fair_server_init(rq);

#ifdef CONFIG_SCHED_CORE
		rq->core = rq;
+6 −0
Original line number Diff line number Diff line
@@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
	}

	stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
	/*
	 * Because mul_u64_u64_div_u64() can approximate on some
	 * achitectures; enforce the constraint that: a*b/(b+c) <= a.
	 */
	if (unlikely(stime > rtime))
		stime = rtime;

update:
	/*
+390 −59
Original line number Diff line number Diff line
@@ -320,19 +320,12 @@ void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
		__sub_running_bw(dl_se->dl_bw, dl_rq);
}

static void dl_change_utilization(struct task_struct *p, u64 new_bw)
static void dl_rq_change_utilization(struct rq *rq, struct sched_dl_entity *dl_se, u64 new_bw)
{
	struct rq *rq;

	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);

	if (task_on_rq_queued(p))
		return;
	if (dl_se->dl_non_contending) {
		sub_running_bw(dl_se, &rq->dl);
		dl_se->dl_non_contending = 0;

	rq = task_rq(p);
	if (p->dl.dl_non_contending) {
		sub_running_bw(&p->dl, &rq->dl);
		p->dl.dl_non_contending = 0;
		/*
		 * If the timer handler is currently running and the
		 * timer cannot be canceled, inactive_task_timer()
@@ -340,13 +333,25 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
		 * will not touch the rq's active utilization,
		 * so we are still safe.
		 */
		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
			put_task_struct(p);
		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
			if (!dl_server(dl_se))
				put_task_struct(dl_task_of(dl_se));
		}
	__sub_rq_bw(p->dl.dl_bw, &rq->dl);
	}
	__sub_rq_bw(dl_se->dl_bw, &rq->dl);
	__add_rq_bw(new_bw, &rq->dl);
}

static void dl_change_utilization(struct task_struct *p, u64 new_bw)
{
	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);

	if (task_on_rq_queued(p))
		return;

	dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
}

static void __dl_clear_params(struct sched_dl_entity *dl_se);

/*
@@ -771,6 +776,15 @@ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
	/* for non-boosted task, pi_of(dl_se) == dl_se */
	dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
	dl_se->runtime = pi_of(dl_se)->dl_runtime;

	/*
	 * If it is a deferred reservation, and the server
	 * is not handling an starvation case, defer it.
	 */
	if (dl_se->dl_defer & !dl_se->dl_defer_running) {
		dl_se->dl_throttled = 1;
		dl_se->dl_defer_armed = 1;
	}
}

/*
@@ -809,6 +823,9 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
	replenish_dl_new_period(dl_se, rq);
}

static int start_dl_timer(struct sched_dl_entity *dl_se);
static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t);

/*
 * Pure Earliest Deadline First (EDF) scheduling does not deal with the
 * possibility of a entity lasting more than what it declared, and thus
@@ -837,9 +854,18 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
	/*
	 * This could be the case for a !-dl task that is boosted.
	 * Just go with full inherited parameters.
	 *
	 * Or, it could be the case of a deferred reservation that
	 * was not able to consume its runtime in background and
	 * reached this point with current u > U.
	 *
	 * In both cases, set a new period.
	 */
	if (dl_se->dl_deadline == 0)
		replenish_dl_new_period(dl_se, rq);
	if (dl_se->dl_deadline == 0 ||
	    (dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) {
		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
		dl_se->runtime = pi_of(dl_se)->dl_runtime;
	}

	if (dl_se->dl_yielded && dl_se->runtime > 0)
		dl_se->runtime = 0;
@@ -873,6 +899,44 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
		dl_se->dl_yielded = 0;
	if (dl_se->dl_throttled)
		dl_se->dl_throttled = 0;

	/*
	 * If this is the replenishment of a deferred reservation,
	 * clear the flag and return.
	 */
	if (dl_se->dl_defer_armed) {
		dl_se->dl_defer_armed = 0;
		return;
	}

	/*
	 * A this point, if the deferred server is not armed, and the deadline
	 * is in the future, if it is not running already, throttle the server
	 * and arm the defer timer.
	 */
	if (dl_se->dl_defer && !dl_se->dl_defer_running &&
	    dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
		if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {

			/*
			 * Set dl_se->dl_defer_armed and dl_throttled variables to
			 * inform the start_dl_timer() that this is a deferred
			 * activation.
			 */
			dl_se->dl_defer_armed = 1;
			dl_se->dl_throttled = 1;
			if (!start_dl_timer(dl_se)) {
				/*
				 * If for whatever reason (delays), a previous timer was
				 * queued but not serviced, cancel it and clean the
				 * deferrable server variables intended for start_dl_timer().
				 */
				hrtimer_try_to_cancel(&dl_se->dl_timer);
				dl_se->dl_defer_armed = 0;
				dl_se->dl_throttled = 0;
			}
		}
	}
}

/*
@@ -1023,6 +1087,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
		}

		replenish_dl_new_period(dl_se, rq);
	} else if (dl_server(dl_se) && dl_se->dl_defer) {
		/*
		 * The server can still use its previous deadline, so check if
		 * it left the dl_defer_running state.
		 */
		if (!dl_se->dl_defer_running) {
			dl_se->dl_defer_armed = 1;
			dl_se->dl_throttled = 1;
		}
	}
}

@@ -1055,8 +1128,21 @@ static int start_dl_timer(struct sched_dl_entity *dl_se)
	 * We want the timer to fire at the deadline, but considering
	 * that it is actually coming from rq->clock and not from
	 * hrtimer's time base reading.
	 */
	 *
	 * The deferred reservation will have its timer set to
	 * (deadline - runtime). At that point, the CBS rule will decide
	 * if the current deadline can be used, or if a replenishment is
	 * required to avoid add too much pressure on the system
	 * (current u > U).
	 */
	if (dl_se->dl_defer_armed) {
		WARN_ON_ONCE(!dl_se->dl_throttled);
		act = ns_to_ktime(dl_se->deadline - dl_se->runtime);
	} else {
		/* act = deadline - rel-deadline + period */
		act = ns_to_ktime(dl_next_period(dl_se));
	}

	now = hrtimer_cb_get_time(timer);
	delta = ktime_to_ns(now) - rq_clock(rq);
	act = ktime_add_ns(act, delta);
@@ -1106,6 +1192,62 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
#endif
}

/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;

static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
{
	struct rq *rq = rq_of_dl_se(dl_se);
	u64 fw;

	scoped_guard (rq_lock, rq) {
		struct rq_flags *rf = &scope.rf;

		if (!dl_se->dl_throttled || !dl_se->dl_runtime)
			return HRTIMER_NORESTART;

		sched_clock_tick();
		update_rq_clock(rq);

		if (!dl_se->dl_runtime)
			return HRTIMER_NORESTART;

		if (!dl_se->server_has_tasks(dl_se)) {
			replenish_dl_entity(dl_se);
			return HRTIMER_NORESTART;
		}

		if (dl_se->dl_defer_armed) {
			/*
			 * First check if the server could consume runtime in background.
			 * If so, it is possible to push the defer timer for this amount
			 * of time. The dl_server_min_res serves as a limit to avoid
			 * forwarding the timer for a too small amount of time.
			 */
			if (dl_time_before(rq_clock(dl_se->rq),
					   (dl_se->deadline - dl_se->runtime - dl_server_min_res))) {

				/* reset the defer timer */
				fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime;

				hrtimer_forward_now(timer, ns_to_ktime(fw));
				return HRTIMER_RESTART;
			}

			dl_se->dl_defer_running = 1;
		}

		enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);

		if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &dl_se->rq->curr->dl))
			resched_curr(rq);

		__push_dl_task(rq, rf);
	}

	return HRTIMER_NORESTART;
}

/*
 * This is the bandwidth enforcement timer callback. If here, we know
 * a task is not on its dl_rq, since the fact that the timer was running
@@ -1128,28 +1270,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
	struct rq_flags rf;
	struct rq *rq;

	if (dl_server(dl_se)) {
		struct rq *rq = rq_of_dl_se(dl_se);
		struct rq_flags rf;

		rq_lock(rq, &rf);
		if (dl_se->dl_throttled) {
			sched_clock_tick();
			update_rq_clock(rq);

			if (dl_se->server_has_tasks(dl_se)) {
				enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
				resched_curr(rq);
				__push_dl_task(rq, &rf);
			} else {
				replenish_dl_entity(dl_se);
			}

		}
		rq_unlock(rq, &rf);

		return HRTIMER_NORESTART;
	}
	if (dl_server(dl_se))
		return dl_server_timer(timer, dl_se);

	p = dl_task_of(dl_se);
	rq = task_rq_lock(p, &rf);
@@ -1319,22 +1441,10 @@ static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
	return (delta * u_act) >> BW_SHIFT;
}

static inline void
update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
                        int flags);
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
	s64 scaled_delta_exec;

	if (unlikely(delta_exec <= 0)) {
		if (unlikely(dl_se->dl_yielded))
			goto throttle;
		return;
	}

	if (dl_entity_is_special(dl_se))
		return;

	/*
	 * For tasks that participate in GRUB, we implement GRUB-PA: the
	 * spare reclaimed bandwidth is used to clock down frequency.
@@ -1353,8 +1463,64 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
		scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
	}

	return scaled_delta_exec;
}

static inline void
update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
			int flags);
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
	s64 scaled_delta_exec;

	if (unlikely(delta_exec <= 0)) {
		if (unlikely(dl_se->dl_yielded))
			goto throttle;
		return;
	}

	if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer)
		return;

	if (dl_entity_is_special(dl_se))
		return;

	scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);

	dl_se->runtime -= scaled_delta_exec;

	/*
	 * The fair server can consume its runtime while throttled (not queued/
	 * running as regular CFS).
	 *
	 * If the server consumes its entire runtime in this state. The server
	 * is not required for the current period. Thus, reset the server by
	 * starting a new period, pushing the activation.
	 */
	if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
		/*
		 * If the server was previously activated - the starving condition
		 * took place, it this point it went away because the fair scheduler
		 * was able to get runtime in background. So return to the initial
		 * state.
		 */
		dl_se->dl_defer_running = 0;

		hrtimer_try_to_cancel(&dl_se->dl_timer);

		replenish_dl_new_period(dl_se, dl_se->rq);

		/*
		 * Not being able to start the timer seems problematic. If it could not
		 * be started for whatever reason, we need to "unthrottle" the DL server
		 * and queue right away. Otherwise nothing might queue it. That's similar
		 * to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn.
		 */
		WARN_ON_ONCE(!start_dl_timer(dl_se));

		return;
	}

throttle:
	if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
		dl_se->dl_throttled = 1;
@@ -1381,6 +1547,14 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
			resched_curr(rq);
	}

	/*
	 * The fair server (sole dl_server) does not account for real-time
	 * workload because it is running fair work.
	 */
	if (dl_se == &rq->fair_server)
		return;

#ifdef CONFIG_RT_GROUP_SCHED
	/*
	 * Because -- for now -- we share the rt bandwidth, we need to
	 * account our runtime there too, otherwise actual rt tasks
@@ -1405,34 +1579,157 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
			rt_rq->rt_time += delta_exec;
		raw_spin_unlock(&rt_rq->rt_runtime_lock);
	}
#endif
}

/*
 * In the non-defer mode, the idle time is not accounted, as the
 * server provides a guarantee.
 *
 * If the dl_server is in defer mode, the idle time is also considered
 * as time available for the fair server, avoiding a penalty for the
 * rt scheduler that did not consumed that time.
 */
void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
{
	s64 delta_exec, scaled_delta_exec;

	if (!rq->fair_server.dl_defer)
		return;

	/* no need to discount more */
	if (rq->fair_server.runtime < 0)
		return;

	delta_exec = rq_clock_task(rq) - p->se.exec_start;
	if (delta_exec < 0)
		return;

	scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec);

	rq->fair_server.runtime -= scaled_delta_exec;

	if (rq->fair_server.runtime < 0) {
		rq->fair_server.dl_defer_running = 0;
		rq->fair_server.runtime = 0;
	}

	p->se.exec_start = rq_clock_task(rq);
}

void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
	/* 0 runtime = fair server disabled */
	if (dl_se->dl_runtime)
		update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}

void dl_server_start(struct sched_dl_entity *dl_se)
{
	struct rq *rq = dl_se->rq;

	/*
	 * XXX: the apply do not work fine at the init phase for the
	 * fair server because things are not yet set. We need to improve
	 * this before getting generic.
	 */
	if (!dl_server(dl_se)) {
		u64 runtime =  50 * NSEC_PER_MSEC;
		u64 period = 1000 * NSEC_PER_MSEC;

		dl_server_apply_params(dl_se, runtime, period, 1);

		dl_se->dl_server = 1;
		dl_se->dl_defer = 1;
		setup_new_dl_entity(dl_se);
	}

	if (!dl_se->dl_runtime)
		return;

	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
	if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
		resched_curr(dl_se->rq);
}

void dl_server_stop(struct sched_dl_entity *dl_se)
{
	if (!dl_se->dl_runtime)
		return;

	dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
	hrtimer_try_to_cancel(&dl_se->dl_timer);
	dl_se->dl_defer_armed = 0;
	dl_se->dl_throttled = 0;
}

void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
		    dl_server_has_tasks_f has_tasks,
		    dl_server_pick_f pick)
		    dl_server_pick_f pick_next,
		    dl_server_pick_f pick_task)
{
	dl_se->rq = rq;
	dl_se->server_has_tasks = has_tasks;
	dl_se->server_pick = pick;
	dl_se->server_pick_next = pick_next;
	dl_se->server_pick_task = pick_task;
}

void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
{
	u64 new_bw = dl_se->dl_bw;
	int cpu = cpu_of(rq);
	struct dl_bw *dl_b;

	dl_b = dl_bw_of(cpu_of(rq));
	guard(raw_spinlock)(&dl_b->lock);

	if (!dl_bw_cpus(cpu))
		return;

	__dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
}

int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
{
	u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
	u64 new_bw = to_ratio(period, runtime);
	struct rq *rq = dl_se->rq;
	int cpu = cpu_of(rq);
	struct dl_bw *dl_b;
	unsigned long cap;
	int retval = 0;
	int cpus;

	dl_b = dl_bw_of(cpu);
	guard(raw_spinlock)(&dl_b->lock);

	cpus = dl_bw_cpus(cpu);
	cap = dl_bw_capacity(cpu);

	if (__dl_overflow(dl_b, cap, old_bw, new_bw))
		return -EBUSY;

	if (init) {
		__add_rq_bw(new_bw, &rq->dl);
		__dl_add(dl_b, new_bw, cpus);
	} else {
		__dl_sub(dl_b, dl_se->dl_bw, cpus);
		__dl_add(dl_b, new_bw, cpus);

		dl_rq_change_utilization(rq, dl_se, new_bw);
	}

	dl_se->dl_runtime = runtime;
	dl_se->dl_deadline = period;
	dl_se->dl_period = period;

	dl_se->runtime = 0;
	dl_se->deadline = 0;

	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
	dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);

	return retval;
}

/*
@@ -1735,7 +2032,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
	 * be counted in the active utilization; hence, we need to call
	 * add_running_bw().
	 */
	if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
	if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
		if (flags & ENQUEUE_WAKEUP)
			task_contending(dl_se, flags);

@@ -1757,6 +2054,25 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
		setup_new_dl_entity(dl_se);
	}

	/*
	 * If the reservation is still throttled, e.g., it got replenished but is a
	 * deferred task and still got to wait, don't enqueue.
	 */
	if (dl_se->dl_throttled && start_dl_timer(dl_se))
		return;

	/*
	 * We're about to enqueue, make sure we're not ->dl_throttled!
	 * In case the timer was not started, say because the defer time
	 * has passed, mark as not throttled and mark unarmed.
	 * Also cancel earlier timers, since letting those run is pointless.
	 */
	if (dl_se->dl_throttled) {
		hrtimer_try_to_cancel(&dl_se->dl_timer);
		dl_se->dl_defer_armed = 0;
		dl_se->dl_throttled = 0;
	}

	__enqueue_dl_entity(dl_se);
}

@@ -2086,7 +2402,12 @@ static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
	return __node_2_dle(left);
}

static struct task_struct *pick_task_dl(struct rq *rq)
/*
 * __pick_next_task_dl - Helper to pick the next -deadline task to run.
 * @rq: The runqueue to pick the next task from.
 * @peek: If true, just peek at the next task. Only relevant for dlserver.
 */
static struct task_struct *__pick_next_task_dl(struct rq *rq, bool peek)
{
	struct sched_dl_entity *dl_se;
	struct dl_rq *dl_rq = &rq->dl;
@@ -2100,7 +2421,10 @@ static struct task_struct *pick_task_dl(struct rq *rq)
	WARN_ON_ONCE(!dl_se);

	if (dl_server(dl_se)) {
		p = dl_se->server_pick(dl_se);
		if (IS_ENABLED(CONFIG_SMP) && peek)
			p = dl_se->server_pick_task(dl_se);
		else
			p = dl_se->server_pick_next(dl_se);
		if (!p) {
			WARN_ON_ONCE(1);
			dl_se->dl_yielded = 1;
@@ -2115,11 +2439,18 @@ static struct task_struct *pick_task_dl(struct rq *rq)
	return p;
}

#ifdef CONFIG_SMP
static struct task_struct *pick_task_dl(struct rq *rq)
{
	return __pick_next_task_dl(rq, true);
}
#endif

static struct task_struct *pick_next_task_dl(struct rq *rq)
{
	struct task_struct *p;

	p = pick_task_dl(rq);
	p = __pick_next_task_dl(rq, false);
	if (!p)
		return p;

+162 −4

File changed.

Preview size limit exceeded, changes collapsed.

Loading