Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into for-6.12 (0df340ce) · Commits · git / linux-net

include/linux/sched.h

+16 −1

Original line number	Diff line number	Diff line
		@@ -641,12 +641,26 @@ struct sched_dl_entity {
		*
		* @dl_overrun tells if the task asked to be informed about runtime
		* overruns.
		*
		* @dl_server tells if this is a server entity.
		*
		* @dl_defer tells if this is a deferred or regular server. For
		* now only defer server exists.
		*
		* @dl_defer_armed tells if the deferrable server is waiting
		* for the replenishment timer to activate it.
		*
		* @dl_defer_running tells if the deferrable server is actually
		* running, skipping the defer phase.
		*/
		unsigned int dl_throttled : 1;
		unsigned int dl_yielded : 1;
		unsigned int dl_non_contending : 1;
		unsigned int dl_overrun : 1;
		unsigned int dl_server : 1;
		unsigned int dl_defer : 1;
		unsigned int dl_defer_armed : 1;
		unsigned int dl_defer_running : 1;

		/*
		* Bandwidth enforcement timer. Each -deadline task has its
		@@ -674,7 +688,8 @@ struct sched_dl_entity {
		*/
		struct rq *rq;
		dl_server_has_tasks_f server_has_tasks;
		dl_server_pick_f server_pick;
		dl_server_pick_f server_pick_next;
		dl_server_pick_f server_pick_task;

		#ifdef CONFIG_RT_MUTEXES
		/*

kernel/sched/core.c

+107 −39

Original line number	Diff line number	Diff line
		@@ -163,6 +163,9 @@ static inline int __task_prio(const struct task_struct *p)
		if (p->sched_class == &stop_sched_class) /* trumps deadline */
		return -2;

		if (p->dl_server)
		return -1; /* deadline */

		if (rt_prio(p->prio)) /* includes deadline */
		return p->prio; /* [-1, 99] */

		@@ -195,8 +198,24 @@ static inline bool prio_less(const struct task_struct *a,
		if (-pb < -pa)
		return false;

		if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
		return !dl_time_before(a->dl.deadline, b->dl.deadline);
		if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */
		const struct sched_dl_entity a_dl, b_dl;

		a_dl = &a->dl;
		/*
		* Since,'a' and 'b' can be CFS tasks served by DL server,
		* __task_prio() can return -1 (for DL) even for those. In that
		* case, get to the dl_server's DL entity.
		*/
		if (a->dl_server)
		a_dl = a->dl_server;

		b_dl = &b->dl;
		if (b->dl_server)
		b_dl = b->dl_server;

		return !dl_time_before(a_dl->deadline, b_dl->deadline);
		}

		if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */
		return cfs_prio_less(a, b, in_fi);
		@@ -1280,7 +1299,7 @@ bool sched_can_stop_tick(struct rq *rq)
		* dequeued by migrating while the constrained task continues to run.
		* E.g. going from 2->1 without going through pick_next_task().
		*/
		if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
		if (__need_bw_check(rq, rq->curr)) {
		if (cfs_task_bw_constrained(rq->curr))
		return false;
		}
		@@ -2255,6 +2274,12 @@ void migrate_disable(void)
		struct task_struct *p = current;

		if (p->migration_disabled) {
		#ifdef CONFIG_DEBUG_PREEMPT
		/*
		*Warn about overflow half-way through the range.
		*/
		WARN_ON_ONCE((s16)p->migration_disabled < 0);
		#endif
		p->migration_disabled++;
		return;
		}
		@@ -2273,14 +2298,20 @@ void migrate_enable(void)
		.flags = SCA_MIGRATE_ENABLE,
		};

		#ifdef CONFIG_DEBUG_PREEMPT
		/*
		* Check both overflow from migrate_disable() and superfluous
		* migrate_enable().
		*/
		if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
		return;
		#endif

		if (p->migration_disabled > 1) {
		p->migration_disabled--;
		return;
		}

		if (WARN_ON_ONCE(!p->migration_disabled))
		return;

		/*
		* Ensure stop_task runs either before or after this, and that
		* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
		@@ -4737,7 +4768,7 @@ void wake_up_new_task(struct task_struct *p)
		update_rq_clock(rq);
		post_init_entity_util_avg(p);

		activate_task(rq, p, ENQUEUE_NOCLOCK);
		activate_task(rq, p, ENQUEUE_NOCLOCK \| ENQUEUE_INITIAL);
		trace_sched_wakeup_new(p);
		wakeup_preempt(rq, p, WF_FORK);
		#ifdef CONFIG_SMP
		@@ -5855,6 +5886,14 @@ static void put_prev_task_balance(struct rq rq, struct task_struct prev,
		#endif

		put_prev_task(rq, prev);

		/*
		* We've updated @prev and no longer need the server link, clear it.
		* Must be done before ->pick_next_task() because that can (re)set
		* ->dl_server.
		*/
		if (prev->dl_server)
		prev->dl_server = NULL;
		}

		/*
		@@ -5888,6 +5927,13 @@ __pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		p = pick_next_task_idle(rq);
		}

		/*
		* This is a normal CFS pick, but the previous could be a DL pick.
		* Clear it as previous is no longer picked.
		*/
		if (prev->dl_server)
		prev->dl_server = NULL;

		/*
		* This is the fast path; it cannot be a DL server pick;
		* therefore even if @p == @prev, ->dl_server must be NULL.
		@@ -5901,14 +5947,6 @@ __pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		restart:
		put_prev_task_balance(rq, prev, rf);

		/*
		* We've updated @prev and no longer need the server link, clear it.
		* Must be done before ->pick_next_task() because that can (re)set
		* ->dl_server.
		*/
		if (prev->dl_server)
		prev->dl_server = NULL;

		for_each_active_class(class) {
		p = class->pick_next_task(rq);
		if (p) {
		@@ -7925,6 +7963,30 @@ void set_rq_offline(struct rq *rq)
		}
		}

		static inline void sched_set_rq_online(struct rq *rq, int cpu)
		{
		struct rq_flags rf;

		rq_lock_irqsave(rq, &rf);
		if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_online(rq);
		}
		rq_unlock_irqrestore(rq, &rf);
		}

		static inline void sched_set_rq_offline(struct rq *rq, int cpu)
		{
		struct rq_flags rf;

		rq_lock_irqsave(rq, &rf);
		if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_offline(rq);
		}
		rq_unlock_irqrestore(rq, &rf);
		}

		/*
		* used to mark begin/end of suspend/resume:
		*/
		@@ -7975,10 +8037,25 @@ static int cpuset_cpu_inactive(unsigned int cpu)
		return 0;
		}

		static inline void sched_smt_present_inc(int cpu)
		{
		#ifdef CONFIG_SCHED_SMT
		if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_inc_cpuslocked(&sched_smt_present);
		#endif
		}

		static inline void sched_smt_present_dec(int cpu)
		{
		#ifdef CONFIG_SCHED_SMT
		if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_dec_cpuslocked(&sched_smt_present);
		#endif
		}

		int sched_cpu_activate(unsigned int cpu)
		{
		struct rq *rq = cpu_rq(cpu);
		struct rq_flags rf;

		/*
		* Clear the balance_push callback and prepare to schedule
		@@ -7986,13 +8063,10 @@ int sched_cpu_activate(unsigned int cpu)
		*/
		balance_push_set(cpu, false);

		#ifdef CONFIG_SCHED_SMT
		/*
		* When going up, increment the number of cores with SMT present.
		*/
		if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_inc_cpuslocked(&sched_smt_present);
		#endif
		sched_smt_present_inc(cpu);
		set_cpu_active(cpu, true);

		if (sched_smp_initialized) {
		@@ -8012,12 +8086,7 @@ int sched_cpu_activate(unsigned int cpu)
		* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
		* domains.
		*/
		rq_lock_irqsave(rq, &rf);
		if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_online(rq);
		}
		rq_unlock_irqrestore(rq, &rf);
		sched_set_rq_online(rq, cpu);

		return 0;
		}
		@@ -8025,7 +8094,6 @@ int sched_cpu_activate(unsigned int cpu)
		int sched_cpu_deactivate(unsigned int cpu)
		{
		struct rq *rq = cpu_rq(cpu);
		struct rq_flags rf;
		int ret;

		/*
		@@ -8056,22 +8124,16 @@ int sched_cpu_deactivate(unsigned int cpu)
		*/
		synchronize_rcu();

		rq_lock_irqsave(rq, &rf);
		if (rq->rd) {
		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
		set_rq_offline(rq);
		}
		rq_unlock_irqrestore(rq, &rf);
		sched_set_rq_offline(rq, cpu);

		scx_rq_deactivate(rq);

		#ifdef CONFIG_SCHED_SMT
		/*
		* When going down, decrement the number of cores with SMT present.
		*/
		if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
		static_branch_dec_cpuslocked(&sched_smt_present);
		sched_smt_present_dec(cpu);

		#ifdef CONFIG_SCHED_SMT
		sched_core_cpu_deactivate(cpu);
		#endif

		@@ -8081,6 +8143,8 @@ int sched_cpu_deactivate(unsigned int cpu)
		sched_update_numa(cpu, false);
		ret = cpuset_cpu_inactive(cpu);
		if (ret) {
		sched_smt_present_inc(cpu);
		sched_set_rq_online(rq, cpu);
		balance_push_set(cpu, false);
		set_cpu_active(cpu, true);
		sched_update_numa(cpu, true);
		@@ -8290,8 +8354,6 @@ void __init sched_init(void)
		#endif /* CONFIG_RT_GROUP_SCHED */
		}

		init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());

		#ifdef CONFIG_SMP
		init_defrootdomain();
		#endif
		@@ -8346,8 +8408,13 @@ void __init sched_init(void)
		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
		#endif /* CONFIG_FAIR_GROUP_SCHED */

		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
		#ifdef CONFIG_RT_GROUP_SCHED
		/*
		* This is required for init cpu because rt.c:__enable_runtime()
		* starts working after scheduler_running, which is not the case
		* yet.
		*/
		rq->rt.rt_runtime = global_rt_runtime();
		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
		#endif
		#ifdef CONFIG_SMP
		@@ -8379,6 +8446,7 @@ void __init sched_init(void)
		#endif /* CONFIG_SMP */
		hrtick_rq_init(rq);
		atomic_set(&rq->nr_iowait, 0);
		fair_server_init(rq);

		#ifdef CONFIG_SCHED_CORE
		rq->core = rq;

kernel/sched/cputime.c

+6 −0

Original line number	Diff line number	Diff line
		@@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime curr, struct prev_cputime prev,
		}

		stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
		/*
		* Because mul_u64_u64_div_u64() can approximate on some
		* achitectures; enforce the constraint that: a*b/(b+c) <= a.
		*/
		if (unlikely(stime > rtime))
		stime = rtime;

		update:
		/*

kernel/sched/deadline.c

+390 −59

Original line number	Diff line number	Diff line
		@@ -320,19 +320,12 @@ void sub_running_bw(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
		__sub_running_bw(dl_se->dl_bw, dl_rq);
		}

		static void dl_change_utilization(struct task_struct *p, u64 new_bw)
		static void dl_rq_change_utilization(struct rq rq, struct sched_dl_entity dl_se, u64 new_bw)
		{
		struct rq *rq;

		WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);

		if (task_on_rq_queued(p))
		return;
		if (dl_se->dl_non_contending) {
		sub_running_bw(dl_se, &rq->dl);
		dl_se->dl_non_contending = 0;

		rq = task_rq(p);
		if (p->dl.dl_non_contending) {
		sub_running_bw(&p->dl, &rq->dl);
		p->dl.dl_non_contending = 0;
		/*
		* If the timer handler is currently running and the
		* timer cannot be canceled, inactive_task_timer()
		@@ -340,13 +333,25 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
		* will not touch the rq's active utilization,
		* so we are still safe.
		*/
		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
		put_task_struct(p);
		if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) {
		if (!dl_server(dl_se))
		put_task_struct(dl_task_of(dl_se));
		}
		__sub_rq_bw(p->dl.dl_bw, &rq->dl);
		}
		__sub_rq_bw(dl_se->dl_bw, &rq->dl);
		__add_rq_bw(new_bw, &rq->dl);
		}

		static void dl_change_utilization(struct task_struct *p, u64 new_bw)
		{
		WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);

		if (task_on_rq_queued(p))
		return;

		dl_rq_change_utilization(task_rq(p), &p->dl, new_bw);
		}

		static void __dl_clear_params(struct sched_dl_entity *dl_se);

		/*
		@@ -771,6 +776,15 @@ static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
		/* for non-boosted task, pi_of(dl_se) == dl_se */
		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
		dl_se->runtime = pi_of(dl_se)->dl_runtime;

		/*
		* If it is a deferred reservation, and the server
		* is not handling an starvation case, defer it.
		*/
		if (dl_se->dl_defer & !dl_se->dl_defer_running) {
		dl_se->dl_throttled = 1;
		dl_se->dl_defer_armed = 1;
		}
		}

		/*
		@@ -809,6 +823,9 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
		replenish_dl_new_period(dl_se, rq);
		}

		static int start_dl_timer(struct sched_dl_entity *dl_se);
		static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t);

		/*
		* Pure Earliest Deadline First (EDF) scheduling does not deal with the
		* possibility of a entity lasting more than what it declared, and thus
		@@ -837,9 +854,18 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
		/*
		* This could be the case for a !-dl task that is boosted.
		* Just go with full inherited parameters.
		*
		* Or, it could be the case of a deferred reservation that
		* was not able to consume its runtime in background and
		* reached this point with current u > U.
		*
		* In both cases, set a new period.
		*/
		if (dl_se->dl_deadline == 0)
		replenish_dl_new_period(dl_se, rq);
		if (dl_se->dl_deadline == 0 \|\|
		(dl_se->dl_defer_armed && dl_entity_overflow(dl_se, rq_clock(rq)))) {
		dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
		dl_se->runtime = pi_of(dl_se)->dl_runtime;
		}

		if (dl_se->dl_yielded && dl_se->runtime > 0)
		dl_se->runtime = 0;
		@@ -873,6 +899,44 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
		dl_se->dl_yielded = 0;
		if (dl_se->dl_throttled)
		dl_se->dl_throttled = 0;

		/*
		* If this is the replenishment of a deferred reservation,
		* clear the flag and return.
		*/
		if (dl_se->dl_defer_armed) {
		dl_se->dl_defer_armed = 0;
		return;
		}

		/*
		* A this point, if the deferred server is not armed, and the deadline
		* is in the future, if it is not running already, throttle the server
		* and arm the defer timer.
		*/
		if (dl_se->dl_defer && !dl_se->dl_defer_running &&
		dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
		if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {

		/*
		* Set dl_se->dl_defer_armed and dl_throttled variables to
		* inform the start_dl_timer() that this is a deferred
		* activation.
		*/
		dl_se->dl_defer_armed = 1;
		dl_se->dl_throttled = 1;
		if (!start_dl_timer(dl_se)) {
		/*
		* If for whatever reason (delays), a previous timer was
		* queued but not serviced, cancel it and clean the
		* deferrable server variables intended for start_dl_timer().
		*/
		hrtimer_try_to_cancel(&dl_se->dl_timer);
		dl_se->dl_defer_armed = 0;
		dl_se->dl_throttled = 0;
		}
		}
		}
		}

		/*
		@@ -1023,6 +1087,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
		}

		replenish_dl_new_period(dl_se, rq);
		} else if (dl_server(dl_se) && dl_se->dl_defer) {
		/*
		* The server can still use its previous deadline, so check if
		* it left the dl_defer_running state.
		*/
		if (!dl_se->dl_defer_running) {
		dl_se->dl_defer_armed = 1;
		dl_se->dl_throttled = 1;
		}
		}
		}

		@@ -1055,8 +1128,21 @@ static int start_dl_timer(struct sched_dl_entity *dl_se)
		* We want the timer to fire at the deadline, but considering
		* that it is actually coming from rq->clock and not from
		* hrtimer's time base reading.
		*/
		*
		* The deferred reservation will have its timer set to
		* (deadline - runtime). At that point, the CBS rule will decide
		* if the current deadline can be used, or if a replenishment is
		* required to avoid add too much pressure on the system
		* (current u > U).
		*/
		if (dl_se->dl_defer_armed) {
		WARN_ON_ONCE(!dl_se->dl_throttled);
		act = ns_to_ktime(dl_se->deadline - dl_se->runtime);
		} else {
		/* act = deadline - rel-deadline + period */
		act = ns_to_ktime(dl_next_period(dl_se));
		}

		now = hrtimer_cb_get_time(timer);
		delta = ktime_to_ns(now) - rq_clock(rq);
		act = ktime_add_ns(act, delta);
		@@ -1106,6 +1192,62 @@ static void __push_dl_task(struct rq rq, struct rq_flags rf)
		#endif
		}

		/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
		static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;

		static enum hrtimer_restart dl_server_timer(struct hrtimer timer, struct sched_dl_entity dl_se)
		{
		struct rq *rq = rq_of_dl_se(dl_se);
		u64 fw;

		scoped_guard (rq_lock, rq) {
		struct rq_flags *rf = &scope.rf;

		if (!dl_se->dl_throttled \|\| !dl_se->dl_runtime)
		return HRTIMER_NORESTART;

		sched_clock_tick();
		update_rq_clock(rq);

		if (!dl_se->dl_runtime)
		return HRTIMER_NORESTART;

		if (!dl_se->server_has_tasks(dl_se)) {
		replenish_dl_entity(dl_se);
		return HRTIMER_NORESTART;
		}

		if (dl_se->dl_defer_armed) {
		/*
		* First check if the server could consume runtime in background.
		* If so, it is possible to push the defer timer for this amount
		* of time. The dl_server_min_res serves as a limit to avoid
		* forwarding the timer for a too small amount of time.
		*/
		if (dl_time_before(rq_clock(dl_se->rq),
		(dl_se->deadline - dl_se->runtime - dl_server_min_res))) {

		/* reset the defer timer */
		fw = dl_se->deadline - rq_clock(dl_se->rq) - dl_se->runtime;

		hrtimer_forward_now(timer, ns_to_ktime(fw));
		return HRTIMER_RESTART;
		}

		dl_se->dl_defer_running = 1;
		}

		enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);

		if (!dl_task(dl_se->rq->curr) \|\| dl_entity_preempt(dl_se, &dl_se->rq->curr->dl))
		resched_curr(rq);

		__push_dl_task(rq, rf);
		}

		return HRTIMER_NORESTART;
		}

		/*
		* This is the bandwidth enforcement timer callback. If here, we know
		* a task is not on its dl_rq, since the fact that the timer was running
		@@ -1128,28 +1270,8 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
		struct rq_flags rf;
		struct rq *rq;

		if (dl_server(dl_se)) {
		struct rq *rq = rq_of_dl_se(dl_se);
		struct rq_flags rf;

		rq_lock(rq, &rf);
		if (dl_se->dl_throttled) {
		sched_clock_tick();
		update_rq_clock(rq);

		if (dl_se->server_has_tasks(dl_se)) {
		enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
		resched_curr(rq);
		__push_dl_task(rq, &rf);
		} else {
		replenish_dl_entity(dl_se);
		}

		}
		rq_unlock(rq, &rf);

		return HRTIMER_NORESTART;
		}
		if (dl_server(dl_se))
		return dl_server_timer(timer, dl_se);

		p = dl_task_of(dl_se);
		rq = task_rq_lock(p, &rf);
		@@ -1319,22 +1441,10 @@ static u64 grub_reclaim(u64 delta, struct rq rq, struct sched_dl_entity dl_se)
		return (delta * u_act) >> BW_SHIFT;
		}

		static inline void
		update_stats_dequeue_dl(struct dl_rq dl_rq, struct sched_dl_entity dl_se,
		int flags);
		static void update_curr_dl_se(struct rq rq, struct sched_dl_entity dl_se, s64 delta_exec)
		s64 dl_scaled_delta_exec(struct rq rq, struct sched_dl_entity dl_se, s64 delta_exec)
		{
		s64 scaled_delta_exec;

		if (unlikely(delta_exec <= 0)) {
		if (unlikely(dl_se->dl_yielded))
		goto throttle;
		return;
		}

		if (dl_entity_is_special(dl_se))
		return;

		/*
		* For tasks that participate in GRUB, we implement GRUB-PA: the
		* spare reclaimed bandwidth is used to clock down frequency.
		@@ -1353,8 +1463,64 @@ static void update_curr_dl_se(struct rq rq, struct sched_dl_entity dl_se, s64
		scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
		}

		return scaled_delta_exec;
		}

		static inline void
		update_stats_dequeue_dl(struct dl_rq dl_rq, struct sched_dl_entity dl_se,
		int flags);
		static void update_curr_dl_se(struct rq rq, struct sched_dl_entity dl_se, s64 delta_exec)
		{
		s64 scaled_delta_exec;

		if (unlikely(delta_exec <= 0)) {
		if (unlikely(dl_se->dl_yielded))
		goto throttle;
		return;
		}

		if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer)
		return;

		if (dl_entity_is_special(dl_se))
		return;

		scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);

		dl_se->runtime -= scaled_delta_exec;

		/*
		* The fair server can consume its runtime while throttled (not queued/
		* running as regular CFS).
		*
		* If the server consumes its entire runtime in this state. The server
		* is not required for the current period. Thus, reset the server by
		* starting a new period, pushing the activation.
		*/
		if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
		/*
		* If the server was previously activated - the starving condition
		* took place, it this point it went away because the fair scheduler
		* was able to get runtime in background. So return to the initial
		* state.
		*/
		dl_se->dl_defer_running = 0;

		hrtimer_try_to_cancel(&dl_se->dl_timer);

		replenish_dl_new_period(dl_se, dl_se->rq);

		/*
		* Not being able to start the timer seems problematic. If it could not
		* be started for whatever reason, we need to "unthrottle" the DL server
		* and queue right away. Otherwise nothing might queue it. That's similar
		* to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn.
		*/
		WARN_ON_ONCE(!start_dl_timer(dl_se));

		return;
		}

		throttle:
		if (dl_runtime_exceeded(dl_se) \|\| dl_se->dl_yielded) {
		dl_se->dl_throttled = 1;
		@@ -1381,6 +1547,14 @@ static void update_curr_dl_se(struct rq rq, struct sched_dl_entity dl_se, s64
		resched_curr(rq);
		}

		/*
		* The fair server (sole dl_server) does not account for real-time
		* workload because it is running fair work.
		*/
		if (dl_se == &rq->fair_server)
		return;

		#ifdef CONFIG_RT_GROUP_SCHED
		/*
		* Because -- for now -- we share the rt bandwidth, we need to
		* account our runtime there too, otherwise actual rt tasks
		@@ -1405,34 +1579,157 @@ static void update_curr_dl_se(struct rq rq, struct sched_dl_entity dl_se, s64
		rt_rq->rt_time += delta_exec;
		raw_spin_unlock(&rt_rq->rt_runtime_lock);
		}
		#endif
		}

		/*
		* In the non-defer mode, the idle time is not accounted, as the
		* server provides a guarantee.
		*
		* If the dl_server is in defer mode, the idle time is also considered
		* as time available for the fair server, avoiding a penalty for the
		* rt scheduler that did not consumed that time.
		*/
		void dl_server_update_idle_time(struct rq rq, struct task_struct p)
		{
		s64 delta_exec, scaled_delta_exec;

		if (!rq->fair_server.dl_defer)
		return;

		/* no need to discount more */
		if (rq->fair_server.runtime < 0)
		return;

		delta_exec = rq_clock_task(rq) - p->se.exec_start;
		if (delta_exec < 0)
		return;

		scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec);

		rq->fair_server.runtime -= scaled_delta_exec;

		if (rq->fair_server.runtime < 0) {
		rq->fair_server.dl_defer_running = 0;
		rq->fair_server.runtime = 0;
		}

		p->se.exec_start = rq_clock_task(rq);
		}

		void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
		{
		/* 0 runtime = fair server disabled */
		if (dl_se->dl_runtime)
		update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
		}

		void dl_server_start(struct sched_dl_entity *dl_se)
		{
		struct rq *rq = dl_se->rq;

		/*
		* XXX: the apply do not work fine at the init phase for the
		* fair server because things are not yet set. We need to improve
		* this before getting generic.
		*/
		if (!dl_server(dl_se)) {
		u64 runtime = 50 * NSEC_PER_MSEC;
		u64 period = 1000 * NSEC_PER_MSEC;

		dl_server_apply_params(dl_se, runtime, period, 1);

		dl_se->dl_server = 1;
		dl_se->dl_defer = 1;
		setup_new_dl_entity(dl_se);
		}

		if (!dl_se->dl_runtime)
		return;

		enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
		if (!dl_task(dl_se->rq->curr) \|\| dl_entity_preempt(dl_se, &rq->curr->dl))
		resched_curr(dl_se->rq);
		}

		void dl_server_stop(struct sched_dl_entity *dl_se)
		{
		if (!dl_se->dl_runtime)
		return;

		dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
		hrtimer_try_to_cancel(&dl_se->dl_timer);
		dl_se->dl_defer_armed = 0;
		dl_se->dl_throttled = 0;
		}

		void dl_server_init(struct sched_dl_entity dl_se, struct rq rq,
		dl_server_has_tasks_f has_tasks,
		dl_server_pick_f pick)
		dl_server_pick_f pick_next,
		dl_server_pick_f pick_task)
		{
		dl_se->rq = rq;
		dl_se->server_has_tasks = has_tasks;
		dl_se->server_pick = pick;
		dl_se->server_pick_next = pick_next;
		dl_se->server_pick_task = pick_task;
		}

		void __dl_server_attach_root(struct sched_dl_entity dl_se, struct rq rq)
		{
		u64 new_bw = dl_se->dl_bw;
		int cpu = cpu_of(rq);
		struct dl_bw *dl_b;

		dl_b = dl_bw_of(cpu_of(rq));
		guard(raw_spinlock)(&dl_b->lock);

		if (!dl_bw_cpus(cpu))
		return;

		__dl_add(dl_b, new_bw, dl_bw_cpus(cpu));
		}

		int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
		{
		u64 old_bw = init ? 0 : to_ratio(dl_se->dl_period, dl_se->dl_runtime);
		u64 new_bw = to_ratio(period, runtime);
		struct rq *rq = dl_se->rq;
		int cpu = cpu_of(rq);
		struct dl_bw *dl_b;
		unsigned long cap;
		int retval = 0;
		int cpus;

		dl_b = dl_bw_of(cpu);
		guard(raw_spinlock)(&dl_b->lock);

		cpus = dl_bw_cpus(cpu);
		cap = dl_bw_capacity(cpu);

		if (__dl_overflow(dl_b, cap, old_bw, new_bw))
		return -EBUSY;

		if (init) {
		__add_rq_bw(new_bw, &rq->dl);
		__dl_add(dl_b, new_bw, cpus);
		} else {
		__dl_sub(dl_b, dl_se->dl_bw, cpus);
		__dl_add(dl_b, new_bw, cpus);

		dl_rq_change_utilization(rq, dl_se, new_bw);
		}

		dl_se->dl_runtime = runtime;
		dl_se->dl_deadline = period;
		dl_se->dl_period = period;

		dl_se->runtime = 0;
		dl_se->deadline = 0;

		dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
		dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);

		return retval;
		}

		/*
		@@ -1735,7 +2032,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
		* be counted in the active utilization; hence, we need to call
		* add_running_bw().
		*/
		if (dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
		if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
		if (flags & ENQUEUE_WAKEUP)
		task_contending(dl_se, flags);

		@@ -1757,6 +2054,25 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
		setup_new_dl_entity(dl_se);
		}

		/*
		* If the reservation is still throttled, e.g., it got replenished but is a
		* deferred task and still got to wait, don't enqueue.
		*/
		if (dl_se->dl_throttled && start_dl_timer(dl_se))
		return;

		/*
		* We're about to enqueue, make sure we're not ->dl_throttled!
		* In case the timer was not started, say because the defer time
		* has passed, mark as not throttled and mark unarmed.
		* Also cancel earlier timers, since letting those run is pointless.
		*/
		if (dl_se->dl_throttled) {
		hrtimer_try_to_cancel(&dl_se->dl_timer);
		dl_se->dl_defer_armed = 0;
		dl_se->dl_throttled = 0;
		}

		__enqueue_dl_entity(dl_se);
		}

		@@ -2086,7 +2402,12 @@ static struct sched_dl_entity pick_next_dl_entity(struct dl_rq dl_rq)
		return __node_2_dle(left);
		}

		static struct task_struct pick_task_dl(struct rq rq)
		/*
		* __pick_next_task_dl - Helper to pick the next -deadline task to run.
		* @rq: The runqueue to pick the next task from.
		* @peek: If true, just peek at the next task. Only relevant for dlserver.
		*/
		static struct task_struct __pick_next_task_dl(struct rq rq, bool peek)
		{
		struct sched_dl_entity *dl_se;
		struct dl_rq *dl_rq = &rq->dl;
		@@ -2100,7 +2421,10 @@ static struct task_struct pick_task_dl(struct rq rq)
		WARN_ON_ONCE(!dl_se);

		if (dl_server(dl_se)) {
		p = dl_se->server_pick(dl_se);
		if (IS_ENABLED(CONFIG_SMP) && peek)
		p = dl_se->server_pick_task(dl_se);
		else
		p = dl_se->server_pick_next(dl_se);
		if (!p) {
		WARN_ON_ONCE(1);
		dl_se->dl_yielded = 1;
		@@ -2115,11 +2439,18 @@ static struct task_struct pick_task_dl(struct rq rq)
		return p;
		}

		#ifdef CONFIG_SMP
		static struct task_struct pick_task_dl(struct rq rq)
		{
		return __pick_next_task_dl(rq, true);
		}
		#endif

		static struct task_struct pick_next_task_dl(struct rq rq)
		{
		struct task_struct *p;

		p = pick_task_dl(rq);
		p = __pick_next_task_dl(rq, false);
		if (!p)
		return p;

kernel/sched/debug.c

+162 −4

File changed.

Preview size limit exceeded, changes collapsed.