Commit 36534698 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:

 - Fix a performance regression when measuring the CPU time of a thread
   (clock_gettime(CLOCK_THREAD_CPUTIME_ID,...)) due to the addition of
   PSI IRQ time accounting in the hotpath

 - Fix a task_struct leak due to missing to decrement the refcount when
   the task is enqueued before the timer which is supposed to do that,
   expires

 - Revert an attempt to expedite detaching of movable tasks, as finding
   those could become very costly. Turns out the original issue wasn't
   even hit by anyone

* tag 'sched_urgent_for_v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Move psi_account_irqtime() out of update_rq_clock_task() hotpath
  sched/deadline: Fix task_struct reference leak
  Revert "sched/fair: Make sure to try to detach at least one movable task"
parents 35ce4632 ddae0ca2
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -723,7 +723,6 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)

	rq->prev_irq_time += irq_delta;
	delta -= irq_delta;
	psi_account_irqtime(rq->curr, irq_delta);
	delayacct_irq(rq->curr, irq_delta);
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
@@ -5665,7 +5664,7 @@ void sched_tick(void)
{
	int cpu = smp_processor_id();
	struct rq *rq = cpu_rq(cpu);
	struct task_struct *curr = rq->curr;
	struct task_struct *curr;
	struct rq_flags rf;
	unsigned long hw_pressure;
	u64 resched_latency;
@@ -5677,6 +5676,9 @@ void sched_tick(void)

	rq_lock(rq, &rf);

	curr = rq->curr;
	psi_account_irqtime(rq, curr, NULL);

	update_rq_clock(rq);
	hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
	update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
@@ -6737,6 +6739,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
		++*switch_count;

		migrate_disable_switch(rq, prev);
		psi_account_irqtime(rq, prev, next);
		psi_sched_switch(prev, next, !task_on_rq_queued(prev));

		trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
+6 −1
Original line number Diff line number Diff line
@@ -1804,8 +1804,13 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
			 * The replenish timer needs to be canceled. No
			 * problem if it fires concurrently: boosted threads
			 * are ignored in dl_task_timer().
			 *
			 * If the timer callback was running (hrtimer_try_to_cancel == -1),
			 * it will eventually call put_task_struct().
			 */
			hrtimer_try_to_cancel(&p->dl.dl_timer);
			if (hrtimer_try_to_cancel(&p->dl.dl_timer) == 1 &&
			    !dl_server(&p->dl))
				put_task_struct(p);
			p->dl.dl_throttled = 0;
		}
	} else if (!dl_prio(p->normal_prio)) {
+3 −9
Original line number Diff line number Diff line
@@ -9149,12 +9149,8 @@ static int detach_tasks(struct lb_env *env)
			break;

		env->loop++;
		/*
		 * We've more or less seen every task there is, call it quits
		 * unless we haven't found any movable task yet.
		 */
		if (env->loop > env->loop_max &&
		    !(env->flags & LBF_ALL_PINNED))
		/* We've more or less seen every task there is, call it quits */
		if (env->loop > env->loop_max)
			break;

		/* take a breather every nr_migrate tasks */
@@ -11393,8 +11389,6 @@ static int sched_balance_rq(int this_cpu, struct rq *this_rq,

		if (env.flags & LBF_NEED_BREAK) {
			env.flags &= ~LBF_NEED_BREAK;
			/* Stop if we tried all running tasks */
			if (env.loop < busiest->nr_running)
			goto more_balance;
		}

+16 −5
Original line number Diff line number Diff line
@@ -773,6 +773,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
	enum psi_states s;
	u32 state_mask;

	lockdep_assert_rq_held(cpu_rq(cpu));
	groupc = per_cpu_ptr(group->pcpu, cpu);

	/*
@@ -991,22 +992,32 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
}

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
void psi_account_irqtime(struct task_struct *task, u32 delta)
void psi_account_irqtime(struct rq *rq, struct task_struct *curr, struct task_struct *prev)
{
	int cpu = task_cpu(task);
	int cpu = task_cpu(curr);
	struct psi_group *group;
	struct psi_group_cpu *groupc;
	u64 now;
	u64 now, irq;
	s64 delta;

	if (static_branch_likely(&psi_disabled))
		return;

	if (!task->pid)
	if (!curr->pid)
		return;

	lockdep_assert_rq_held(rq);
	group = task_psi_group(curr);
	if (prev && task_psi_group(prev) == group)
		return;

	now = cpu_clock(cpu);
	irq = irq_time_read(cpu);
	delta = (s64)(irq - rq->psi_irq_time);
	if (delta < 0)
		return;
	rq->psi_irq_time = irq;

	group = task_psi_group(task);
	do {
		if (!group->enabled)
			continue;
+1 −0
Original line number Diff line number Diff line
@@ -1126,6 +1126,7 @@ struct rq {

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	u64			prev_irq_time;
	u64			psi_irq_time;
#endif
#ifdef CONFIG_PARAVIRT
	u64			prev_steal_time;
Loading