Commit 1c3b68f0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "Fair scheduling updates:
   - Skip SCHED_IDLE rq for SCHED_IDLE tasks (Christian Loehle)
   - Remove superfluous rcu_read_lock() in the wakeup path (K Prateek Nayak)
   - Simplify the entry condition for update_idle_cpu_scan() (K Prateek Nayak)
   - Simplify SIS_UTIL handling in select_idle_cpu() (K Prateek Nayak)
   - Avoid overflow in enqueue_entity() (K Prateek Nayak)
   - Update overutilized detection (Vincent Guittot)
   - Prevent negative lag increase during delayed dequeue (Vincent Guittot)
   - Clear buddies for preempt_short (Vincent Guittot)
   - Implement more complex proportional newidle balance (Peter Zijlstra)
   - Increase weight bits for avg_vruntime (Peter Zijlstra)
   - Use full weight to __calc_delta() (Peter Zijlstra)

  RT and DL scheduling updates:
   - Fix incorrect schedstats for rt and dl thread (Dengjun Su)
   - Skip group schedulable check with rt_group_sched=0 (Michal Koutný)
   - Move group schedulability check to sched_rt_global_validate()
     (Michal Koutný)
   - Add reporting of runtime left & abs deadline to sched_getattr()
     for DEADLINE tasks (Tommaso Cucinotta)

  Scheduling topology updates by K Prateek Nayak:
   - Compute sd_weight considering cpuset partitions
   - Extract "imb_numa_nr" calculation into a separate helper
   - Allocate per-CPU sched_domain_shared in s_data
   - Switch to assigning "sd->shared" from s_data
   - Remove sched_domain_shared allocation with sd_data

  Energy-aware scheduling updates:
   - Filter false overloaded_group case for EAS (Vincent Guittot)
   - PM: EM: Switch to rcu_dereference_all() in wakeup path
     (Dietmar Eggemann)

  Infrastructure updates:
   - Replace use of system_unbound_wq with system_dfl_wq (Marco Crivellari)

  Proxy scheduling updates by John Stultz:
   - Make class_schedulers avoid pushing current, and get rid of proxy_tag_curr()
   - Minimise repeated sched_proxy_exec() checking
   - Fix potentially missing balancing with Proxy Exec
   - Fix and improve task::blocked_on et al handling
   - Add assert_balance_callbacks_empty() helper
   - Add logic to zap balancing callbacks if we pick again
   - Move attach_one_task() and attach_task() helpers to sched.h
   - Handle blocked-waiter migration (and return migration)
   - Add K Prateek Nayak to scheduler reviewers for proxy execution

  Misc cleanups and fixes by John Stultz, Joseph Salisbury, Peter
  Zijlstra, K Prateek Nayak, Michal Koutný, Randy Dunlap, Shrikanth
  Hegde, Vincent Guittot, Zhan Xusheng, Xie Yuanbin and Vincent Guittot"

* tag 'sched-core-2026-04-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits)
  sched/eevdf: Clear buddies for preempt_short
  sched/rt: Cleanup global RT bandwidth functions
  sched/rt: Move group schedulability check to sched_rt_global_validate()
  sched/rt: Skip group schedulable check with rt_group_sched=0
  sched/fair: Avoid overflow in enqueue_entity()
  sched: Use u64 for bandwidth ratio calculations
  sched/fair: Prevent negative lag increase during delayed dequeue
  sched/fair: Use sched_energy_enabled()
  sched: Handle blocked-waiter migration (and return migration)
  sched: Move attach_one_task and attach_task helpers to sched.h
  sched: Add logic to zap balance callbacks if we pick again
  sched: Add assert_balance_callbacks_empty helper
  sched/locking: Add special p->blocked_on==PROXY_WAKING value for proxy return-migration
  sched: Fix modifying donor->blocked on without proper locking
  locking: Add task::blocked_lock to serialize blocked_on state
  sched: Fix potentially missing balancing with Proxy Exec
  sched: Minimise repeated sched_proxy_exec() checking
  sched: Make class_schedulers avoid pushing current, and get rid of proxy_tag_curr()
  MAINTAINERS: Add K Prateek Nayak to scheduler reviewers
  sched/core: Get this cpu once in ttwu_queue_cond()
  ...
parents 33c66eb5 78cde54e
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -23708,6 +23708,7 @@ R: Steven Rostedt <rostedt@goodmis.org> (SCHED_FIFO/SCHED_RR)
R:	Ben Segall <bsegall@google.com> (CONFIG_CFS_BANDWIDTH)
R:	Mel Gorman <mgorman@suse.de> (CONFIG_NUMA_BALANCING)
R:	Valentin Schneider <vschneid@redhat.com> (TOPOLOGY)
R:	K Prateek Nayak <kprateek.nayak@amd.com>
L:	linux-kernel@vger.kernel.org
S:	Maintained
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core
+0 −3
Original line number Diff line number Diff line
@@ -136,9 +136,6 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
}
#endif

#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

extern void mm_init_global_asid(struct mm_struct *mm);
extern void mm_free_global_asid(struct mm_struct *mm);

+26 −0
Original line number Diff line number Diff line
@@ -172,6 +172,28 @@ struct tlb_state_shared {
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);

/*
 * Please ignore the name of this function.  It should be called
 * switch_to_kernel_thread().
 *
 * enter_lazy_tlb() is a hint from the scheduler that we are entering a
 * kernel thread or other context without an mm.  Acceptable implementations
 * include doing nothing whatsoever, switching to init_mm, or various clever
 * lazy tricks to try to minimize TLB flushes.
 *
 * The scheduler reserves the right to call enter_lazy_tlb() several times
 * in a row.  It will notify us that we're going back to a real mm by
 * calling switch_mm_irqs_off().
 */
#define enter_lazy_tlb enter_lazy_tlb
static __always_inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
		return;

	this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
}

bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay

@@ -480,6 +502,10 @@ static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
}
#endif
#else /* !MODULE */
#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
	__compiletime_error("enter_lazy_tlb() should not be used in modules");
#endif /* !MODULE */

static inline void __native_tlb_flush_global(unsigned long cr4)
+0 −21
Original line number Diff line number Diff line
@@ -971,27 +971,6 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
	}
}

/*
 * Please ignore the name of this function.  It should be called
 * switch_to_kernel_thread().
 *
 * enter_lazy_tlb() is a hint from the scheduler that we are entering a
 * kernel thread or other context without an mm.  Acceptable implementations
 * include doing nothing whatsoever, switching to init_mm, or various clever
 * lazy tricks to try to minimize TLB flushes.
 *
 * The scheduler reserves the right to call enter_lazy_tlb() several times
 * in a row.  It will notify us that we're going back to a real mm by
 * calling switch_mm_irqs_off().
 */
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
		return;

	this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
}

/*
 * Using a temporary mm allows to set temporary mappings that are not accessible
 * by other CPUs. Such mappings are needed to perform sensitive memory writes
+2 −2
Original line number Diff line number Diff line
@@ -248,7 +248,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
	struct em_perf_state *ps;
	int i;

	WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
	lockdep_assert(rcu_read_lock_any_held());

	if (!sum_util)
		return 0;
@@ -267,7 +267,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
	 * Find the lowest performance state of the Energy Model above the
	 * requested performance.
	 */
	em_table = rcu_dereference(pd->em_table);
	em_table = rcu_dereference_all(pd->em_table);
	i = em_pd_get_efficient_state(em_table->state, pd, max_util);
	ps = &em_table->state[i];

Loading