Scheduler updates for v6.18:
Core scheduler changes: - Make migrate_{en,dis}able() inline, to improve performance (Menglong Dong) - Move STDL_INIT() functions out-of-line (Peter Zijlstra) - Unify the SCHED_{SMT,CLUSTER,MC} Kconfig (Peter Zijlstra) Fair scheduling: - Defer throttling when tasks exit to user-space, to reduce the chance & impact of throttle-preemption with held locks and other resources. (Aaron Lu, Valentin Schneider) - Get rid of sched_domains_curr_level hack for tl->cpumask(), as the warning was getting triggered on certain topologies. (Peter Zijlstra) Misc cleanups & fixes: - Header cleanups (Menglong Dong) - Fix race in push_dl_task() (Harshit Agarwal) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmjWn0IRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1i9ng/+KJYEsNilPA6nGzZLurU3HXm6S5NrNBPc xJU43eo3QdFUymKqYaCoXCwaMah3m8fFW+DC8ZoEvWC5wHnlIw6+u/ZEtsWQ4R8N 8+sjQddQeb9Gez+nkC7pXX8h1nqlhHyGWU88+9hOyEEMk21KgH7tJ5gOuGQdPhiA v24D8dkS1sT6CAeqZAycYIkos+kfNNV+wAEQCXAxfvSSslpzJVZcj9kdQInxF4O4 9+WrvFZFVYJWdJgmgfbpBMw7N8a4Gpv+Lr6U0ZVXHNeLjNdn60I+5Me+9BW9GRcO pPL50RfGgGgVIqyEHvBhhz8p0tlKUJo9NkRJ9hmNB5SKT3P442SU789ppTYgI1il 5P4g3TiuomqPVuo99/mYHYOpT6NkYC1fkwMP9Hfk4NRUX0EA6lKXelVnMXaC3Z7R U+0xVYtK4BBLRFBo4HIEM1HChSIcOnutuufFX4lPPt+ewnAmJwSt619w+lBF0v+n cpiLIlQ74LrYlrULw3bznnwzh5dV8XHm4CT3XAShm6qB97/SaLr0rI5W7njdSvte ym9z4yFWidawowvLWjFX1CykSnX/T5MV+uzuIH64MEIA39B4VKJWCAC3l3AMJn/5 Ckhf93B5uG0q+wUtE66x/JrN7wtbz9PMpwh01fXNWs/eWc1VKkVrvq+PmHODngmz drSUoI1P570= =8ZTZ -----END PGP SIGNATURE----- Merge tag 'sched-core-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler updates from Ingo Molnar: "Core scheduler changes: - Make migrate_{en,dis}able() inline, to improve performance (Menglong Dong) - Move STDL_INIT() functions out-of-line (Peter Zijlstra) - Unify the SCHED_{SMT,CLUSTER,MC} Kconfig (Peter Zijlstra) Fair scheduling: - Defer throttling to when tasks exit to user-space, to reduce the chance & impact of throttle-preemption with held locks and other resources (Aaron Lu, Valentin Schneider) - Get rid of sched_domains_curr_level hack for tl->cpumask(), as the warning was getting triggered on certain topologies (Peter Zijlstra) Misc cleanups & fixes: - Header cleanups (Menglong Dong) - Fix race in push_dl_task() (Harshit Agarwal)" * tag 'sched-core-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Fix some typos in include/linux/preempt.h sched: Make migrate_{en,dis}able() inline rcu: Replace preempt.h with sched.h in include/linux/rcupdate.h arch: Add the macro COMPILE_OFFSETS to all the asm-offsets.c sched/fair: Do not balance task to a throttled cfs_rq sched/fair: Do not special case tasks in throttled hierarchy sched/fair: update_cfs_group() for throttled cfs_rqs sched/fair: Propagate load for throttled cfs_rq sched/fair: Get rid of throttled_lb_pair() sched/fair: Task based throttle time accounting sched/fair: Switch to task based throttle model sched/fair: Implement throttle task work and related helpers sched/fair: Add related data structure for task based throttle sched: Unify the SCHED_{SMT,CLUSTER,MC} Kconfig sched: Move STDL_INIT() functions out-of-line sched/fair: Get rid of sched_domains_curr_level hack for tl->cpumask() sched/deadline: Fix race in push_dl_task()
This commit is contained in:
commit
6c7340a7a8
13
Kbuild
13
Kbuild
|
@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file)
|
|||
$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
|
||||
$(call filechk,offsets,__ASM_OFFSETS_H__)
|
||||
|
||||
# Generate rq-offsets.h
|
||||
|
||||
rq-offsets-file := include/generated/rq-offsets.h
|
||||
|
||||
targets += kernel/sched/rq-offsets.s
|
||||
|
||||
kernel/sched/rq-offsets.s: $(offsets-file)
|
||||
|
||||
$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE
|
||||
$(call filechk,offsets,__RQ_OFFSETS_H__)
|
||||
|
||||
# Check for missing system calls
|
||||
|
||||
quiet_cmd_syscalls = CALL $<
|
||||
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
|
||||
|
||||
PHONY += missing-syscalls
|
||||
missing-syscalls: scripts/checksyscalls.sh $(offsets-file)
|
||||
missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file)
|
||||
$(call cmd,syscalls)
|
||||
|
||||
# Check the manual modification of atomic headers
|
||||
|
|
38
arch/Kconfig
38
arch/Kconfig
|
@ -41,6 +41,44 @@ config HOTPLUG_SMT
|
|||
config SMT_NUM_THREADS_DYNAMIC
|
||||
bool
|
||||
|
||||
config ARCH_SUPPORTS_SCHED_SMT
|
||||
bool
|
||||
|
||||
config ARCH_SUPPORTS_SCHED_CLUSTER
|
||||
bool
|
||||
|
||||
config ARCH_SUPPORTS_SCHED_MC
|
||||
bool
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (Hyperthreading) scheduler support"
|
||||
depends on ARCH_SUPPORTS_SCHED_SMT
|
||||
default y
|
||||
help
|
||||
Improves the CPU scheduler's decision making when dealing with
|
||||
MultiThreading at a cost of slightly increased overhead in some
|
||||
places. If unsure say N here.
|
||||
|
||||
config SCHED_CLUSTER
|
||||
bool "Cluster scheduler support"
|
||||
depends on ARCH_SUPPORTS_SCHED_CLUSTER
|
||||
default y
|
||||
help
|
||||
Cluster scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with machines that have clusters of CPUs.
|
||||
Cluster usually means a couple of CPUs which are placed closely
|
||||
by sharing mid-level caches, last-level cache tags or internal
|
||||
busses.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-Core Cache (MC) scheduler support"
|
||||
depends on ARCH_SUPPORTS_SCHED_MC
|
||||
default y
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
|
||||
config HOTPLUG_CORE_SYNC
|
||||
bool
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
* This code generates raw asm output which is post-processed to extract
|
||||
* and format the required data.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/stddef.h>
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -941,28 +941,14 @@ config IRQSTACKS
|
|||
config ARM_CPU_TOPOLOGY
|
||||
bool "Support cpu topology definition"
|
||||
depends on SMP && CPU_V7
|
||||
select ARCH_SUPPORTS_SCHED_MC
|
||||
select ARCH_SUPPORTS_SCHED_SMT
|
||||
default y
|
||||
help
|
||||
Support ARM cpu topology definition. The MPIDR register defines
|
||||
affinity between processors which is then used to describe the cpu
|
||||
topology of an ARM System.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
depends on ARM_CPU_TOPOLOGY
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT scheduler support"
|
||||
depends on ARM_CPU_TOPOLOGY
|
||||
help
|
||||
Improves the CPU scheduler's decision making when dealing with
|
||||
MultiThreading at a cost of slightly increased overhead in some
|
||||
places. If unsure say N here.
|
||||
|
||||
config HAVE_ARM_SCU
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
* This code generates raw asm output which is post-processed to extract
|
||||
* and format the required data.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -108,6 +108,9 @@ config ARM64
|
|||
select ARCH_SUPPORTS_PER_VMA_LOCK
|
||||
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
|
||||
select ARCH_SUPPORTS_RT
|
||||
select ARCH_SUPPORTS_SCHED_SMT
|
||||
select ARCH_SUPPORTS_SCHED_CLUSTER
|
||||
select ARCH_SUPPORTS_SCHED_MC
|
||||
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
|
||||
select ARCH_WANT_DEFAULT_BPF_JIT
|
||||
|
@ -1507,29 +1510,6 @@ config CPU_LITTLE_ENDIAN
|
|||
|
||||
endchoice
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config SCHED_CLUSTER
|
||||
bool "Cluster scheduler support"
|
||||
help
|
||||
Cluster scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with machines that have clusters of CPUs.
|
||||
Cluster usually means a couple of CPUs which are placed closely
|
||||
by sharing mid-level caches, last-level cache tags or internal
|
||||
busses.
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT scheduler support"
|
||||
help
|
||||
Improves the CPU scheduler's decision making when dealing with
|
||||
MultiThreading at a cost of slightly increased overhead in some
|
||||
places. If unsure say N here.
|
||||
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs (2-4096)"
|
||||
range 2 4096
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
* 2001-2002 Keith Owens
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/arm_sdei.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*
|
||||
* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/types.h>
|
||||
|
|
|
@ -70,6 +70,8 @@ config LOONGARCH
|
|||
select ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
|
||||
select ARCH_SUPPORTS_NUMA_BALANCING
|
||||
select ARCH_SUPPORTS_RT
|
||||
select ARCH_SUPPORTS_SCHED_SMT if SMP
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select ARCH_USE_MEMTEST
|
||||
|
@ -452,23 +454,6 @@ config EFI_STUB
|
|||
This kernel feature allows the kernel to be loaded directly by
|
||||
EFI firmware without the use of a bootloader.
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT scheduler support"
|
||||
depends on SMP
|
||||
default y
|
||||
help
|
||||
Improves scheduler's performance when there are multiple
|
||||
threads in one physical core.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
depends on SMP
|
||||
default y
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places.
|
||||
|
||||
config SMP
|
||||
bool "Multi-Processing support"
|
||||
help
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
*
|
||||
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
* #defines from the assembly-language output.
|
||||
*/
|
||||
|
||||
#define COMPILE_OFFSETS
|
||||
#define ASM_OFFSETS_C
|
||||
|
||||
#include <linux/stddef.h>
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
* License. See the file "COPYING" in the main directory of this archive
|
||||
* for more details.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/stddef.h>
|
||||
|
|
|
@ -2223,7 +2223,7 @@ config MIPS_MT_SMP
|
|||
select SMP
|
||||
select SMP_UP
|
||||
select SYS_SUPPORTS_SMP
|
||||
select SYS_SUPPORTS_SCHED_SMT
|
||||
select ARCH_SUPPORTS_SCHED_SMT
|
||||
select MIPS_PERF_SHARED_TC_COUNTERS
|
||||
help
|
||||
This is a kernel model which is known as SMVP. This is supported
|
||||
|
@ -2235,18 +2235,6 @@ config MIPS_MT_SMP
|
|||
config MIPS_MT
|
||||
bool
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (multithreading) scheduler support"
|
||||
depends on SYS_SUPPORTS_SCHED_SMT
|
||||
default n
|
||||
help
|
||||
SMT scheduler support improves the CPU scheduler's decision making
|
||||
when dealing with MIPS MT enabled cores at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config SYS_SUPPORTS_SCHED_SMT
|
||||
bool
|
||||
|
||||
config SYS_SUPPORTS_MULTITHREADING
|
||||
bool
|
||||
|
||||
|
@ -2318,7 +2306,7 @@ config MIPS_CPS
|
|||
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
|
||||
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
|
||||
select SYS_SUPPORTS_HOTPLUG_CPU
|
||||
select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6
|
||||
select ARCH_SUPPORTS_SCHED_SMT if CPU_MIPSR6
|
||||
select SYS_SUPPORTS_SMP
|
||||
select WEAK_ORDERING
|
||||
select GENERIC_IRQ_MIGRATION if HOTPLUG_CPU
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
* Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
|
||||
* Copyright (C) 2000 MIPS Technologies, Inc.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
/*
|
||||
* Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
* compile this file to assembler, and then extract the
|
||||
* #defines from the assembly-language output.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/signal.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -44,6 +44,7 @@ config PARISC
|
|||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GENERIC_ARCH_TOPOLOGY if SMP
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP && PA8X00
|
||||
select GENERIC_CPU_DEVICES if !SMP
|
||||
select GENERIC_LIB_DEVMEM_IS_ALLOWED
|
||||
select SYSCTL_ARCH_UNALIGN_ALLOW
|
||||
|
@ -319,14 +320,6 @@ config SMP
|
|||
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
depends on GENERIC_ARCH_TOPOLOGY && PA8X00
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config IRQSTACKS
|
||||
bool "Use separate kernel stacks when processing interrupts"
|
||||
default y
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
* Copyright (C) 2002 Randolph Chung <tausq with parisc-linux.org>
|
||||
* Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -170,6 +170,9 @@ config PPC
|
|||
select ARCH_STACKWALK
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW
|
||||
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
select ARCH_SUPPORTS_SCHED_SMT if PPC64 && SMP
|
||||
select SCHED_MC if ARCH_SUPPORTS_SCHED_MC
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
|
||||
select ARCH_USE_MEMTEST
|
||||
|
@ -965,14 +968,6 @@ config PPC_PROT_SAO_LPAR
|
|||
config PPC_COPRO_BASE
|
||||
bool
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (Hyperthreading) scheduler support"
|
||||
depends on PPC64 && SMP
|
||||
help
|
||||
SMT scheduler support improves the CPU scheduler's decision making
|
||||
when dealing with POWER5 cpus at a cost of slightly increased
|
||||
overhead in some places. If unsure say N here.
|
||||
|
||||
config PPC_DENORMALISATION
|
||||
bool "PowerPC denormalisation exception handling"
|
||||
depends on PPC_BOOK3S_64
|
||||
|
|
|
@ -131,6 +131,8 @@ static inline int cpu_to_coregroup_id(int cpu)
|
|||
#ifdef CONFIG_SMP
|
||||
#include <asm/cputable.h>
|
||||
|
||||
struct cpumask *cpu_coregroup_mask(int cpu);
|
||||
|
||||
#ifdef CONFIG_PPC64
|
||||
#include <asm/smp.h>
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
* compile this file to assembler, and then extract the
|
||||
* #defines from the assembly-language output.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/signal.h>
|
||||
|
|
|
@ -1028,19 +1028,19 @@ static int powerpc_shared_proc_flags(void)
|
|||
* We can't just pass cpu_l2_cache_mask() directly because
|
||||
* returns a non-const pointer and the compiler barfs on that.
|
||||
*/
|
||||
static const struct cpumask *shared_cache_mask(int cpu)
|
||||
static const struct cpumask *tl_cache_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return per_cpu(cpu_l2_cache_map, cpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
static const struct cpumask *smallcore_smt_mask(int cpu)
|
||||
static const struct cpumask *tl_smallcore_smt_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return cpu_smallcore_mask(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct cpumask *cpu_coregroup_mask(int cpu)
|
||||
struct cpumask *cpu_coregroup_mask(int cpu)
|
||||
{
|
||||
return per_cpu(cpu_coregroup_map, cpu);
|
||||
}
|
||||
|
@ -1054,11 +1054,6 @@ static bool has_coregroup_support(void)
|
|||
return coregroup_enabled;
|
||||
}
|
||||
|
||||
static const struct cpumask *cpu_mc_mask(int cpu)
|
||||
{
|
||||
return cpu_coregroup_mask(cpu);
|
||||
}
|
||||
|
||||
static int __init init_big_cores(void)
|
||||
{
|
||||
int cpu;
|
||||
|
@ -1448,7 +1443,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
|
|||
return false;
|
||||
}
|
||||
|
||||
cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
|
||||
cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
|
||||
|
||||
/* Update l2-cache mask with all the CPUs that are part of submask */
|
||||
or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
|
||||
|
@ -1538,7 +1533,7 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
|
|||
return;
|
||||
}
|
||||
|
||||
cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu));
|
||||
cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
|
||||
|
||||
/* Update coregroup mask with all the CPUs that are part of submask */
|
||||
or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
|
||||
|
@ -1601,7 +1596,7 @@ static void add_cpu_to_masks(int cpu)
|
|||
|
||||
/* If chip_id is -1; limit the cpu_core_mask to within PKG */
|
||||
if (chip_id == -1)
|
||||
cpumask_and(mask, mask, cpu_cpu_mask(cpu));
|
||||
cpumask_and(mask, mask, cpu_node_mask(cpu));
|
||||
|
||||
for_each_cpu(i, mask) {
|
||||
if (chip_id == cpu_to_chip_id(i)) {
|
||||
|
@ -1701,22 +1696,22 @@ static void __init build_sched_topology(void)
|
|||
if (has_big_cores) {
|
||||
pr_info("Big cores detected but using small core scheduling\n");
|
||||
powerpc_topology[i++] =
|
||||
SDTL_INIT(smallcore_smt_mask, powerpc_smt_flags, SMT);
|
||||
SDTL_INIT(tl_smallcore_smt_mask, powerpc_smt_flags, SMT);
|
||||
} else {
|
||||
powerpc_topology[i++] = SDTL_INIT(cpu_smt_mask, powerpc_smt_flags, SMT);
|
||||
powerpc_topology[i++] = SDTL_INIT(tl_smt_mask, powerpc_smt_flags, SMT);
|
||||
}
|
||||
#endif
|
||||
if (shared_caches) {
|
||||
powerpc_topology[i++] =
|
||||
SDTL_INIT(shared_cache_mask, powerpc_shared_cache_flags, CACHE);
|
||||
SDTL_INIT(tl_cache_mask, powerpc_shared_cache_flags, CACHE);
|
||||
}
|
||||
|
||||
if (has_coregroup_support()) {
|
||||
powerpc_topology[i++] =
|
||||
SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC);
|
||||
SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC);
|
||||
}
|
||||
|
||||
powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG);
|
||||
powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG);
|
||||
|
||||
/* There must be one trailing NULL entry left. */
|
||||
BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
|
||||
|
|
|
@ -74,6 +74,7 @@ config RISCV
|
|||
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
|
||||
select ARCH_SUPPORTS_RT
|
||||
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
|
@ -455,14 +456,6 @@ config SMP
|
|||
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
depends on SMP
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs (2-512)"
|
||||
depends on SMP
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
* Copyright (C) 2012 Regents of the University of California
|
||||
* Copyright (C) 2017 SiFive
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/mm.h>
|
||||
|
|
|
@ -554,15 +554,11 @@ config NODES_SHIFT
|
|||
depends on NUMA
|
||||
default "1"
|
||||
|
||||
config SCHED_SMT
|
||||
def_bool n
|
||||
|
||||
config SCHED_MC
|
||||
def_bool n
|
||||
|
||||
config SCHED_TOPOLOGY
|
||||
def_bool y
|
||||
prompt "Topology scheduler support"
|
||||
select ARCH_SUPPORTS_SCHED_SMT
|
||||
select ARCH_SUPPORTS_SCHED_MC
|
||||
select SCHED_SMT
|
||||
select SCHED_MC
|
||||
help
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
* This code generates raw asm output which is post-processed to extract
|
||||
* and format the required data.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/sched.h>
|
||||
|
|
|
@ -509,33 +509,27 @@ int topology_cpu_init(struct cpu *cpu)
|
|||
return rc;
|
||||
}
|
||||
|
||||
static const struct cpumask *cpu_thread_mask(int cpu)
|
||||
{
|
||||
return &cpu_topology[cpu].thread_mask;
|
||||
}
|
||||
|
||||
|
||||
const struct cpumask *cpu_coregroup_mask(int cpu)
|
||||
{
|
||||
return &cpu_topology[cpu].core_mask;
|
||||
}
|
||||
|
||||
static const struct cpumask *cpu_book_mask(int cpu)
|
||||
static const struct cpumask *tl_book_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return &cpu_topology[cpu].book_mask;
|
||||
}
|
||||
|
||||
static const struct cpumask *cpu_drawer_mask(int cpu)
|
||||
static const struct cpumask *tl_drawer_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return &cpu_topology[cpu].drawer_mask;
|
||||
}
|
||||
|
||||
static struct sched_domain_topology_level s390_topology[] = {
|
||||
SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT),
|
||||
SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
|
||||
SDTL_INIT(cpu_book_mask, NULL, BOOK),
|
||||
SDTL_INIT(cpu_drawer_mask, NULL, DRAWER),
|
||||
SDTL_INIT(cpu_cpu_mask, NULL, PKG),
|
||||
SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
|
||||
SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
|
||||
SDTL_INIT(tl_book_mask, NULL, BOOK),
|
||||
SDTL_INIT(tl_drawer_mask, NULL, DRAWER),
|
||||
SDTL_INIT(tl_pkg_mask, NULL, PKG),
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
* compile this file to assembler, and then extract the
|
||||
* #defines from the assembly-language output.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/types.h>
|
||||
|
|
|
@ -110,6 +110,8 @@ config SPARC64
|
|||
select HAVE_SETUP_PER_CPU_AREA
|
||||
select NEED_PER_CPU_EMBED_FIRST_CHUNK
|
||||
select NEED_PER_CPU_PAGE_FIRST_CHUNK
|
||||
select ARCH_SUPPORTS_SCHED_SMT if SMP
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
|
||||
config ARCH_PROC_KCORE_TEXT
|
||||
def_bool y
|
||||
|
@ -288,24 +290,6 @@ if SPARC64 || COMPILE_TEST
|
|||
source "kernel/power/Kconfig"
|
||||
endif
|
||||
|
||||
config SCHED_SMT
|
||||
bool "SMT (Hyperthreading) scheduler support"
|
||||
depends on SPARC64 && SMP
|
||||
default y
|
||||
help
|
||||
SMT scheduler support improves the CPU scheduler's decision making
|
||||
when dealing with SPARC cpus at a cost of slightly increased overhead
|
||||
in some places. If unsure say N here.
|
||||
|
||||
config SCHED_MC
|
||||
bool "Multi-core scheduler support"
|
||||
depends on SPARC64 && SMP
|
||||
default y
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config CMDLINE_BOOL
|
||||
bool "Default bootloader kernel arguments"
|
||||
depends on SPARC64
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
*
|
||||
* On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <sysdep/kernel-offsets.h>
|
||||
|
|
|
@ -330,6 +330,10 @@ config X86
|
|||
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
|
||||
select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
|
||||
select ARCH_SUPPORTS_PT_RECLAIM if X86_64
|
||||
select ARCH_SUPPORTS_SCHED_SMT if SMP
|
||||
select SCHED_SMT if SMP
|
||||
select ARCH_SUPPORTS_SCHED_CLUSTER if SMP
|
||||
select ARCH_SUPPORTS_SCHED_MC if SMP
|
||||
|
||||
config INSTRUCTION_DECODER
|
||||
def_bool y
|
||||
|
@ -1031,29 +1035,6 @@ config NR_CPUS
|
|||
This is purely to save memory: each supported CPU adds about 8KB
|
||||
to the kernel image.
|
||||
|
||||
config SCHED_CLUSTER
|
||||
bool "Cluster scheduler support"
|
||||
depends on SMP
|
||||
default y
|
||||
help
|
||||
Cluster scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with machines that have clusters of CPUs.
|
||||
Cluster usually means a couple of CPUs which are placed closely
|
||||
by sharing mid-level caches, last-level cache tags or internal
|
||||
busses.
|
||||
|
||||
config SCHED_SMT
|
||||
def_bool y if SMP
|
||||
|
||||
config SCHED_MC
|
||||
def_bool y
|
||||
prompt "Multi-core scheduler support"
|
||||
depends on SMP
|
||||
help
|
||||
Multi-core scheduler support improves the CPU scheduler's decision
|
||||
making when dealing with multi-core CPU chips at a cost of slightly
|
||||
increased overhead in some places. If unsure say N here.
|
||||
|
||||
config SCHED_MC_PRIO
|
||||
bool "CPU core priorities scheduler support"
|
||||
depends on SCHED_MC
|
||||
|
|
|
@ -479,14 +479,14 @@ static int x86_cluster_flags(void)
|
|||
static bool x86_has_numa_in_package;
|
||||
|
||||
static struct sched_domain_topology_level x86_topology[] = {
|
||||
SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT),
|
||||
SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
|
||||
#ifdef CONFIG_SCHED_CLUSTER
|
||||
SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS),
|
||||
SDTL_INIT(tl_cls_mask, x86_cluster_flags, CLS),
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC),
|
||||
SDTL_INIT(tl_mc_mask, x86_core_flags, MC),
|
||||
#endif
|
||||
SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG),
|
||||
SDTL_INIT(tl_pkg_mask, x86_sched_itmt_flags, PKG),
|
||||
{ NULL },
|
||||
};
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
*
|
||||
* Chris Zankel <chris@zankel.net>
|
||||
*/
|
||||
#define COMPILE_OFFSETS
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/coprocessor.h>
|
||||
|
|
|
@ -372,7 +372,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||
/*
|
||||
* Migrate-Disable and why it is undesired.
|
||||
*
|
||||
* When a preempted task becomes elegible to run under the ideal model (IOW it
|
||||
* When a preempted task becomes eligible to run under the ideal model (IOW it
|
||||
* becomes one of the M highest priority tasks), it might still have to wait
|
||||
* for the preemptee's migrate_disable() section to complete. Thereby suffering
|
||||
* a reduction in bandwidth in the exact duration of the migrate_disable()
|
||||
|
@ -387,7 +387,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||
* - a lower priority tasks; which under preempt_disable() could've instantly
|
||||
* migrated away when another CPU becomes available, is now constrained
|
||||
* by the ability to push the higher priority task away, which might itself be
|
||||
* in a migrate_disable() section, reducing it's available bandwidth.
|
||||
* in a migrate_disable() section, reducing its available bandwidth.
|
||||
*
|
||||
* IOW it trades latency / moves the interference term, but it stays in the
|
||||
* system, and as long as it remains unbounded, the system is not fully
|
||||
|
@ -399,7 +399,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||
* PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
|
||||
* number of primitives into becoming preemptible, they would also allow
|
||||
* migration. This turns out to break a bunch of per-cpu usage. To this end,
|
||||
* all these primitives employ migirate_disable() to restore this implicit
|
||||
* all these primitives employ migrate_disable() to restore this implicit
|
||||
* assumption.
|
||||
*
|
||||
* This is a 'temporary' work-around at best. The correct solution is getting
|
||||
|
@ -407,7 +407,7 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||
* per-cpu locking or short preempt-disable regions.
|
||||
*
|
||||
* The end goal must be to get rid of migrate_disable(), alternatively we need
|
||||
* a schedulability theory that does not depend on abritrary migration.
|
||||
* a schedulability theory that does not depend on arbitrary migration.
|
||||
*
|
||||
*
|
||||
* Notes on the implementation.
|
||||
|
@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
|||
* work-conserving schedulers.
|
||||
*
|
||||
*/
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
|
||||
/**
|
||||
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
|
||||
|
@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
|
|||
|
||||
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
|
||||
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
|
||||
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include <linux/compiler.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/bottom_half.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/cleanup.h>
|
||||
|
|
|
@ -49,6 +49,9 @@
|
|||
#include <linux/tracepoint-defs.h>
|
||||
#include <linux/unwind_deferred_types.h>
|
||||
#include <asm/kmap_size.h>
|
||||
#ifndef COMPILE_OFFSETS
|
||||
#include <generated/rq-offsets.h>
|
||||
#endif
|
||||
|
||||
/* task_struct member predeclarations (sorted alphabetically): */
|
||||
struct audit_context;
|
||||
|
@ -881,6 +884,11 @@ struct task_struct {
|
|||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
struct task_group *sched_task_group;
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
struct callback_head sched_throttle_work;
|
||||
struct list_head throttle_node;
|
||||
bool throttled;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -2310,4 +2318,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
|
|||
#define alloc_tag_restore(_tag, _old) do {} while (0)
|
||||
#endif
|
||||
|
||||
#ifndef MODULE
|
||||
#ifndef COMPILE_OFFSETS
|
||||
|
||||
extern void ___migrate_enable(void);
|
||||
|
||||
struct rq;
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
||||
/*
|
||||
* The "struct rq" is not available here, so we can't access the
|
||||
* "runqueues" with this_cpu_ptr(), as the compilation will fail in
|
||||
* this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
|
||||
* typeof((ptr) + 0)
|
||||
*
|
||||
* So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
|
||||
#else
|
||||
#define this_rq_raw() PERCPU_PTR(&runqueues)
|
||||
#endif
|
||||
#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))
|
||||
|
||||
static inline void __migrate_enable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
* Check both overflow from migrate_disable() and superfluous
|
||||
* migrate_enable().
|
||||
*/
|
||||
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (p->migration_disabled > 1) {
|
||||
p->migration_disabled--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure stop_task runs either before or after this, and that
|
||||
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
|
||||
*/
|
||||
guard(preempt)();
|
||||
if (unlikely(p->cpus_ptr != &p->cpus_mask))
|
||||
___migrate_enable();
|
||||
/*
|
||||
* Mustn't clear migration_disabled() until cpus_ptr points back at the
|
||||
* regular cpus_mask, otherwise things that race (eg.
|
||||
* select_fallback_rq) get confused.
|
||||
*/
|
||||
barrier();
|
||||
p->migration_disabled = 0;
|
||||
this_rq_pinned()--;
|
||||
}
|
||||
|
||||
static inline void __migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
if (p->migration_disabled) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
*Warn about overflow half-way through the range.
|
||||
*/
|
||||
WARN_ON_ONCE((s16)p->migration_disabled < 0);
|
||||
#endif
|
||||
p->migration_disabled++;
|
||||
return;
|
||||
}
|
||||
|
||||
guard(preempt)();
|
||||
this_rq_pinned()++;
|
||||
p->migration_disabled = 1;
|
||||
}
|
||||
#else /* !COMPILE_OFFSETS */
|
||||
static inline void __migrate_disable(void) { }
|
||||
static inline void __migrate_enable(void) { }
|
||||
#endif /* !COMPILE_OFFSETS */
|
||||
|
||||
/*
|
||||
* So that it is possible to not export the runqueues variable, define and
|
||||
* export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
|
||||
* them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
|
||||
* be defined in kernel/sched/core.c.
|
||||
*/
|
||||
#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
|
||||
static inline void migrate_disable(void)
|
||||
{
|
||||
__migrate_disable();
|
||||
}
|
||||
|
||||
static inline void migrate_enable(void)
|
||||
{
|
||||
__migrate_enable();
|
||||
}
|
||||
#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
|
||||
|
||||
#else /* MODULE */
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
#endif /* MODULE */
|
||||
|
||||
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||||
|
||||
#endif
|
||||
|
|
|
@ -30,33 +30,24 @@ struct sd_flag_debug {
|
|||
};
|
||||
extern const struct sd_flag_debug sd_flag_debug[];
|
||||
|
||||
struct sched_domain_topology_level;
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
static inline int cpu_smt_flags(void)
|
||||
{
|
||||
return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
|
||||
}
|
||||
extern int cpu_smt_flags(void);
|
||||
extern const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CLUSTER
|
||||
static inline int cpu_cluster_flags(void)
|
||||
{
|
||||
return SD_CLUSTER | SD_SHARE_LLC;
|
||||
}
|
||||
extern int cpu_cluster_flags(void);
|
||||
extern const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
static inline int cpu_core_flags(void)
|
||||
{
|
||||
return SD_SHARE_LLC;
|
||||
}
|
||||
extern int cpu_core_flags(void);
|
||||
extern const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static inline int cpu_numa_flags(void)
|
||||
{
|
||||
return SD_NUMA;
|
||||
}
|
||||
#endif
|
||||
extern const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu);
|
||||
|
||||
extern int arch_asym_cpu_priority(int cpu);
|
||||
|
||||
|
@ -172,7 +163,7 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu);
|
|||
bool cpus_share_cache(int this_cpu, int that_cpu);
|
||||
bool cpus_share_resources(int this_cpu, int that_cpu);
|
||||
|
||||
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
|
||||
typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu);
|
||||
typedef int (*sched_domain_flags_f)(void);
|
||||
|
||||
struct sd_data {
|
||||
|
|
|
@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
|
|||
|
||||
#endif
|
||||
|
||||
static inline const struct cpumask *cpu_cpu_mask(int cpu)
|
||||
static inline const struct cpumask *cpu_node_mask(int cpu)
|
||||
{
|
||||
return cpumask_of_node(cpu_to_node(cpu));
|
||||
}
|
||||
|
|
|
@ -23859,6 +23859,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
|
|||
BTF_SET_START(btf_id_deny)
|
||||
BTF_ID_UNUSED
|
||||
#ifdef CONFIG_SMP
|
||||
BTF_ID(func, ___migrate_enable)
|
||||
BTF_ID(func, migrate_disable)
|
||||
BTF_ID(func, migrate_enable)
|
||||
#endif
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
* Copyright (C) 1991-2002 Linus Torvalds
|
||||
* Copyright (C) 1998-2024 Ingo Molnar, Red Hat
|
||||
*/
|
||||
#define INSTANTIATE_EXPORTED_MIGRATE_DISABLE
|
||||
#include <linux/sched.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hrtimer_api.h>
|
||||
#include <linux/ktime_api.h>
|
||||
|
@ -2381,28 +2383,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
|
|||
__do_set_cpus_allowed(p, &ac);
|
||||
}
|
||||
|
||||
void migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
if (p->migration_disabled) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
*Warn about overflow half-way through the range.
|
||||
*/
|
||||
WARN_ON_ONCE((s16)p->migration_disabled < 0);
|
||||
#endif
|
||||
p->migration_disabled++;
|
||||
return;
|
||||
}
|
||||
|
||||
guard(preempt)();
|
||||
this_rq()->nr_pinned++;
|
||||
p->migration_disabled = 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_disable);
|
||||
|
||||
void migrate_enable(void)
|
||||
void ___migrate_enable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
struct affinity_context ac = {
|
||||
|
@ -2410,35 +2391,19 @@ void migrate_enable(void)
|
|||
.flags = SCA_MIGRATE_ENABLE,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
* Check both overflow from migrate_disable() and superfluous
|
||||
* migrate_enable().
|
||||
*/
|
||||
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
|
||||
return;
|
||||
#endif
|
||||
__set_cpus_allowed_ptr(p, &ac);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(___migrate_enable);
|
||||
|
||||
if (p->migration_disabled > 1) {
|
||||
p->migration_disabled--;
|
||||
return;
|
||||
}
|
||||
void migrate_disable(void)
|
||||
{
|
||||
__migrate_disable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_disable);
|
||||
|
||||
/*
|
||||
* Ensure stop_task runs either before or after this, and that
|
||||
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
|
||||
*/
|
||||
guard(preempt)();
|
||||
if (p->cpus_ptr != &p->cpus_mask)
|
||||
__set_cpus_allowed_ptr(p, &ac);
|
||||
/*
|
||||
* Mustn't clear migration_disabled() until cpus_ptr points back at the
|
||||
* regular cpus_mask, otherwise things that race (eg.
|
||||
* select_fallback_rq) get confused.
|
||||
*/
|
||||
barrier();
|
||||
p->migration_disabled = 0;
|
||||
this_rq()->nr_pinned--;
|
||||
void migrate_enable(void)
|
||||
{
|
||||
__migrate_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_enable);
|
||||
|
||||
|
@ -4490,6 +4455,9 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
|
|||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
p->se.cfs_rq = NULL;
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
init_cfs_throttle_work(p);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
|
|
|
@ -2551,6 +2551,25 @@ static int find_later_rq(struct task_struct *task)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (!has_pushable_dl_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
|
||||
|
||||
WARN_ON_ONCE(rq->cpu != task_cpu(p));
|
||||
WARN_ON_ONCE(task_current(rq, p));
|
||||
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
|
||||
|
||||
WARN_ON_ONCE(!task_on_rq_queued(p));
|
||||
WARN_ON_ONCE(!dl_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Locks the rq it finds */
|
||||
static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
||||
{
|
||||
|
@ -2578,12 +2597,37 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
|||
|
||||
/* Retry if something changed. */
|
||||
if (double_lock_balance(rq, later_rq)) {
|
||||
if (unlikely(task_rq(task) != rq ||
|
||||
/*
|
||||
* double_lock_balance had to release rq->lock, in the
|
||||
* meantime, task may no longer be fit to be migrated.
|
||||
* Check the following to ensure that the task is
|
||||
* still suitable for migration:
|
||||
* 1. It is possible the task was scheduled,
|
||||
* migrate_disabled was set and then got preempted,
|
||||
* so we must check the task migration disable
|
||||
* flag.
|
||||
* 2. The CPU picked is in the task's affinity.
|
||||
* 3. For throttled task (dl_task_offline_migration),
|
||||
* check the following:
|
||||
* - the task is not on the rq anymore (it was
|
||||
* migrated)
|
||||
* - the task is not on CPU anymore
|
||||
* - the task is still a dl task
|
||||
* - the task is not queued on the rq anymore
|
||||
* 4. For the non-throttled task (push_dl_task), the
|
||||
* check to ensure that this task is still at the
|
||||
* head of the pushable tasks list is enough.
|
||||
*/
|
||||
if (unlikely(is_migration_disabled(task) ||
|
||||
!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
is_migration_disabled(task) ||
|
||||
!task_on_rq_queued(task))) {
|
||||
(task->dl.dl_throttled &&
|
||||
(task_rq(task) != rq ||
|
||||
task_on_cpu(rq, task) ||
|
||||
!dl_task(task) ||
|
||||
!task_on_rq_queued(task))) ||
|
||||
(!task->dl.dl_throttled &&
|
||||
task != pick_next_pushable_dl_task(rq)))) {
|
||||
|
||||
double_unlock_balance(rq, later_rq);
|
||||
later_rq = NULL;
|
||||
break;
|
||||
|
@ -2606,25 +2650,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
|
|||
return later_rq;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (!has_pushable_dl_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
|
||||
|
||||
WARN_ON_ONCE(rq->cpu != task_cpu(p));
|
||||
WARN_ON_ONCE(task_current(rq, p));
|
||||
WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
|
||||
|
||||
WARN_ON_ONCE(!task_on_rq_queued(p));
|
||||
WARN_ON_ONCE(!dl_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* See if the non running -deadline tasks on this rq
|
||||
* can be sent to some other CPU where they can preempt
|
||||
|
|
|
@ -3957,9 +3957,6 @@ static void update_cfs_group(struct sched_entity *se)
|
|||
if (!gcfs_rq || !gcfs_rq->load.weight)
|
||||
return;
|
||||
|
||||
if (throttled_hierarchy(gcfs_rq))
|
||||
return;
|
||||
|
||||
shares = calc_group_shares(gcfs_rq);
|
||||
if (unlikely(se->load.weight != shares))
|
||||
reweight_entity(cfs_rq_of(se), se, shares);
|
||||
|
@ -5291,18 +5288,16 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
|
||||
if (cfs_rq->nr_queued == 1) {
|
||||
check_enqueue_throttle(cfs_rq);
|
||||
if (!throttled_hierarchy(cfs_rq)) {
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
} else {
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
if (cfs_rq->pelt_clock_throttled) {
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
|
||||
cfs_rq->throttled_clock = rq_clock(rq);
|
||||
if (!cfs_rq->throttled_clock_self)
|
||||
cfs_rq->throttled_clock_self = rq_clock(rq);
|
||||
#endif
|
||||
cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
|
||||
cfs_rq->throttled_clock_pelt;
|
||||
cfs_rq->pelt_clock_throttled = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5341,8 +5336,6 @@ static void set_delayed(struct sched_entity *se)
|
|||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
cfs_rq->h_nr_runnable--;
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5363,8 +5356,6 @@ static void clear_delayed(struct sched_entity *se)
|
|||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
cfs_rq->h_nr_runnable++;
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5392,7 +5383,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
* DELAY_DEQUEUE relies on spurious wakeups, special task
|
||||
* states must not suffer spurious wakeups, excempt them.
|
||||
*/
|
||||
if (flags & DEQUEUE_SPECIAL)
|
||||
if (flags & (DEQUEUE_SPECIAL | DEQUEUE_THROTTLE))
|
||||
delay = false;
|
||||
|
||||
WARN_ON_ONCE(delay && se->sched_delayed);
|
||||
|
@ -5450,8 +5441,18 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
|||
if (flags & DEQUEUE_DELAYED)
|
||||
finish_delayed_dequeue_entity(se);
|
||||
|
||||
if (cfs_rq->nr_queued == 0)
|
||||
if (cfs_rq->nr_queued == 0) {
|
||||
update_idle_cfs_rq_clock_pelt(cfs_rq);
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
if (throttled_hierarchy(cfs_rq)) {
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
|
||||
list_del_leaf_cfs_rq(cfs_rq);
|
||||
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
|
||||
cfs_rq->pelt_clock_throttled = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -5725,74 +5726,253 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
|
|||
return cfs_bandwidth_used() && cfs_rq->throttled;
|
||||
}
|
||||
|
||||
static inline bool cfs_rq_pelt_clock_throttled(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_bandwidth_used() && cfs_rq->pelt_clock_throttled;
|
||||
}
|
||||
|
||||
/* check whether cfs_rq, or any parent, is throttled */
|
||||
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return cfs_bandwidth_used() && cfs_rq->throttle_count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that neither of the group entities corresponding to src_cpu or
|
||||
* dest_cpu are members of a throttled hierarchy when performing group
|
||||
* load-balance operations.
|
||||
*/
|
||||
static inline int throttled_lb_pair(struct task_group *tg,
|
||||
int src_cpu, int dest_cpu)
|
||||
static inline int lb_throttled_hierarchy(struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
|
||||
|
||||
src_cfs_rq = tg->cfs_rq[src_cpu];
|
||||
dest_cfs_rq = tg->cfs_rq[dest_cpu];
|
||||
|
||||
return throttled_hierarchy(src_cfs_rq) ||
|
||||
throttled_hierarchy(dest_cfs_rq);
|
||||
return throttled_hierarchy(task_group(p)->cfs_rq[dst_cpu]);
|
||||
}
|
||||
|
||||
static inline bool task_is_throttled(struct task_struct *p)
|
||||
{
|
||||
return cfs_bandwidth_used() && p->throttled;
|
||||
}
|
||||
|
||||
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags);
|
||||
static void throttle_cfs_rq_work(struct callback_head *work)
|
||||
{
|
||||
struct task_struct *p = container_of(work, struct task_struct, sched_throttle_work);
|
||||
struct sched_entity *se;
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct rq *rq;
|
||||
|
||||
WARN_ON_ONCE(p != current);
|
||||
p->sched_throttle_work.next = &p->sched_throttle_work;
|
||||
|
||||
/*
|
||||
* If task is exiting, then there won't be a return to userspace, so we
|
||||
* don't have to bother with any of this.
|
||||
*/
|
||||
if ((p->flags & PF_EXITING))
|
||||
return;
|
||||
|
||||
scoped_guard(task_rq_lock, p) {
|
||||
se = &p->se;
|
||||
cfs_rq = cfs_rq_of(se);
|
||||
|
||||
/* Raced, forget */
|
||||
if (p->sched_class != &fair_sched_class)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If not in limbo, then either replenish has happened or this
|
||||
* task got migrated out of the throttled cfs_rq, move along.
|
||||
*/
|
||||
if (!cfs_rq->throttle_count)
|
||||
return;
|
||||
rq = scope.rq;
|
||||
update_rq_clock(rq);
|
||||
WARN_ON_ONCE(p->throttled || !list_empty(&p->throttle_node));
|
||||
dequeue_task_fair(rq, p, DEQUEUE_SLEEP | DEQUEUE_THROTTLE);
|
||||
list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list);
|
||||
/*
|
||||
* Must not set throttled before dequeue or dequeue will
|
||||
* mistakenly regard this task as an already throttled one.
|
||||
*/
|
||||
p->throttled = true;
|
||||
resched_curr(rq);
|
||||
}
|
||||
}
|
||||
|
||||
void init_cfs_throttle_work(struct task_struct *p)
|
||||
{
|
||||
init_task_work(&p->sched_throttle_work, throttle_cfs_rq_work);
|
||||
/* Protect against double add, see throttle_cfs_rq() and throttle_cfs_rq_work() */
|
||||
p->sched_throttle_work.next = &p->sched_throttle_work;
|
||||
INIT_LIST_HEAD(&p->throttle_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Task is throttled and someone wants to dequeue it again:
|
||||
* it could be sched/core when core needs to do things like
|
||||
* task affinity change, task group change, task sched class
|
||||
* change etc. and in these cases, DEQUEUE_SLEEP is not set;
|
||||
* or the task is blocked after throttled due to freezer etc.
|
||||
* and in these cases, DEQUEUE_SLEEP is set.
|
||||
*/
|
||||
static void detach_task_cfs_rq(struct task_struct *p);
|
||||
static void dequeue_throttled_task(struct task_struct *p, int flags)
|
||||
{
|
||||
WARN_ON_ONCE(p->se.on_rq);
|
||||
list_del_init(&p->throttle_node);
|
||||
|
||||
/* task blocked after throttled */
|
||||
if (flags & DEQUEUE_SLEEP) {
|
||||
p->throttled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* task is migrating off its old cfs_rq, detach
|
||||
* the task's load from its old cfs_rq.
|
||||
*/
|
||||
if (task_on_rq_migrating(p))
|
||||
detach_task_cfs_rq(p);
|
||||
}
|
||||
|
||||
static bool enqueue_throttled_task(struct task_struct *p)
|
||||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
|
||||
|
||||
/* @p should have gone through dequeue_throttled_task() first */
|
||||
WARN_ON_ONCE(!list_empty(&p->throttle_node));
|
||||
|
||||
/*
|
||||
* If the throttled task @p is enqueued to a throttled cfs_rq,
|
||||
* take the fast path by directly putting the task on the
|
||||
* target cfs_rq's limbo list.
|
||||
*
|
||||
* Do not do that when @p is current because the following race can
|
||||
* cause @p's group_node to be incorectly re-insterted in its rq's
|
||||
* cfs_tasks list, despite being throttled:
|
||||
*
|
||||
* cpuX cpuY
|
||||
* p ret2user
|
||||
* throttle_cfs_rq_work() sched_move_task(p)
|
||||
* LOCK task_rq_lock
|
||||
* dequeue_task_fair(p)
|
||||
* UNLOCK task_rq_lock
|
||||
* LOCK task_rq_lock
|
||||
* task_current_donor(p) == true
|
||||
* task_on_rq_queued(p) == true
|
||||
* dequeue_task(p)
|
||||
* put_prev_task(p)
|
||||
* sched_change_group()
|
||||
* enqueue_task(p) -> p's new cfs_rq
|
||||
* is throttled, go
|
||||
* fast path and skip
|
||||
* actual enqueue
|
||||
* set_next_task(p)
|
||||
* list_move(&se->group_node, &rq->cfs_tasks); // bug
|
||||
* schedule()
|
||||
*
|
||||
* In the above race case, @p current cfs_rq is in the same rq as
|
||||
* its previous cfs_rq because sched_move_task() only moves a task
|
||||
* to a different group from the same rq, so we can use its current
|
||||
* cfs_rq to derive rq and test if the task is current.
|
||||
*/
|
||||
if (throttled_hierarchy(cfs_rq) &&
|
||||
!task_current_donor(rq_of(cfs_rq), p)) {
|
||||
list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* we can't take the fast path, do an actual enqueue*/
|
||||
p->throttled = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags);
|
||||
static int tg_unthrottle_up(struct task_group *tg, void *data)
|
||||
{
|
||||
struct rq *rq = data;
|
||||
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
||||
struct task_struct *p, *tmp;
|
||||
|
||||
cfs_rq->throttle_count--;
|
||||
if (!cfs_rq->throttle_count) {
|
||||
if (--cfs_rq->throttle_count)
|
||||
return 0;
|
||||
|
||||
if (cfs_rq->pelt_clock_throttled) {
|
||||
cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
|
||||
cfs_rq->throttled_clock_pelt;
|
||||
|
||||
/* Add cfs_rq with load or one or more already running entities to the list */
|
||||
if (!cfs_rq_is_decayed(cfs_rq))
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
|
||||
if (cfs_rq->throttled_clock_self) {
|
||||
u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
|
||||
|
||||
cfs_rq->throttled_clock_self = 0;
|
||||
|
||||
if (WARN_ON_ONCE((s64)delta < 0))
|
||||
delta = 0;
|
||||
|
||||
cfs_rq->throttled_clock_self_time += delta;
|
||||
}
|
||||
cfs_rq->pelt_clock_throttled = 0;
|
||||
}
|
||||
|
||||
if (cfs_rq->throttled_clock_self) {
|
||||
u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
|
||||
|
||||
cfs_rq->throttled_clock_self = 0;
|
||||
|
||||
if (WARN_ON_ONCE((s64)delta < 0))
|
||||
delta = 0;
|
||||
|
||||
cfs_rq->throttled_clock_self_time += delta;
|
||||
}
|
||||
|
||||
/* Re-enqueue the tasks that have been throttled at this level. */
|
||||
list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) {
|
||||
list_del_init(&p->throttle_node);
|
||||
p->throttled = false;
|
||||
enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP);
|
||||
}
|
||||
|
||||
/* Add cfs_rq with load or one or more already running entities to the list */
|
||||
if (!cfs_rq_is_decayed(cfs_rq))
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool task_has_throttle_work(struct task_struct *p)
|
||||
{
|
||||
return p->sched_throttle_work.next != &p->sched_throttle_work;
|
||||
}
|
||||
|
||||
static inline void task_throttle_setup_work(struct task_struct *p)
|
||||
{
|
||||
if (task_has_throttle_work(p))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Kthreads and exiting tasks don't return to userspace, so adding the
|
||||
* work is pointless
|
||||
*/
|
||||
if ((p->flags & (PF_EXITING | PF_KTHREAD)))
|
||||
return;
|
||||
|
||||
task_work_add(p, &p->sched_throttle_work, TWA_RESUME);
|
||||
}
|
||||
|
||||
static void record_throttle_clock(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
|
||||
cfs_rq->throttled_clock = rq_clock(rq);
|
||||
|
||||
if (!cfs_rq->throttled_clock_self)
|
||||
cfs_rq->throttled_clock_self = rq_clock(rq);
|
||||
}
|
||||
|
||||
static int tg_throttle_down(struct task_group *tg, void *data)
|
||||
{
|
||||
struct rq *rq = data;
|
||||
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
|
||||
|
||||
/* group is entering throttled state, stop time */
|
||||
if (!cfs_rq->throttle_count) {
|
||||
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
|
||||
if (cfs_rq->throttle_count++)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* For cfs_rqs that still have entities enqueued, PELT clock
|
||||
* stop happens at dequeue time when all entities are dequeued.
|
||||
*/
|
||||
if (!cfs_rq->nr_queued) {
|
||||
list_del_leaf_cfs_rq(cfs_rq);
|
||||
|
||||
WARN_ON_ONCE(cfs_rq->throttled_clock_self);
|
||||
if (cfs_rq->nr_queued)
|
||||
cfs_rq->throttled_clock_self = rq_clock(rq);
|
||||
cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
|
||||
cfs_rq->pelt_clock_throttled = 1;
|
||||
}
|
||||
cfs_rq->throttle_count++;
|
||||
|
||||
WARN_ON_ONCE(cfs_rq->throttled_clock_self);
|
||||
WARN_ON_ONCE(!list_empty(&cfs_rq->throttled_limbo_list));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5800,8 +5980,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|||
{
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||
struct sched_entity *se;
|
||||
long queued_delta, runnable_delta, idle_delta, dequeue = 1;
|
||||
int dequeue = 1;
|
||||
|
||||
raw_spin_lock(&cfs_b->lock);
|
||||
/* This will start the period timer if necessary */
|
||||
|
@ -5824,76 +6003,17 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
|
|||
if (!dequeue)
|
||||
return false; /* Throttle no longer required. */
|
||||
|
||||
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
|
||||
|
||||
/* freeze hierarchy runnable averages while throttled */
|
||||
rcu_read_lock();
|
||||
walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
|
||||
rcu_read_unlock();
|
||||
|
||||
queued_delta = cfs_rq->h_nr_queued;
|
||||
runnable_delta = cfs_rq->h_nr_runnable;
|
||||
idle_delta = cfs_rq->h_nr_idle;
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
int flags;
|
||||
|
||||
/* throttled entity or throttle-on-deactivate */
|
||||
if (!se->on_rq)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Abuse SPECIAL to avoid delayed dequeue in this instance.
|
||||
* This avoids teaching dequeue_entities() about throttled
|
||||
* entities and keeps things relatively simple.
|
||||
*/
|
||||
flags = DEQUEUE_SLEEP | DEQUEUE_SPECIAL;
|
||||
if (se->sched_delayed)
|
||||
flags |= DEQUEUE_DELAYED;
|
||||
dequeue_entity(qcfs_rq, se, flags);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_queued -= queued_delta;
|
||||
qcfs_rq->h_nr_runnable -= runnable_delta;
|
||||
qcfs_rq->h_nr_idle -= idle_delta;
|
||||
|
||||
if (qcfs_rq->load.weight) {
|
||||
/* Avoid re-evaluating load for this entity: */
|
||||
se = parent_entity(se);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
/* throttled entity or throttle-on-deactivate */
|
||||
if (!se->on_rq)
|
||||
goto done;
|
||||
|
||||
update_load_avg(qcfs_rq, se, 0);
|
||||
se_update_runnable(se);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_queued -= queued_delta;
|
||||
qcfs_rq->h_nr_runnable -= runnable_delta;
|
||||
qcfs_rq->h_nr_idle -= idle_delta;
|
||||
}
|
||||
|
||||
/* At this point se is NULL and we are at root level*/
|
||||
sub_nr_running(rq, queued_delta);
|
||||
done:
|
||||
/*
|
||||
* Note: distribution will already see us throttled via the
|
||||
* throttled-list. rq->lock protects completion.
|
||||
*/
|
||||
cfs_rq->throttled = 1;
|
||||
WARN_ON_ONCE(cfs_rq->throttled_clock);
|
||||
if (cfs_rq->nr_queued)
|
||||
cfs_rq->throttled_clock = rq_clock(rq);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5901,9 +6021,20 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
|||
{
|
||||
struct rq *rq = rq_of(cfs_rq);
|
||||
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||
struct sched_entity *se;
|
||||
long queued_delta, runnable_delta, idle_delta;
|
||||
long rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
|
||||
|
||||
/*
|
||||
* It's possible we are called with !runtime_remaining due to things
|
||||
* like user changed quota setting(see tg_set_cfs_bandwidth()) or async
|
||||
* unthrottled us with a positive runtime_remaining but other still
|
||||
* running entities consumed those runtime before we reached here.
|
||||
*
|
||||
* Anyway, we can't unthrottle this cfs_rq without any runtime remaining
|
||||
* because any enqueue in tg_unthrottle_up() will immediately trigger a
|
||||
* throttle, which is not supposed to happen on unthrottle path.
|
||||
*/
|
||||
if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0)
|
||||
return;
|
||||
|
||||
se = cfs_rq->tg->se[cpu_of(rq)];
|
||||
|
||||
|
@ -5933,62 +6064,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
|||
if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
|
||||
break;
|
||||
}
|
||||
goto unthrottle_throttle;
|
||||
}
|
||||
|
||||
queued_delta = cfs_rq->h_nr_queued;
|
||||
runnable_delta = cfs_rq->h_nr_runnable;
|
||||
idle_delta = cfs_rq->h_nr_idle;
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
|
||||
/* Handle any unfinished DELAY_DEQUEUE business first. */
|
||||
if (se->sched_delayed) {
|
||||
int flags = DEQUEUE_SLEEP | DEQUEUE_DELAYED;
|
||||
|
||||
dequeue_entity(qcfs_rq, se, flags);
|
||||
} else if (se->on_rq)
|
||||
break;
|
||||
enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_queued += queued_delta;
|
||||
qcfs_rq->h_nr_runnable += runnable_delta;
|
||||
qcfs_rq->h_nr_idle += idle_delta;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(qcfs_rq))
|
||||
goto unthrottle_throttle;
|
||||
}
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct cfs_rq *qcfs_rq = cfs_rq_of(se);
|
||||
|
||||
update_load_avg(qcfs_rq, se, UPDATE_TG);
|
||||
se_update_runnable(se);
|
||||
|
||||
if (cfs_rq_is_idle(group_cfs_rq(se)))
|
||||
idle_delta = cfs_rq->h_nr_queued;
|
||||
|
||||
qcfs_rq->h_nr_queued += queued_delta;
|
||||
qcfs_rq->h_nr_runnable += runnable_delta;
|
||||
qcfs_rq->h_nr_idle += idle_delta;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(qcfs_rq))
|
||||
goto unthrottle_throttle;
|
||||
}
|
||||
|
||||
/* Start the fair server if un-throttling resulted in new runnable tasks */
|
||||
if (!rq_h_nr_queued && rq->cfs.h_nr_queued)
|
||||
dl_server_start(&rq->fair_server);
|
||||
|
||||
/* At this point se is NULL and we are at root level*/
|
||||
add_nr_running(rq, queued_delta);
|
||||
|
||||
unthrottle_throttle:
|
||||
assert_list_leaf_cfs_rq(rq);
|
||||
|
||||
/* Determine whether we need to wake up potentially idle CPU: */
|
||||
|
@ -6472,6 +6549,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
|||
cfs_rq->runtime_enabled = 0;
|
||||
INIT_LIST_HEAD(&cfs_rq->throttled_list);
|
||||
INIT_LIST_HEAD(&cfs_rq->throttled_csd_list);
|
||||
INIT_LIST_HEAD(&cfs_rq->throttled_limbo_list);
|
||||
}
|
||||
|
||||
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||
|
@ -6639,19 +6717,28 @@ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
|
|||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
||||
static inline void sync_throttle(struct task_group *tg, int cpu) {}
|
||||
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
|
||||
static void task_throttle_setup_work(struct task_struct *p) {}
|
||||
static bool task_is_throttled(struct task_struct *p) { return false; }
|
||||
static void dequeue_throttled_task(struct task_struct *p, int flags) {}
|
||||
static bool enqueue_throttled_task(struct task_struct *p) { return false; }
|
||||
static void record_throttle_clock(struct cfs_rq *cfs_rq) {}
|
||||
|
||||
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool cfs_rq_pelt_clock_throttled(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int throttled_lb_pair(struct task_group *tg,
|
||||
int src_cpu, int dest_cpu)
|
||||
static inline int lb_throttled_hierarchy(struct task_struct *p, int dst_cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -6831,6 +6918,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
int rq_h_nr_queued = rq->cfs.h_nr_queued;
|
||||
u64 slice = 0;
|
||||
|
||||
if (task_is_throttled(p) && enqueue_throttled_task(p))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The code below (indirectly) updates schedutil which looks at
|
||||
* the cfs_rq utilization to select a frequency.
|
||||
|
@ -6883,10 +6973,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
if (cfs_rq_is_idle(cfs_rq))
|
||||
h_nr_idle = 1;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
goto enqueue_throttle;
|
||||
|
||||
flags = ENQUEUE_WAKEUP;
|
||||
}
|
||||
|
||||
|
@ -6908,10 +6994,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
|
||||
if (cfs_rq_is_idle(cfs_rq))
|
||||
h_nr_idle = 1;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
goto enqueue_throttle;
|
||||
}
|
||||
|
||||
if (!rq_h_nr_queued && rq->cfs.h_nr_queued) {
|
||||
|
@ -6941,7 +7023,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
if (!task_new)
|
||||
check_update_overutilized_status(rq);
|
||||
|
||||
enqueue_throttle:
|
||||
assert_list_leaf_cfs_rq(rq);
|
||||
|
||||
hrtick_update(rq);
|
||||
|
@ -6963,6 +7044,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
|||
bool was_sched_idle = sched_idle_rq(rq);
|
||||
bool task_sleep = flags & DEQUEUE_SLEEP;
|
||||
bool task_delayed = flags & DEQUEUE_DELAYED;
|
||||
bool task_throttled = flags & DEQUEUE_THROTTLE;
|
||||
struct task_struct *p = NULL;
|
||||
int h_nr_idle = 0;
|
||||
int h_nr_queued = 0;
|
||||
|
@ -6996,9 +7078,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
|||
if (cfs_rq_is_idle(cfs_rq))
|
||||
h_nr_idle = h_nr_queued;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
return 0;
|
||||
if (throttled_hierarchy(cfs_rq) && task_throttled)
|
||||
record_throttle_clock(cfs_rq);
|
||||
|
||||
/* Don't dequeue parent if it has other entities besides us */
|
||||
if (cfs_rq->load.weight) {
|
||||
|
@ -7010,7 +7091,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
|||
* Bias pick_next to pick a task from this cfs_rq, as
|
||||
* p is sleeping when it is within its sched_slice.
|
||||
*/
|
||||
if (task_sleep && se && !throttled_hierarchy(cfs_rq))
|
||||
if (task_sleep && se)
|
||||
set_next_buddy(se);
|
||||
break;
|
||||
}
|
||||
|
@ -7037,9 +7118,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
|||
if (cfs_rq_is_idle(cfs_rq))
|
||||
h_nr_idle = h_nr_queued;
|
||||
|
||||
/* end evaluation on encountering a throttled cfs_rq */
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
return 0;
|
||||
if (throttled_hierarchy(cfs_rq) && task_throttled)
|
||||
record_throttle_clock(cfs_rq);
|
||||
}
|
||||
|
||||
sub_nr_running(rq, h_nr_queued);
|
||||
|
@ -7073,6 +7153,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
|
|||
*/
|
||||
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (task_is_throttled(p)) {
|
||||
dequeue_throttled_task(p, flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!p->se.sched_delayed)
|
||||
util_est_dequeue(&rq->cfs, p);
|
||||
|
||||
|
@ -8660,7 +8745,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
|
|||
* lead to a throttle). This both saves work and prevents false
|
||||
* next-buddy nomination below.
|
||||
*/
|
||||
if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
|
||||
if (task_is_throttled(p))
|
||||
return;
|
||||
|
||||
if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK) && !pse->sched_delayed) {
|
||||
|
@ -8741,19 +8826,22 @@ static struct task_struct *pick_task_fair(struct rq *rq)
|
|||
{
|
||||
struct sched_entity *se;
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct task_struct *p;
|
||||
bool throttled;
|
||||
|
||||
again:
|
||||
cfs_rq = &rq->cfs;
|
||||
if (!cfs_rq->nr_queued)
|
||||
return NULL;
|
||||
|
||||
throttled = false;
|
||||
|
||||
do {
|
||||
/* Might not have done put_prev_entity() */
|
||||
if (cfs_rq->curr && cfs_rq->curr->on_rq)
|
||||
update_curr(cfs_rq);
|
||||
|
||||
if (unlikely(check_cfs_rq_runtime(cfs_rq)))
|
||||
goto again;
|
||||
throttled |= check_cfs_rq_runtime(cfs_rq);
|
||||
|
||||
se = pick_next_entity(rq, cfs_rq);
|
||||
if (!se)
|
||||
|
@ -8761,7 +8849,10 @@ again:
|
|||
cfs_rq = group_cfs_rq(se);
|
||||
} while (cfs_rq);
|
||||
|
||||
return task_of(se);
|
||||
p = task_of(se);
|
||||
if (unlikely(throttled))
|
||||
task_throttle_setup_work(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
static void __set_next_task_fair(struct rq *rq, struct task_struct *p, bool first);
|
||||
|
@ -8923,8 +9014,8 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
|
|||
{
|
||||
struct sched_entity *se = &p->se;
|
||||
|
||||
/* throttled hierarchies are not runnable */
|
||||
if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
|
||||
/* !se->on_rq also covers throttled task */
|
||||
if (!se->on_rq)
|
||||
return false;
|
||||
|
||||
/* Tell the scheduler that we'd really like se to run next. */
|
||||
|
@ -9283,7 +9374,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
|||
/*
|
||||
* We do not migrate tasks that are:
|
||||
* 1) delayed dequeued unless we migrate load, or
|
||||
* 2) throttled_lb_pair, or
|
||||
* 2) target cfs_rq is in throttled hierarchy, or
|
||||
* 3) cannot be migrated to this CPU due to cpus_ptr, or
|
||||
* 4) running (obviously), or
|
||||
* 5) are cache-hot on their current CPU, or
|
||||
|
@ -9292,7 +9383,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
|||
if ((p->se.sched_delayed) && (env->migration_type != migrate_load))
|
||||
return 0;
|
||||
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
if (lb_throttled_hierarchy(p, env->dst_cpu))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
@ -13076,10 +13167,13 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
|
|||
{
|
||||
struct cfs_rq *cfs_rq = cfs_rq_of(se);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
return;
|
||||
|
||||
if (!throttled_hierarchy(cfs_rq))
|
||||
/*
|
||||
* If a task gets attached to this cfs_rq and before being queued,
|
||||
* it gets migrated to another CPU due to reasons like affinity
|
||||
* change, make sure this cfs_rq stays on leaf cfs_rq list to have
|
||||
* that removed load decayed or it can cause faireness problem.
|
||||
*/
|
||||
if (!cfs_rq_pelt_clock_throttled(cfs_rq))
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
|
||||
/* Start to propagate at parent */
|
||||
|
@ -13090,10 +13184,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
|
|||
|
||||
update_load_avg(cfs_rq, se, UPDATE_TG);
|
||||
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
break;
|
||||
|
||||
if (!throttled_hierarchy(cfs_rq))
|
||||
if (!cfs_rq_pelt_clock_throttled(cfs_rq))
|
||||
list_add_leaf_cfs_rq(cfs_rq);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
|||
{
|
||||
u64 throttled;
|
||||
|
||||
if (unlikely(cfs_rq->throttle_count))
|
||||
if (unlikely(cfs_rq->pelt_clock_throttled))
|
||||
throttled = U64_MAX;
|
||||
else
|
||||
throttled = cfs_rq->throttled_clock_pelt_time;
|
||||
|
@ -173,7 +173,7 @@ static inline void update_idle_cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
|||
/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
|
||||
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
if (unlikely(cfs_rq->throttle_count))
|
||||
if (unlikely(cfs_rq->pelt_clock_throttled))
|
||||
return cfs_rq->throttled_clock_pelt - cfs_rq->throttled_clock_pelt_time;
|
||||
|
||||
return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_pelt_time;
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define COMPILE_OFFSETS
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/types.h>
|
||||
#include "sched.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -760,10 +760,12 @@ struct cfs_rq {
|
|||
u64 throttled_clock_pelt_time;
|
||||
u64 throttled_clock_self;
|
||||
u64 throttled_clock_self_time;
|
||||
int throttled;
|
||||
bool throttled:1;
|
||||
bool pelt_clock_throttled:1;
|
||||
int throttle_count;
|
||||
struct list_head throttled_list;
|
||||
struct list_head throttled_csd_list;
|
||||
struct list_head throttled_limbo_list;
|
||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
};
|
||||
|
@ -2367,6 +2369,7 @@ extern const u32 sched_prio_to_wmult[40];
|
|||
#define DEQUEUE_SPECIAL 0x10
|
||||
#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
|
||||
#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
|
||||
#define DEQUEUE_THROTTLE 0x800
|
||||
|
||||
#define ENQUEUE_WAKEUP 0x01
|
||||
#define ENQUEUE_RESTORE 0x02
|
||||
|
@ -2683,6 +2686,8 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
|
|||
|
||||
extern void init_dl_entity(struct sched_dl_entity *dl_se);
|
||||
|
||||
extern void init_cfs_throttle_work(struct task_struct *p);
|
||||
|
||||
#define BW_SHIFT 20
|
||||
#define BW_UNIT (1 << BW_SHIFT)
|
||||
#define RATIO_SHIFT 8
|
||||
|
|
|
@ -1591,7 +1591,6 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
|
|||
enum numa_topology_type sched_numa_topology_type;
|
||||
|
||||
static int sched_domains_numa_levels;
|
||||
static int sched_domains_curr_level;
|
||||
|
||||
int sched_max_numa_distance;
|
||||
static int *sched_domains_numa_distance;
|
||||
|
@ -1632,14 +1631,7 @@ sd_init(struct sched_domain_topology_level *tl,
|
|||
int sd_id, sd_weight, sd_flags = 0;
|
||||
struct cpumask *sd_span;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/*
|
||||
* Ugly hack to pass state to sd_numa_mask()...
|
||||
*/
|
||||
sched_domains_curr_level = tl->numa_level;
|
||||
#endif
|
||||
|
||||
sd_weight = cpumask_weight(tl->mask(cpu));
|
||||
sd_weight = cpumask_weight(tl->mask(tl, cpu));
|
||||
|
||||
if (tl->sd_flags)
|
||||
sd_flags = (*tl->sd_flags)();
|
||||
|
@ -1677,7 +1669,7 @@ sd_init(struct sched_domain_topology_level *tl,
|
|||
};
|
||||
|
||||
sd_span = sched_domain_span(sd);
|
||||
cpumask_and(sd_span, cpu_map, tl->mask(cpu));
|
||||
cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
|
||||
sd_id = cpumask_first(sd_span);
|
||||
|
||||
sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);
|
||||
|
@ -1732,22 +1724,63 @@ sd_init(struct sched_domain_topology_level *tl,
|
|||
return sd;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
int cpu_smt_flags(void)
|
||||
{
|
||||
return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
|
||||
}
|
||||
|
||||
const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return cpu_smt_mask(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CLUSTER
|
||||
int cpu_cluster_flags(void)
|
||||
{
|
||||
return SD_CLUSTER | SD_SHARE_LLC;
|
||||
}
|
||||
|
||||
const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return cpu_clustergroup_mask(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
int cpu_core_flags(void)
|
||||
{
|
||||
return SD_SHARE_LLC;
|
||||
}
|
||||
|
||||
const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return cpu_coregroup_mask(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return cpu_node_mask(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Topology list, bottom-up.
|
||||
*/
|
||||
static struct sched_domain_topology_level default_topology[] = {
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT),
|
||||
SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_CLUSTER
|
||||
SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS),
|
||||
SDTL_INIT(tl_cls_mask, cpu_cluster_flags, CLS),
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_MC
|
||||
SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
|
||||
SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
|
||||
#endif
|
||||
SDTL_INIT(cpu_cpu_mask, NULL, PKG),
|
||||
SDTL_INIT(tl_pkg_mask, NULL, PKG),
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
|
@ -1768,10 +1801,14 @@ void __init set_sched_topology(struct sched_domain_topology_level *tl)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
static const struct cpumask *sd_numa_mask(int cpu)
|
||||
static int cpu_numa_flags(void)
|
||||
{
|
||||
return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
|
||||
return SD_NUMA;
|
||||
}
|
||||
|
||||
static const struct cpumask *sd_numa_mask(struct sched_domain_topology_level *tl, int cpu)
|
||||
{
|
||||
return sched_domains_numa_masks[tl->numa_level][cpu_to_node(cpu)];
|
||||
}
|
||||
|
||||
static void sched_numa_warn(const char *str)
|
||||
|
@ -2413,7 +2450,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
|
|||
* breaks the linking done for an earlier span.
|
||||
*/
|
||||
for_each_cpu(cpu, cpu_map) {
|
||||
const struct cpumask *tl_cpu_mask = tl->mask(cpu);
|
||||
const struct cpumask *tl_cpu_mask = tl->mask(tl, cpu);
|
||||
int id;
|
||||
|
||||
/* lowest bit set in this mask is used as a unique id */
|
||||
|
@ -2421,7 +2458,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
|
|||
|
||||
if (cpumask_test_cpu(id, id_seen)) {
|
||||
/* First CPU has already been seen, ensure identical spans */
|
||||
if (!cpumask_equal(tl->mask(id), tl_cpu_mask))
|
||||
if (!cpumask_equal(tl->mask(tl, id), tl_cpu_mask))
|
||||
return false;
|
||||
} else {
|
||||
/* First CPU hasn't been seen before, ensure it's a completely new span */
|
||||
|
|
Loading…
Reference in New Issue