Commit 6a68cec1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull sched_ext updates from Tejun Heo:

 - Add support for cgroup "cpu.max" interface

 - Code organization cleanup so that ext_idle.c doesn't depend on the
   source-file-inclusion build method of sched/

 - Drop UP paths in accordance with sched core changes

 - Documentation and other misc changes

* tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Fix scx_bpf_reenqueue_local() reference
  sched_ext: Drop kfuncs marked for removal in 6.15
  sched_ext, rcu: Eject BPF scheduler on RCU CPU stall panic
  kernel/sched/ext.c: fix typo "occured" -> "occurred" in comments
  sched_ext: Add support for cgroup bandwidth control interface
  sched_ext, sched/core: Factor out struct scx_task_group
  sched_ext: Return NULL in llc_span
  sched_ext: Always use SMP versions in kernel/sched/ext_idle.h
  sched_ext: Always use SMP versions in kernel/sched/ext_idle.c
  sched_ext: Always use SMP versions in kernel/sched/ext.h
  sched_ext: Always use SMP versions in kernel/sched/ext.c
  sched_ext: Documentation: Clarify time slice handling in task lifecycle
  sched_ext: Make scx_locked_rq() inline
  sched_ext: Make scx_rq_bypassing() inline
  sched_ext: idle: Make local functions static in ext_idle.c
  sched_ext: idle: Remove unnecessary ifdef in scx_bpf_cpu_node()
parents 6aee5aed ae96bba1
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -313,16 +313,21 @@ by a sched_ext scheduler:
        ops.runnable();         /* Task becomes ready to run */

        while (task is runnable) {
            if (task is not in a DSQ) {
            if (task is not in a DSQ && task->scx.slice == 0) {
                ops.enqueue();  /* Task can be added to a DSQ */

                /* A CPU becomes available */
                /* Any usable CPU becomes available */

                ops.dispatch(); /* Task is moved to a local DSQ */
            }
            ops.running();      /* Task starts running on its assigned CPU */
            while (task->scx.slice > 0 && task is runnable)
                ops.tick();     /* Called every 1/HZ seconds */
            ops.stopping();     /* Task stops running (time slice expires or wait) */

            /* Task's CPU becomes available */

            ops.dispatch();     /* task->scx.slice can be refilled */
        }

        ops.quiescent();        /* Task releases its assigned CPU (wait) */
+18 −5
Original line number Diff line number Diff line
@@ -164,7 +164,7 @@ struct sched_ext_entity {

	/*
	 * Runtime budget in nsecs. This is usually set through
	 * scx_bpf_dispatch() but can also be modified directly by the BPF
	 * scx_bpf_dsq_insert() but can also be modified directly by the BPF
	 * scheduler. Automatically decreased by SCX as the task executes. On
	 * depletion, a scheduling event is triggered.
	 *
@@ -176,10 +176,10 @@ struct sched_ext_entity {

	/*
	 * Used to order tasks when dispatching to the vtime-ordered priority
	 * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
	 * but can also be modified directly by the BPF scheduler. Modifying it
	 * while a task is queued on a dsq may mangle the ordering and is not
	 * recommended.
	 * queue of a dsq. This is usually set through
	 * scx_bpf_dsq_insert_vtime() but can also be modified directly by the
	 * BPF scheduler. Modifying it while a task is queued on a dsq may
	 * mangle the ordering and is not recommended.
	 */
	u64			dsq_vtime;

@@ -206,12 +206,25 @@ struct sched_ext_entity {
void sched_ext_free(struct task_struct *p);
void print_scx_info(const char *log_lvl, struct task_struct *p);
void scx_softlockup(u32 dur_s);
bool scx_rcu_cpu_stall(void);

#else	/* !CONFIG_SCHED_CLASS_EXT */

static inline void sched_ext_free(struct task_struct *p) {}
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
static inline void scx_softlockup(u32 dur_s) {}
static inline bool scx_rcu_cpu_stall(void) { return false; }

#endif	/* CONFIG_SCHED_CLASS_EXT */

struct scx_task_group {
#ifdef CONFIG_EXT_GROUP_SCHED
	u32			flags;		/* SCX_TG_* */
	u32			weight;
	u64			bw_period_us;
	u64			bw_quota_us;
	u64			bw_burst_us;
#endif
};

#endif	/* _LINUX_SCHED_EXT_H */
+5 −0
Original line number Diff line number Diff line
@@ -1081,6 +1081,9 @@ if CGROUP_SCHED
config GROUP_SCHED_WEIGHT
	def_bool n

config GROUP_SCHED_BANDWIDTH
        def_bool n

config FAIR_GROUP_SCHED
	bool "Group scheduling for SCHED_OTHER"
	depends on CGROUP_SCHED
@@ -1090,6 +1093,7 @@ config FAIR_GROUP_SCHED
config CFS_BANDWIDTH
	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
	depends on FAIR_GROUP_SCHED
	select GROUP_SCHED_BANDWIDTH
	default n
	help
	  This option allows users to define CPU bandwidth rates (limits) for
@@ -1124,6 +1128,7 @@ config EXT_GROUP_SCHED
	bool
	depends on SCHED_CLASS_EXT && CGROUP_SCHED
	select GROUP_SCHED_WEIGHT
	select GROUP_SCHED_BANDWIDTH
	default y

endif #CGROUP_SCHED
+7 −0
Original line number Diff line number Diff line
@@ -163,6 +163,13 @@ static void panic_on_rcu_stall(void)
{
	static int cpu_stall;

	/*
	 * Attempt to kick out the BPF scheduler if it's installed and defer
	 * the panic to give the system a chance to recover.
	 */
	if (scx_rcu_cpu_stall())
		return;

	if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
		return;

+24 −5
Original line number Diff line number Diff line
@@ -9815,7 +9815,9 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)

	return 0;
}
#endif /* CONFIG_CFS_BANDWIDTH */

#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
/* More than 203 days if BW_SHIFT equals 20. */
@@ -9824,12 +9826,21 @@ static const u64 max_bw_runtime_us = MAX_BW;
static void tg_bandwidth(struct task_group *tg,
			 u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
{
#ifdef CONFIG_CFS_BANDWIDTH
	if (period_us_p)
		*period_us_p = tg_get_cfs_period(tg);
	if (quota_us_p)
		*quota_us_p = tg_get_cfs_quota(tg);
	if (burst_us_p)
		*burst_us_p = tg_get_cfs_burst(tg);
#else /* !CONFIG_CFS_BANDWIDTH */
	if (period_us_p)
		*period_us_p = tg->scx.bw_period_us;
	if (quota_us_p)
		*quota_us_p = tg->scx.bw_quota_us;
	if (burst_us_p)
		*burst_us_p = tg->scx.bw_burst_us;
#endif /* CONFIG_CFS_BANDWIDTH */
}

static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
@@ -9845,6 +9856,7 @@ static int tg_set_bandwidth(struct task_group *tg,
			    u64 period_us, u64 quota_us, u64 burst_us)
{
	const u64 max_usec = U64_MAX / NSEC_PER_USEC;
	int ret = 0;

	if (tg == &root_task_group)
		return -EINVAL;
@@ -9882,7 +9894,12 @@ static int tg_set_bandwidth(struct task_group *tg,
					burst_us + quota_us > max_bw_runtime_us))
		return -EINVAL;

	return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
#ifdef CONFIG_CFS_BANDWIDTH
	ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
#endif /* CONFIG_CFS_BANDWIDTH */
	if (!ret)
		scx_group_set_bandwidth(tg, period_us, quota_us, burst_us);
	return ret;
}

static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
@@ -9935,7 +9952,7 @@ static int cpu_burst_write_u64(struct cgroup_subsys_state *css,
	tg_bandwidth(tg, &period_us, &quota_us, NULL);
	return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
}
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */

#ifdef CONFIG_RT_GROUP_SCHED
static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
@@ -9995,7 +10012,7 @@ static struct cftype cpu_legacy_files[] = {
		.write_s64 = cpu_idle_write_s64,
	},
#endif
#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
	{
		.name = "cfs_period_us",
		.read_u64 = cpu_period_read_u64,
@@ -10011,6 +10028,8 @@ static struct cftype cpu_legacy_files[] = {
		.read_u64 = cpu_burst_read_u64,
		.write_u64 = cpu_burst_write_u64,
	},
#endif
#ifdef CONFIG_CFS_BANDWIDTH
	{
		.name = "stat",
		.seq_show = cpu_cfs_stat_show,
@@ -10224,7 +10243,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p,
	return 0;
}

#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
static int cpu_max_show(struct seq_file *sf, void *v)
{
	struct task_group *tg = css_tg(seq_css(sf));
@@ -10271,7 +10290,7 @@ static struct cftype cpu_files[] = {
		.write_s64 = cpu_idle_write_s64,
	},
#endif
#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
	{
		.name = "max",
		.flags = CFTYPE_NOT_ON_ROOT,
Loading