sched_ext: Changes for v6.17
- Add support for cgroup "cpu.max" interface. - Code organization cleanup so that ext_idle.c doesn't depend on the source-file-inclusion build method of sched/. - Drop UP paths in accordance with sched core changes. - Documentation and other misc changes. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCaIqnxg4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGUh5AQC6YM7ggRPYRmy28m5B0nubpKtCHqPOAHSd/QbY MCiThgD+JuE9ewg3wYO/jvJx3NyIRB1McMnAaG59hf6R0Plh5Qo= =TeLF -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext updates from Tejun Heo: - Add support for cgroup "cpu.max" interface - Code organization cleanup so that ext_idle.c doesn't depend on the source-file-inclusion build method of sched/ - Drop UP paths in accordance with sched core changes - Documentation and other misc changes * tag 'sched_ext-for-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: Fix scx_bpf_reenqueue_local() reference sched_ext: Drop kfuncs marked for removal in 6.15 sched_ext, rcu: Eject BPF scheduler on RCU CPU stall panic kernel/sched/ext.c: fix typo "occured" -> "occurred" in comments sched_ext: Add support for cgroup bandwidth control interface sched_ext, sched/core: Factor out struct scx_task_group sched_ext: Return NULL in llc_span sched_ext: Always use SMP versions in kernel/sched/ext_idle.h sched_ext: Always use SMP versions in kernel/sched/ext_idle.c sched_ext: Always use SMP versions in kernel/sched/ext.h sched_ext: Always use SMP versions in kernel/sched/ext.c sched_ext: Documentation: Clarify time slice handling in task lifecycle sched_ext: Make scx_locked_rq() inline sched_ext: Make scx_rq_bypassing() inline sched_ext: idle: Make local functions static in ext_idle.c sched_ext: idle: Remove unnecessary ifdef in scx_bpf_cpu_node()
This commit is contained in:
commit
6a68cec16b
|
@ -313,16 +313,21 @@ by a sched_ext scheduler:
|
||||||
ops.runnable(); /* Task becomes ready to run */
|
ops.runnable(); /* Task becomes ready to run */
|
||||||
|
|
||||||
while (task is runnable) {
|
while (task is runnable) {
|
||||||
if (task is not in a DSQ) {
|
if (task is not in a DSQ && task->scx.slice == 0) {
|
||||||
ops.enqueue(); /* Task can be added to a DSQ */
|
ops.enqueue(); /* Task can be added to a DSQ */
|
||||||
|
|
||||||
/* A CPU becomes available */
|
/* Any usable CPU becomes available */
|
||||||
|
|
||||||
ops.dispatch(); /* Task is moved to a local DSQ */
|
ops.dispatch(); /* Task is moved to a local DSQ */
|
||||||
}
|
}
|
||||||
ops.running(); /* Task starts running on its assigned CPU */
|
ops.running(); /* Task starts running on its assigned CPU */
|
||||||
ops.tick(); /* Called every 1/HZ seconds */
|
while (task->scx.slice > 0 && task is runnable)
|
||||||
|
ops.tick(); /* Called every 1/HZ seconds */
|
||||||
ops.stopping(); /* Task stops running (time slice expires or wait) */
|
ops.stopping(); /* Task stops running (time slice expires or wait) */
|
||||||
|
|
||||||
|
/* Task's CPU becomes available */
|
||||||
|
|
||||||
|
ops.dispatch(); /* task->scx.slice can be refilled */
|
||||||
}
|
}
|
||||||
|
|
||||||
ops.quiescent(); /* Task releases its assigned CPU (wait) */
|
ops.quiescent(); /* Task releases its assigned CPU (wait) */
|
||||||
|
|
|
@ -164,7 +164,7 @@ struct sched_ext_entity {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Runtime budget in nsecs. This is usually set through
|
* Runtime budget in nsecs. This is usually set through
|
||||||
* scx_bpf_dispatch() but can also be modified directly by the BPF
|
* scx_bpf_dsq_insert() but can also be modified directly by the BPF
|
||||||
* scheduler. Automatically decreased by SCX as the task executes. On
|
* scheduler. Automatically decreased by SCX as the task executes. On
|
||||||
* depletion, a scheduling event is triggered.
|
* depletion, a scheduling event is triggered.
|
||||||
*
|
*
|
||||||
|
@ -176,10 +176,10 @@ struct sched_ext_entity {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Used to order tasks when dispatching to the vtime-ordered priority
|
* Used to order tasks when dispatching to the vtime-ordered priority
|
||||||
* queue of a dsq. This is usually set through scx_bpf_dispatch_vtime()
|
* queue of a dsq. This is usually set through
|
||||||
* but can also be modified directly by the BPF scheduler. Modifying it
|
* scx_bpf_dsq_insert_vtime() but can also be modified directly by the
|
||||||
* while a task is queued on a dsq may mangle the ordering and is not
|
* BPF scheduler. Modifying it while a task is queued on a dsq may
|
||||||
* recommended.
|
* mangle the ordering and is not recommended.
|
||||||
*/
|
*/
|
||||||
u64 dsq_vtime;
|
u64 dsq_vtime;
|
||||||
|
|
||||||
|
@ -206,12 +206,25 @@ struct sched_ext_entity {
|
||||||
void sched_ext_free(struct task_struct *p);
|
void sched_ext_free(struct task_struct *p);
|
||||||
void print_scx_info(const char *log_lvl, struct task_struct *p);
|
void print_scx_info(const char *log_lvl, struct task_struct *p);
|
||||||
void scx_softlockup(u32 dur_s);
|
void scx_softlockup(u32 dur_s);
|
||||||
|
bool scx_rcu_cpu_stall(void);
|
||||||
|
|
||||||
#else /* !CONFIG_SCHED_CLASS_EXT */
|
#else /* !CONFIG_SCHED_CLASS_EXT */
|
||||||
|
|
||||||
static inline void sched_ext_free(struct task_struct *p) {}
|
static inline void sched_ext_free(struct task_struct *p) {}
|
||||||
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
|
static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {}
|
||||||
static inline void scx_softlockup(u32 dur_s) {}
|
static inline void scx_softlockup(u32 dur_s) {}
|
||||||
|
static inline bool scx_rcu_cpu_stall(void) { return false; }
|
||||||
|
|
||||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||||
|
|
||||||
|
struct scx_task_group {
|
||||||
|
#ifdef CONFIG_EXT_GROUP_SCHED
|
||||||
|
u32 flags; /* SCX_TG_* */
|
||||||
|
u32 weight;
|
||||||
|
u64 bw_period_us;
|
||||||
|
u64 bw_quota_us;
|
||||||
|
u64 bw_burst_us;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _LINUX_SCHED_EXT_H */
|
#endif /* _LINUX_SCHED_EXT_H */
|
||||||
|
|
|
@ -1081,6 +1081,9 @@ if CGROUP_SCHED
|
||||||
config GROUP_SCHED_WEIGHT
|
config GROUP_SCHED_WEIGHT
|
||||||
def_bool n
|
def_bool n
|
||||||
|
|
||||||
|
config GROUP_SCHED_BANDWIDTH
|
||||||
|
def_bool n
|
||||||
|
|
||||||
config FAIR_GROUP_SCHED
|
config FAIR_GROUP_SCHED
|
||||||
bool "Group scheduling for SCHED_OTHER"
|
bool "Group scheduling for SCHED_OTHER"
|
||||||
depends on CGROUP_SCHED
|
depends on CGROUP_SCHED
|
||||||
|
@ -1090,6 +1093,7 @@ config FAIR_GROUP_SCHED
|
||||||
config CFS_BANDWIDTH
|
config CFS_BANDWIDTH
|
||||||
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
|
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
|
||||||
depends on FAIR_GROUP_SCHED
|
depends on FAIR_GROUP_SCHED
|
||||||
|
select GROUP_SCHED_BANDWIDTH
|
||||||
default n
|
default n
|
||||||
help
|
help
|
||||||
This option allows users to define CPU bandwidth rates (limits) for
|
This option allows users to define CPU bandwidth rates (limits) for
|
||||||
|
@ -1124,6 +1128,7 @@ config EXT_GROUP_SCHED
|
||||||
bool
|
bool
|
||||||
depends on SCHED_CLASS_EXT && CGROUP_SCHED
|
depends on SCHED_CLASS_EXT && CGROUP_SCHED
|
||||||
select GROUP_SCHED_WEIGHT
|
select GROUP_SCHED_WEIGHT
|
||||||
|
select GROUP_SCHED_BANDWIDTH
|
||||||
default y
|
default y
|
||||||
|
|
||||||
endif #CGROUP_SCHED
|
endif #CGROUP_SCHED
|
||||||
|
|
|
@ -163,6 +163,13 @@ static void panic_on_rcu_stall(void)
|
||||||
{
|
{
|
||||||
static int cpu_stall;
|
static int cpu_stall;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to kick out the BPF scheduler if it's installed and defer
|
||||||
|
* the panic to give the system a chance to recover.
|
||||||
|
*/
|
||||||
|
if (scx_rcu_cpu_stall())
|
||||||
|
return;
|
||||||
|
|
||||||
if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
|
if (++cpu_stall < sysctl_max_rcu_stall_to_panic)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
|
@ -9815,7 +9815,9 @@ static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||||
|
|
||||||
|
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
|
||||||
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
|
const u64 max_bw_quota_period_us = 1 * USEC_PER_SEC; /* 1s */
|
||||||
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
|
static const u64 min_bw_quota_period_us = 1 * USEC_PER_MSEC; /* 1ms */
|
||||||
/* More than 203 days if BW_SHIFT equals 20. */
|
/* More than 203 days if BW_SHIFT equals 20. */
|
||||||
|
@ -9824,12 +9826,21 @@ static const u64 max_bw_runtime_us = MAX_BW;
|
||||||
static void tg_bandwidth(struct task_group *tg,
|
static void tg_bandwidth(struct task_group *tg,
|
||||||
u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
|
u64 *period_us_p, u64 *quota_us_p, u64 *burst_us_p)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
if (period_us_p)
|
if (period_us_p)
|
||||||
*period_us_p = tg_get_cfs_period(tg);
|
*period_us_p = tg_get_cfs_period(tg);
|
||||||
if (quota_us_p)
|
if (quota_us_p)
|
||||||
*quota_us_p = tg_get_cfs_quota(tg);
|
*quota_us_p = tg_get_cfs_quota(tg);
|
||||||
if (burst_us_p)
|
if (burst_us_p)
|
||||||
*burst_us_p = tg_get_cfs_burst(tg);
|
*burst_us_p = tg_get_cfs_burst(tg);
|
||||||
|
#else /* !CONFIG_CFS_BANDWIDTH */
|
||||||
|
if (period_us_p)
|
||||||
|
*period_us_p = tg->scx.bw_period_us;
|
||||||
|
if (quota_us_p)
|
||||||
|
*quota_us_p = tg->scx.bw_quota_us;
|
||||||
|
if (burst_us_p)
|
||||||
|
*burst_us_p = tg->scx.bw_burst_us;
|
||||||
|
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
|
static u64 cpu_period_read_u64(struct cgroup_subsys_state *css,
|
||||||
|
@ -9845,6 +9856,7 @@ static int tg_set_bandwidth(struct task_group *tg,
|
||||||
u64 period_us, u64 quota_us, u64 burst_us)
|
u64 period_us, u64 quota_us, u64 burst_us)
|
||||||
{
|
{
|
||||||
const u64 max_usec = U64_MAX / NSEC_PER_USEC;
|
const u64 max_usec = U64_MAX / NSEC_PER_USEC;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
if (tg == &root_task_group)
|
if (tg == &root_task_group)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -9882,7 +9894,12 @@ static int tg_set_bandwidth(struct task_group *tg,
|
||||||
burst_us + quota_us > max_bw_runtime_us))
|
burst_us + quota_us > max_bw_runtime_us))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
ret = tg_set_cfs_bandwidth(tg, period_us, quota_us, burst_us);
|
||||||
|
#endif /* CONFIG_CFS_BANDWIDTH */
|
||||||
|
if (!ret)
|
||||||
|
scx_group_set_bandwidth(tg, period_us, quota_us, burst_us);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
|
static s64 cpu_quota_read_s64(struct cgroup_subsys_state *css,
|
||||||
|
@ -9935,7 +9952,7 @@ static int cpu_burst_write_u64(struct cgroup_subsys_state *css,
|
||||||
tg_bandwidth(tg, &period_us, "a_us, NULL);
|
tg_bandwidth(tg, &period_us, "a_us, NULL);
|
||||||
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
|
return tg_set_bandwidth(tg, period_us, quota_us, burst_us);
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
|
||||||
|
|
||||||
#ifdef CONFIG_RT_GROUP_SCHED
|
#ifdef CONFIG_RT_GROUP_SCHED
|
||||||
static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
|
static int cpu_rt_runtime_write(struct cgroup_subsys_state *css,
|
||||||
|
@ -9995,7 +10012,7 @@ static struct cftype cpu_legacy_files[] = {
|
||||||
.write_s64 = cpu_idle_write_s64,
|
.write_s64 = cpu_idle_write_s64,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CFS_BANDWIDTH
|
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
|
||||||
{
|
{
|
||||||
.name = "cfs_period_us",
|
.name = "cfs_period_us",
|
||||||
.read_u64 = cpu_period_read_u64,
|
.read_u64 = cpu_period_read_u64,
|
||||||
|
@ -10011,6 +10028,8 @@ static struct cftype cpu_legacy_files[] = {
|
||||||
.read_u64 = cpu_burst_read_u64,
|
.read_u64 = cpu_burst_read_u64,
|
||||||
.write_u64 = cpu_burst_write_u64,
|
.write_u64 = cpu_burst_write_u64,
|
||||||
},
|
},
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
{
|
{
|
||||||
.name = "stat",
|
.name = "stat",
|
||||||
.seq_show = cpu_cfs_stat_show,
|
.seq_show = cpu_cfs_stat_show,
|
||||||
|
@ -10224,7 +10243,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, u64 *period_us_p,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_CFS_BANDWIDTH
|
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
|
||||||
static int cpu_max_show(struct seq_file *sf, void *v)
|
static int cpu_max_show(struct seq_file *sf, void *v)
|
||||||
{
|
{
|
||||||
struct task_group *tg = css_tg(seq_css(sf));
|
struct task_group *tg = css_tg(seq_css(sf));
|
||||||
|
@ -10271,7 +10290,7 @@ static struct cftype cpu_files[] = {
|
||||||
.write_s64 = cpu_idle_write_s64,
|
.write_s64 = cpu_idle_write_s64,
|
||||||
},
|
},
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CFS_BANDWIDTH
|
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
|
||||||
{
|
{
|
||||||
.name = "max",
|
.name = "max",
|
||||||
.flags = CFTYPE_NOT_ON_ROOT,
|
.flags = CFTYPE_NOT_ON_ROOT,
|
||||||
|
|
|
@ -203,6 +203,11 @@ struct scx_exit_task_args {
|
||||||
struct scx_cgroup_init_args {
|
struct scx_cgroup_init_args {
|
||||||
/* the weight of the cgroup [1..10000] */
|
/* the weight of the cgroup [1..10000] */
|
||||||
u32 weight;
|
u32 weight;
|
||||||
|
|
||||||
|
/* bandwidth control parameters from cpu.max and cpu.max.burst */
|
||||||
|
u64 bw_period_us;
|
||||||
|
u64 bw_quota_us;
|
||||||
|
u64 bw_burst_us;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum scx_cpu_preempt_reason {
|
enum scx_cpu_preempt_reason {
|
||||||
|
@ -664,9 +669,31 @@ struct sched_ext_ops {
|
||||||
* @cgrp: cgroup whose weight is being updated
|
* @cgrp: cgroup whose weight is being updated
|
||||||
* @weight: new weight [1..10000]
|
* @weight: new weight [1..10000]
|
||||||
*
|
*
|
||||||
* Update @tg's weight to @weight.
|
* Update @cgrp's weight to @weight.
|
||||||
*/
|
*/
|
||||||
void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
|
void (*cgroup_set_weight)(struct cgroup *cgrp, u32 weight);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @cgroup_set_bandwidth: A cgroup's bandwidth is being changed
|
||||||
|
* @cgrp: cgroup whose bandwidth is being updated
|
||||||
|
* @period_us: bandwidth control period
|
||||||
|
* @quota_us: bandwidth control quota
|
||||||
|
* @burst_us: bandwidth control burst
|
||||||
|
*
|
||||||
|
* Update @cgrp's bandwidth control parameters. This is from the cpu.max
|
||||||
|
* cgroup interface.
|
||||||
|
*
|
||||||
|
* @quota_us / @period_us determines the CPU bandwidth @cgrp is entitled
|
||||||
|
* to. For example, if @period_us is 1_000_000 and @quota_us is
|
||||||
|
* 2_500_000. @cgrp is entitled to 2.5 CPUs. @burst_us can be
|
||||||
|
* interpreted in the same fashion and specifies how much @cgrp can
|
||||||
|
* burst temporarily. The specific control mechanism and thus the
|
||||||
|
* interpretation of @period_us and burstiness is upto to the BPF
|
||||||
|
* scheduler.
|
||||||
|
*/
|
||||||
|
void (*cgroup_set_bandwidth)(struct cgroup *cgrp,
|
||||||
|
u64 period_us, u64 quota_us, u64 burst_us);
|
||||||
|
|
||||||
#endif /* CONFIG_EXT_GROUP_SCHED */
|
#endif /* CONFIG_EXT_GROUP_SCHED */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -884,7 +911,7 @@ enum scx_enq_flags {
|
||||||
/*
|
/*
|
||||||
* The task being enqueued was previously enqueued on the current CPU's
|
* The task being enqueued was previously enqueued on the current CPU's
|
||||||
* %SCX_DSQ_LOCAL, but was removed from it in a call to the
|
* %SCX_DSQ_LOCAL, but was removed from it in a call to the
|
||||||
* bpf_scx_reenqueue_local() kfunc. If bpf_scx_reenqueue_local() was
|
* scx_bpf_reenqueue_local() kfunc. If scx_bpf_reenqueue_local() was
|
||||||
* invoked in a ->cpu_release() callback, and the task is again
|
* invoked in a ->cpu_release() callback, and the task is again
|
||||||
* dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
|
* dispatched back to %SCX_LOCAL_DSQ by this current ->enqueue(), the
|
||||||
* task will not be scheduled on the CPU until at least the next invocation
|
* task will not be scheduled on the CPU until at least the next invocation
|
||||||
|
@ -1247,7 +1274,7 @@ static void scx_kf_disallow(u32 mask)
|
||||||
* This allows kfuncs to safely operate on rq from any scx ops callback,
|
* This allows kfuncs to safely operate on rq from any scx ops callback,
|
||||||
* knowing which rq is already locked.
|
* knowing which rq is already locked.
|
||||||
*/
|
*/
|
||||||
static DEFINE_PER_CPU(struct rq *, locked_rq);
|
DEFINE_PER_CPU(struct rq *, scx_locked_rq_state);
|
||||||
|
|
||||||
static inline void update_locked_rq(struct rq *rq)
|
static inline void update_locked_rq(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
@ -1258,16 +1285,7 @@ static inline void update_locked_rq(struct rq *rq)
|
||||||
*/
|
*/
|
||||||
if (rq)
|
if (rq)
|
||||||
lockdep_assert_rq_held(rq);
|
lockdep_assert_rq_held(rq);
|
||||||
__this_cpu_write(locked_rq, rq);
|
__this_cpu_write(scx_locked_rq_state, rq);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Return the rq currently locked from an scx callback, or NULL if no rq is
|
|
||||||
* locked.
|
|
||||||
*/
|
|
||||||
static inline struct rq *scx_locked_rq(void)
|
|
||||||
{
|
|
||||||
return __this_cpu_read(locked_rq);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SCX_CALL_OP(sch, mask, op, rq, args...) \
|
#define SCX_CALL_OP(sch, mask, op, rq, args...) \
|
||||||
|
@ -1641,7 +1659,7 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
|
||||||
* scx_add_event - Increase an event counter for 'name' by 'cnt'
|
* scx_add_event - Increase an event counter for 'name' by 'cnt'
|
||||||
* @sch: scx_sched to account events for
|
* @sch: scx_sched to account events for
|
||||||
* @name: an event name defined in struct scx_event_stats
|
* @name: an event name defined in struct scx_event_stats
|
||||||
* @cnt: the number of the event occured
|
* @cnt: the number of the event occurred
|
||||||
*
|
*
|
||||||
* This can be used when preemption is not disabled.
|
* This can be used when preemption is not disabled.
|
||||||
*/
|
*/
|
||||||
|
@ -1654,7 +1672,7 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
|
||||||
* __scx_add_event - Increase an event counter for 'name' by 'cnt'
|
* __scx_add_event - Increase an event counter for 'name' by 'cnt'
|
||||||
* @sch: scx_sched to account events for
|
* @sch: scx_sched to account events for
|
||||||
* @name: an event name defined in struct scx_event_stats
|
* @name: an event name defined in struct scx_event_stats
|
||||||
* @cnt: the number of the event occured
|
* @cnt: the number of the event occurred
|
||||||
*
|
*
|
||||||
* This should be used only when preemption is disabled.
|
* This should be used only when preemption is disabled.
|
||||||
*/
|
*/
|
||||||
|
@ -1705,11 +1723,6 @@ static bool scx_tryset_enable_state(enum scx_enable_state to,
|
||||||
return atomic_try_cmpxchg(&scx_enable_state_var, &from_v, to);
|
return atomic_try_cmpxchg(&scx_enable_state_var, &from_v, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool scx_rq_bypassing(struct rq *rq)
|
|
||||||
{
|
|
||||||
return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* wait_ops_state - Busy-wait the specified ops state to end
|
* wait_ops_state - Busy-wait the specified ops state to end
|
||||||
* @p: target task
|
* @p: target task
|
||||||
|
@ -1796,12 +1809,10 @@ static void run_deferred(struct rq *rq)
|
||||||
process_ddsp_deferred_locals(rq);
|
process_ddsp_deferred_locals(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
static void deferred_bal_cb_workfn(struct rq *rq)
|
static void deferred_bal_cb_workfn(struct rq *rq)
|
||||||
{
|
{
|
||||||
run_deferred(rq);
|
run_deferred(rq);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
static void deferred_irq_workfn(struct irq_work *irq_work)
|
static void deferred_irq_workfn(struct irq_work *irq_work)
|
||||||
{
|
{
|
||||||
|
@ -1824,7 +1835,6 @@ static void schedule_deferred(struct rq *rq)
|
||||||
{
|
{
|
||||||
lockdep_assert_rq_held(rq);
|
lockdep_assert_rq_held(rq);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/*
|
/*
|
||||||
* If in the middle of waking up a task, task_woken_scx() will be called
|
* If in the middle of waking up a task, task_woken_scx() will be called
|
||||||
* afterwards which will then run the deferred actions, no need to
|
* afterwards which will then run the deferred actions, no need to
|
||||||
|
@ -1842,7 +1852,7 @@ static void schedule_deferred(struct rq *rq)
|
||||||
deferred_bal_cb_workfn);
|
deferred_bal_cb_workfn);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
/*
|
/*
|
||||||
* No scheduler hooks available. Queue an irq work. They are executed on
|
* No scheduler hooks available. Queue an irq work. They are executed on
|
||||||
* IRQ re-enable which may take a bit longer than the scheduler hooks.
|
* IRQ re-enable which may take a bit longer than the scheduler hooks.
|
||||||
|
@ -2546,7 +2556,6 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
|
||||||
p->scx.dsq = dst_dsq;
|
p->scx.dsq = dst_dsq;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/**
|
/**
|
||||||
* move_remote_task_to_local_dsq - Move a task from a foreign rq to a local DSQ
|
* move_remote_task_to_local_dsq - Move a task from a foreign rq to a local DSQ
|
||||||
* @p: task to move
|
* @p: task to move
|
||||||
|
@ -2713,11 +2722,6 @@ static bool consume_remote_task(struct rq *this_rq, struct task_struct *p,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else /* CONFIG_SMP */
|
|
||||||
static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); }
|
|
||||||
static inline bool task_can_run_on_remote_rq(struct scx_sched *sch, struct task_struct *p, struct rq *rq, bool enforce) { return false; }
|
|
||||||
static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; }
|
|
||||||
#endif /* CONFIG_SMP */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* move_task_between_dsqs() - Move a task from one DSQ to another
|
* move_task_between_dsqs() - Move a task from one DSQ to another
|
||||||
|
@ -2890,9 +2894,7 @@ static void dispatch_to_local_dsq(struct scx_sched *sch, struct rq *rq,
|
||||||
{
|
{
|
||||||
struct rq *src_rq = task_rq(p);
|
struct rq *src_rq = task_rq(p);
|
||||||
struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
|
struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
struct rq *locked_rq = rq;
|
struct rq *locked_rq = rq;
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We're synchronized against dequeue through DISPATCHING. As @p can't
|
* We're synchronized against dequeue through DISPATCHING. As @p can't
|
||||||
|
@ -2906,7 +2908,6 @@ static void dispatch_to_local_dsq(struct scx_sched *sch, struct rq *rq,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
if (src_rq != dst_rq &&
|
if (src_rq != dst_rq &&
|
||||||
unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
|
unlikely(!task_can_run_on_remote_rq(sch, p, dst_rq, true))) {
|
||||||
dispatch_enqueue(sch, find_global_dsq(p), p,
|
dispatch_enqueue(sch, find_global_dsq(p), p,
|
||||||
|
@ -2966,9 +2967,6 @@ static void dispatch_to_local_dsq(struct scx_sched *sch, struct rq *rq,
|
||||||
raw_spin_rq_unlock(locked_rq);
|
raw_spin_rq_unlock(locked_rq);
|
||||||
raw_spin_rq_lock(rq);
|
raw_spin_rq_lock(rq);
|
||||||
}
|
}
|
||||||
#else /* CONFIG_SMP */
|
|
||||||
BUG(); /* control can not reach here on UP */
|
|
||||||
#endif /* CONFIG_SMP */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3292,10 +3290,8 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
|
||||||
static enum scx_cpu_preempt_reason
|
static enum scx_cpu_preempt_reason
|
||||||
preempt_reason_from_class(const struct sched_class *class)
|
preempt_reason_from_class(const struct sched_class *class)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
if (class == &stop_sched_class)
|
if (class == &stop_sched_class)
|
||||||
return SCX_CPU_PREEMPT_STOP;
|
return SCX_CPU_PREEMPT_STOP;
|
||||||
#endif
|
|
||||||
if (class == &dl_sched_class)
|
if (class == &dl_sched_class)
|
||||||
return SCX_CPU_PREEMPT_DL;
|
return SCX_CPU_PREEMPT_DL;
|
||||||
if (class == &rt_sched_class)
|
if (class == &rt_sched_class)
|
||||||
|
@ -3308,14 +3304,12 @@ static void switch_class(struct rq *rq, struct task_struct *next)
|
||||||
struct scx_sched *sch = scx_root;
|
struct scx_sched *sch = scx_root;
|
||||||
const struct sched_class *next_class = next->sched_class;
|
const struct sched_class *next_class = next->sched_class;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/*
|
/*
|
||||||
* Pairs with the smp_load_acquire() issued by a CPU in
|
* Pairs with the smp_load_acquire() issued by a CPU in
|
||||||
* kick_cpus_irq_workfn() who is waiting for this CPU to perform a
|
* kick_cpus_irq_workfn() who is waiting for this CPU to perform a
|
||||||
* resched.
|
* resched.
|
||||||
*/
|
*/
|
||||||
smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
|
smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
|
||||||
#endif
|
|
||||||
if (!(sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT))
|
if (!(sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -3512,8 +3506,6 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SCHED_CORE */
|
#endif /* CONFIG_SCHED_CORE */
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
|
|
||||||
static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
|
static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
|
||||||
{
|
{
|
||||||
struct scx_sched *sch = scx_root;
|
struct scx_sched *sch = scx_root;
|
||||||
|
@ -3643,7 +3635,6 @@ static void rq_offline_scx(struct rq *rq)
|
||||||
rq->scx.flags &= ~SCX_RQ_ONLINE;
|
rq->scx.flags &= ~SCX_RQ_ONLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_SMP */
|
|
||||||
|
|
||||||
static bool check_rq_for_timeouts(struct rq *rq)
|
static bool check_rq_for_timeouts(struct rq *rq)
|
||||||
{
|
{
|
||||||
|
@ -4098,7 +4089,9 @@ static bool scx_cgroup_enabled;
|
||||||
|
|
||||||
void scx_tg_init(struct task_group *tg)
|
void scx_tg_init(struct task_group *tg)
|
||||||
{
|
{
|
||||||
tg->scx_weight = CGROUP_WEIGHT_DFL;
|
tg->scx.weight = CGROUP_WEIGHT_DFL;
|
||||||
|
tg->scx.bw_period_us = default_bw_period_us();
|
||||||
|
tg->scx.bw_quota_us = RUNTIME_INF;
|
||||||
}
|
}
|
||||||
|
|
||||||
int scx_tg_online(struct task_group *tg)
|
int scx_tg_online(struct task_group *tg)
|
||||||
|
@ -4106,14 +4099,17 @@ int scx_tg_online(struct task_group *tg)
|
||||||
struct scx_sched *sch = scx_root;
|
struct scx_sched *sch = scx_root;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
WARN_ON_ONCE(tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED));
|
WARN_ON_ONCE(tg->scx.flags & (SCX_TG_ONLINE | SCX_TG_INITED));
|
||||||
|
|
||||||
percpu_down_read(&scx_cgroup_rwsem);
|
percpu_down_read(&scx_cgroup_rwsem);
|
||||||
|
|
||||||
if (scx_cgroup_enabled) {
|
if (scx_cgroup_enabled) {
|
||||||
if (SCX_HAS_OP(sch, cgroup_init)) {
|
if (SCX_HAS_OP(sch, cgroup_init)) {
|
||||||
struct scx_cgroup_init_args args =
|
struct scx_cgroup_init_args args =
|
||||||
{ .weight = tg->scx_weight };
|
{ .weight = tg->scx.weight,
|
||||||
|
.bw_period_us = tg->scx.bw_period_us,
|
||||||
|
.bw_quota_us = tg->scx.bw_quota_us,
|
||||||
|
.bw_burst_us = tg->scx.bw_burst_us };
|
||||||
|
|
||||||
ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init,
|
ret = SCX_CALL_OP_RET(sch, SCX_KF_UNLOCKED, cgroup_init,
|
||||||
NULL, tg->css.cgroup, &args);
|
NULL, tg->css.cgroup, &args);
|
||||||
|
@ -4121,9 +4117,9 @@ int scx_tg_online(struct task_group *tg)
|
||||||
ret = ops_sanitize_err(sch, "cgroup_init", ret);
|
ret = ops_sanitize_err(sch, "cgroup_init", ret);
|
||||||
}
|
}
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
tg->scx_flags |= SCX_TG_ONLINE | SCX_TG_INITED;
|
tg->scx.flags |= SCX_TG_ONLINE | SCX_TG_INITED;
|
||||||
} else {
|
} else {
|
||||||
tg->scx_flags |= SCX_TG_ONLINE;
|
tg->scx.flags |= SCX_TG_ONLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
percpu_up_read(&scx_cgroup_rwsem);
|
percpu_up_read(&scx_cgroup_rwsem);
|
||||||
|
@ -4134,15 +4130,15 @@ void scx_tg_offline(struct task_group *tg)
|
||||||
{
|
{
|
||||||
struct scx_sched *sch = scx_root;
|
struct scx_sched *sch = scx_root;
|
||||||
|
|
||||||
WARN_ON_ONCE(!(tg->scx_flags & SCX_TG_ONLINE));
|
WARN_ON_ONCE(!(tg->scx.flags & SCX_TG_ONLINE));
|
||||||
|
|
||||||
percpu_down_read(&scx_cgroup_rwsem);
|
percpu_down_read(&scx_cgroup_rwsem);
|
||||||
|
|
||||||
if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) &&
|
if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_exit) &&
|
||||||
(tg->scx_flags & SCX_TG_INITED))
|
(tg->scx.flags & SCX_TG_INITED))
|
||||||
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
|
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_exit, NULL,
|
||||||
tg->css.cgroup);
|
tg->css.cgroup);
|
||||||
tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
|
tg->scx.flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
|
||||||
|
|
||||||
percpu_up_read(&scx_cgroup_rwsem);
|
percpu_up_read(&scx_cgroup_rwsem);
|
||||||
}
|
}
|
||||||
|
@ -4251,11 +4247,11 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
|
||||||
percpu_down_read(&scx_cgroup_rwsem);
|
percpu_down_read(&scx_cgroup_rwsem);
|
||||||
|
|
||||||
if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
|
if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) &&
|
||||||
tg->scx_weight != weight)
|
tg->scx.weight != weight)
|
||||||
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
|
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL,
|
||||||
tg_cgrp(tg), weight);
|
tg_cgrp(tg), weight);
|
||||||
|
|
||||||
tg->scx_weight = weight;
|
tg->scx.weight = weight;
|
||||||
|
|
||||||
percpu_up_read(&scx_cgroup_rwsem);
|
percpu_up_read(&scx_cgroup_rwsem);
|
||||||
}
|
}
|
||||||
|
@ -4265,6 +4261,27 @@ void scx_group_set_idle(struct task_group *tg, bool idle)
|
||||||
/* TODO: Implement ops->cgroup_set_idle() */
|
/* TODO: Implement ops->cgroup_set_idle() */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void scx_group_set_bandwidth(struct task_group *tg,
|
||||||
|
u64 period_us, u64 quota_us, u64 burst_us)
|
||||||
|
{
|
||||||
|
struct scx_sched *sch = scx_root;
|
||||||
|
|
||||||
|
percpu_down_read(&scx_cgroup_rwsem);
|
||||||
|
|
||||||
|
if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_bandwidth) &&
|
||||||
|
(tg->scx.bw_period_us != period_us ||
|
||||||
|
tg->scx.bw_quota_us != quota_us ||
|
||||||
|
tg->scx.bw_burst_us != burst_us))
|
||||||
|
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_bandwidth, NULL,
|
||||||
|
tg_cgrp(tg), period_us, quota_us, burst_us);
|
||||||
|
|
||||||
|
tg->scx.bw_period_us = period_us;
|
||||||
|
tg->scx.bw_quota_us = quota_us;
|
||||||
|
tg->scx.bw_burst_us = burst_us;
|
||||||
|
|
||||||
|
percpu_up_read(&scx_cgroup_rwsem);
|
||||||
|
}
|
||||||
|
|
||||||
static void scx_cgroup_lock(void)
|
static void scx_cgroup_lock(void)
|
||||||
{
|
{
|
||||||
percpu_down_write(&scx_cgroup_rwsem);
|
percpu_down_write(&scx_cgroup_rwsem);
|
||||||
|
@ -4308,14 +4325,12 @@ DEFINE_SCHED_CLASS(ext) = {
|
||||||
.put_prev_task = put_prev_task_scx,
|
.put_prev_task = put_prev_task_scx,
|
||||||
.set_next_task = set_next_task_scx,
|
.set_next_task = set_next_task_scx,
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
.select_task_rq = select_task_rq_scx,
|
.select_task_rq = select_task_rq_scx,
|
||||||
.task_woken = task_woken_scx,
|
.task_woken = task_woken_scx,
|
||||||
.set_cpus_allowed = set_cpus_allowed_scx,
|
.set_cpus_allowed = set_cpus_allowed_scx,
|
||||||
|
|
||||||
.rq_online = rq_online_scx,
|
.rq_online = rq_online_scx,
|
||||||
.rq_offline = rq_offline_scx,
|
.rq_offline = rq_offline_scx,
|
||||||
#endif
|
|
||||||
|
|
||||||
.task_tick = task_tick_scx,
|
.task_tick = task_tick_scx,
|
||||||
|
|
||||||
|
@ -4408,9 +4423,9 @@ static void scx_cgroup_exit(struct scx_sched *sch)
|
||||||
css_for_each_descendant_post(css, &root_task_group.css) {
|
css_for_each_descendant_post(css, &root_task_group.css) {
|
||||||
struct task_group *tg = css_tg(css);
|
struct task_group *tg = css_tg(css);
|
||||||
|
|
||||||
if (!(tg->scx_flags & SCX_TG_INITED))
|
if (!(tg->scx.flags & SCX_TG_INITED))
|
||||||
continue;
|
continue;
|
||||||
tg->scx_flags &= ~SCX_TG_INITED;
|
tg->scx.flags &= ~SCX_TG_INITED;
|
||||||
|
|
||||||
if (!sch->ops.cgroup_exit)
|
if (!sch->ops.cgroup_exit)
|
||||||
continue;
|
continue;
|
||||||
|
@ -4442,14 +4457,19 @@ static int scx_cgroup_init(struct scx_sched *sch)
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
css_for_each_descendant_pre(css, &root_task_group.css) {
|
css_for_each_descendant_pre(css, &root_task_group.css) {
|
||||||
struct task_group *tg = css_tg(css);
|
struct task_group *tg = css_tg(css);
|
||||||
struct scx_cgroup_init_args args = { .weight = tg->scx_weight };
|
struct scx_cgroup_init_args args = {
|
||||||
|
.weight = tg->scx.weight,
|
||||||
|
.bw_period_us = tg->scx.bw_period_us,
|
||||||
|
.bw_quota_us = tg->scx.bw_quota_us,
|
||||||
|
.bw_burst_us = tg->scx.bw_burst_us,
|
||||||
|
};
|
||||||
|
|
||||||
if ((tg->scx_flags &
|
if ((tg->scx.flags &
|
||||||
(SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE)
|
(SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!sch->ops.cgroup_init) {
|
if (!sch->ops.cgroup_init) {
|
||||||
tg->scx_flags |= SCX_TG_INITED;
|
tg->scx.flags |= SCX_TG_INITED;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4464,7 +4484,7 @@ static int scx_cgroup_init(struct scx_sched *sch)
|
||||||
scx_error(sch, "ops.cgroup_init() failed (%d)", ret);
|
scx_error(sch, "ops.cgroup_init() failed (%d)", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
tg->scx_flags |= SCX_TG_INITED;
|
tg->scx.flags |= SCX_TG_INITED;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
css_put(css);
|
css_put(css);
|
||||||
|
@ -4656,6 +4676,41 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
|
||||||
p->sched_class != &ext_sched_class;
|
p->sched_class != &ext_sched_class;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* scx_rcu_cpu_stall - sched_ext RCU CPU stall handler
|
||||||
|
*
|
||||||
|
* While there are various reasons why RCU CPU stalls can occur on a system
|
||||||
|
* that may not be caused by the current BPF scheduler, try kicking out the
|
||||||
|
* current scheduler in an attempt to recover the system to a good state before
|
||||||
|
* issuing panics.
|
||||||
|
*/
|
||||||
|
bool scx_rcu_cpu_stall(void)
|
||||||
|
{
|
||||||
|
struct scx_sched *sch;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
sch = rcu_dereference(scx_root);
|
||||||
|
if (unlikely(!sch)) {
|
||||||
|
rcu_read_unlock();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (scx_enable_state()) {
|
||||||
|
case SCX_ENABLING:
|
||||||
|
case SCX_ENABLED:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rcu_read_unlock();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
scx_error(sch, "RCU CPU stall detected!");
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_softlockup - sched_ext softlockup handler
|
* scx_softlockup - sched_ext softlockup handler
|
||||||
* @dur_s: number of seconds of CPU stuck due to soft lockup
|
* @dur_s: number of seconds of CPU stuck due to soft lockup
|
||||||
|
@ -5944,6 +5999,7 @@ static s32 sched_ext_ops__cgroup_prep_move(struct task_struct *p, struct cgroup
|
||||||
static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
|
static void sched_ext_ops__cgroup_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
|
||||||
static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
|
static void sched_ext_ops__cgroup_cancel_move(struct task_struct *p, struct cgroup *from, struct cgroup *to) {}
|
||||||
static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {}
|
static void sched_ext_ops__cgroup_set_weight(struct cgroup *cgrp, u32 weight) {}
|
||||||
|
static void sched_ext_ops__cgroup_set_bandwidth(struct cgroup *cgrp, u64 period_us, u64 quota_us, u64 burst_us) {}
|
||||||
#endif
|
#endif
|
||||||
static void sched_ext_ops__cpu_online(s32 cpu) {}
|
static void sched_ext_ops__cpu_online(s32 cpu) {}
|
||||||
static void sched_ext_ops__cpu_offline(s32 cpu) {}
|
static void sched_ext_ops__cpu_offline(s32 cpu) {}
|
||||||
|
@ -5981,6 +6037,7 @@ static struct sched_ext_ops __bpf_ops_sched_ext_ops = {
|
||||||
.cgroup_move = sched_ext_ops__cgroup_move,
|
.cgroup_move = sched_ext_ops__cgroup_move,
|
||||||
.cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move,
|
.cgroup_cancel_move = sched_ext_ops__cgroup_cancel_move,
|
||||||
.cgroup_set_weight = sched_ext_ops__cgroup_set_weight,
|
.cgroup_set_weight = sched_ext_ops__cgroup_set_weight,
|
||||||
|
.cgroup_set_bandwidth = sched_ext_ops__cgroup_set_bandwidth,
|
||||||
#endif
|
#endif
|
||||||
.cpu_online = sched_ext_ops__cpu_online,
|
.cpu_online = sched_ext_ops__cpu_online,
|
||||||
.cpu_offline = sched_ext_ops__cpu_offline,
|
.cpu_offline = sched_ext_ops__cpu_offline,
|
||||||
|
@ -6338,7 +6395,8 @@ __bpf_kfunc_start_defs();
|
||||||
* When called from ops.dispatch(), there are no restrictions on @p or @dsq_id
|
* When called from ops.dispatch(), there are no restrictions on @p or @dsq_id
|
||||||
* and this function can be called upto ops.dispatch_max_batch times to insert
|
* and this function can be called upto ops.dispatch_max_batch times to insert
|
||||||
* multiple tasks. scx_bpf_dispatch_nr_slots() returns the number of the
|
* multiple tasks. scx_bpf_dispatch_nr_slots() returns the number of the
|
||||||
* remaining slots. scx_bpf_consume() flushes the batch and resets the counter.
|
* remaining slots. scx_bpf_dsq_move_to_local() flushes the batch and resets the
|
||||||
|
* counter.
|
||||||
*
|
*
|
||||||
* This function doesn't have any locking restrictions and may be called under
|
* This function doesn't have any locking restrictions and may be called under
|
||||||
* BPF locks (in the future when BPF introduces more flexible locking).
|
* BPF locks (in the future when BPF introduces more flexible locking).
|
||||||
|
@ -6362,14 +6420,6 @@ __bpf_kfunc void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice
|
||||||
scx_dsq_insert_commit(p, dsq_id, enq_flags);
|
scx_dsq_insert_commit(p, dsq_id, enq_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice,
|
|
||||||
u64 enq_flags)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()");
|
|
||||||
scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ
|
* scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ
|
||||||
* @p: task_struct to insert
|
* @p: task_struct to insert
|
||||||
|
@ -6407,21 +6457,11 @@ __bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id,
|
||||||
scx_dsq_insert_commit(p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
|
scx_dsq_insert_commit(p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id,
|
|
||||||
u64 slice, u64 vtime, u64 enq_flags)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()");
|
|
||||||
scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
__bpf_kfunc_end_defs();
|
__bpf_kfunc_end_defs();
|
||||||
|
|
||||||
BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch)
|
BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch, KF_RCU)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime, KF_RCU)
|
|
||||||
BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch)
|
BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch)
|
||||||
|
|
||||||
static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
|
static const struct btf_kfunc_id_set scx_kfunc_set_enqueue_dispatch = {
|
||||||
|
@ -6594,13 +6634,6 @@ __bpf_kfunc bool scx_bpf_dsq_move_to_local(u64 dsq_id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc bool scx_bpf_consume(u64 dsq_id)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()");
|
|
||||||
return scx_bpf_dsq_move_to_local(dsq_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_bpf_dsq_move_set_slice - Override slice when moving between DSQs
|
* scx_bpf_dsq_move_set_slice - Override slice when moving between DSQs
|
||||||
* @it__iter: DSQ iterator in progress
|
* @it__iter: DSQ iterator in progress
|
||||||
|
@ -6619,14 +6652,6 @@ __bpf_kfunc void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter,
|
||||||
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_SLICE;
|
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_SLICE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_slice(
|
|
||||||
struct bpf_iter_scx_dsq *it__iter, u64 slice)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()");
|
|
||||||
scx_bpf_dsq_move_set_slice(it__iter, slice);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_bpf_dsq_move_set_vtime - Override vtime when moving between DSQs
|
* scx_bpf_dsq_move_set_vtime - Override vtime when moving between DSQs
|
||||||
* @it__iter: DSQ iterator in progress
|
* @it__iter: DSQ iterator in progress
|
||||||
|
@ -6646,14 +6671,6 @@ __bpf_kfunc void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter,
|
||||||
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_VTIME;
|
kit->cursor.flags |= __SCX_DSQ_ITER_HAS_VTIME;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc void scx_bpf_dispatch_from_dsq_set_vtime(
|
|
||||||
struct bpf_iter_scx_dsq *it__iter, u64 vtime)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()");
|
|
||||||
scx_bpf_dsq_move_set_vtime(it__iter, vtime);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_bpf_dsq_move - Move a task from DSQ iteration to a DSQ
|
* scx_bpf_dsq_move - Move a task from DSQ iteration to a DSQ
|
||||||
* @it__iter: DSQ iterator in progress
|
* @it__iter: DSQ iterator in progress
|
||||||
|
@ -6686,15 +6703,6 @@ __bpf_kfunc bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter,
|
||||||
p, dsq_id, enq_flags);
|
p, dsq_id, enq_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter,
|
|
||||||
struct task_struct *p, u64 dsq_id,
|
|
||||||
u64 enq_flags)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()");
|
|
||||||
return scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scx_bpf_dsq_move_vtime - Move a task from DSQ iteration to a PRIQ DSQ
|
* scx_bpf_dsq_move_vtime - Move a task from DSQ iteration to a PRIQ DSQ
|
||||||
* @it__iter: DSQ iterator in progress
|
* @it__iter: DSQ iterator in progress
|
||||||
|
@ -6720,30 +6728,16 @@ __bpf_kfunc bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter,
|
||||||
p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
|
p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for backward compatibility, will be removed in v6.15 */
|
|
||||||
__bpf_kfunc bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter,
|
|
||||||
struct task_struct *p, u64 dsq_id,
|
|
||||||
u64 enq_flags)
|
|
||||||
{
|
|
||||||
printk_deferred_once(KERN_WARNING "sched_ext: scx_bpf_dispatch_from_dsq_vtime() renamed to scx_bpf_dsq_move_vtime()");
|
|
||||||
return scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
__bpf_kfunc_end_defs();
|
__bpf_kfunc_end_defs();
|
||||||
|
|
||||||
BTF_KFUNCS_START(scx_kfunc_ids_dispatch)
|
BTF_KFUNCS_START(scx_kfunc_ids_dispatch)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
|
BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
|
BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_consume)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
|
|
||||||
BTF_KFUNCS_END(scx_kfunc_ids_dispatch)
|
BTF_KFUNCS_END(scx_kfunc_ids_dispatch)
|
||||||
|
|
||||||
static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
|
static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
|
||||||
|
@ -6874,10 +6868,6 @@ BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
|
BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU)
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_slice)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq_set_vtime)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_from_dsq, KF_RCU)
|
|
||||||
BTF_ID_FLAGS(func, scx_bpf_dispatch_vtime_from_dsq, KF_RCU)
|
|
||||||
BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
|
BTF_KFUNCS_END(scx_kfunc_ids_unlocked)
|
||||||
|
|
||||||
static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
|
static const struct btf_kfunc_id_set scx_kfunc_set_unlocked = {
|
||||||
|
|
|
@ -13,8 +13,24 @@ static inline bool scx_kf_allowed_if_unlocked(void)
|
||||||
return !current->scx.kf_mask;
|
return !current->scx.kf_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool scx_rq_bypassing(struct rq *rq)
|
||||||
|
{
|
||||||
|
return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
|
||||||
|
}
|
||||||
|
|
||||||
DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
|
DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the rq currently locked from an scx callback, or NULL if no rq is
|
||||||
|
* locked.
|
||||||
|
*/
|
||||||
|
static inline struct rq *scx_locked_rq(void)
|
||||||
|
{
|
||||||
|
return __this_cpu_read(scx_locked_rq_state);
|
||||||
|
}
|
||||||
|
|
||||||
void scx_tick(struct rq *rq);
|
void scx_tick(struct rq *rq);
|
||||||
void init_scx_entity(struct sched_ext_entity *scx);
|
void init_scx_entity(struct sched_ext_entity *scx);
|
||||||
void scx_pre_fork(struct task_struct *p);
|
void scx_pre_fork(struct task_struct *p);
|
||||||
|
@ -65,7 +81,7 @@ static inline void init_sched_ext_class(void) {}
|
||||||
|
|
||||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||||
|
|
||||||
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
|
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||||
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
|
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
|
||||||
|
|
||||||
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
|
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
|
||||||
|
@ -88,6 +104,7 @@ void scx_cgroup_finish_attach(void);
|
||||||
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
|
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
|
||||||
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
|
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
|
||||||
void scx_group_set_idle(struct task_group *tg, bool idle);
|
void scx_group_set_idle(struct task_group *tg, bool idle);
|
||||||
|
void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us);
|
||||||
#else /* CONFIG_EXT_GROUP_SCHED */
|
#else /* CONFIG_EXT_GROUP_SCHED */
|
||||||
static inline void scx_tg_init(struct task_group *tg) {}
|
static inline void scx_tg_init(struct task_group *tg) {}
|
||||||
static inline int scx_tg_online(struct task_group *tg) { return 0; }
|
static inline int scx_tg_online(struct task_group *tg) { return 0; }
|
||||||
|
@ -98,5 +115,6 @@ static inline void scx_cgroup_finish_attach(void) {}
|
||||||
static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
|
static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
|
||||||
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
|
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
|
||||||
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
|
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
|
||||||
|
static inline void scx_group_set_bandwidth(struct task_group *tg, u64 period_us, u64 quota_us, u64 burst_us) {}
|
||||||
#endif /* CONFIG_EXT_GROUP_SCHED */
|
#endif /* CONFIG_EXT_GROUP_SCHED */
|
||||||
#endif /* CONFIG_CGROUP_SCHED */
|
#endif /* CONFIG_CGROUP_SCHED */
|
||||||
|
|
|
@ -17,7 +17,6 @@ static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);
|
||||||
/* Enable/disable per-node idle cpumasks */
|
/* Enable/disable per-node idle cpumasks */
|
||||||
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_per_node);
|
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_per_node);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/* Enable/disable LLC aware optimizations */
|
/* Enable/disable LLC aware optimizations */
|
||||||
static DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_llc);
|
static DEFINE_STATIC_KEY_FALSE(scx_selcpu_topo_llc);
|
||||||
|
|
||||||
|
@ -75,7 +74,7 @@ static int scx_cpu_node_if_enabled(int cpu)
|
||||||
return cpu_to_node(cpu);
|
return cpu_to_node(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scx_idle_test_and_clear_cpu(int cpu)
|
static bool scx_idle_test_and_clear_cpu(int cpu)
|
||||||
{
|
{
|
||||||
int node = scx_cpu_node_if_enabled(cpu);
|
int node = scx_cpu_node_if_enabled(cpu);
|
||||||
struct cpumask *idle_cpus = idle_cpumask(node)->cpu;
|
struct cpumask *idle_cpus = idle_cpumask(node)->cpu;
|
||||||
|
@ -198,7 +197,7 @@ pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u6
|
||||||
/*
|
/*
|
||||||
* Find an idle CPU in the system, starting from @node.
|
* Find an idle CPU in the system, starting from @node.
|
||||||
*/
|
*/
|
||||||
s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags)
|
static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags)
|
||||||
{
|
{
|
||||||
s32 cpu;
|
s32 cpu;
|
||||||
|
|
||||||
|
@ -250,7 +249,7 @@ static struct cpumask *llc_span(s32 cpu)
|
||||||
|
|
||||||
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
sd = rcu_dereference(per_cpu(sd_llc, cpu));
|
||||||
if (!sd)
|
if (!sd)
|
||||||
return 0;
|
return NULL;
|
||||||
|
|
||||||
return sched_domain_span(sd);
|
return sched_domain_span(sd);
|
||||||
}
|
}
|
||||||
|
@ -794,7 +793,6 @@ static void reset_idle_masks(struct sched_ext_ops *ops)
|
||||||
cpumask_and(idle_cpumask(node)->smt, cpu_online_mask, node_mask);
|
cpumask_and(idle_cpumask(node)->smt, cpu_online_mask, node_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_SMP */
|
|
||||||
|
|
||||||
void scx_idle_enable(struct sched_ext_ops *ops)
|
void scx_idle_enable(struct sched_ext_ops *ops)
|
||||||
{
|
{
|
||||||
|
@ -808,9 +806,7 @@ void scx_idle_enable(struct sched_ext_ops *ops)
|
||||||
else
|
else
|
||||||
static_branch_disable_cpuslocked(&scx_builtin_idle_per_node);
|
static_branch_disable_cpuslocked(&scx_builtin_idle_per_node);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
reset_idle_masks(ops);
|
reset_idle_masks(ops);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void scx_idle_disable(void)
|
void scx_idle_disable(void)
|
||||||
|
@ -860,8 +856,8 @@ static bool check_builtin_idle_enabled(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
const struct cpumask *allowed, u64 flags)
|
const struct cpumask *allowed, u64 flags)
|
||||||
{
|
{
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
|
@ -896,7 +892,6 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
if (!rq)
|
if (!rq)
|
||||||
lockdep_assert_held(&p->pi_lock);
|
lockdep_assert_held(&p->pi_lock);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/*
|
/*
|
||||||
* This may also be called from ops.enqueue(), so we need to handle
|
* This may also be called from ops.enqueue(), so we need to handle
|
||||||
* per-CPU tasks as well. For these tasks, we can skip all idle CPU
|
* per-CPU tasks as well. For these tasks, we can skip all idle CPU
|
||||||
|
@ -913,9 +908,7 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags,
|
cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags,
|
||||||
allowed ?: p->cpus_ptr, flags);
|
allowed ?: p->cpus_ptr, flags);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
cpu = -EBUSY;
|
|
||||||
#endif
|
|
||||||
if (scx_kf_allowed_if_unlocked())
|
if (scx_kf_allowed_if_unlocked())
|
||||||
task_rq_unlock(rq, p, &rf);
|
task_rq_unlock(rq, p, &rf);
|
||||||
|
|
||||||
|
@ -929,14 +922,10 @@ s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
*/
|
*/
|
||||||
__bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
|
__bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
if (!kf_cpu_valid(cpu, NULL))
|
if (!kf_cpu_valid(cpu, NULL))
|
||||||
return NUMA_NO_NODE;
|
return NUMA_NO_NODE;
|
||||||
|
|
||||||
return cpu_to_node(cpu);
|
return cpu_to_node(cpu);
|
||||||
#else
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1010,11 +999,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask_node(int node)
|
||||||
if (node < 0)
|
if (node < 0)
|
||||||
return cpu_none_mask;
|
return cpu_none_mask;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
return idle_cpumask(node)->cpu;
|
return idle_cpumask(node)->cpu;
|
||||||
#else
|
|
||||||
return cpu_none_mask;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1034,11 +1019,7 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_cpumask(void)
|
||||||
if (!check_builtin_idle_enabled())
|
if (!check_builtin_idle_enabled())
|
||||||
return cpu_none_mask;
|
return cpu_none_mask;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
return idle_cpumask(NUMA_NO_NODE)->cpu;
|
return idle_cpumask(NUMA_NO_NODE)->cpu;
|
||||||
#else
|
|
||||||
return cpu_none_mask;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1057,14 +1038,10 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask_node(int node)
|
||||||
if (node < 0)
|
if (node < 0)
|
||||||
return cpu_none_mask;
|
return cpu_none_mask;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
if (sched_smt_active())
|
if (sched_smt_active())
|
||||||
return idle_cpumask(node)->smt;
|
return idle_cpumask(node)->smt;
|
||||||
else
|
else
|
||||||
return idle_cpumask(node)->cpu;
|
return idle_cpumask(node)->cpu;
|
||||||
#else
|
|
||||||
return cpu_none_mask;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1085,14 +1062,10 @@ __bpf_kfunc const struct cpumask *scx_bpf_get_idle_smtmask(void)
|
||||||
if (!check_builtin_idle_enabled())
|
if (!check_builtin_idle_enabled())
|
||||||
return cpu_none_mask;
|
return cpu_none_mask;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
if (sched_smt_active())
|
if (sched_smt_active())
|
||||||
return idle_cpumask(NUMA_NO_NODE)->smt;
|
return idle_cpumask(NUMA_NO_NODE)->smt;
|
||||||
else
|
else
|
||||||
return idle_cpumask(NUMA_NO_NODE)->cpu;
|
return idle_cpumask(NUMA_NO_NODE)->cpu;
|
||||||
#else
|
|
||||||
return cpu_none_mask;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1125,10 +1098,10 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu)
|
||||||
if (!check_builtin_idle_enabled())
|
if (!check_builtin_idle_enabled())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (kf_cpu_valid(cpu, NULL))
|
if (!kf_cpu_valid(cpu, NULL))
|
||||||
return scx_idle_test_and_clear_cpu(cpu);
|
|
||||||
else
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
return scx_idle_test_and_clear_cpu(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -12,20 +12,8 @@
|
||||||
|
|
||||||
struct sched_ext_ops;
|
struct sched_ext_ops;
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);
|
void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops);
|
||||||
void scx_idle_init_masks(void);
|
void scx_idle_init_masks(void);
|
||||||
bool scx_idle_test_and_clear_cpu(int cpu);
|
|
||||||
s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags);
|
|
||||||
#else /* !CONFIG_SMP */
|
|
||||||
static inline void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) {}
|
|
||||||
static inline void scx_idle_init_masks(void) {}
|
|
||||||
static inline bool scx_idle_test_and_clear_cpu(int cpu) { return false; }
|
|
||||||
static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node, u64 flags)
|
|
||||||
{
|
|
||||||
return -EBUSY;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_SMP */
|
|
||||||
|
|
||||||
s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
|
||||||
const struct cpumask *cpus_allowed, u64 flags);
|
const struct cpumask *cpus_allowed, u64 flags);
|
||||||
|
|
|
@ -403,7 +403,7 @@ static inline bool dl_server_active(struct sched_dl_entity *dl_se)
|
||||||
|
|
||||||
extern struct list_head task_groups;
|
extern struct list_head task_groups;
|
||||||
|
|
||||||
#ifdef CONFIG_CFS_BANDWIDTH
|
#ifdef CONFIG_GROUP_SCHED_BANDWIDTH
|
||||||
extern const u64 max_bw_quota_period_us;
|
extern const u64 max_bw_quota_period_us;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -414,7 +414,7 @@ static inline u64 default_bw_period_us(void)
|
||||||
{
|
{
|
||||||
return 100000ULL;
|
return 100000ULL;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_CFS_BANDWIDTH */
|
#endif /* CONFIG_GROUP_SCHED_BANDWIDTH */
|
||||||
|
|
||||||
struct cfs_bandwidth {
|
struct cfs_bandwidth {
|
||||||
#ifdef CONFIG_CFS_BANDWIDTH
|
#ifdef CONFIG_CFS_BANDWIDTH
|
||||||
|
@ -472,10 +472,7 @@ struct task_group {
|
||||||
struct rt_bandwidth rt_bandwidth;
|
struct rt_bandwidth rt_bandwidth;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_EXT_GROUP_SCHED
|
struct scx_task_group scx;
|
||||||
u32 scx_flags; /* SCX_TG_* */
|
|
||||||
u32 scx_weight;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
|
|
@ -615,6 +615,26 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc
|
||||||
taskc->force_local, taskc->core_sched_seq);
|
taskc->force_local, taskc->core_sched_seq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args)
|
||||||
|
{
|
||||||
|
bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
|
||||||
|
cgrp->kn->id, args->weight, args->bw_period_us,
|
||||||
|
args->bw_quota_us, args->bw_burst_us);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
|
||||||
|
{
|
||||||
|
bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp,
|
||||||
|
u64 period_us, u64 quota_us, u64 burst_us)
|
||||||
|
{
|
||||||
|
bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id,
|
||||||
|
period_us, quota_us, burst_us);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print out the online and possible CPU map using bpf_printk() as a
|
* Print out the online and possible CPU map using bpf_printk() as a
|
||||||
* demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
|
* demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
|
||||||
|
@ -840,6 +860,9 @@ SCX_OPS_DEFINE(qmap_ops,
|
||||||
.dump = (void *)qmap_dump,
|
.dump = (void *)qmap_dump,
|
||||||
.dump_cpu = (void *)qmap_dump_cpu,
|
.dump_cpu = (void *)qmap_dump_cpu,
|
||||||
.dump_task = (void *)qmap_dump_task,
|
.dump_task = (void *)qmap_dump_task,
|
||||||
|
.cgroup_init = (void *)qmap_cgroup_init,
|
||||||
|
.cgroup_set_weight = (void *)qmap_cgroup_set_weight,
|
||||||
|
.cgroup_set_bandwidth = (void *)qmap_cgroup_set_bandwidth,
|
||||||
.cpu_online = (void *)qmap_cpu_online,
|
.cpu_online = (void *)qmap_cpu_online,
|
||||||
.cpu_offline = (void *)qmap_cpu_offline,
|
.cpu_offline = (void *)qmap_cpu_offline,
|
||||||
.init = (void *)qmap_init,
|
.init = (void *)qmap_init,
|
||||||
|
|
|
@ -123,6 +123,10 @@ void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p,
|
||||||
void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
|
void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
void BPF_STRUCT_OPS(maximal_cgroup_set_bandwidth, struct cgroup *cgrp,
|
||||||
|
u64 period_us, u64 quota_us, u64 burst_us)
|
||||||
|
{}
|
||||||
|
|
||||||
s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
|
s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
|
||||||
{
|
{
|
||||||
return scx_bpf_create_dsq(DSQ_ID, -1);
|
return scx_bpf_create_dsq(DSQ_ID, -1);
|
||||||
|
@ -160,6 +164,7 @@ struct sched_ext_ops maximal_ops = {
|
||||||
.cgroup_move = (void *) maximal_cgroup_move,
|
.cgroup_move = (void *) maximal_cgroup_move,
|
||||||
.cgroup_cancel_move = (void *) maximal_cgroup_cancel_move,
|
.cgroup_cancel_move = (void *) maximal_cgroup_cancel_move,
|
||||||
.cgroup_set_weight = (void *) maximal_cgroup_set_weight,
|
.cgroup_set_weight = (void *) maximal_cgroup_set_weight,
|
||||||
|
.cgroup_set_bandwidth = (void *) maximal_cgroup_set_bandwidth,
|
||||||
.init = (void *) maximal_init,
|
.init = (void *) maximal_init,
|
||||||
.exit = (void *) maximal_exit,
|
.exit = (void *) maximal_exit,
|
||||||
.name = "maximal",
|
.name = "maximal",
|
||||||
|
|
Loading…
Reference in New Issue