Commit 2fa0fbeb authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Implement auto local dispatching of migration disabled tasks



Migration disabled tasks are special and pinned to their previous CPUs. They
tripped up some unsuspecting BPF schedulers as their ->nr_cpus_allowed may
not agree with the bits set in ->cpus_ptr. Make it easier for BPF schedulers
by automatically dispatching them to the pinned local DSQs by default. If a
BPF scheduler wants to handle migration disabled tasks explicitly, it can
set SCX_OPS_ENQ_MIGRATION_DISABLED.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarAndrea Righi <arighi@nvidia.com>
parent 029b6ce7
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -122,6 +122,19 @@ enum scx_ops_flags {
	 */
	SCX_OPS_SWITCH_PARTIAL	= 1LLU << 3,

	/*
	 * A migration disabled task can only execute on its current CPU. By
	 * default, such tasks are automatically put on the CPU's local DSQ with
	 * the default slice on enqueue. If this ops flag is set, they also go
	 * through ops.enqueue().
	 *
	 * A migration disabled task never invokes ops.select_cpu() as it can
	 * only select the current CPU. Also, p->cpus_ptr will only contain its
	 * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
	 * and thus may disagree with cpumask_weight(p->cpus_ptr).
	 */
	SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4,

	/*
	 * CPU cgroup support flags
	 */
@@ -130,6 +143,7 @@ enum scx_ops_flags {
	SCX_OPS_ALL_FLAGS	= SCX_OPS_KEEP_BUILTIN_IDLE |
				  SCX_OPS_ENQ_LAST |
				  SCX_OPS_ENQ_EXITING |
				  SCX_OPS_ENQ_MIGRATION_DISABLED |
				  SCX_OPS_SWITCH_PARTIAL |
				  SCX_OPS_HAS_CGROUP_WEIGHT,
};
@@ -882,6 +896,7 @@ static bool scx_warned_zero_slice;

static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last);
static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled);
static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);

@@ -2014,6 +2029,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
	    unlikely(p->flags & PF_EXITING))
		goto local;

	/* see %SCX_OPS_ENQ_MIGRATION_DISABLED */
	if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) &&
	    is_migration_disabled(p))
		goto local;

	if (!SCX_HAS_OP(enqueue))
		goto global;

@@ -5052,6 +5072,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
		static_branch_disable(&scx_has_op[i]);
	static_branch_disable(&scx_ops_enq_last);
	static_branch_disable(&scx_ops_enq_exiting);
	static_branch_disable(&scx_ops_enq_migration_disabled);
	static_branch_disable(&scx_ops_cpu_preempt);
	static_branch_disable(&scx_builtin_idle_enabled);
	synchronize_rcu();
@@ -5661,6 +5682,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)

	if (ops->flags & SCX_OPS_ENQ_EXITING)
		static_branch_enable(&scx_ops_enq_exiting);
	if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED)
		static_branch_enable(&scx_ops_enq_migration_disabled);
	if (scx_ops.cpu_acquire || scx_ops.cpu_release)
		static_branch_enable(&scx_ops_cpu_preempt);