sched_ext: Factor out scx_dispatch_sched() (39d0b2c4) · Commits · git / linux-net

kernel/sched/ext.c

+65 −58

Original line number	Diff line number	Diff line
		@@ -2388,67 +2388,22 @@ static inline void maybe_queue_balance_callback(struct rq *rq)
		rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING;
		}

		static int balance_one(struct rq rq, struct task_struct prev)
		static bool scx_dispatch_sched(struct scx_sched sch, struct rq rq,
		struct task_struct *prev)
		{
		struct scx_sched *sch = scx_root;
		struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
		bool prev_on_scx = prev->sched_class == &ext_sched_class;
		bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
		int nr_loops = SCX_DSP_MAX_LOOPS;
		s32 cpu = cpu_of(rq);

		lockdep_assert_rq_held(rq);
		rq->scx.flags \|= SCX_RQ_IN_BALANCE;
		rq->scx.flags &= ~SCX_RQ_BAL_KEEP;

		if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
		unlikely(rq->scx.cpu_released)) {
		/*
		* If the previous sched_class for the current CPU was not SCX,
		* notify the BPF scheduler that it again has control of the
		* core. This callback complements ->cpu_release(), which is
		* emitted in switch_class().
		*/
		if (SCX_HAS_OP(sch, cpu_acquire))
		SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
		rq->scx.cpu_released = false;
		}

		if (prev_on_scx) {
		update_curr_scx(rq);

		/*
		* If @prev is runnable & has slice left, it has priority and
		* fetching more just increases latency for the fetched tasks.
		* Tell pick_task_scx() to keep running @prev. If the BPF
		* scheduler wants to handle this explicitly, it should
		* implement ->cpu_release().
		*
		* See scx_disable_workfn() for the explanation on the bypassing
		* test.
		*/
		if (prev_on_rq && prev->scx.slice && !scx_bypassing(sch, cpu)) {
		rq->scx.flags \|= SCX_RQ_BAL_KEEP;
		goto has_tasks;
		}
		}

		/* if there already are tasks to run, nothing to do */
		if (rq->scx.local_dsq.nr)
		goto has_tasks;

		if (consume_global_dsq(sch, rq))
		goto has_tasks;
		return true;

		if (scx_bypassing(sch, cpu)) {
		if (consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu)))
		goto has_tasks;
		else
		goto no_tasks;
		}
		if (scx_bypassing(sch, cpu))
		return consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu));

		if (unlikely(!SCX_HAS_OP(sch, dispatch)) \|\| !scx_rq_online(rq))
		goto no_tasks;
		return false;

		dspc->rq = rq;

		@@ -2467,14 +2422,14 @@ static int balance_one(struct rq rq, struct task_struct prev)

		flush_dispatch_buf(sch, rq);

		if (prev_on_rq && prev->scx.slice) {
		if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice) {
		rq->scx.flags \|= SCX_RQ_BAL_KEEP;
		goto has_tasks;
		return true;
		}
		if (rq->scx.local_dsq.nr)
		goto has_tasks;
		return true;
		if (consume_global_dsq(sch, rq))
		goto has_tasks;
		return true;

		/*
		* ops.dispatch() can trap us in this loop by repeatedly
		@@ -2483,7 +2438,7 @@ static int balance_one(struct rq rq, struct task_struct prev)
		* balance(), we want to complete this scheduling cycle and then
		* start a new one. IOW, we want to call resched_curr() on the
		* next, most likely idle, task, not the current one. Use
		* scx_kick_cpu() for deferred kicking.
		* __scx_bpf_kick_cpu() for deferred kicking.
		*/
		if (unlikely(!--nr_loops)) {
		scx_kick_cpu(sch, cpu, 0);
		@@ -2491,12 +2446,64 @@ static int balance_one(struct rq rq, struct task_struct prev)
		}
		} while (dspc->nr_tasks);

		no_tasks:
		return false;
		}

		static int balance_one(struct rq rq, struct task_struct prev)
		{
		struct scx_sched *sch = scx_root;
		s32 cpu = cpu_of(rq);

		lockdep_assert_rq_held(rq);
		rq->scx.flags \|= SCX_RQ_IN_BALANCE;
		rq->scx.flags &= ~SCX_RQ_BAL_KEEP;

		if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
		unlikely(rq->scx.cpu_released)) {
		/*
		* If the previous sched_class for the current CPU was not SCX,
		* notify the BPF scheduler that it again has control of the
		* core. This callback complements ->cpu_release(), which is
		* emitted in switch_class().
		*/
		if (SCX_HAS_OP(sch, cpu_acquire))
		SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
		rq->scx.cpu_released = false;
		}

		if (prev->sched_class == &ext_sched_class) {
		update_curr_scx(rq);

		/*
		* If @prev is runnable & has slice left, it has priority and
		* fetching more just increases latency for the fetched tasks.
		* Tell pick_task_scx() to keep running @prev. If the BPF
		* scheduler wants to handle this explicitly, it should
		* implement ->cpu_release().
		*
		* See scx_disable_workfn() for the explanation on the bypassing
		* test.
		*/
		if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice &&
		!scx_bypassing(sch, cpu)) {
		rq->scx.flags \|= SCX_RQ_BAL_KEEP;
		goto has_tasks;
		}
		}

		/* if there already are tasks to run, nothing to do */
		if (rq->scx.local_dsq.nr)
		goto has_tasks;

		/* dispatch @sch */
		if (scx_dispatch_sched(sch, rq, prev))
		goto has_tasks;

		/*
		* Didn't find another task to run. Keep running @prev unless
		* %SCX_OPS_ENQ_LAST is in effect.
		*/
		if (prev_on_rq &&
		if ((prev->scx.flags & SCX_TASK_QUEUED) &&
		(!(sch->ops.flags & SCX_OPS_ENQ_LAST) \|\| scx_bypassing(sch, cpu))) {
		rq->scx.flags \|= SCX_RQ_BAL_KEEP;
		__scx_add_event(sch, SCX_EV_DISPATCH_KEEP_LAST, 1);