Commit ea4593e9 authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Relocate run_deferred() and its callees



Previously, both process_ddsp_deferred_locals() and reenq_local() required
forward declarations. Reorganize so that only run_deferred() needs to be
declared. Both callees are grouped right before run_deferred() for better
locality. This reduces forward declaration clutter and will ease adding more
to the run_deferred() path.

No functional changes.

v2: Also relocate process_ddsp_deferred_locals() next to run_deferred()
    (Daniel Jordan).

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarEmil Tsalapatis <emil@etsalapatis.com>
Reviewed-by: default avatarDaniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: default avatarAndrea Righi <arighi@nvidia.com>
parent 053d27fb
Loading
Loading
Loading
Loading
+92 −94
Original line number Diff line number Diff line
@@ -193,9 +193,8 @@ MODULE_PARM_DESC(bypass_lb_intv_us, "bypass load balance interval in microsecond
#define CREATE_TRACE_POINTS
#include <trace/events/sched_ext.h>

static void process_ddsp_deferred_locals(struct rq *rq);
static void run_deferred(struct rq *rq);
static bool task_dead_and_done(struct task_struct *p);
static u32 reenq_local(struct scx_sched *sch, struct rq *rq);
static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags);
static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind);
static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
@@ -1003,23 +1002,6 @@ static int ops_sanitize_err(struct scx_sched *sch, const char *ops_name, s32 err
	return -EPROTO;
}

static void run_deferred(struct rq *rq)
{
	process_ddsp_deferred_locals(rq);

	if (!llist_empty(&rq->scx.deferred_reenq_locals)) {
		struct llist_node *llist =
			llist_del_all(&rq->scx.deferred_reenq_locals);
		struct scx_sched_pcpu *pos, *next;

		llist_for_each_entry_safe(pos, next, llist,
					  deferred_reenq_locals_node) {
			init_llist_node(&pos->deferred_reenq_locals_node);
			reenq_local(pos->sch, rq);
		}
	}
}

static void deferred_bal_cb_workfn(struct rq *rq)
{
	run_deferred(rq);
@@ -2624,33 +2606,6 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
	return true;
}

static void process_ddsp_deferred_locals(struct rq *rq)
{
	struct task_struct *p;

	lockdep_assert_rq_held(rq);

	/*
	 * Now that @rq can be unlocked, execute the deferred enqueueing of
	 * tasks directly dispatched to the local DSQs of other CPUs. See
	 * direct_dispatch(). Keep popping from the head instead of using
	 * list_for_each_entry_safe() as dispatch_local_dsq() may unlock @rq
	 * temporarily.
	 */
	while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
				struct task_struct, scx.dsq_list.node))) {
		struct scx_sched *sch = scx_task_sched(p);
		struct scx_dispatch_q *dsq;

		list_del_init(&p->scx.dsq_list.node);

		dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, task_cpu(p));
		if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
			dispatch_to_local_dsq(sch, rq, dsq, p,
					      p->scx.ddsp_enq_flags);
	}
}

static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
{
	struct scx_sched *sch = scx_task_sched(p);
@@ -3071,7 +3026,6 @@ static void rq_offline_scx(struct rq *rq)
	rq->scx.flags &= ~SCX_RQ_ONLINE;
}


static bool check_rq_for_timeouts(struct rq *rq)
{
	struct scx_sched *sch;
@@ -3611,6 +3565,97 @@ int scx_check_setscheduler(struct task_struct *p, int policy)
	return 0;
}

static void process_ddsp_deferred_locals(struct rq *rq)
{
	struct task_struct *p;

	lockdep_assert_rq_held(rq);

	/*
	 * Now that @rq can be unlocked, execute the deferred enqueueing of
	 * tasks directly dispatched to the local DSQs of other CPUs. See
	 * direct_dispatch(). Keep popping from the head instead of using
	 * list_for_each_entry_safe() as dispatch_local_dsq() may unlock @rq
	 * temporarily.
	 */
	while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
				struct task_struct, scx.dsq_list.node))) {
		struct scx_sched *sch = scx_task_sched(p);
		struct scx_dispatch_q *dsq;

		list_del_init(&p->scx.dsq_list.node);

		dsq = find_dsq_for_dispatch(sch, rq, p->scx.ddsp_dsq_id, task_cpu(p));
		if (!WARN_ON_ONCE(dsq->id != SCX_DSQ_LOCAL))
			dispatch_to_local_dsq(sch, rq, dsq, p,
					      p->scx.ddsp_enq_flags);
	}
}

static u32 reenq_local(struct scx_sched *sch, struct rq *rq)
{
	LIST_HEAD(tasks);
	u32 nr_enqueued = 0;
	struct task_struct *p, *n;

	lockdep_assert_rq_held(rq);

	/*
	 * The BPF scheduler may choose to dispatch tasks back to
	 * @rq->scx.local_dsq. Move all candidate tasks off to a private list
	 * first to avoid processing the same tasks repeatedly.
	 */
	list_for_each_entry_safe(p, n, &rq->scx.local_dsq.list,
				 scx.dsq_list.node) {
		struct scx_sched *task_sch = scx_task_sched(p);

		/*
		 * If @p is being migrated, @p's current CPU may not agree with
		 * its allowed CPUs and the migration_cpu_stop is about to
		 * deactivate and re-activate @p anyway. Skip re-enqueueing.
		 *
		 * While racing sched property changes may also dequeue and
		 * re-enqueue a migrating task while its current CPU and allowed
		 * CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
		 * the current local DSQ for running tasks and thus are not
		 * visible to the BPF scheduler.
		 */
		if (p->migration_pending)
			continue;

		if (!scx_is_descendant(task_sch, sch))
			continue;

		dispatch_dequeue(rq, p);
		list_add_tail(&p->scx.dsq_list.node, &tasks);
	}

	list_for_each_entry_safe(p, n, &tasks, scx.dsq_list.node) {
		list_del_init(&p->scx.dsq_list.node);
		do_enqueue_task(rq, p, SCX_ENQ_REENQ, -1);
		nr_enqueued++;
	}

	return nr_enqueued;
}

static void run_deferred(struct rq *rq)
{
	process_ddsp_deferred_locals(rq);

	if (!llist_empty(&rq->scx.deferred_reenq_locals)) {
		struct llist_node *llist =
			llist_del_all(&rq->scx.deferred_reenq_locals);
		struct scx_sched_pcpu *pos, *next;

		llist_for_each_entry_safe(pos, next, llist,
					  deferred_reenq_locals_node) {
			init_llist_node(&pos->deferred_reenq_locals_node);
			reenq_local(pos->sch, rq);
		}
	}
}

#ifdef CONFIG_NO_HZ_FULL
bool scx_can_stop_tick(struct rq *rq)
{
@@ -7701,53 +7746,6 @@ static const struct btf_kfunc_id_set scx_kfunc_set_dispatch = {
	.set			= &scx_kfunc_ids_dispatch,
};

static u32 reenq_local(struct scx_sched *sch, struct rq *rq)
{
	LIST_HEAD(tasks);
	u32 nr_enqueued = 0;
	struct task_struct *p, *n;

	lockdep_assert_rq_held(rq);

	/*
	 * The BPF scheduler may choose to dispatch tasks back to
	 * @rq->scx.local_dsq. Move all candidate tasks off to a private list
	 * first to avoid processing the same tasks repeatedly.
	 */
	list_for_each_entry_safe(p, n, &rq->scx.local_dsq.list,
				 scx.dsq_list.node) {
		struct scx_sched *task_sch = scx_task_sched(p);

		/*
		 * If @p is being migrated, @p's current CPU may not agree with
		 * its allowed CPUs and the migration_cpu_stop is about to
		 * deactivate and re-activate @p anyway. Skip re-enqueueing.
		 *
		 * While racing sched property changes may also dequeue and
		 * re-enqueue a migrating task while its current CPU and allowed
		 * CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
		 * the current local DSQ for running tasks and thus are not
		 * visible to the BPF scheduler.
		 */
		if (p->migration_pending)
			continue;

		if (!scx_is_descendant(task_sch, sch))
			continue;

		dispatch_dequeue(rq, p);
		list_add_tail(&p->scx.dsq_list.node, &tasks);
	}

	list_for_each_entry_safe(p, n, &tasks, scx.dsq_list.node) {
		list_del_init(&p->scx.dsq_list.node);
		do_enqueue_task(rq, p, SCX_ENQ_REENQ, -1);
		nr_enqueued++;
	}

	return nr_enqueued;
}

__bpf_kfunc_start_defs();

/**