Commit 8409b800 authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Move global_dsqs into scx_sched



Global DSQs are going to become per scheduler instance. Move global_dsqs
into scx_sched. find_global_dsq() already takes a task_struct pointer as an
argument and should later be able to determine the scx_sched to use from
that. For now, assume scx_root.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarAndrea Righi <arighi@nvidia.com>
Acked-by: default avatarChangwoo Min <changwoo@igalia.com>
parent cdf5a6fa
Loading
Loading
Loading
Loading
+49 −46
Original line number Diff line number Diff line
@@ -770,7 +770,17 @@ struct scx_sched {
	struct sched_ext_ops	ops;
	DECLARE_BITMAP(has_op, SCX_OPI_END);

	/*
	 * Dispatch queues.
	 *
	 * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
	 * This is to avoid live-locking in bypass mode where all tasks are
	 * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
	 * per-node split isn't sufficient, it can be further split.
	 */
	struct rhashtable	dsq_hash;
	struct scx_dispatch_q	**global_dsqs;

	bool			warned_zero_slice;

	atomic_t		exit_kind;
@@ -1001,16 +1011,6 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
 */
static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);

/*
 * Dispatch queues.
 *
 * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
 * to avoid live-locking in bypass mode where all tasks are dispatched to
 * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
 * sufficient, it can be further split.
 */
static struct scx_dispatch_q **global_dsqs;

static const struct rhashtable_params dsq_hash_params = {
	.key_len		= sizeof_field(struct scx_dispatch_q, id),
	.key_offset		= offsetof(struct scx_dispatch_q, id),
@@ -1111,7 +1111,9 @@ static bool u32_before(u32 a, u32 b)

static struct scx_dispatch_q *find_global_dsq(struct task_struct *p)
{
	return global_dsqs[cpu_to_node(task_cpu(p))];
	struct scx_sched *sch = scx_root;

	return sch->global_dsqs[cpu_to_node(task_cpu(p))];
}

static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
@@ -2788,11 +2790,11 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
	return false;
}

static bool consume_global_dsq(struct rq *rq)
static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
{
	int node = cpu_to_node(cpu_of(rq));

	return consume_dispatch_q(rq, global_dsqs[node]);
	return consume_dispatch_q(rq, sch->global_dsqs[node]);
}

/**
@@ -3038,7 +3040,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
	if (rq->scx.local_dsq.nr)
		goto has_tasks;

	if (consume_global_dsq(rq))
	if (consume_global_dsq(sch, rq))
		goto has_tasks;

	if (unlikely(!SCX_HAS_OP(scx_root, dispatch)) ||
@@ -3068,7 +3070,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
		}
		if (rq->scx.local_dsq.nr)
			goto has_tasks;
		if (consume_global_dsq(rq))
		if (consume_global_dsq(sch, rq))
			goto has_tasks;

		/*
@@ -4409,6 +4411,11 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
	struct scx_sched *sch = container_of(rcu_work, struct scx_sched, rcu_work);
	struct rhashtable_iter rht_iter;
	struct scx_dispatch_q *dsq;
	int node;

	for_each_node_state(node, N_POSSIBLE)
		kfree(sch->global_dsqs[node]);
	kfree(sch->global_dsqs);

	rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
	do {
@@ -5247,7 +5254,7 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
{
	struct scx_sched *sch;
	int ret;
	int node, ret;

	sch = kzalloc(sizeof(*sch), GFP_KERNEL);
	if (!sch)
@@ -5263,6 +5270,26 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
	if (ret < 0)
		goto err_free_ei;

	sch->global_dsqs = kcalloc(nr_node_ids, sizeof(sch->global_dsqs[0]),
				   GFP_KERNEL);
	if (!sch->global_dsqs) {
		ret = -ENOMEM;
		goto err_free_hash;
	}

	for_each_node_state(node, N_POSSIBLE) {
		struct scx_dispatch_q *dsq;

		dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
		if (!dsq) {
			ret = -ENOMEM;
			goto err_free_gdsqs;
		}

		init_dsq(dsq, SCX_DSQ_GLOBAL);
		sch->global_dsqs[node] = dsq;
	}

	atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
	sch->ops = *ops;
	ops->priv = sch;
@@ -5270,10 +5297,14 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
	sch->kobj.kset = scx_kset;
	ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
	if (ret < 0)
		goto err_free_hash;
		goto err_free_gdsqs;

	return sch;

err_free_gdsqs:
	for_each_node_state(node, N_POSSIBLE)
		kfree(sch->global_dsqs[node]);
	kfree(sch->global_dsqs);
err_free_hash:
	rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
err_free_ei:
@@ -5335,7 +5366,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
	struct scx_task_iter sti;
	struct task_struct *p;
	unsigned long timeout;
	int i, cpu, node, ret;
	int i, cpu, ret;

	if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
			   cpu_possible_mask)) {
@@ -5362,34 +5393,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
		}
	}

	if (!global_dsqs) {
		struct scx_dispatch_q **dsqs;

		dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL);
		if (!dsqs) {
			ret = -ENOMEM;
			goto err_unlock;
		}

		for_each_node_state(node, N_POSSIBLE) {
			struct scx_dispatch_q *dsq;

			dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
			if (!dsq) {
				for_each_node_state(node, N_POSSIBLE)
					kfree(dsqs[node]);
				kfree(dsqs);
				ret = -ENOMEM;
				goto err_unlock;
			}

			init_dsq(dsq, SCX_DSQ_GLOBAL);
			dsqs[node] = dsq;
		}

		global_dsqs = dsqs;
	}

	if (scx_enable_state() != SCX_DISABLED) {
		ret = -EBUSY;
		goto err_unlock;