sched_ext: Move bypass_dsq into scx_sched_pcpu (ff06f727) · Commits · git / linux-net

kernel/sched/ext.c

+27 −25

Original line number	Diff line number	Diff line
		@@ -359,6 +359,11 @@ static const struct sched_class scx_setscheduler_class(struct task_struct p)
		return __setscheduler_class(p->policy, p->prio);
		}

		static struct scx_dispatch_q bypass_dsq(struct scx_sched sch, s32 cpu)
		{
		return &per_cpu_ptr(sch->pcpu, cpu)->bypass_dsq;
		}

		/*
		* scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
		* ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
		@@ -1632,7 +1637,7 @@ static void do_enqueue_task(struct rq rq, struct task_struct p, u64 enq_flags,
		dsq = find_global_dsq(sch, p);
		goto enqueue;
		bypass:
		dsq = &task_rq(p)->scx.bypass_dsq;
		dsq = bypass_dsq(sch, task_cpu(p));
		goto enqueue;

		enqueue:
		@@ -2443,7 +2448,7 @@ static int balance_one(struct rq rq, struct task_struct prev)
		goto has_tasks;

		if (scx_rq_bypassing(rq)) {
		if (consume_dispatch_q(sch, rq, &rq->scx.bypass_dsq))
		if (consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu_of(rq))))
		goto has_tasks;
		else
		goto no_tasks;
		@@ -4210,11 +4215,12 @@ bool scx_hardlockup(int cpu)
		return true;
		}

		static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		static u32 bypass_lb_cpu(struct scx_sched *sch, s32 donor,
		struct cpumask donee_mask, struct cpumask resched_mask,
		u32 nr_donor_target, u32 nr_donee_target)
		{
		struct scx_dispatch_q *donor_dsq = &rq->scx.bypass_dsq;
		struct rq *donor_rq = cpu_rq(donor);
		struct scx_dispatch_q *donor_dsq = bypass_dsq(sch, donor);
		struct task_struct p, n;
		struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, 0, 0);
		s32 delta = READ_ONCE(donor_dsq->nr) - nr_donor_target;
		@@ -4230,7 +4236,7 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		if (delta < DIV_ROUND_UP(min_delta_us, READ_ONCE(scx_slice_bypass_us)))
		return 0;

		raw_spin_rq_lock_irq(rq);
		raw_spin_rq_lock_irq(donor_rq);
		raw_spin_lock(&donor_dsq->lock);
		list_add(&cursor.node, &donor_dsq->list);
		resume:
		@@ -4238,7 +4244,6 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		n = nldsq_next_task(donor_dsq, n, false);

		while ((p = n)) {
		struct rq *donee_rq;
		struct scx_dispatch_q *donee_dsq;
		int donee;

		@@ -4254,14 +4259,13 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		if (donee >= nr_cpu_ids)
		continue;

		donee_rq = cpu_rq(donee);
		donee_dsq = &donee_rq->scx.bypass_dsq;
		donee_dsq = bypass_dsq(sch, donee);

		/*
		* $p's rq is not locked but $p's DSQ lock protects its
		* scheduling properties making this test safe.
		*/
		if (!task_can_run_on_remote_rq(sch, p, donee_rq, false))
		if (!task_can_run_on_remote_rq(sch, p, cpu_rq(donee), false))
		continue;

		/*
		@@ -4276,7 +4280,7 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		* between bypass DSQs.
		*/
		dispatch_dequeue_locked(p, donor_dsq);
		dispatch_enqueue(sch, donee_rq, donee_dsq, p, SCX_ENQ_NESTED);
		dispatch_enqueue(sch, cpu_rq(donee), donee_dsq, p, SCX_ENQ_NESTED);

		/*
		* $donee might have been idle and need to be woken up. No need
		@@ -4291,9 +4295,9 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,
		if (!(nr_balanced % SCX_BYPASS_LB_BATCH) && n) {
		list_move_tail(&cursor.node, &n->scx.dsq_list.node);
		raw_spin_unlock(&donor_dsq->lock);
		raw_spin_rq_unlock_irq(rq);
		raw_spin_rq_unlock_irq(donor_rq);
		cpu_relax();
		raw_spin_rq_lock_irq(rq);
		raw_spin_rq_lock_irq(donor_rq);
		raw_spin_lock(&donor_dsq->lock);
		goto resume;
		}
		@@ -4301,7 +4305,7 @@ static u32 bypass_lb_cpu(struct scx_sched sch, struct rq rq,

		list_del_init(&cursor.node);
		raw_spin_unlock(&donor_dsq->lock);
		raw_spin_rq_unlock_irq(rq);
		raw_spin_rq_unlock_irq(donor_rq);

		return nr_balanced;
		}
		@@ -4319,7 +4323,7 @@ static void bypass_lb_node(struct scx_sched *sch, int node)

		/* count the target tasks and CPUs */
		for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
		u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
		u32 nr = READ_ONCE(bypass_dsq(sch, cpu)->nr);

		nr_tasks += nr;
		nr_cpus++;
		@@ -4341,24 +4345,21 @@ static void bypass_lb_node(struct scx_sched *sch, int node)

		cpumask_clear(donee_mask);
		for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
		if (READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr) < nr_target)
		if (READ_ONCE(bypass_dsq(sch, cpu)->nr) < nr_target)
		cpumask_set_cpu(cpu, donee_mask);
		}

		/* iterate !donee CPUs and see if they should be offloaded */
		cpumask_clear(resched_mask);
		for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
		struct rq *rq = cpu_rq(cpu);
		struct scx_dispatch_q *donor_dsq = &rq->scx.bypass_dsq;

		if (cpumask_empty(donee_mask))
		break;
		if (cpumask_test_cpu(cpu, donee_mask))
		continue;
		if (READ_ONCE(donor_dsq->nr) <= nr_donor_target)
		if (READ_ONCE(bypass_dsq(sch, cpu)->nr) <= nr_donor_target)
		continue;

		nr_balanced += bypass_lb_cpu(sch, rq, donee_mask, resched_mask,
		nr_balanced += bypass_lb_cpu(sch, cpu, donee_mask, resched_mask,
		nr_donor_target, nr_target);
		}

		@@ -4366,7 +4367,7 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
		resched_cpu(cpu);

		for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
		u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
		u32 nr = READ_ONCE(bypass_dsq(sch, cpu)->nr);

		after_min = min(nr, after_min);
		after_max = max(nr, after_max);
		@@ -5261,7 +5262,7 @@ static struct scx_sched scx_alloc_and_add_sched(struct sched_ext_ops ops,
		{
		struct scx_sched *sch;
		s32 level = parent ? parent->level + 1 : 0;
		int node, ret;
		s32 node, cpu, ret;

		sch = kzalloc_flex(*sch, ancestors, level);
		if (!sch)
		@@ -5302,6 +5303,9 @@ static struct scx_sched scx_alloc_and_add_sched(struct sched_ext_ops ops,
		goto err_free_gdsqs;
		}

		for_each_possible_cpu(cpu)
		init_dsq(bypass_dsq(sch, cpu), SCX_DSQ_BYPASS, sch);

		sch->helper = kthread_run_worker(0, "sched_ext_helper");
		if (IS_ERR(sch->helper)) {
		ret = PTR_ERR(sch->helper);
		@@ -5490,7 +5494,6 @@ static void scx_root_enable_workfn(struct kthread_work *work)
		struct rq *rq = cpu_rq(cpu);

		rq->scx.local_dsq.sched = sch;
		rq->scx.bypass_dsq.sched = sch;
		rq->scx.cpuperf_target = SCX_CPUPERF_ONE;
		}

		@@ -6465,9 +6468,8 @@ void __init init_sched_ext_class(void)
		struct rq *rq = cpu_rq(cpu);
		int n = cpu_to_node(cpu);

		/* local/bypass dsq's sch will be set during scx_root_enable() */
		/* local_dsq's sch will be set during scx_root_enable() */
		init_dsq(&rq->scx.local_dsq, SCX_DSQ_LOCAL, NULL);
		init_dsq(&rq->scx.bypass_dsq, SCX_DSQ_BYPASS, NULL);

		INIT_LIST_HEAD(&rq->scx.runnable_list);
		INIT_LIST_HEAD(&rq->scx.ddsp_deferred_locals);

kernel/sched/ext_internal.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -932,6 +932,8 @@ struct scx_sched_pcpu {
		* constructed when requested by scx_bpf_events().
		*/
		struct scx_event_stats event_stats;

		struct scx_dispatch_q bypass_dsq;
		};

		struct scx_sched {

kernel/sched/sched.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -810,7 +810,6 @@ struct scx_rq {
		struct balance_callback deferred_bal_cb;
		struct irq_work deferred_irq_work;
		struct irq_work kick_cpus_irq_work;
		struct scx_dispatch_q bypass_dsq;
		};
		#endif /* CONFIG_SCHED_CLASS_EXT */