Commit 1edab907 authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext/scx_qmap: Pick idle CPU for direct dispatch on !wakeup enqueues



Because there was no way to directly dispatch to the local DSQ of a remote
CPU from ops.enqueue(), scx_qmap skipped looking for an idle CPU on !wakeup
enqueues. This restriction was removed and sched_ext now allows
SCX_DSQ_LOCAL_ON verdicts for direct dispatches.

Factor out pick_direct_dispatch_cpu() from ops.select_cpu() and use it to
direct dispatch from ops.enqueue() on !wakeup enqueues.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarDavid Vernet <void@manifault.com>
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
Cc: Changwoo Min <changwoo@igalia.com>
Cc: Andrea Righi <righi.andrea@gmail.com>
parent 5b26f7b9
Loading
Loading
Loading
Loading
+30 −9
Original line number Diff line number Diff line
@@ -120,11 +120,26 @@ struct {
} cpu_ctx_stor SEC(".maps");

/* Statistics */
u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued, nr_ddsp_from_enq;
u64 nr_core_sched_execed;
u32 cpuperf_min, cpuperf_avg, cpuperf_max;
u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;

static s32 pick_direct_dispatch_cpu(struct task_struct *p, s32 prev_cpu)
{
	s32 cpu;

	if (p->nr_cpus_allowed == 1 ||
	    scx_bpf_test_and_clear_cpu_idle(prev_cpu))
		return prev_cpu;

	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
	if (cpu >= 0)
		return cpu;

	return -1;
}

s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
		   s32 prev_cpu, u64 wake_flags)
{
@@ -137,18 +152,15 @@ s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
		return -ESRCH;
	}

	if (p->nr_cpus_allowed == 1 ||
	    scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
		tctx->force_local = true;
		return prev_cpu;
	}
	cpu = pick_direct_dispatch_cpu(p, prev_cpu);

	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
	if (cpu >= 0)
	if (cpu >= 0) {
		tctx->force_local = true;
		return cpu;

	} else {
		return prev_cpu;
	}
}

static int weight_to_idx(u32 weight)
{
@@ -172,6 +184,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
	u32 pid = p->pid;
	int idx = weight_to_idx(p->scx.weight);
	void *ring;
	s32 cpu;

	if (p->flags & PF_KTHREAD) {
		if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
@@ -207,6 +220,14 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
		return;
	}

	/* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */
	if (!(enq_flags & SCX_ENQ_WAKEUP) &&
	    (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
		__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
		scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
		return;
	}

	/*
	 * If the task was re-enqueued due to the CPU being preempted by a
	 * higher priority scheduling class, just re-enqueue the task directly
+3 −2
Original line number Diff line number Diff line
@@ -116,10 +116,11 @@ int main(int argc, char **argv)
		long nr_enqueued = skel->bss->nr_enqueued;
		long nr_dispatched = skel->bss->nr_dispatched;

		printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64"\n",
		printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" enq_ddsp=%"PRIu64"\n",
		       nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
		       skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
		       skel->bss->nr_core_sched_execed);
		       skel->bss->nr_core_sched_execed,
		       skel->bss->nr_ddsp_from_enq);
		if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur"))
			printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
			       skel->bss->cpuperf_min,