Commit 5ae51618 authored by Andrea Righi's avatar Andrea Righi Committed by Tejun Heo
Browse files

selftests/sched_ext: Add NUMA-aware scheduler test



Add a selftest to validate the behavior of the NUMA-aware scheduler
functionalities, including idle CPU selection within nodes, per-node
DSQs and CPU to node mapping.

Signed-off-by: default avatarAndrea Righi <arighi@nvidia.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 5e3b6424
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -172,6 +172,7 @@ auto-test-targets := \
	maximal				\
	maybe_null			\
	minimal				\
	numa				\
	prog_run			\
	reload_loop			\
	select_cpu_dfl			\
+100 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * A scheduler that validates the behavior of the NUMA-aware
 * functionalities.
 *
 * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
 * are exclusively processed by CPUs within their respective nodes. Idle
 * CPUs are selected only within the same node, so task migration can only
 * occurs between CPUs belonging to the same node.
 *
 * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
 */

#include <scx/common.bpf.h>

char _license[] SEC("license") = "GPL";

UEI_DEFINE(uei);

const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;

static bool is_cpu_idle(s32 cpu, int node)
{
	const struct cpumask *idle_cpumask;
	bool idle;

	idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
	idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
	scx_bpf_put_cpumask(idle_cpumask);

	return idle;
}

s32 BPF_STRUCT_OPS(numa_select_cpu,
		   struct task_struct *p, s32 prev_cpu, u64 wake_flags)
{
	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
	s32 cpu;

	/*
	 * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
	 * since it already tries to pick an idle CPU within the node
	 * first, but let's use both functions for better testing coverage.
	 */
	cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
					__COMPAT_SCX_PICK_IDLE_IN_NODE);
	if (cpu < 0)
		cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
						__COMPAT_SCX_PICK_IDLE_IN_NODE);

	if (is_cpu_idle(cpu, node))
		scx_bpf_error("CPU %d should be marked as busy", cpu);

	if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
		scx_bpf_error("CPU %d should be in node %d", cpu, node);

	return cpu;
}

void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
{
	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));

	scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
}

void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
{
	int node = __COMPAT_scx_bpf_cpu_node(cpu);

	scx_bpf_dsq_move_to_local(node);
}

s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
{
	int node, err;

	bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
		err = scx_bpf_create_dsq(node, node);
		if (err)
			return err;
	}

	return 0;
}

void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
{
	UEI_RECORD(uei, ei);
}

SEC(".struct_ops.link")
struct sched_ext_ops numa_ops = {
	.select_cpu		= (void *)numa_select_cpu,
	.enqueue		= (void *)numa_enqueue,
	.dispatch		= (void *)numa_dispatch,
	.init			= (void *)numa_init,
	.exit			= (void *)numa_exit,
	.name			= "numa",
};
+59 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
 */
#include <bpf/bpf.h>
#include <scx/common.h>
#include <sys/wait.h>
#include <unistd.h>
#include "numa.bpf.skel.h"
#include "scx_test.h"

static enum scx_test_status setup(void **ctx)
{
	struct numa *skel;

	skel = numa__open();
	SCX_FAIL_IF(!skel, "Failed to open");
	SCX_ENUM_INIT(skel);
	skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE;
	skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE;
	SCX_FAIL_IF(numa__load(skel), "Failed to load skel");

	*ctx = skel;

	return SCX_TEST_PASS;
}

static enum scx_test_status run(void *ctx)
{
	struct numa *skel = ctx;
	struct bpf_link *link;

	link = bpf_map__attach_struct_ops(skel->maps.numa_ops);
	SCX_FAIL_IF(!link, "Failed to attach scheduler");

	/* Just sleeping is fine, plenty of scheduling events happening */
	sleep(1);

	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
	bpf_link__destroy(link);

	return SCX_TEST_PASS;
}

static void cleanup(void *ctx)
{
	struct numa *skel = ctx;

	numa__destroy(skel);
}

struct scx_test numa = {
	.name = "numa",
	.description = "Verify NUMA-aware functionalities",
	.setup = setup,
	.run = run,
	.cleanup = cleanup,
};
REGISTER_SCX_TEST(&numa)