scx_central: Defer timer start to central dispatch to fix init error (d6edb15a) · Commits · git / linux-net

tools/sched_ext/scx_central.bpf.c

+42 −20

Original line number	Diff line number	Diff line
		@@ -60,6 +60,7 @@ const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */
		const volatile u64 slice_ns;

		bool timer_pinned = true;
		bool timer_started;
		u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
		u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries;
		u64 nr_overflows;
		@@ -179,9 +180,47 @@ static bool dispatch_to_cpu(s32 cpu)
		return false;
		}

		static void start_central_timer(void)
		{
		struct bpf_timer *timer;
		u32 key = 0;
		int ret;

		if (likely(timer_started))
		return;

		timer = bpf_map_lookup_elem(&central_timer, &key);
		if (!timer) {
		scx_bpf_error("failed to lookup central timer");
		return;
		}

		ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, BPF_F_TIMER_CPU_PIN);
		/*
		* BPF_F_TIMER_CPU_PIN is pretty new (>=6.7). If we're running in a
		* kernel which doesn't have it, bpf_timer_start() will return -EINVAL.
		* Retry without the PIN. This would be the perfect use case for
		* bpf_core_enum_value_exists() but the enum type doesn't have a name
		* and can't be used with bpf_core_enum_value_exists(). Oh well...
		*/
		if (ret == -EINVAL) {
		timer_pinned = false;
		ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, 0);
		}

		if (ret) {
		scx_bpf_error("bpf_timer_start failed (%d)", ret);
		return;
		}

		timer_started = true;
		}

		void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev)
		{
		if (cpu == central_cpu) {
		start_central_timer();

		/* dispatch for all other CPUs first */
		__sync_fetch_and_add(&nr_dispatches, 1);

		@@ -310,29 +349,12 @@ int BPF_STRUCT_OPS_SLEEPABLE(central_init)
		if (!timer)
		return -ESRCH;

		if (bpf_get_smp_processor_id() != central_cpu) {
		scx_bpf_error("init from non-central CPU");
		return -EINVAL;
		}

		bpf_timer_init(timer, &central_timer, CLOCK_MONOTONIC);
		bpf_timer_set_callback(timer, central_timerfn);

		ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, BPF_F_TIMER_CPU_PIN);
		/*
		* BPF_F_TIMER_CPU_PIN is pretty new (>=6.7). If we're running in a
		* kernel which doesn't have it, bpf_timer_start() will return -EINVAL.
		* Retry without the PIN. This would be the perfect use case for
		* bpf_core_enum_value_exists() but the enum type doesn't have a name
		* and can't be used with bpf_core_enum_value_exists(). Oh well...
		*/
		if (ret == -EINVAL) {
		timer_pinned = false;
		ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, 0);
		}
		if (ret)
		scx_bpf_error("bpf_timer_start failed (%d)", ret);
		return ret;
		scx_bpf_kick_cpu(central_cpu, 0);

		return 0;
		}

		void BPF_STRUCT_OPS(central_exit, struct scx_exit_info *ei)

tools/sched_ext/scx_central.c

+0 −24

Original line number	Diff line number	Diff line
		@@ -5,7 +5,6 @@
		* Copyright (c) 2022 David Vernet <dvernet@meta.com>
		*/
		#define _GNU_SOURCE
		#include <sched.h>
		#include <stdio.h>
		#include <unistd.h>
		#include <inttypes.h>
		@@ -49,8 +48,6 @@ int main(int argc, char **argv)
		struct bpf_link *link;
		__u64 seq = 0, ecode;
		__s32 opt;
		cpu_set_t *cpuset;
		size_t cpuset_size;

		libbpf_set_print(libbpf_print_fn);
		signal(SIGINT, sigint_handler);
		@@ -96,27 +93,6 @@ int main(int argc, char **argv)

		SCX_OPS_LOAD(skel, central_ops, scx_central, uei);

		/*
		* Affinitize the loading thread to the central CPU, as:
		* - That's where the BPF timer is first invoked in the BPF program.
		* - We probably don't want this user space component to take up a core
		* from a task that would benefit from avoiding preemption on one of
		* the tickless cores.
		*
		* Until BPF supports pinning the timer, it's not guaranteed that it
		* will always be invoked on the central CPU. In practice, this
		* suffices the majority of the time.
		*/
		cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
		SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
		cpuset_size = CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids);
		CPU_ZERO_S(cpuset_size, cpuset);
		CPU_SET_S(skel->rodata->central_cpu, cpuset_size, cpuset);
		SCX_BUG_ON(sched_setaffinity(0, cpuset_size, cpuset),
		"Failed to affinitize to central CPU %d (max %d)",
		skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
		CPU_FREE(cpuset);

		link = SCX_OPS_ATTACH(skel, central_ops, scx_central);

		if (!skel->data->timer_pinned)