sched_ext: idle: Accept an arbitrary cpumask in scx_select_cpu_dfl() (c2d8b2a5) · Commits · git / linux-net

kernel/sched/ext_idle.c

+44 −4

Original line number	Diff line number	Diff line
		@@ -49,6 +49,7 @@ static struct scx_idle_cpus **scx_idle_node_masks;
		/*
		* Local per-CPU cpumasks (used to generate temporary idle cpumasks).
		*/
		static DEFINE_PER_CPU(cpumask_var_t, local_idle_cpumask);
		static DEFINE_PER_CPU(cpumask_var_t, local_llc_idle_cpumask);
		static DEFINE_PER_CPU(cpumask_var_t, local_numa_idle_cpumask);

		@@ -417,13 +418,15 @@ static inline bool task_affinity_all(const struct task_struct *p)
		* branch prediction optimizations.
		*
		* 3. Pick a CPU within the same LLC (Last-Level Cache):
		* - if the above conditions aren't met, pick a CPU that shares the same LLC
		* to maintain cache locality.
		* - if the above conditions aren't met, pick a CPU that shares the same
		* LLC, if the LLC domain is a subset of @cpus_allowed, to maintain
		* cache locality.
		*
		* 4. Pick a CPU within the same NUMA node, if enabled:
		* - choose a CPU from the same NUMA node to reduce memory access latency.
		* - choose a CPU from the same NUMA node, if the node cpumask is a
		* subset of @cpus_allowed, to reduce memory access latency.
		*
		* 5. Pick any idle CPU usable by the task.
		* 5. Pick any idle CPU within the @cpus_allowed domain.
		*
		* Step 3 and 4 are performed only if the system has, respectively,
		* multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
		@@ -446,6 +449,39 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
		int node = scx_cpu_node_if_enabled(prev_cpu);
		s32 cpu;

		preempt_disable();

		/*
		* Determine the subset of CPUs usable by @p within @cpus_allowed.
		*/
		if (allowed != p->cpus_ptr) {
		struct cpumask *local_cpus = this_cpu_cpumask_var_ptr(local_idle_cpumask);

		if (task_affinity_all(p)) {
		allowed = cpus_allowed;
		} else if (cpumask_and(local_cpus, cpus_allowed, p->cpus_ptr)) {
		allowed = local_cpus;
		} else {
		cpu = -EBUSY;
		goto out_enable;
		}

		/*
		* If @prev_cpu is not in the allowed CPUs, skip topology
		* optimizations and try to pick any idle CPU usable by the
		* task.
		*
		* If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
		* the current node, as it may optimize some waker->wakee
		* workloads.
		*/
		if (!cpumask_test_cpu(prev_cpu, allowed)) {
		node = scx_cpu_node_if_enabled(smp_processor_id());
		cpu = scx_pick_idle_cpu(allowed, node, flags);
		goto out_enable;
		}
		}

		/*
		* This is necessary to protect llc_cpus.
		*/
		@@ -610,6 +646,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,

		out_unlock:
		rcu_read_unlock();
		out_enable:
		preempt_enable();

		return cpu;
		}
		@@ -641,6 +679,8 @@ void scx_idle_init_masks(void)

		/* Allocate local per-cpu idle cpumasks */
		for_each_possible_cpu(i) {
		BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_idle_cpumask, i),
		GFP_KERNEL, cpu_to_node(i)));
		BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_llc_idle_cpumask, i),
		GFP_KERNEL, cpu_to_node(i)));
		BUG_ON(!alloc_cpumask_var_node(&per_cpu(local_numa_idle_cpumask, i),