Commit bcb04425 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull sched_ext updates from Tejun Heo:

 - Add mechanism to count and report internal events. This significantly
   improves visibility on subtle corner conditions.

 - The default idle CPU selection logic is revamped and improved in
   multiple ways including being made topology aware.

 - sched_ext was disabling ttwu_queue for simplicity, which can be
   costly when hardware topology is more complex. Implement
   SCX_OPS_ALLOWED_QUEUED_WAKEUP so that BPF schedulers can selectively
   enable ttwu_queue.

 - tools/sched_ext updates to improve compatibility among others.

 - Other misc updates and fixes.

 - sched_ext/for-6.14-fixes were pulled a few times to receive
   prerequisite fixes and resolve conflicts.

* tag 'sched_ext-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (42 commits)
  sched_ext: idle: Refactor scx_select_cpu_dfl()
  sched_ext: idle: Honor idle flags in the built-in idle selection policy
  sched_ext: Skip per-CPU tasks in scx_bpf_reenqueue_local()
  sched_ext: Add trace point to track sched_ext core events
  sched_ext: Change the event type from u64 to s64
  sched_ext: Documentation: add task lifecycle summary
  tools/sched_ext: Provide a compatible helper for scx_bpf_events()
  selftests/sched_ext: Add NUMA-aware scheduler test
  tools/sched_ext: Provide consistent access to scx flags
  sched_ext: idle: Fix scx_bpf_pick_any_cpu_node() behavior
  sched_ext: idle: Introduce scx_bpf_nr_node_ids()
  sched_ext: idle: Introduce node-aware idle cpu kfunc helpers
  sched_ext: idle: Per-node idle cpumasks
  sched_ext: idle: Introduce SCX_OPS_BUILTIN_IDLE_PER_NODE
  sched_ext: idle: Make idle static keys private
  sched/topology: Introduce for_each_node_numadist() iterator
  mm/numa: Introduce nearest_node_nodemask()
  nodemask: numa: reorganize inclusion path
  nodemask: add nodes_copy()
  tools/sched_ext: Sync with scx repo
  ...
parents 94dc216a e4855fc9
Loading
Loading
Loading
Loading
+36 −0
Original line number Diff line number Diff line
@@ -294,6 +294,42 @@ dispatching, and must be dispatched to with ``scx_bpf_dsq_insert()``. See
the function documentation and usage in ``tools/sched_ext/scx_simple.bpf.c``
for more information.

Task Lifecycle
--------------

The following pseudo-code summarizes the entire lifecycle of a task managed
by a sched_ext scheduler:

.. code-block:: c

    ops.init_task();            /* A new task is created */
    ops.enable();               /* Enable BPF scheduling for the task */

    while (task in SCHED_EXT) {
        if (task can migrate)
            ops.select_cpu();   /* Called on wakeup (optimization) */

        ops.runnable();         /* Task becomes ready to run */

        while (task is runnable) {
            if (task is not in a DSQ) {
                ops.enqueue();  /* Task can be added to a DSQ */

                /* A CPU becomes available */

                ops.dispatch(); /* Task is moved to a local DSQ */
            }
            ops.running();      /* Task starts running on its assigned CPU */
            ops.tick();         /* Called every 1/HZ seconds */
            ops.stopping();     /* Task stops running (time slice expires or wait) */
        }

        ops.quiescent();        /* Task releases its assigned CPU (wait) */
    }

    ops.disable();              /* Disable BPF scheduling for the task */
    ops.exit_task();            /* Task is destroyed */

Where to Look
=============

+1 −2
Original line number Diff line number Diff line
@@ -21196,8 +21196,7 @@ S: Maintained
W:	https://github.com/sched-ext/scx
T:	git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git
F:	include/linux/sched/ext.h
F:	kernel/sched/ext.h
F:	kernel/sched/ext.c
F:	kernel/sched/ext*
F:	tools/sched_ext/
F:	tools/testing/selftests/sched_ext
+7 −1
Original line number Diff line number Diff line
@@ -94,7 +94,6 @@
#include <linux/bitmap.h>
#include <linux/minmax.h>
#include <linux/nodemask_types.h>
#include <linux/numa.h>
#include <linux/random.h>

extern nodemask_t _unused_nodemask_arg_;
@@ -191,6 +190,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s
	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
}

#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES)
static __always_inline void __nodes_copy(nodemask_t *dstp,
					const nodemask_t *srcp, unsigned int nbits)
{
	bitmap_copy(dstp->bits, srcp->bits, nbits);
}

#define nodes_complement(dst, src) \
			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
static __always_inline void __nodes_complement(nodemask_t *dstp,
+10 −1
Original line number Diff line number Diff line
@@ -3,7 +3,16 @@
#define __LINUX_NODEMASK_TYPES_H

#include <linux/bitops.h>
#include <linux/numa.h>

#ifdef CONFIG_NODES_SHIFT
#define NODES_SHIFT     CONFIG_NODES_SHIFT
#else
#define NODES_SHIFT     0
#endif

#define MAX_NUMNODES    (1 << NODES_SHIFT)

#define	NUMA_NO_NODE	(-1)

typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;

+8 −9
Original line number Diff line number Diff line
@@ -3,16 +3,8 @@
#define _LINUX_NUMA_H
#include <linux/init.h>
#include <linux/types.h>
#include <linux/nodemask.h>

#ifdef CONFIG_NODES_SHIFT
#define NODES_SHIFT     CONFIG_NODES_SHIFT
#else
#define NODES_SHIFT     0
#endif

#define MAX_NUMNODES    (1 << NODES_SHIFT)

#define	NUMA_NO_NODE	(-1)
#define	NUMA_NO_MEMBLK	(-1)

static inline bool numa_valid_node(int nid)
@@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid);
/* Generic implementation available */
int numa_nearest_node(int node, unsigned int state);

int nearest_node_nodemask(int node, nodemask_t *mask);

#ifndef memory_add_physaddr_to_nid
int memory_add_physaddr_to_nid(u64 start);
#endif
@@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state)
	return NUMA_NO_NODE;
}

static inline int nearest_node_nodemask(int node, nodemask_t *mask)
{
	return NUMA_NO_NODE;
}

static inline int memory_add_physaddr_to_nid(u64 start)
{
	return 0;
Loading