Commit de68c051 authored by Andrea Righi's avatar Andrea Righi Committed by Tejun Heo
Browse files

tools/sched_ext: Receive updates from SCX repo

Receive tools/sched_ext updates form https://github.com/sched-ext/scx

 to
sync userspace bits:

 - basic BPF arena allocator abstractions,

 - additional process flags definitions,

 - fixed is_migration_disabled() helper,

 - separate out user_exit_info BPF and user space code.

This also fixes the following warning when building the selftests:

 tools/sched_ext/include/scx/common.bpf.h:550:9: warning: 'likely' macro redefined [-Wmacro-redefined]
  550 | #define likely(x) __builtin_expect(!!(x), 1)
      |         ^

Co-developed-by: default avatarCheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: default avatarAndrea Righi <arighi@nvidia.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 8f5ae30d
Loading
Loading
Loading
Loading
+175 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
#pragma once

#ifndef PAGE_SIZE
#define PAGE_SIZE __PAGE_SIZE
/*
 * for older kernels try sizeof(struct genradix_node)
 * or flexible:
 * static inline long __bpf_page_size(void) {
 *   return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
 * }
 * but generated code is not great.
 */
#endif

#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
#define __arena_global __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
#else

/* emit instruction:
 * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
 *
 * This is a workaround for LLVM compiler versions without
 * __BPF_FEATURE_ADDR_SPACE_CAST that do not automatically cast between arena
 * pointers and native kernel/userspace ones. In this case we explicitly do so
 * with cast_kern() and cast_user(). E.g., in the Linux kernel tree,
 * tools/testing/selftests/bpf includes tests that use these macros to implement
 * linked lists and hashtables backed by arena memory. In sched_ext, we use
 * cast_kern() and cast_user() for compatibility with older LLVM toolchains.
 */
#ifndef bpf_addr_space_cast
#define bpf_addr_space_cast(var, dst_as, src_as)\
	asm volatile(".byte 0xBF;		\
		     .ifc %[reg], r0;		\
		     .byte 0x00;		\
		     .endif;			\
		     .ifc %[reg], r1;		\
		     .byte 0x11;		\
		     .endif;			\
		     .ifc %[reg], r2;		\
		     .byte 0x22;		\
		     .endif;			\
		     .ifc %[reg], r3;		\
		     .byte 0x33;		\
		     .endif;			\
		     .ifc %[reg], r4;		\
		     .byte 0x44;		\
		     .endif;			\
		     .ifc %[reg], r5;		\
		     .byte 0x55;		\
		     .endif;			\
		     .ifc %[reg], r6;		\
		     .byte 0x66;		\
		     .endif;			\
		     .ifc %[reg], r7;		\
		     .byte 0x77;		\
		     .endif;			\
		     .ifc %[reg], r8;		\
		     .byte 0x88;		\
		     .endif;			\
		     .ifc %[reg], r9;		\
		     .byte 0x99;		\
		     .endif;			\
		     .short %[off];		\
		     .long %[as]"		\
		     : [reg]"+r"(var)		\
		     : [off]"i"(BPF_ADDR_SPACE_CAST) \
		     , [as]"i"((dst_as << 16) | src_as));
#endif

#define __arena
#define __arena_global SEC(".addr_space.1")
#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
#endif

void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
				    int node_id, __u64 flags) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;

/*
 * Note that cond_break can only be portably used in the body of a breakable
 * construct, whereas can_loop can be used anywhere.
 */
#ifdef TEST
#define can_loop true
#define __cond_break(expr) expr
#else
#ifdef __BPF_FEATURE_MAY_GOTO
#define can_loop					\
	({ __label__ l_break, l_continue;		\
	bool ret = true;				\
	asm volatile goto("may_goto %l[l_break]"	\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: ret = false;				\
	l_continue:;					\
	ret;						\
	})

#define __cond_break(expr)				\
	({ __label__ l_break, l_continue;		\
	asm volatile goto("may_goto %l[l_break]"	\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: expr;					\
	l_continue:;					\
	})
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define can_loop					\
	({ __label__ l_break, l_continue;		\
	bool ret = true;				\
	asm volatile goto("1:.byte 0xe5;		\
		      .byte 0;				\
		      .long ((%l[l_break] - 1b - 8) / 8) & 0xffff;	\
		      .short 0"				\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: ret = false;				\
	l_continue:;					\
	ret;						\
	})

#define __cond_break(expr)				\
	({ __label__ l_break, l_continue;		\
	asm volatile goto("1:.byte 0xe5;		\
		      .byte 0;				\
		      .long ((%l[l_break] - 1b - 8) / 8) & 0xffff;	\
		      .short 0"				\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: expr;					\
	l_continue:;					\
	})
#else
#define can_loop					\
	({ __label__ l_break, l_continue;		\
	bool ret = true;				\
	asm volatile goto("1:.byte 0xe5;		\
		      .byte 0;				\
		      .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16;	\
		      .short 0"				\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: ret = false;				\
	l_continue:;					\
	ret;						\
	})

#define __cond_break(expr)				\
	({ __label__ l_break, l_continue;		\
	asm volatile goto("1:.byte 0xe5;		\
		      .byte 0;				\
		      .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16;	\
		      .short 0"				\
		      :::: l_break);			\
	goto l_continue;				\
	l_break: expr;					\
	l_continue:;					\
	})
#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
#endif /* __BPF_FEATURE_MAY_GOTO */
#endif /* TEST */

#define cond_break __cond_break(break)
#define cond_break_label(label) __cond_break(goto label)


void bpf_preempt_disable(void) __weak __ksym;
void bpf_preempt_enable(void) __weak __ksym;
+33 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
#pragma once

#ifndef arena_container_of
#define arena_container_of(ptr, type, member)			\
	({							\
		void __arena *__mptr = (void __arena *)(ptr);	\
		((type *)(__mptr - offsetof(type, member)));	\
	})
#endif

/* Provide the definition of PAGE_SIZE. */
#include <sys/user.h>

#define __arena
#define __arg_arena
#define cast_kern(ptr) /* nop for user space */
#define cast_user(ptr) /* nop for user space */
char __attribute__((weak)) arena[1];

#ifndef offsetof
#define offsetof(type, member)  ((unsigned long)&((type *)0)->member)
#endif

static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
						  int node_id, __u64 flags)
{
	return NULL;
}
static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
{
}
+93 −9
Original line number Diff line number Diff line
@@ -24,14 +24,26 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <asm-generic/errno.h>
#include "user_exit_info.h"
#include "user_exit_info.bpf.h"
#include "enum_defs.autogen.h"

#define PF_IDLE				0x00000002	/* I am an IDLE thread */
#define PF_IO_WORKER			0x00000010	/* Task is an IO worker */
#define PF_WQ_WORKER			0x00000020	/* I'm a workqueue worker */
#define PF_KCOMPACTD			0x00010000      /* I am kcompactd */
#define PF_KSWAPD			0x00020000      /* I am kswapd */
#define PF_KTHREAD			0x00200000	/* I am a kernel thread */
#define PF_EXITING			0x00000004
#define CLOCK_MONOTONIC			1

#ifndef NR_CPUS
#define NR_CPUS 1024
#endif

#ifndef NUMA_NO_NODE
#define	NUMA_NO_NODE	(-1)
#endif

extern int LINUX_KERNEL_VERSION __kconfig;
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
@@ -107,6 +119,9 @@ void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}

#define SCX_STRINGIFY(x) #x
#define SCX_TOSTRING(x) SCX_STRINGIFY(x)

/*
 * Helper macro for initializing the fmt and variadic argument inputs to both
 * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
@@ -141,13 +156,15 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
 * scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
 * instead of an array of u64. Invoking this macro will cause the scheduler to
 * exit in an erroneous state, with diagnostic information being passed to the
 * user.
 * user. It appends the file and line number to aid debugging.
 */
#define scx_bpf_error(fmt, args...)						\
({										\
	scx_bpf_bstr_preamble(fmt, args)					\
	scx_bpf_bstr_preamble(							\
		__FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args)		\
	scx_bpf_error_bstr(___fmt, ___param, sizeof(___param));			\
	___scx_bpf_bstr_format_checker(fmt, ##args);				\
	___scx_bpf_bstr_format_checker(						\
		__FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args);		\
})

/*
@@ -229,6 +246,7 @@ BPF_PROG(name, ##args)
 * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
 * `MEMBER_VPTR(ptr, ->member)`.
 */
#ifndef MEMBER_VPTR
#define MEMBER_VPTR(base, member) (typeof((base) member) *)			\
({										\
	u64 __base = (u64)&(base);						\
@@ -245,6 +263,7 @@ BPF_PROG(name, ##args)
		  [max]"i"(sizeof(base) - sizeof((base) member)));		\
	__addr;									\
})
#endif /* MEMBER_VPTR */

/**
 * ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
@@ -260,6 +279,7 @@ BPF_PROG(name, ##args)
 * size of the array to compute the max, which will result in rejection by
 * the verifier.
 */
#ifndef ARRAY_ELEM_PTR
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)				\
({										\
	u64 __base = (u64)arr;							\
@@ -274,7 +294,7 @@ BPF_PROG(name, ##args)
		  [max]"r"(sizeof(arr[0]) * ((n) - 1)));			\
	__addr;									\
})

#endif /* ARRAY_ELEM_PTR */

/*
 * BPF declarations and helpers
@@ -438,8 +458,27 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
 */
static inline bool is_migration_disabled(const struct task_struct *p)
{
	if (bpf_core_field_exists(p->migration_disabled))
	/*
	 * Testing p->migration_disabled in a BPF code is tricky because the
	 * migration is _always_ disabled while running the BPF code.
	 * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) for BPF
	 * code execution disable and re-enable the migration of the current
	 * task, respectively. So, the _current_ task of the sched_ext ops is
	 * always migration-disabled. Moreover, p->migration_disabled could be
	 * two or greater when a sched_ext ops BPF code (e.g., ops.tick) is
	 * executed in the middle of the other BPF code execution.
	 *
	 * Therefore, we should decide that the _current_ task is
	 * migration-disabled only when its migration_disabled count is greater
	 * than one. In other words, when  p->migration_disabled == 1, there is
	 * an ambiguity, so we should check if @p is the current task or not.
	 */
	if (bpf_core_field_exists(p->migration_disabled)) {
		if (p->migration_disabled == 1)
			return bpf_get_current_task_btf() != p;
		else
			return p->migration_disabled;
	}
	return false;
}

@@ -547,9 +586,15 @@ static inline bool time_in_range_open(u64 a, u64 b, u64 c)
 */

/* useful compiler attributes */
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
#ifndef __maybe_unused
#define __maybe_unused __attribute__((__unused__))
#endif

/*
 * READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
@@ -632,6 +677,26 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
	__u.__val;								\
})

/*
 * __calc_avg - Calculate exponential weighted moving average (EWMA) with
 * @old and @new values. @decay represents how large the @old value remains.
 * With a larger @decay value, the moving average changes slowly, exhibiting
 * fewer fluctuations.
 */
#define __calc_avg(old, new, decay) ({						\
	typeof(decay) thr = 1 << (decay);					\
	typeof(old) ret;							\
	if (((old) < thr) || ((new) < thr)) {					\
		if (((old) == 1) && ((new) == 0))				\
			ret = 0;						\
		else								\
			ret = ((old) - ((old) >> 1)) + ((new) >> 1);		\
	} else {								\
		ret = ((old) - ((old) >> (decay))) + ((new) >> (decay));	\
	}									\
	ret;									\
})

/*
 * log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
 * @v: The value for which we're computing the base 2 logarithm.
@@ -662,6 +727,25 @@ static inline u32 log2_u64(u64 v)
                return log2_u32(v) + 1;
}

/*
 * sqrt_u64 - Calculate the square root of value @x using Newton's method.
 */
static inline u64 __sqrt_u64(u64 x)
{
	if (x == 0 || x == 1)
		return x;

	u64 r = ((1ULL << 32) > x) ? x : (1ULL << 32);

	for (int i = 0; i < 8; ++i) {
		u64 q = x / r;
		if (r <= q)
			break;
		r = (r + q) >> 1;
	}
	return r;
}

/*
 * Return a value proportionally scaled to the task's weight.
 */
+3 −2
Original line number Diff line number Diff line
@@ -75,8 +75,9 @@ typedef int64_t s64;
#include "enums.h"

/* not available when building kernel tools/sched_ext */
#if __has_include(<lib/sdt_task.h>)
#include <lib/sdt_task.h>
#if __has_include(<lib/sdt_task_defs.h>)
#include "bpf_arena_common.h"
#include <lib/sdt_task_defs.h>
#endif

#endif	/* __SCHED_EXT_COMMON_H */
+5 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__i
void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;

#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags)				\
	(bpf_ksym_exists(scx_bpf_dsq_insert) ?					\
@@ -82,6 +83,10 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
	  scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
	  false))

#define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz)		\
	(bpf_ksym_exists(bpf_cpumask_populate) ?			\
	 (bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)

#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags)				\
	_Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")

Loading