Commit 2a52ca7c authored by Tejun Heo's avatar Tejun Heo
Browse files

sched_ext: Add scx_simple and scx_example_qmap example schedulers



Add two simple example BPF schedulers - simple and qmap.

* simple: In terms of scheduling, it behaves identical to not having any
  operation implemented at all. The two operations it implements are only to
  improve visibility and exit handling. On certain homogeneous
  configurations, this actually can perform pretty well.

* qmap: A fixed five level priority scheduler to demonstrate queueing PIDs
  on BPF maps for scheduling. While not very practical, this is useful as a
  simple example and will be used to demonstrate different features.

v7: - Compat helpers stripped out in prepartion of upstreaming as the
      upstreamed patchset will be the baselinfe. Utility macros that can be
      used to implement compat features are kept.

    - Explicitly disable map autoattach on struct_ops to avoid trying to
      attach twice while maintaining compatbility with older libbpf.

v6: - Common header files reorganized and cleaned up. Compat helpers are
      added to demonstrate how schedulers can maintain backward
      compatibility with older kernels while making use of newly added
      features.

    - simple_select_cpu() added to keep track of the number of local
      dispatches. This is needed because the default ops.select_cpu()
      implementation is updated to dispatch directly and won't call
      ops.enqueue().

    - Updated to reflect the sched_ext API changes. Switching all tasks is
      the default behavior now and scx_qmap supports partial switching when
      `-p` is specified.

    - tools/sched_ext/Kconfig dropped. This will be included in the doc
      instead.

v5: - Improve Makefile. Build artifects are now collected into a separate
      dir which change be changed. Install and help targets are added and
      clean actually cleans everything.

    - MEMBER_VPTR() improved to improve access to structs. ARRAY_ELEM_PTR()
      and RESIZEABLE_ARRAY() are added to support resizable arrays in .bss.

    - Add scx_common.h which provides common utilities to user code such as
      SCX_BUG[_ON]() and RESIZE_ARRAY().

    - Use SCX_BUG[_ON]() to simplify error handling.

v4: - Dropped _example prefix from scheduler names.

v3: - Rename scx_example_dummy to scx_example_simple and restructure a bit
      to ease later additions. Comment updates.

    - Added declarations for BPF inline iterators. In the future, hopefully,
      these will be consolidated into a generic BPF header so that they
      don't need to be replicated here.

v2: - Updated with the generic BPF cpumask helpers.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarDavid Vernet <dvernet@meta.com>
Acked-by: default avatarJosh Don <joshdon@google.com>
Acked-by: default avatarHao Luo <haoluo@google.com>
Acked-by: default avatarBarret Rhoden <brho@google.com>
parent f0e1a064
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -1355,6 +1355,12 @@ ifneq ($(wildcard $(resolve_btfids_O)),)
	$(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean
endif

tools-clean-targets := sched_ext
PHONY += $(tools-clean-targets)
$(tools-clean-targets):
	$(Q)$(MAKE) -sC tools $@_clean
tools_clean: $(tools-clean-targets)

# Clear a bunch of variables before executing the submake
ifeq ($(quiet),silent_)
tools_silent=s
@@ -1527,7 +1533,7 @@ PHONY += $(mrproper-dirs) mrproper
$(mrproper-dirs):
	$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)

mrproper: clean $(mrproper-dirs)
mrproper: clean $(mrproper-dirs) tools_clean
	$(call cmd,rmfiles)
	@find . $(RCS_FIND_IGNORE) \
		\( -name '*.rmeta' \) \
+9 −1
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ help:
	@echo '  pci                    - PCI tools'
	@echo '  perf                   - Linux performance measurement and analysis tool'
	@echo '  selftests              - various kernel selftests'
	@echo '  sched_ext              - sched_ext example schedulers'
	@echo '  bootconfig             - boot config tool'
	@echo '  spi                    - spi tools'
	@echo '  tmon                   - thermal monitoring and tuning tool'
@@ -91,6 +92,9 @@ perf: FORCE
	$(Q)mkdir -p $(PERF_O) .
	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=

sched_ext: FORCE
	$(call descend,sched_ext)

selftests: FORCE
	$(call descend,testing/$@)

@@ -184,6 +188,9 @@ perf_clean:
	$(Q)mkdir -p $(PERF_O) .
	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean

sched_ext_clean:
	$(call descend,sched_ext,clean)

selftests_clean:
	$(call descend,testing/$(@:_clean=),clean)

@@ -213,6 +220,7 @@ clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
		mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
		freefall_clean build_clean libbpf_clean libsubcmd_clean \
		gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
		intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
		intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
		sched_ext_clean

.PHONY: FORCE
+2 −0
Original line number Diff line number Diff line
tools/
build/
+246 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
include ../build/Build.include
include ../scripts/Makefile.arch
include ../scripts/Makefile.include

all: all_targets

ifneq ($(LLVM),)
ifneq ($(filter %/,$(LLVM)),)
LLVM_PREFIX := $(LLVM)
else ifneq ($(filter -%,$(LLVM)),)
LLVM_SUFFIX := $(LLVM)
endif

CLANG_TARGET_FLAGS_arm          := arm-linux-gnueabi
CLANG_TARGET_FLAGS_arm64        := aarch64-linux-gnu
CLANG_TARGET_FLAGS_hexagon      := hexagon-linux-musl
CLANG_TARGET_FLAGS_m68k         := m68k-linux-gnu
CLANG_TARGET_FLAGS_mips         := mipsel-linux-gnu
CLANG_TARGET_FLAGS_powerpc      := powerpc64le-linux-gnu
CLANG_TARGET_FLAGS_riscv        := riscv64-linux-gnu
CLANG_TARGET_FLAGS_s390         := s390x-linux-gnu
CLANG_TARGET_FLAGS_x86          := x86_64-linux-gnu
CLANG_TARGET_FLAGS              := $(CLANG_TARGET_FLAGS_$(ARCH))

ifeq ($(CROSS_COMPILE),)
ifeq ($(CLANG_TARGET_FLAGS),)
$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk)
else
CLANG_FLAGS     += --target=$(CLANG_TARGET_FLAGS)
endif # CLANG_TARGET_FLAGS
else
CLANG_FLAGS     += --target=$(notdir $(CROSS_COMPILE:%-=%))
endif # CROSS_COMPILE

CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
else
CC := $(CROSS_COMPILE)gcc
endif # LLVM

CURDIR := $(abspath .)
TOOLSDIR := $(abspath ..)
LIBDIR := $(TOOLSDIR)/lib
BPFDIR := $(LIBDIR)/bpf
TOOLSINCDIR := $(TOOLSDIR)/include
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
APIDIR := $(TOOLSINCDIR)/uapi
GENDIR := $(abspath ../../include/generated)
GENHDR := $(GENDIR)/autoconf.h

ifeq ($(O),)
OUTPUT_DIR := $(CURDIR)/build
else
OUTPUT_DIR := $(O)/build
endif # O
OBJ_DIR := $(OUTPUT_DIR)/obj
INCLUDE_DIR := $(OUTPUT_DIR)/include
BPFOBJ_DIR := $(OBJ_DIR)/libbpf
SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
BINDIR := $(OUTPUT_DIR)/bin
BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
ifneq ($(CROSS_COMPILE),)
HOST_BUILD_DIR		:= $(OBJ_DIR)/host
HOST_OUTPUT_DIR	:= host-tools
HOST_INCLUDE_DIR	:= $(HOST_OUTPUT_DIR)/include
else
HOST_BUILD_DIR		:= $(OBJ_DIR)
HOST_OUTPUT_DIR	:= $(OUTPUT_DIR)
HOST_INCLUDE_DIR	:= $(INCLUDE_DIR)
endif
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
DEFAULT_BPFTOOL := $(HOST_OUTPUT_DIR)/sbin/bpftool

VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)					\
		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)		\
		     ../../vmlinux						\
		     /sys/kernel/btf/vmlinux					\
		     /boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
ifeq ($(VMLINUX_BTF),)
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
endif

BPFTOOL ?= $(DEFAULT_BPFTOOL)

ifneq ($(wildcard $(GENHDR)),)
  GENFLAGS := -DHAVE_GENHDR
endif

CFLAGS += -g -O2 -rdynamic -pthread -Wall -Werror $(GENFLAGS)			\
	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)				\
	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(CURDIR)/include

# Silence some warnings when compiled with clang
ifneq ($(LLVM),)
CFLAGS += -Wno-unused-command-line-argument
endif

LDFLAGS = -lelf -lz -lpthread

IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null |				\
			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')

# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
define get_sys_includes
$(shell $(1) -v -E - </dev/null 2>&1 \
	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
endef

BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH)					\
	     $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)		\
	     -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat			\
	     -I$(INCLUDE_DIR) -I$(APIDIR)					\
	     -I../../include							\
	     $(call get_sys_includes,$(CLANG))					\
	     -Wall -Wno-compare-distinct-pointer-types				\
	     -O2 -mcpu=v3

# sort removes libbpf duplicates when not cross-building
MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf			\
	       $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids	\
	       $(INCLUDE_DIR) $(SCXOBJ_DIR) $(BINDIR))

$(MAKE_DIRS):
	$(call msg,MKDIR,,$@)
	$(Q)mkdir -p $@

$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)			\
	   $(APIDIR)/linux/bpf.h						\
	   | $(OBJ_DIR)/libbpf
	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/	\
		    EXTRA_CFLAGS='-g -O0 -fPIC'					\
		    DESTDIR=$(OUTPUT_DIR) prefix= all install_headers

$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)				\
		    ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD)		\
		    EXTRA_CFLAGS='-g -O0'					\
		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/				\
		    LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/			\
		    LIBBPF_DESTDIR=$(HOST_OUTPUT_DIR)/				\
		    prefix= DESTDIR=$(HOST_OUTPUT_DIR)/ install-bin

$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
	$(call msg,GEN,,$@)
	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
else
	$(call msg,CP,,$@)
	$(Q)cp "$(VMLINUX_H)" $@
endif

$(SCXOBJ_DIR)/%.bpf.o: %.bpf.c $(INCLUDE_DIR)/vmlinux.h include/scx/*.h		\
		       | $(BPFOBJ) $(SCXOBJ_DIR)
	$(call msg,CLNG-BPF,,$(notdir $@))
	$(Q)$(CLANG) $(BPF_CFLAGS) -target bpf -c $< -o $@

$(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BPFTOOL)
	$(eval sched=$(notdir $@))
	$(call msg,GEN-SKEL,,$(sched))
	$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
	$(Q)$(BPFTOOL) gen object $(<:.o=.linked2.o) $(<:.o=.linked1.o)
	$(Q)$(BPFTOOL) gen object $(<:.o=.linked3.o) $(<:.o=.linked2.o)
	$(Q)diff $(<:.o=.linked2.o) $(<:.o=.linked3.o)
	$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $@
	$(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(subst .bpf.skel.h,,$(sched)) > $(@:.skel.h=.subskel.h)

SCX_COMMON_DEPS := include/scx/common.h include/scx/user_exit_info.h | $(BINDIR)

c-sched-targets = scx_simple scx_qmap

$(addprefix $(BINDIR)/,$(c-sched-targets)): \
	$(BINDIR)/%: \
		$(filter-out %.bpf.c,%.c) \
		$(INCLUDE_DIR)/%.bpf.skel.h \
		$(SCX_COMMON_DEPS)
	$(eval sched=$(notdir $@))
	$(CC) $(CFLAGS) -c $(sched).c -o $(SCXOBJ_DIR)/$(sched).o
	$(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(HOST_BPFOBJ) $(LDFLAGS)

$(c-sched-targets): %: $(BINDIR)/%

install: all
	$(Q)mkdir -p $(DESTDIR)/usr/local/bin/
	$(Q)cp $(BINDIR)/* $(DESTDIR)/usr/local/bin/

clean:
	rm -rf $(OUTPUT_DIR) $(HOST_OUTPUT_DIR)
	rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h
	rm -f $(c-sched-targets)

help:
	@echo   'Building targets'
	@echo   '================'
	@echo   ''
	@echo   '  all		  - Compile all schedulers'
	@echo   ''
	@echo   'Alternatively, you may compile individual schedulers:'
	@echo   ''
	@printf '  %s\n' $(c-sched-targets)
	@echo   ''
	@echo   'For any scheduler build target, you may specify an alternative'
	@echo   'build output path with the O= environment variable. For example:'
	@echo   ''
	@echo   '   O=/tmp/sched_ext make all'
	@echo   ''
	@echo   'will compile all schedulers, and emit the build artifacts to'
	@echo   '/tmp/sched_ext/build.'
	@echo   ''
	@echo   ''
	@echo   'Installing targets'
	@echo   '=================='
	@echo   ''
	@echo   '  install	  - Compile and install all schedulers to /usr/bin.'
	@echo   '		    You may specify the DESTDIR= environment variable'
	@echo   '		    to indicate a prefix for /usr/bin. For example:'
	@echo   ''
	@echo   '                     DESTDIR=/tmp/sched_ext make install'
	@echo   ''
	@echo   '		    will build the schedulers in CWD/build, and'
	@echo   '		    install the schedulers to /tmp/sched_ext/usr/bin.'
	@echo   ''
	@echo   ''
	@echo   'Cleaning targets'
	@echo   '================'
	@echo   ''
	@echo   '  clean		  - Remove all generated files'

all_targets: $(c-sched-targets)

.PHONY: all all_targets $(c-sched-targets) clean help

# delete failed targets
.DELETE_ON_ERROR:

# keep intermediate (.bpf.skel.h, .bpf.o, etc) targets
.SECONDARY:
+11 −0
Original line number Diff line number Diff line
/*
 * Dummy gnu/stubs.h. clang can end up including /usr/include/gnu/stubs.h when
 * compiling BPF files although its content doesn't play any role. The file in
 * turn includes stubs-64.h or stubs-32.h depending on whether __x86_64__ is
 * defined. When compiling a BPF source, __x86_64__ isn't set and thus
 * stubs-32.h is selected. However, the file is not there if the system doesn't
 * have 32bit glibc devel package installed leading to a build failure.
 *
 * The problem is worked around by making this file available in the include
 * search paths before the system one when building BPF.
 */
Loading