Commit a4946824 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull modules updates from Luis Chamberlain:
 "Finally something fun. Mike Rapoport does some cleanup to allow us to
  take out module_alloc() out of modules into a new paint shedded
  execmem_alloc() and execmem_free() so to make emphasis these helpers
  are actually used outside of modules.

  It starts with a non-functional changes API rename / placeholders to
  then allow architectures to define their requirements into a new shiny
  struct execmem_info with ranges, and requirements for those ranges.

  Archs now can intitialize this execmem_info as the last part of
  mm_core_init() if they have to diverge from the norm. Each range is a
  known type clearly articulated and spelled out in enum execmem_type.

  Although a lot of this is major cleanup and prep work for future
  enhancements an immediate clear gain is we get to enable KPROBES
  without MODULES now. That is ultimately what motiviated to pick this
  work up again, now with smaller goal as concrete stepping stone"

* tag 'modules-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux:
  bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of
  kprobes: remove dependency on CONFIG_MODULES
  powerpc: use CONFIG_EXECMEM instead of CONFIG_MODULES where appropriate
  x86/ftrace: enable dynamic ftrace without CONFIG_MODULES
  arch: make execmem setup available regardless of CONFIG_MODULES
  powerpc: extend execmem_params for kprobes allocations
  arm64: extend execmem_info for generated code allocations
  riscv: extend execmem_params for generated code allocations
  mm/execmem, arch: convert remaining overrides of module_alloc to execmem
  mm/execmem, arch: convert simple overrides of module_alloc to execmem
  mm: introduce execmem_alloc() and execmem_free()
  module: make module_memory_{alloc,free} more self-contained
  sparc: simplify module_alloc()
  nios2: define virtual address space for modules
  mips: module: rename MODULE_START to MODULES_VADDR
  arm64: module: remove unneeded call to kasan_alloc_module_shadow()
  kallsyms: replace deprecated strncpy with strscpy
  module: allow UNUSED_KSYMS_WHITELIST to be relative against objtree.
parents 8c06da67 2c9e5d4a
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -60,9 +60,9 @@ config GENERIC_ENTRY

config KPROBES
	bool "Kprobes"
	depends on MODULES
	depends on HAVE_KPROBES
	select KALLSYMS
	select EXECMEM
	select NEED_TASKS_RCU
	help
	  Kprobes allows you to trap at almost any kernel address and
@@ -977,6 +977,14 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
	  For architectures like powerpc/32 which have constraints on module
	  allocation and need to allocate module data outside of module area.

config ARCH_WANTS_EXECMEM_LATE
	bool
	help
	  For architectures that do not allocate executable memory early on
	  boot, but rather require its initialization late when there is
	  enough entropy for module space randomization, for instance
	  arm64.

config HAVE_IRQ_EXIT_ON_IRQ_STACK
	bool
	help
+0 −34
Original line number Diff line number Diff line
@@ -12,48 +12,14 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/gfp.h>

#include <asm/sections.h>
#include <asm/smp_plat.h>
#include <asm/unwind.h>
#include <asm/opcodes.h>

#ifdef CONFIG_XIP_KERNEL
/*
 * The XIP kernel text is mapped in the module area for modules and
 * some other stuff to work without any indirect relocations.
 * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
 * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
 */
#undef MODULES_VADDR
#define MODULES_VADDR	(((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
#endif

#ifdef CONFIG_MMU
void *module_alloc(unsigned long size)
{
	gfp_t gfp_mask = GFP_KERNEL;
	void *p;

	/* Silence the initial allocation */
	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
		gfp_mask |= __GFP_NOWARN;

	p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
				gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
				__builtin_return_address(0));
	if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
		return p;
	return __vmalloc_node_range(size, 1,  VMALLOC_START, VMALLOC_END,
				GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
				__builtin_return_address(0));
}
#endif

bool module_init_section(const char *name)
{
	return strstarts(name, ".init") ||
+45 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/sizes.h>
#include <linux/stop_machine.h>
#include <linux/swiotlb.h>
#include <linux/execmem.h>

#include <asm/cp15.h>
#include <asm/mach-types.h>
@@ -486,3 +487,47 @@ void free_initrd_mem(unsigned long start, unsigned long end)
	free_reserved_area((void *)start, (void *)end, -1, "initrd");
}
#endif

#ifdef CONFIG_EXECMEM

#ifdef CONFIG_XIP_KERNEL
/*
 * The XIP kernel text is mapped in the module area for modules and
 * some other stuff to work without any indirect relocations.
 * MODULES_VADDR is redefined here and not in asm/memory.h to avoid
 * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
 */
#undef MODULES_VADDR
#define MODULES_VADDR	(((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
#endif

#ifdef CONFIG_MMU
static struct execmem_info execmem_info __ro_after_init;

struct execmem_info __init *execmem_arch_setup(void)
{
	unsigned long fallback_start = 0, fallback_end = 0;

	if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
		fallback_start = VMALLOC_START;
		fallback_end = VMALLOC_END;
	}

	execmem_info = (struct execmem_info){
		.ranges = {
			[EXECMEM_DEFAULT] = {
				.start	= MODULES_VADDR,
				.end	= MODULES_END,
				.pgprot	= PAGE_KERNEL_EXEC,
				.alignment = 1,
				.fallback_start	= fallback_start,
				.fallback_end	= fallback_end,
			},
		},
	};

	return &execmem_info;
}
#endif /* CONFIG_MMU */

#endif /* CONFIG_EXECMEM */
+1 −0
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ config ARM64
	select ARCH_WANT_FRAME_POINTERS
	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
	select ARCH_WANT_LD_ORPHAN_WARN
	select ARCH_WANTS_EXECMEM_LATE if EXECMEM
	select ARCH_WANTS_NO_INSTR
	select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
	select ARCH_HAS_UBSAN
+0 −126
Original line number Diff line number Diff line
@@ -12,144 +12,18 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
#include <linux/random.h>
#include <linux/scs.h>
#include <linux/vmalloc.h>

#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>

static u64 module_direct_base __ro_after_init = 0;
static u64 module_plt_base __ro_after_init = 0;

/*
 * Choose a random page-aligned base address for a window of 'size' bytes which
 * entirely contains the interval [start, end - 1].
 */
static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
{
	u64 max_pgoff, pgoff;

	if ((end - start) >= size)
		return 0;

	max_pgoff = (size - (end - start)) / PAGE_SIZE;
	pgoff = get_random_u32_inclusive(0, max_pgoff);

	return start - pgoff * PAGE_SIZE;
}

/*
 * Modules may directly reference data and text anywhere within the kernel
 * image and other modules. References using PREL32 relocations have a +/-2G
 * range, and so we need to ensure that the entire kernel image and all modules
 * fall within a 2G window such that these are always within range.
 *
 * Modules may directly branch to functions and code within the kernel text,
 * and to functions and code within other modules. These branches will use
 * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
 * that the entire kernel text and all module text falls within a 128M window
 * such that these are always within range. With PLTs, we can expand this to a
 * 2G window.
 *
 * We chose the 128M region to surround the entire kernel image (rather than
 * just the text) as using the same bounds for the 128M and 2G regions ensures
 * by construction that we never select a 128M region that is not a subset of
 * the 2G region. For very large and unusual kernel configurations this means
 * we may fall back to PLTs where they could have been avoided, but this keeps
 * the logic significantly simpler.
 */
static int __init module_init_limits(void)
{
	u64 kernel_end = (u64)_end;
	u64 kernel_start = (u64)_text;
	u64 kernel_size = kernel_end - kernel_start;

	/*
	 * The default modules region is placed immediately below the kernel
	 * image, and is large enough to use the full 2G relocation range.
	 */
	BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
	BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);

	if (!kaslr_enabled()) {
		if (kernel_size < SZ_128M)
			module_direct_base = kernel_end - SZ_128M;
		if (kernel_size < SZ_2G)
			module_plt_base = kernel_end - SZ_2G;
	} else {
		u64 min = kernel_start;
		u64 max = kernel_end;

		if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
			pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
		} else {
			module_direct_base = random_bounding_box(SZ_128M, min, max);
			if (module_direct_base) {
				min = module_direct_base;
				max = module_direct_base + SZ_128M;
			}
		}

		module_plt_base = random_bounding_box(SZ_2G, min, max);
	}

	pr_info("%llu pages in range for non-PLT usage",
		module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
	pr_info("%llu pages in range for PLT usage",
		module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);

	return 0;
}
subsys_initcall(module_init_limits);

void *module_alloc(unsigned long size)
{
	void *p = NULL;

	/*
	 * Where possible, prefer to allocate within direct branch range of the
	 * kernel such that no PLTs are necessary.
	 */
	if (module_direct_base) {
		p = __vmalloc_node_range(size, MODULE_ALIGN,
					 module_direct_base,
					 module_direct_base + SZ_128M,
					 GFP_KERNEL | __GFP_NOWARN,
					 PAGE_KERNEL, 0, NUMA_NO_NODE,
					 __builtin_return_address(0));
	}

	if (!p && module_plt_base) {
		p = __vmalloc_node_range(size, MODULE_ALIGN,
					 module_plt_base,
					 module_plt_base + SZ_2G,
					 GFP_KERNEL | __GFP_NOWARN,
					 PAGE_KERNEL, 0, NUMA_NO_NODE,
					 __builtin_return_address(0));
	}

	if (!p) {
		pr_warn_ratelimited("%s: unable to allocate memory\n",
				    __func__);
	}

	if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
		vfree(p);
		return NULL;
	}

	/* Memory is intended to be executable, reset the pointer tag. */
	return kasan_reset_tag(p);
}

enum aarch64_reloc_op {
	RELOC_OP_NONE,
	RELOC_OP_ABS,
Loading