Commit 9d7de2aa authored by Brian Gerst's avatar Brian Gerst Committed by Ingo Molnar
Browse files

x86/percpu/64: Use relative percpu offsets



The percpu section is currently linked at absolute address 0, because
older compilers hard-coded the stack protector canary value at a fixed
offset from the start of the GS segment.  Now that the canary is a
normal percpu variable, the percpu section does not need to be linked
at a specific address.

x86-64 will now calculate the percpu offsets as the delta between the
initial percpu address and the dynamically allocated memory, like other
architectures.  Note that GSBASE is limited to the canonical address
width (48 or 57 bits, sign-extended).  As long as the kernel text,
modules, and the dynamically allocated percpu memory are all in the
negative address space, the delta will not overflow this limit.

Signed-off-by: default avatarBrian Gerst <brgerst@gmail.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Reviewed-by: default avatarUros Bizjak <ubizjak@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20250123190747.745588-9-brgerst@gmail.com
parent 80d47def
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -431,7 +431,11 @@ DECLARE_INIT_PER_CPU(fixed_percpu_data);

static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
#ifdef CONFIG_SMP
	return per_cpu_offset(cpu);
#else
	return 0;
#endif
}

extern asmlinkage void entry_SYSCALL32_ignore(void);
+9 −10
Original line number Diff line number Diff line
@@ -61,11 +61,14 @@ SYM_CODE_START_NOALIGN(startup_64)
	/* Set up the stack for verify_cpu() */
	leaq	__top_init_kernel_stack(%rip), %rsp

	/* Setup GSBASE to allow stack canary access for C code */
	/*
	 * Set up GSBASE.
	 * Note that on SMP the boot CPU uses the init data section until
	 * the per-CPU areas are set up.
	 */
	movl	$MSR_GS_BASE, %ecx
	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
	movl	%edx, %eax
	shrq	$32,  %rdx
	xorl	%eax, %eax
	xorl	%edx, %edx
	wrmsr

	call	startup_64_setup_gdt_idt
@@ -359,16 +362,12 @@ SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
	movl %eax,%fs
	movl %eax,%gs

	/* Set up %gs.
	 *
	 * The base of %gs always points to fixed_percpu_data.
	/*
	 * Set up GSBASE.
	 * Note that, on SMP, the boot cpu uses init data section until
	 * the per cpu areas are set up.
	 */
	movl	$MSR_GS_BASE,%ecx
#ifndef CONFIG_SMP
	leaq	INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
#endif
	movl	%edx, %eax
	shrq	$32, %rdx
	wrmsr
+2 −10
Original line number Diff line number Diff line
@@ -23,18 +23,10 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>

#ifdef CONFIG_X86_64
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
#endif

DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);

unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
};
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
EXPORT_SYMBOL(__per_cpu_offset);

/*
+1 −28
Original line number Diff line number Diff line
@@ -112,12 +112,6 @@ ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX
PHDRS {
	text PT_LOAD FLAGS(5);          /* R_E */
	data PT_LOAD FLAGS(6);          /* RW_ */
#ifdef CONFIG_X86_64
#ifdef CONFIG_SMP
	percpu PT_LOAD FLAGS(6);        /* RW_ */
#endif
	init PT_LOAD FLAGS(7);          /* RWE */
#endif
	note PT_NOTE FLAGS(0);          /* ___ */
}

@@ -216,21 +210,7 @@ SECTIONS
		__init_begin = .; /* paired with __init_end */
	}

#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
	/*
	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
	 * output PHDR, so the next output section - .init.text - should
	 * start another segment - init.
	 */
	PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
	ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
	       "per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif

	INIT_TEXT_SECTION(PAGE_SIZE)
#ifdef CONFIG_X86_64
	:init
#endif

	/*
	 * Section for code used exclusively before alternatives are run. All
@@ -347,9 +327,7 @@ SECTIONS
		EXIT_DATA
	}

#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif

	RUNTIME_CONST_VARIABLES
	RUNTIME_CONST(ptr, USER_PTR_MAX)
@@ -497,16 +475,11 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
 * Per-cpu symbols which need to be offset from __per_cpu_load
 * for the boot processor.
 */
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);

#ifdef CONFIG_SMP
. = ASSERT((fixed_percpu_data == 0),
           "fixed_percpu_data is not at start of per-cpu area");
#endif

#ifdef CONFIG_MITIGATION_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
#endif
+2 −3
Original line number Diff line number Diff line
@@ -179,9 +179,8 @@ SYM_CODE_START(pvh_start_xen)
	 * the per-CPU areas are set up.
	 */
	movl $MSR_GS_BASE,%ecx
	leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
	movq %edx, %eax
	shrq $32, %rdx
	xorl %eax, %eax
	xorl %edx, %edx
	wrmsr

	/* Call xen_prepare_pvh() via the kernel virtual mapping */
Loading