Commit 5b7f7234 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:

 - A large and involved preparatory series to pave the way to add
   exception handling for relocate_kernel - which will be a debugging
   facility that has aided in the field to debug an exceptionally hard
   to debug early boot bug. Plus assorted cleanups and fixes that were
   discovered along the way, by David Woodhouse:

      - Clean up and document register use in relocate_kernel_64.S
      - Use named labels in swap_pages in relocate_kernel_64.S
      - Only swap pages for ::preserve_context mode
      - Allocate PGD for x86_64 transition page tables separately
      - Copy control page into place in machine_kexec_prepare()
      - Invoke copy of relocate_kernel() instead of the original
      - Move relocate_kernel to kernel .data section
      - Add data section to relocate_kernel
      - Drop page_list argument from relocate_kernel()
      - Eliminate writes through kernel mapping of relocate_kernel page
      - Clean up register usage in relocate_kernel()
      - Mark relocate_kernel page as ROX instead of RWX
      - Disable global pages before writing to control page
      - Ensure preserve_context flag is set on return to kernel
      - Use correct swap page in swap_pages function
      - Fix stack and handling of re-entry point for ::preserve_context
      - Mark machine_kexec() with __nocfi
      - Cope with relocate_kernel() not being at the start of the page
      - Use typedef for relocate_kernel_fn function prototype
      - Fix location of relocate_kernel with -ffunction-sections (fix by Nathan Chancellor)

 - A series to remove the last remaining absolute symbol references from
   .head.text, and enforce this at build time, by Ard Biesheuvel:

      - Avoid WARN()s and panic()s in early boot code
      - Don't hang but terminate on failure to remap SVSM CA
      - Determine VA/PA offset before entering C code
      - Avoid intentional absolute symbol references in .head.text
      - Disable UBSAN in early boot code
      - Move ENTRY_TEXT to the start of the image
      - Move .head.text into its own output section
      - Reject absolute references in .head.text

 - The above build-time enforcement uncovered a handful of bugs of
   essentially non-working code, and a wrokaround for a toolchain bug,
   fixed by Ard Biesheuvel as well:

      - Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12
      - Disable UBSAN on SEV code that may execute very early
      - Disable ftrace branch profiling in SEV startup code

 - And miscellaneous cleanups:

      - kexec_core: Add and update comments regarding the KEXEC_JUMP flow (Rafael J. Wysocki)
      - x86/sysfs: Constify 'struct bin_attribute' (Thomas Weißschuh)"

* tag 'x86-boot-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  x86/sev: Disable ftrace branch profiling in SEV startup code
  x86/kexec: Use typedef for relocate_kernel_fn function prototype
  x86/kexec: Cope with relocate_kernel() not being at the start of the page
  kexec_core: Add and update comments regarding the KEXEC_JUMP flow
  x86/kexec: Mark machine_kexec() with __nocfi
  x86/kexec: Fix location of relocate_kernel with -ffunction-sections
  x86/kexec: Fix stack and handling of re-entry point for ::preserve_context
  x86/kexec: Use correct swap page in swap_pages function
  x86/kexec: Ensure preserve_context flag is set on return to kernel
  x86/kexec: Disable global pages before writing to control page
  x86/sev: Don't hang but terminate on failure to remap SVSM CA
  x86/sev: Disable UBSAN on SEV code that may execute very early
  x86/boot/64: Fix spurious undefined reference when CONFIG_X86_5LEVEL=n, on GCC-12
  x86/sysfs: Constify 'struct bin_attribute'
  x86/kexec: Mark relocate_kernel page as ROX instead of RWX
  x86/kexec: Clean up register usage in relocate_kernel()
  x86/kexec: Eliminate writes through kernel mapping of relocate_kernel page
  x86/kexec: Drop page_list argument from relocate_kernel()
  x86/kexec: Add data section to relocate_kernel
  x86/kexec: Move relocate_kernel to kernel .data section
  ...
parents 7685b334 cf4ca806
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -13,3 +13,6 @@ KCOV_INSTRUMENT_core.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
KCSAN_SANITIZE		:= n

# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
UBSAN_SANITIZE		:= n
+6 −9
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@

#define pr_fmt(fmt)	"SEV: " fmt

#define DISABLE_BRANCH_PROFILING

#include <linux/sched/debug.h>	/* For show_regs() */
#include <linux/percpu-defs.h>
#include <linux/cc_platform.h>
@@ -787,15 +789,10 @@ early_set_pages_state(unsigned long vaddr, unsigned long paddr,

		val = sev_es_rd_ghcb_msr();

		if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP,
			 "Wrong PSC response code: 0x%x\n",
			 (unsigned int)GHCB_RESP_CODE(val)))
		if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
			goto e_term;

		if (WARN(GHCB_MSR_PSC_RESP_VAL(val),
			 "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n",
			 op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared",
			 paddr, GHCB_MSR_PSC_RESP_VAL(val)))
		if (GHCB_MSR_PSC_RESP_VAL(val))
			goto e_term;

		/* Page validation must be performed after changing to private */
@@ -831,7 +828,7 @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
	early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
}

void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
					unsigned long npages)
{
	/*
@@ -2423,7 +2420,7 @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
	call.rcx = pa;
	ret = svsm_perform_call_protocol(&call);
	if (ret)
		panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out);
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);

	RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
	RIP_REL_REF(boot_svsm_caa_pa) = pa;
+9 −7
Original line number Diff line number Diff line
@@ -498,7 +498,7 @@ static const struct snp_cpuid_table *snp_cpuid_get_table(void)
 *
 * Return: XSAVE area size on success, 0 otherwise.
 */
static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
{
	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
	u64 xfeatures_found = 0;
@@ -576,7 +576,8 @@ static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpui
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}

static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
static int __head
snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
		      struct cpuid_leaf *leaf)
{
	struct cpuid_leaf leaf_hv = *leaf;
@@ -1253,7 +1254,7 @@ static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svs
	__pval_terminate(pfn, action, page_size, ret, svsm_ret);
}

static void svsm_pval_4k_page(unsigned long paddr, bool validate)
static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
{
	struct svsm_pvalidate_call *pc;
	struct svsm_call call = {};
@@ -1285,12 +1286,13 @@ static void svsm_pval_4k_page(unsigned long paddr, bool validate)

	ret = svsm_perform_call_protocol(&call);
	if (ret)
		svsm_pval_terminate(pc, ret, call.rax_out);
		sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);

	native_local_irq_restore(flags);
}

static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool validate)
static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
				     bool validate)
{
	int ret;

@@ -1303,7 +1305,7 @@ static void pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, bool val
	} else {
		ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
		if (ret)
			__pval_terminate(PHYS_PFN(paddr), validate, RMP_PG_SIZE_4K, ret, 0);
			sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
	}
}

+1 −1
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H

#define __head	__section(".head.text")
#define __head	__section(".head.text") __no_sanitize_undefined

struct x86_mapping_info {
	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+31 −23
Original line number Diff line number Diff line
@@ -8,14 +8,9 @@
# define PA_PGD			2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#else
# define PA_CONTROL_PAGE	0
# define VA_CONTROL_PAGE	1
# define PA_TABLE_PAGE		2
# define PA_SWAP_PAGE		3
# define PAGES_NR		4
#endif

# define KEXEC_CONTROL_PAGE_SIZE	4096
# define KEXEC_CONTROL_CODE_MAX_SIZE	2048

#ifndef __ASSEMBLY__
@@ -43,7 +38,6 @@ struct kimage;
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE

# define KEXEC_CONTROL_PAGE_SIZE	4096

/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
@@ -58,11 +52,12 @@ struct kimage;
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT     (MAXMEM-1)

/* Allocate one page for the pdp and the second for the code */
# define KEXEC_CONTROL_PAGE_SIZE  (4096UL + 4096UL)

/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64

extern unsigned long kexec_va_control_page;
extern unsigned long kexec_pa_table_page;
extern unsigned long kexec_pa_swap_page;
#endif

/*
@@ -116,21 +111,21 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
}

#ifdef CONFIG_X86_32
asmlinkage unsigned long
relocate_kernel(unsigned long indirection_page,
typedef asmlinkage unsigned long
relocate_kernel_fn(unsigned long indirection_page,
		   unsigned long control_page,
		   unsigned long start_address,
		   unsigned int has_pae,
		   unsigned int preserve_context);
#else
unsigned long
relocate_kernel(unsigned long indirection_page,
		unsigned long page_list,
typedef unsigned long
relocate_kernel_fn(unsigned long indirection_page,
		   unsigned long pa_control_page,
		   unsigned long start_address,
		   unsigned int preserve_context,
		   unsigned int host_mem_enc_active);
#endif

extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH

#ifdef CONFIG_X86_32
@@ -145,6 +140,19 @@ struct kimage_arch {
};
#else
struct kimage_arch {
	/*
	 * This is a kimage control page, as it must not overlap with either
	 * source or destination address ranges.
	 */
	pgd_t *pgd;
	/*
	 * The virtual mapping of the control code page itself is used only
	 * during the transition, while the current kernel's pages are all
	 * in place. Thus the intermediate page table pages used to map it
	 * are not control pages, but instead just normal pages obtained
	 * with get_zeroed_page(). And have to be tracked (below) so that
	 * they can be freed.
	 */
	p4d_t *p4d;
	pud_t *pud;
	pmd_t *pmd;
Loading