Commit ff2e6468 authored by Sean Christopherson's avatar Sean Christopherson Committed by Dave Hansen
Browse files

x86/boot: Add a trampoline for booting APs via firmware handoff



Historically, x86 platforms have booted secondary processors (APs)
using INIT followed by the start up IPI (SIPI) messages. In regular
VMs, this boot sequence is supported by the VMM emulation. But such a
wakeup model is fatal for secure VMs like TDX in which VMM is an
untrusted entity. To address this issue, a new wakeup model was added
in ACPI v6.4, in which firmware (like TDX virtual BIOS) will help boot
the APs. More details about this wakeup model can be found in ACPI
specification v6.4, the section titled "Multiprocessor Wakeup Structure".

Since the existing trampoline code requires processors to boot in real
mode with 16-bit addressing, it will not work for this wakeup model
(because it boots the AP in 64-bit mode). To handle it, extend the
trampoline code to support 64-bit mode firmware handoff. Also, extend
IDT and GDT pointers to support 64-bit mode hand off.

There is no TDX-specific detection for this new boot method. The kernel
will rely on it as the sole boot method whenever the new ACPI structure
is present.

The ACPI table parser for the MADT multiprocessor wake up structure and
the wakeup method that uses this structure will be added by the following
patch in this series.

Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: default avatarKuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: default avatarAndi Kleen <ak@linux.intel.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Reviewed-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20220405232939.73860-21-kirill.shutemov@linux.intel.com
parent cfb8ec7a
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -328,6 +328,8 @@ struct apic {

	/* wakeup_secondary_cpu */
	int	(*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
	/* wakeup secondary CPU using 64-bit wakeup point */
	int	(*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip);

	void	(*inquire_remote_apic)(int apicid);

+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ struct real_mode_header {
	u32	sev_es_trampoline_start;
#endif
#ifdef CONFIG_X86_64
	u32	trampoline_start64;
	u32	trampoline_pgd;
#endif
	/* ACPI S3 wakeup */
+10 −2
Original line number Diff line number Diff line
@@ -1082,6 +1082,11 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
	unsigned long boot_error = 0;
	unsigned long timeout;

#ifdef CONFIG_X86_64
	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
	if (apic->wakeup_secondary_cpu_64)
		start_ip = real_mode_header->trampoline_start64;
#endif
	idle->thread.sp = (unsigned long)task_pt_regs(idle);
	early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
	initial_code = (unsigned long)start_secondary;
@@ -1123,11 +1128,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,

	/*
	 * Wake up a CPU in difference cases:
	 * - Use the method in the APIC driver if it's defined
	 * - Use a method from the APIC driver if one defined, with wakeup
	 *   straight to 64-bit mode preferred over wakeup to RM.
	 * Otherwise,
	 * - Use an INIT boot APIC message for APs or NMI for BSP.
	 */
	if (apic->wakeup_secondary_cpu)
	if (apic->wakeup_secondary_cpu_64)
		boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
	else if (apic->wakeup_secondary_cpu)
		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
	else
		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
+1 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ SYM_DATA_START(real_mode_header)
	.long	pa_sev_es_trampoline_start
#endif
#ifdef CONFIG_X86_64
	.long	pa_trampoline_start64
	.long	pa_trampoline_pgd;
#endif
	/* ACPI S3 wakeup */
+38 −0
Original line number Diff line number Diff line
@@ -161,6 +161,19 @@ SYM_CODE_START(startup_32)
	ljmpl	$__KERNEL_CS, $pa_startup_64
SYM_CODE_END(startup_32)

SYM_CODE_START(pa_trampoline_compat)
	/*
	 * In compatibility mode.  Prep ESP and DX for startup_32, then disable
	 * paging and complete the switch to legacy 32-bit mode.
	 */
	movl	$rm_stack_end, %esp
	movw	$__KERNEL_DS, %dx

	movl	$X86_CR0_PE, %eax
	movl	%eax, %cr0
	ljmpl   $__KERNEL32_CS, $pa_startup_32
SYM_CODE_END(pa_trampoline_compat)

	.section ".text64","ax"
	.code64
	.balign 4
@@ -169,6 +182,20 @@ SYM_CODE_START(startup_64)
	jmpq	*tr_start(%rip)
SYM_CODE_END(startup_64)

SYM_CODE_START(trampoline_start64)
	/*
	 * APs start here on a direct transfer from 64-bit BIOS with identity
	 * mapped page tables.  Load the kernel's GDT in order to gear down to
	 * 32-bit mode (to handle 4-level vs. 5-level paging), and to (re)load
	 * segment registers.  Load the zero IDT so any fault triggers a
	 * shutdown instead of jumping back into BIOS.
	 */
	lidt	tr_idt(%rip)
	lgdt	tr_gdt64(%rip)

	ljmpl	*tr_compat(%rip)
SYM_CODE_END(trampoline_start64)

	.section ".rodata","a"
	# Duplicate the global descriptor table
	# so the kernel can live anywhere
@@ -182,6 +209,17 @@ SYM_DATA_START(tr_gdt)
	.quad	0x00cf93000000ffff	# __KERNEL_DS
SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)

SYM_DATA_START(tr_gdt64)
	.short	tr_gdt_end - tr_gdt - 1	# gdt limit
	.long	pa_tr_gdt
	.long	0
SYM_DATA_END(tr_gdt64)

SYM_DATA_START(tr_compat)
	.long	pa_trampoline_compat
	.short	__KERNEL32_CS
SYM_DATA_END(tr_compat)

	.bss
	.balign	PAGE_SIZE
SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
Loading