Commit 6cb09458 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 TDX updates from Dave Hansen:
 "Avoid direct HLT instruction execution in TDX guests.

  TDX guests aren't expected to use the HLT instruction directly. It
  causes a virtualization exception (#VE). While the #VE _can_ be
  handled, the current handling is slow and buggy and the easiest thing
  is just to avoid HLT in the first place. Plus, the kernel already has
  paravirt infrastructure that makes it relatively painless.

  Make TDX guests require paravirt and add some TDX-specific paravirt
  handlers which avoid HLT in the normal halt routines. Also add a
  warning in case another HLT sneaks in.

  There was a report that this leads to a "major performance
  improvement" on specjbb2015, probably because of the extra #VE
  overhead or missed wakeups from the buggy HLT handling"

* tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tdx: Emit warning if IRQs are enabled during HLT #VE handling
  x86/tdx: Fix arch_safe_halt() execution for TDX VMs
  x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT
parents 92b71bef e8f45927
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -889,6 +889,7 @@ config INTEL_TDX_GUEST
	depends on X86_64 && CPU_SUP_INTEL
	depends on X86_X2APIC
	depends on EFI_STUB
	depends on PARAVIRT
	select ARCH_HAS_CC_PLATFORM
	select X86_MEM_ENCRYPT
	select X86_MCE
+33 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -392,13 +393,21 @@ static int handle_halt(struct ve_info *ve)
{
	const bool irq_disabled = irqs_disabled();

	/*
	 * HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a
	 * wake event may be consumed before requesting HLT emulation, leaving
	 * the vCPU blocking indefinitely.
	 */
	if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled"))
		return -EIO;

	if (__halt(irq_disabled))
		return -EIO;

	return ve_instr_len(ve);
}

void __cpuidle tdx_safe_halt(void)
void __cpuidle tdx_halt(void)
{
	const bool irq_disabled = false;

@@ -409,6 +418,16 @@ void __cpuidle tdx_safe_halt(void)
		WARN_ONCE(1, "HLT instruction emulation failed\n");
}

static void __cpuidle tdx_safe_halt(void)
{
	tdx_halt();
	/*
	 * "__cpuidle" section doesn't support instrumentation, so stick
	 * with raw_* variant that avoids tracing hooks.
	 */
	raw_local_irq_enable();
}

static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
	struct tdx_module_args args = {
@@ -1109,6 +1128,19 @@ void __init tdx_early_init(void)
	x86_platform.guest.enc_kexec_begin	     = tdx_kexec_begin;
	x86_platform.guest.enc_kexec_finish	     = tdx_kexec_finish;

	/*
	 * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
	 * will enable interrupts before HLT TDCALL invocation if executed
	 * in STI-shadow, possibly resulting in missed wakeup events.
	 *
	 * Modify all possible HLT execution paths to use TDX specific routines
	 * that directly execute TDCALL and toggle the interrupt state as
	 * needed after TDCALL completion. This also reduces HLT related #VEs
	 * in addition to having a reliable halt logic execution.
	 */
	pv_ops.irq.safe_halt = tdx_safe_halt;
	pv_ops.irq.halt = tdx_halt;

	/*
	 * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
	 * bringup low level code. That raises #VE which cannot be handled
+22 −18
Original line number Diff line number Diff line
@@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags)

#endif

#ifndef CONFIG_PARAVIRT
#ifndef __ASSEMBLY__
/*
 * Used in the idle loop; sti takes one instruction cycle
 * to complete:
 */
static __always_inline void arch_safe_halt(void)
{
	native_safe_halt();
}

/*
 * Used when interrupts are already enabled or to
 * shutdown the processor:
 */
static __always_inline void halt(void)
{
	native_halt();
}
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PARAVIRT */

#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
@@ -97,24 +119,6 @@ static __always_inline void arch_local_irq_enable(void)
	native_irq_enable();
}

/*
 * Used in the idle loop; sti takes one instruction cycle
 * to complete:
 */
static __always_inline void arch_safe_halt(void)
{
	native_safe_halt();
}

/*
 * Used when interrupts are already enabled or to
 * shutdown the processor:
 */
static __always_inline void halt(void)
{
	native_halt();
}

/*
 * For spinlocks, etc:
 */
+10 −10
Original line number Diff line number Diff line
@@ -102,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn,
	PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
}

static __always_inline void arch_safe_halt(void)
{
	PVOP_VCALL0(irq.safe_halt);
}

static inline void halt(void)
{
	PVOP_VCALL0(irq.halt);
}

#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
@@ -165,16 +175,6 @@ static inline void __write_cr4(unsigned long x)
	PVOP_VCALL1(cpu.write_cr4, x);
}

static __always_inline void arch_safe_halt(void)
{
	PVOP_VCALL0(irq.safe_halt);
}

static inline void halt(void)
{
	PVOP_VCALL0(irq.halt);
}

static inline u64 paravirt_read_msr(unsigned msr)
{
	return PVOP_CALL1(u64, cpu.read_msr, msr);
+1 −2
Original line number Diff line number Diff line
@@ -120,10 +120,9 @@ struct pv_irq_ops {
	struct paravirt_callee_save save_fl;
	struct paravirt_callee_save irq_disable;
	struct paravirt_callee_save irq_enable;

#endif
	void (*safe_halt)(void);
	void (*halt)(void);
#endif
} __no_randomize_layout;

struct pv_mmu_ops {
Loading