Commit a40d2525 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge branch 'linus' into x86/urgent, to pick up dependent commits



Prepare to fix aspects of the new BHI code.

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 5ce344be 2c71fdf0
Loading
Loading
Loading
Loading
+42 −6
Original line number Diff line number Diff line
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.

Currently the only known real-world BHB attack vector is via
unprivileged eBPF. Therefore, it's highly recommended to not enable
unprivileged eBPF, especially when eIBRS is used (without retpolines).
For a full mitigation against BHB attacks, it's recommended to use
retpolines (or eIBRS combined with retpolines).
Previously the only known real-world BHB attack vector was via unprivileged
eBPF. Further research has found attacks that don't require unprivileged eBPF.
For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
use the BHB clearing sequence.

Attack scenarios
----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
  'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB
  ===========================  =======================================================

  - Branch History Injection (BHI) protection status:

.. list-table::

 * - BHI: Not affected
   - System is not affected
 * - BHI: Retpoline
   - System is protected by retpoline
 * - BHI: BHI_DIS_S
   - System is protected by BHI_DIS_S
 * - BHI: SW loop; KVM SW loop
   - System is protected by software clearing sequence
 * - BHI: Syscall hardening
   - Syscalls are hardened against BHI
 * - BHI: Syscall hardening; KVM: SW loop
   - System is protected from userspace attacks by syscall hardening; KVM is protected by software clearing sequence

Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2

   Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
   boot, by setting the IBRS bit, and they're automatically protected against
   Spectre v2 variant attacks.
   some Spectre v2 variant attacks. The BHB can still influence the choice of
   indirect branch predictor entry, and although branch predictor entries are
   isolated between modes when eIBRS is enabled, the BHB itself is not isolated
   between modes. Systems which support BHI_DIS_S will set it to protect against
   BHI attacks.

   On Intel's enhanced IBRS systems, this includes cross-thread branch target
   injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,22 @@ kernel command line.
		spectre_v2=off. Spectre variant 1 mitigations
		cannot be disabled.

	spectre_bhi=

		[X86] Control mitigation of Branch History Injection
		(BHI) vulnerability. Syscalls are hardened against BHI
		regardless of this setting. This setting affects the deployment
		of the HW BHI control and the SW BHB clearing sequence.

		on
			unconditionally enable.
		off
			unconditionally disable.
		auto
			enable if hardware mitigation
			control(BHI_DIS_S) is available, otherwise
			enable alternate mitigation in KVM.

For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt

Mitigation selection guide
+12 −0
Original line number Diff line number Diff line
@@ -6063,6 +6063,18 @@
	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
			See Documentation/admin-guide/laptops/sonypi.rst

	spectre_bhi=	[X86] Control mitigation of Branch History Injection
			(BHI) vulnerability. Syscalls are hardened against BHI
			reglardless of this setting. This setting affects the
			deployment of the HW BHI control and the SW BHB
			clearing sequence.

			on   - unconditionally enable.
			off  - unconditionally disable.
			auto - (default) enable hardware mitigation
			       (BHI_DIS_S) if available, otherwise enable
			       alternate mitigation in KVM.

	spectre_v2=	[X86,EARLY] Control mitigation of Spectre variant 2
			(indirect branch speculation) vulnerability.
			The default operation protects the kernel from
+26 −0
Original line number Diff line number Diff line
@@ -2633,6 +2633,32 @@ config MITIGATION_RFDS
	  stored in floating point, vector and integer registers.
	  See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>

choice
	prompt "Clear branch history"
	depends on CPU_SUP_INTEL
	default SPECTRE_BHI_ON
	help
	  Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
	  where the branch history buffer is poisoned to speculatively steer
	  indirect branches.
	  See <file:Documentation/admin-guide/hw-vuln/spectre.rst>

config SPECTRE_BHI_ON
	bool "on"
	help
	  Equivalent to setting spectre_bhi=on command line parameter.
config SPECTRE_BHI_OFF
	bool "off"
	help
	  Equivalent to setting spectre_bhi=off command line parameter.
config SPECTRE_BHI_AUTO
	bool "auto"
	depends on BROKEN
	help
	  Equivalent to setting spectre_bhi=auto command line parameter.

endchoice

endif

config ARCH_HAS_ADD_PAGES
+5 −5
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)

	if (likely(unr < NR_syscalls)) {
		unr = array_index_nospec(unr, NR_syscalls);
		regs->ax = sys_call_table[unr](regs);
		regs->ax = x64_sys_call(regs, unr);
		return true;
	}
	return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)

	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
		xnr = array_index_nospec(xnr, X32_NR_syscalls);
		regs->ax = x32_sys_call_table[xnr](regs);
		regs->ax = x32_sys_call(regs, xnr);
		return true;
	}
	return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)

	if (likely(unr < IA32_NR_syscalls)) {
		unr = array_index_nospec(unr, IA32_NR_syscalls);
		regs->ax = ia32_sys_call_table[unr](regs);
		regs->ax = ia32_sys_call(regs, unr);
	} else if (nr != -1) {
		regs->ax = __ia32_sys_ni_syscall(regs);
	}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}

/**
 * int80_emulation - 32-bit legacy syscall entry
 * do_int80_emulation - 32-bit legacy syscall C entry from asm
 *
 * This entry point can be used by 32-bit and 64-bit programs to perform
 * 32-bit system calls.  Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
 *   eax:				system call number
 *   ebx, ecx, edx, esi, edi, ebp:	arg1 - arg 6
 */
DEFINE_IDTENTRY_RAW(int80_emulation)
__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
	int nr;

+61 −0
Original line number Diff line number Diff line
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
	/* clobbers %rax, make sure it is after saving the syscall nr */
	IBRS_ENTER
	UNTRAIN_RET
	CLEAR_BRANCH_HISTORY

	call	do_syscall_64		/* returns with IRQs disabled */

@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
	call	make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection

/*
 * This sequence executes branches in order to remove user branch information
 * from the branch history tracker in the Branch Predictor, therefore removing
 * user influence on subsequent BTB lookups.
 *
 * It should be used on parts prior to Alder Lake. Newer parts should use the
 * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
 * virtualized on newer hardware the VMM should protect against BHI attacks by
 * setting BHI_DIS_S for the guests.
 *
 * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
 * and not clearing the branch history. The call tree looks like:
 *
 * call 1
 *    call 2
 *      call 2
 *        call 2
 *          call 2
 * 	      call 2
 * 	      ret
 * 	    ret
 *        ret
 *      ret
 *    ret
 * ret
 *
 * This means that the stack is non-constant and ORC can't unwind it with %rsp
 * alone.  Therefore we unconditionally set up the frame pointer, which allows
 * ORC to unwind properly.
 *
 * The alignment is for performance and not for safety, and may be safely
 * refactored in the future if needed.
 */
SYM_FUNC_START(clear_bhb_loop)
	push	%rbp
	mov	%rsp, %rbp
	movl	$5, %ecx
	ANNOTATE_INTRA_FUNCTION_CALL
	call	1f
	jmp	5f
	.align 64, 0xcc
	ANNOTATE_INTRA_FUNCTION_CALL
1:	call	2f
	RET
	.align 64, 0xcc
2:	movl	$5, %eax
3:	jmp	4f
	nop
4:	sub	$1, %eax
	jnz	3b
	sub	$1, %ecx
	jnz	1b
	RET
5:	lfence
	pop	%rbp
	RET
SYM_FUNC_END(clear_bhb_loop)
EXPORT_SYMBOL_GPL(clear_bhb_loop)
STACK_FRAME_NON_STANDARD(clear_bhb_loop)
Loading