Commit 9591fdb0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull more x86 updates from Borislav Petkov:

 - Remove a bunch of asm implementing condition flags testing in KVM's
   emulator in favor of int3_emulate_jcc() which is written in C

 - Replace KVM fastops with C-based stubs which avoids problems with the
   fastop infra related to latter not adhering to the C ABI due to their
   special calling convention and, more importantly, bypassing compiler
   control-flow integrity checking because they're written in asm

 - Remove wrongly used static branches and other ugliness accumulated
   over time in hyperv's hypercall implementation with a proper static
   function call to the correct hypervisor call variant

 - Add some fixes and modifications to allow running FRED-enabled
   kernels in KVM even on non-FRED hardware

 - Add kCFI improvements like validating indirect calls and prepare for
   enabling kCFI with GCC. Add cmdline params documentation and other
   code cleanups

 - Use the single-byte 0xd6 insn as the official #UD single-byte
   undefined opcode instruction as agreed upon by both x86 vendors

 - Other smaller cleanups and touchups all over the place

* tag 'x86_core_for_v6.18_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  x86,retpoline: Optimize patch_retpoline()
  x86,ibt: Use UDB instead of 0xEA
  x86/cfi: Remove __noinitretpoline and __noretpoline
  x86/cfi: Add "debug" option to "cfi=" bootparam
  x86/cfi: Standardize on common "CFI:" prefix for CFI reports
  x86/cfi: Document the "cfi=" bootparam options
  x86/traps: Clarify KCFI instruction layout
  compiler_types.h: Move __nocfi out of compiler-specific header
  objtool: Validate kCFI calls
  x86/fred: KVM: VMX: Always use FRED for IRQs when CONFIG_X86_FRED=y
  x86/fred: Play nice with invoking asm_fred_entry_from_kvm() on non-FRED hardware
  x86/fred: Install system vector handlers even if FRED isn't fully enabled
  x86/hyperv: Use direct call to hypercall-page
  x86/hyperv: Clean up hv_do_hypercall()
  KVM: x86: Remove fastops
  KVM: x86: Convert em_salc() to C
  KVM: x86: Introduce EM_ASM_3WCL
  KVM: x86: Introduce EM_ASM_1SRC2
  KVM: x86: Introduce EM_ASM_2CL
  KVM: x86: Introduce EM_ASM_2W
  ...
parents 2f0a7504 4a1e02b1
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -608,6 +608,24 @@
	ccw_timeout_log	[S390]
			See Documentation/arch/s390/common_io.rst for details.

	cfi=		[X86-64] Set Control Flow Integrity checking features
			when CONFIG_FINEIBT is enabled.
			Format: feature[,feature...]
			Default: auto

			auto:	  Use FineIBT if IBT available, otherwise kCFI.
				  Under FineIBT, enable "paranoid" mode when
				  FRED is not available.
			off:	  Turn off CFI checking.
			kcfi:	  Use kCFI (disable FineIBT).
			fineibt:  Use FineIBT (even if IBT not available).
			norand:   Do not re-randomize CFI hashes.
			paranoid: Add caller hash checking under FineIBT.
			bhi:	  Enable register poisoning to stop speculation
				  across FineIBT. (Disabled by default.)
			warn:	  Do not enforce CFI checking: warn only.
			debug:    Report CFI initialization details.

	cgroup_disable=	[KNL] Disable a particular controller or optional feature
			Format: {name of the controller(s) or feature(s) to disable}
			The effects of cgroup_disable=foo are:
+5 −6
Original line number Diff line number Diff line
@@ -99,7 +99,7 @@ For 32-bit we have the following conventions - kernel is built with
	.endif
.endm

.macro CLEAR_REGS clear_bp=1
.macro CLEAR_REGS clear_callee=1
	/*
	 * Sanitize registers of values that a speculation attack might
	 * otherwise want to exploit. The lower registers are likely clobbered
@@ -113,20 +113,19 @@ For 32-bit we have the following conventions - kernel is built with
	xorl	%r9d,  %r9d	/* nospec r9  */
	xorl	%r10d, %r10d	/* nospec r10 */
	xorl	%r11d, %r11d	/* nospec r11 */
	.if \clear_callee
	xorl	%ebx,  %ebx	/* nospec rbx */
	.if \clear_bp
	xorl	%ebp,  %ebp	/* nospec rbp */
	.endif
	xorl	%r12d, %r12d	/* nospec r12 */
	xorl	%r13d, %r13d	/* nospec r13 */
	xorl	%r14d, %r14d	/* nospec r14 */
	xorl	%r15d, %r15d	/* nospec r15 */

	.endif
.endm

.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_bp=1 unwind_hint=1
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0 clear_callee=1 unwind_hint=1
	PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret unwind_hint=\unwind_hint
	CLEAR_REGS clear_bp=\clear_bp
	CLEAR_REGS clear_callee=\clear_callee
.endm

.macro POP_REGS pop_rdi=1
+26 −7
Original line number Diff line number Diff line
@@ -111,18 +111,37 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
	push %rax				/* Return RIP */
	push $0					/* Error code, 0 for IRQ/NMI */

	PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
	PUSH_AND_CLEAR_REGS clear_callee=0 unwind_hint=0

	movq %rsp, %rdi				/* %rdi -> pt_regs */
	/*
	 * At this point: {rdi, rsi, rdx, rcx, r8, r9}, {r10, r11}, {rax, rdx}
	 * are clobbered, which corresponds to: arguments, extra caller-saved
	 * and return. All registers a C function is allowed to clobber.
	 *
	 * Notably, the callee-saved registers: {rbx, r12, r13, r14, r15}
	 * are untouched, with the exception of rbp, which carries the stack
	 * frame and will be restored before exit.
	 *
	 * Further calling another C function will not alter this state.
	 */
	call __fred_entry_from_kvm		/* Call the C entry point */
	POP_REGS
	ERETS
1:

	/*
	 * Objtool doesn't understand what ERETS does, this hint tells it that
	 * yes, we'll reach here and with what stack state. A save/restore pair
	 * isn't strictly needed, but it's the simplest form.
	 * When FRED, use ERETS to potentially clear NMIs, otherwise simply
	 * restore the stack pointer.
	 */
	ALTERNATIVE "nop; nop; mov %rbp, %rsp", \
	            __stringify(add $C_PTREGS_SIZE, %rsp; ERETS), \
		    X86_FEATURE_FRED

1:	/*
	 * Objtool doesn't understand ERETS, and the cfi register state is
	 * different from initial_func_cfi due to PUSH_REGS. Tell it the state
	 * is similar to where UNWIND_HINT_SAVE is.
	 */
	UNWIND_HINT_RESTORE

	pop %rbp
	RET

+44 −25
Original line number Diff line number Diff line
@@ -17,7 +17,6 @@
#include <asm/desc.h>
#include <asm/e820/api.h>
#include <asm/sev.h>
#include <asm/ibt.h>
#include <asm/hypervisor.h>
#include <hyperv/hvhdk.h>
#include <asm/mshyperv.h>
@@ -37,7 +36,45 @@
#include <linux/export.h>

void *hv_hypercall_pg;

#ifdef CONFIG_X86_64
static u64 __hv_hyperfail(u64 control, u64 param1, u64 param2)
{
	return U64_MAX;
}

DEFINE_STATIC_CALL(__hv_hypercall, __hv_hyperfail);

u64 hv_std_hypercall(u64 control, u64 param1, u64 param2)
{
	u64 hv_status;

	register u64 __r8 asm("r8") = param2;
	asm volatile ("call " STATIC_CALL_TRAMP_STR(__hv_hypercall)
		      : "=a" (hv_status), ASM_CALL_CONSTRAINT,
		        "+c" (control), "+d" (param1), "+r" (__r8)
		      : : "cc", "memory", "r9", "r10", "r11");

	return hv_status;
}

typedef u64 (*hv_hypercall_f)(u64 control, u64 param1, u64 param2);

static inline void hv_set_hypercall_pg(void *ptr)
{
	hv_hypercall_pg = ptr;

	if (!ptr)
		ptr = &__hv_hyperfail;
	static_call_update(__hv_hypercall, (hv_hypercall_f)ptr);
}
#else
static inline void hv_set_hypercall_pg(void *ptr)
{
	hv_hypercall_pg = ptr;
}
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
#endif

union hv_ghcb * __percpu *hv_ghcb_pg;

@@ -330,7 +367,7 @@ static int hv_suspend(void)
	 * pointer is restored on resume.
	 */
	hv_hypercall_pg_saved = hv_hypercall_pg;
	hv_hypercall_pg = NULL;
	hv_set_hypercall_pg(NULL);

	/* Disable the hypercall page in the hypervisor */
	rdmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -356,7 +393,7 @@ static void hv_resume(void)
		vmalloc_to_pfn(hv_hypercall_pg_saved);
	wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

	hv_hypercall_pg = hv_hypercall_pg_saved;
	hv_set_hypercall_pg(hv_hypercall_pg_saved);
	hv_hypercall_pg_saved = NULL;

	/*
@@ -476,8 +513,8 @@ void __init hyperv_init(void)
	if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present)
		goto skip_hypercall_pg_init;

	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
	hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, MODULES_VADDR,
			MODULES_END, GFP_KERNEL, PAGE_KERNEL_ROX,
			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
			__builtin_return_address(0));
	if (hv_hypercall_pg == NULL)
@@ -515,27 +552,9 @@ void __init hyperv_init(void)
		wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
	}

skip_hypercall_pg_init:
	/*
	 * Some versions of Hyper-V that provide IBT in guest VMs have a bug
	 * in that there's no ENDBR64 instruction at the entry to the
	 * hypercall page. Because hypercalls are invoked via an indirect call
	 * to the hypercall page, all hypercall attempts fail when IBT is
	 * enabled, and Linux panics. For such buggy versions, disable IBT.
	 *
	 * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
	 * page, so if future Linux kernel versions enable IBT for 32-bit
	 * builds, additional hypercall page hackery will be required here
	 * to provide an ENDBR32.
	 */
#ifdef CONFIG_X86_KERNEL_IBT
	if (cpu_feature_enabled(X86_FEATURE_IBT) &&
	    *(u32 *)hv_hypercall_pg != gen_endbr()) {
		setup_clear_cpu_cap(X86_FEATURE_IBT);
		pr_warn("Disabling IBT because of Hyper-V bug\n");
	}
#endif
	hv_set_hypercall_pg(hv_hypercall_pg);

skip_hypercall_pg_init:
	/*
	 * hyperv_init() is called before LAPIC is initialized: see
	 * apic_intr_mode_init() -> x86_platform.apic_post_init() and
+15 −0
Original line number Diff line number Diff line
@@ -385,9 +385,23 @@ int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu)
	return ret;
}

u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2)
{
	u64 hv_status;

	register u64 __r8 asm("r8") = param2;
	asm volatile("vmmcall"
		     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
		       "+c" (control), "+d" (param1), "+r" (__r8)
		     : : "cc", "memory", "r9", "r10", "r11");

	return hv_status;
}

#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */

#ifdef CONFIG_INTEL_TDX_GUEST
@@ -437,6 +451,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
#else
static inline void hv_tdx_msr_write(u64 msr, u64 value) {}
static inline void hv_tdx_msr_read(u64 msr, u64 *value) {}
u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; }
#endif /* CONFIG_INTEL_TDX_GUEST */

#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
Loading