Commit be420222 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull tdx updates from Dave Hansen:
 "These essentially refine some interactions between TDX guests and
  VMMs.

  The first leverages a new TDX module feature to runtime disable the
  ability for a VM to inject #VE exceptions. Before this feature, there
  was only a static on/off switch and the guest had to panic if it was
  configured in a bad state.

  The second lets the guest opt in to be able to access the topology
  CPUID leaves. Before this, accesses to those leaves would #VE.

  For both of these, it would have been nicest to just change the
  default behavior, but some pesky "other" OSes evidently need to retain
  the legacy behavior.

  Summary:

   - Add new infrastructure for reading TDX metadata

   - Use the newly-available metadata to:
      - Disable potentially nasty #VE exceptions
      - Get more complete CPU topology information from the VMM"

* tag 'x86_tdx_for_6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/tdx: Enable CPU topology enumeration
  x86/tdx: Dynamically disable SEPT violations from causing #VEs
  x86/tdx: Rename tdx_parse_tdinfo() to tdx_setup()
  x86/tdx: Introduce wrappers to read and write TD metadata
parents 5af5d43f 7ae15e2f
Loading
Loading
Loading
Loading
+116 −22
Original line number Diff line number Diff line
@@ -78,6 +78,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
		panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}

/* Read TD-scoped metadata */
static inline u64 tdg_vm_rd(u64 field, u64 *value)
{
	struct tdx_module_args args = {
		.rdx = field,
	};
	u64 ret;

	ret = __tdcall_ret(TDG_VM_RD, &args);
	*value = args.r8;

	return ret;
}

/* Write TD-scoped metadata */
static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
{
	struct tdx_module_args args = {
		.rdx = field,
		.r8 = value,
		.r9 = mask,
	};

	return __tdcall(TDG_VM_WR, &args);
}

/**
 * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
 *                           subtype 0) using TDG.MR.REPORT TDCALL.
@@ -168,7 +194,87 @@ static void __noreturn tdx_panic(const char *msg)
		__tdx_hypercall(&args);
}

static void tdx_parse_tdinfo(u64 *cc_mask)
/*
 * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
 * that no #VE will be delivered for accesses to TD-private memory.
 *
 * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
 * controls if the guest will receive such #VE with TD attribute
 * ATTR_SEPT_VE_DISABLE.
 *
 * Newer TDX modules allow the guest to control if it wants to receive SEPT
 * violation #VEs.
 *
 * Check if the feature is available and disable SEPT #VE if possible.
 *
 * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
 * attribute is no longer reliable. It reflects the initial state of the
 * control for the TD, but it will not be updated if someone (e.g. bootloader)
 * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
 * determine if SEPT #VEs are enabled or disabled.
 */
static void disable_sept_ve(u64 td_attr)
{
	const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
	bool debug = td_attr & ATTR_DEBUG;
	u64 config, controls;

	/* Is this TD allowed to disable SEPT #VE */
	tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
	if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
		/* No SEPT #VE controls for the guest: check the attribute */
		if (td_attr & ATTR_SEPT_VE_DISABLE)
			return;

		/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
		if (debug)
			pr_warn("%s\n", msg);
		else
			tdx_panic(msg);
		return;
	}

	/* Check if SEPT #VE has been disabled before us */
	tdg_vm_rd(TDCS_TD_CTLS, &controls);
	if (controls & TD_CTLS_PENDING_VE_DISABLE)
		return;

	/* Keep #VEs enabled for splats in debugging environments */
	if (debug)
		return;

	/* Disable SEPT #VEs */
	tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_PENDING_VE_DISABLE,
		  TD_CTLS_PENDING_VE_DISABLE);
}

/*
 * TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
 * 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
 * In practice, this means that the kernel can only boot with a plain topology.
 * Any complications will cause problems.
 *
 * The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
 * Enabling the feature  eliminates topology-related #VEs: the TDX module
 * virtualizes accesses to the CPUID leafs and the MSR.
 *
 * Enable ENUM_TOPOLOGY if it is available.
 */
static void enable_cpu_topology_enumeration(void)
{
	u64 configured;

	/* Has the VMM provided a valid topology configuration? */
	tdg_vm_rd(TDCS_TOPOLOGY_ENUM_CONFIGURED, &configured);
	if (!configured) {
		pr_err("VMM did not configure X2APIC_IDs properly\n");
		return;
	}

	tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
}

static void tdx_setup(u64 *cc_mask)
{
	struct tdx_module_args args = {};
	unsigned int gpa_width;
@@ -193,21 +299,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
	gpa_width = args.rcx & GENMASK(5, 0);
	*cc_mask = BIT_ULL(gpa_width - 1);

	/*
	 * The kernel can not handle #VE's when accessing normal kernel
	 * memory.  Ensure that no #VE will be delivered for accesses to
	 * TD-private memory.  Only VMM-shared memory (MMIO) will #VE.
	 */
	td_attr = args.rdx;
	if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
		const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set.";

		/* Relax SEPT_VE_DISABLE check for debug TD. */
		if (td_attr & ATTR_DEBUG)
			pr_warn("%s\n", msg);
		else
			tdx_panic(msg);
	}
	/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
	tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);

	disable_sept_ve(td_attr);
	enable_cpu_topology_enumeration();
}

/*
@@ -929,10 +1027,6 @@ static void tdx_kexec_finish(void)

void __init tdx_early_init(void)
{
	struct tdx_module_args args = {
		.rdx = TDCS_NOTIFY_ENABLES,
		.r9 = -1ULL,
	};
	u64 cc_mask;
	u32 eax, sig[3];

@@ -947,11 +1041,11 @@ void __init tdx_early_init(void)
	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);

	cc_vendor = CC_VENDOR_INTEL;
	tdx_parse_tdinfo(&cc_mask);
	cc_set_mask(cc_mask);

	/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
	tdcall(TDG_VM_WR, &args);
	/* Configure the TD */
	tdx_setup(&cc_mask);

	cc_set_mask(cc_mask);

	/*
	 * All bits above GPA width are reserved and kernel treats shared bit
+12 −1
Original line number Diff line number Diff line
@@ -16,10 +16,21 @@
#define TDG_VP_VEINFO_GET		3
#define TDG_MR_REPORT			4
#define TDG_MEM_PAGE_ACCEPT		6
#define TDG_VM_RD			7
#define TDG_VM_WR			8

/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
#define TDCS_CONFIG_FLAGS		0x1110000300000016
#define TDCS_TD_CTLS			0x1110000300000017
#define TDCS_NOTIFY_ENABLES		0x9100000000000010
#define TDCS_TOPOLOGY_ENUM_CONFIGURED	0x9100000000000019

/* TDCS_CONFIG_FLAGS bits */
#define TDCS_CONFIG_FLEXIBLE_PENDING_VE	BIT_ULL(1)

/* TDCS_TD_CTLS bits */
#define TD_CTLS_PENDING_VE_DISABLE	BIT_ULL(0)
#define TD_CTLS_ENUM_TOPOLOGY		BIT_ULL(1)

/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA		0x10001