Commit 56b2b1fc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 fixes from Ingo Molnar:

 - Fix SEV-SNP kdump bugs

 - Update the email address of Alexey Makhalov in MAINTAINERS

 - Add the CPU feature flag for the Zen6 microarchitecture

 - Fix typo in system message

* tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Remove duplicated word in warning message
  x86/CPU/AMD: Add X86_FEATURE_ZEN6
  x86/sev: Make sure pages are not skipped during kdump
  x86/sev: Do not touch VMSA pages during SNP guest memory kdump
  MAINTAINERS: Update Alexey Makhalov's email address
  x86/sev: Fix operator precedence in GHCB_MSR_VMPL_REQ_LEVEL macro
parents 4bcaa590 03680913
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -18439,7 +18439,7 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M:	Juergen Gross <jgross@suse.com>
R:	Ajay Kaher <ajay.kaher@broadcom.com>
R:	Alexey Makhalov <alexey.amakhalov@broadcom.com>
R:	Alexey Makhalov <alexey.makhalov@broadcom.com>
R:	Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L:	virtualization@lists.linux.dev
L:	x86@kernel.org
@@ -25924,7 +25924,7 @@ F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
M:	Ajay Kaher <ajay.kaher@broadcom.com>
M:	Alexey Makhalov <alexey.amakhalov@broadcom.com>
M:	Alexey Makhalov <alexey.makhalov@broadcom.com>
R:	Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L:	virtualization@lists.linux.dev
L:	x86@kernel.org
@@ -25952,7 +25952,7 @@ F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
M:	Nick Shi <nick.shi@broadcom.com>
R:	Ajay Kaher <ajay.kaher@broadcom.com>
R:	Alexey Makhalov <alexey.amakhalov@broadcom.com>
R:	Alexey Makhalov <alexey.makhalov@broadcom.com>
R:	Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L:	netdev@vger.kernel.org
S:	Supported
+165 −90
Original line number Diff line number Diff line
@@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
	set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}

static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
{
	bool create = event != SVM_VMGEXIT_AP_DESTROY;
	struct ghcb_state state;
	unsigned long flags;
	struct ghcb *ghcb;
	int ret = 0;

	local_irq_save(flags);

	ghcb = __sev_get_ghcb(&state);

	vc_ghcb_invalidate(ghcb);

	if (create)
		ghcb_set_rax(ghcb, vmsa->sev_features);

	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
	ghcb_set_sw_exit_info_1(ghcb,
				((u64)apic_id << 32)	|
				((u64)snp_vmpl << 16)	|
				event);
	ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));

	sev_es_wr_ghcb_msr(__pa(ghcb));
	VMGEXIT();

	if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
	    lower_32_bits(ghcb->save.sw_exit_info_1)) {
		pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
		ret = -EINVAL;
	}

	__sev_put_ghcb(&state);

	local_irq_restore(flags);

	return ret;
}

static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
{
	int ret;

	if (snp_vmpl) {
		struct svsm_call call = {};
		unsigned long flags;

		local_irq_save(flags);

		call.caa = this_cpu_read(svsm_caa);
		call.rcx = __pa(va);

		if (make_vmsa) {
			/* Protocol 0, Call ID 2 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
			call.rdx = __pa(caa);
			call.r8  = apic_id;
		} else {
			/* Protocol 0, Call ID 3 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
		}

		ret = svsm_perform_call_protocol(&call);

		local_irq_restore(flags);
	} else {
		/*
		 * If the kernel runs at VMPL0, it can change the VMSA
		 * bit for a page using the RMPADJUST instruction.
		 * However, for the instruction to succeed it must
		 * target the permissions of a lesser privileged (higher
		 * numbered) VMPL level, so use VMPL1.
		 */
		u64 attrs = 1;

		if (make_vmsa)
			attrs |= RMPADJUST_VMSA_PAGE_BIT;

		ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
	}

	return ret;
}

static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
{
	int err;

	err = snp_set_vmsa(vmsa, NULL, apic_id, false);
	if (err)
		pr_err("clear VMSA page failed (%u), leaking page\n", err);
	else
		free_page((unsigned long)vmsa);
}

static void set_pte_enc(pte_t *kpte, int level, void *va)
{
	struct pte_enc_desc d = {
@@ -1005,7 +1101,8 @@ static void unshare_all_memory(void)
			data = per_cpu(runtime_data, cpu);
			ghcb = (unsigned long)&data->ghcb_page;

			if (addr <= ghcb && ghcb <= addr + size) {
			/* Handle the case of a huge page containing the GHCB page */
			if (addr <= ghcb && ghcb < addr + size) {
				skipped_addr = true;
				break;
			}
@@ -1055,11 +1152,70 @@ void snp_kexec_begin(void)
		pr_warn("Failed to stop shared<->private conversions\n");
}

/*
 * Shutdown all APs except the one handling kexec/kdump and clearing
 * the VMSA tag on AP's VMSA pages as they are not being used as
 * VMSA page anymore.
 */
static void shutdown_all_aps(void)
{
	struct sev_es_save_area *vmsa;
	int apic_id, this_cpu, cpu;

	this_cpu = get_cpu();

	/*
	 * APs are already in HLT loop when enc_kexec_finish() callback
	 * is invoked.
	 */
	for_each_present_cpu(cpu) {
		vmsa = per_cpu(sev_vmsa, cpu);

		/*
		 * The BSP or offlined APs do not have guest allocated VMSA
		 * and there is no need  to clear the VMSA tag for this page.
		 */
		if (!vmsa)
			continue;

		/*
		 * Cannot clear the VMSA tag for the currently running vCPU.
		 */
		if (this_cpu == cpu) {
			unsigned long pa;
			struct page *p;

			pa = __pa(vmsa);
			/*
			 * Mark the VMSA page of the running vCPU as offline
			 * so that is excluded and not touched by makedumpfile
			 * while generating vmcore during kdump.
			 */
			p = pfn_to_online_page(pa >> PAGE_SHIFT);
			if (p)
				__SetPageOffline(p);
			continue;
		}

		apic_id = cpuid_to_apicid[cpu];

		/*
		 * Issue AP destroy to ensure AP gets kicked out of guest mode
		 * to allow using RMPADJUST to remove the VMSA tag on it's
		 * VMSA page.
		 */
		vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
		snp_cleanup_vmsa(vmsa, apic_id);
	}

	put_cpu();
}

void snp_kexec_finish(void)
{
	struct sev_es_runtime_data *data;
	unsigned long size, addr;
	unsigned int level, cpu;
	unsigned long size;
	struct ghcb *ghcb;
	pte_t *pte;

@@ -1069,6 +1225,8 @@ void snp_kexec_finish(void)
	if (!IS_ENABLED(CONFIG_KEXEC_CORE))
		return;

	shutdown_all_aps();

	unshare_all_memory();

	/*
@@ -1085,54 +1243,11 @@ void snp_kexec_finish(void)
		ghcb = &data->ghcb_page;
		pte = lookup_address((unsigned long)ghcb, &level);
		size = page_level_size(level);
		set_pte_enc(pte, level, (void *)ghcb);
		snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
	}
}

static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
{
	int ret;

	if (snp_vmpl) {
		struct svsm_call call = {};
		unsigned long flags;

		local_irq_save(flags);

		call.caa = this_cpu_read(svsm_caa);
		call.rcx = __pa(va);

		if (make_vmsa) {
			/* Protocol 0, Call ID 2 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
			call.rdx = __pa(caa);
			call.r8  = apic_id;
		} else {
			/* Protocol 0, Call ID 3 */
			call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
		}

		ret = svsm_perform_call_protocol(&call);

		local_irq_restore(flags);
	} else {
		/*
		 * If the kernel runs at VMPL0, it can change the VMSA
		 * bit for a page using the RMPADJUST instruction.
		 * However, for the instruction to succeed it must
		 * target the permissions of a lesser privileged (higher
		 * numbered) VMPL level, so use VMPL1.
		 */
		u64 attrs = 1;

		if (make_vmsa)
			attrs |= RMPADJUST_VMSA_PAGE_BIT;

		ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
		/* Handle the case of a huge page containing the GHCB page */
		addr = (unsigned long)ghcb & page_level_mask(level);
		set_pte_enc(pte, level, (void *)addr);
		snp_set_memory_private(addr, (size / PAGE_SIZE));
	}

	return ret;
}

#define __ATTR_BASE		(SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1166,24 +1281,10 @@ static void *snp_alloc_vmsa_page(int cpu)
	return page_address(p + 1);
}

static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
{
	int err;

	err = snp_set_vmsa(vmsa, NULL, apic_id, false);
	if (err)
		pr_err("clear VMSA page failed (%u), leaking page\n", err);
	else
		free_page((unsigned long)vmsa);
}

static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
{
	struct sev_es_save_area *cur_vmsa, *vmsa;
	struct ghcb_state state;
	struct svsm_ca *caa;
	unsigned long flags;
	struct ghcb *ghcb;
	u8 sipi_vector;
	int cpu, ret;
	u64 cr4;
@@ -1297,33 +1398,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
	}

	/* Issue VMGEXIT AP Creation NAE event */
	local_irq_save(flags);

	ghcb = __sev_get_ghcb(&state);

	vc_ghcb_invalidate(ghcb);
	ghcb_set_rax(ghcb, vmsa->sev_features);
	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
	ghcb_set_sw_exit_info_1(ghcb,
				((u64)apic_id << 32)	|
				((u64)snp_vmpl << 16)	|
				SVM_VMGEXIT_AP_CREATE);
	ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));

	sev_es_wr_ghcb_msr(__pa(ghcb));
	VMGEXIT();

	if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
	    lower_32_bits(ghcb->save.sw_exit_info_1)) {
		pr_err("SNP AP Creation error\n");
		ret = -EINVAL;
	}

	__sev_put_ghcb(&state);

	local_irq_restore(flags);

	/* Perform cleanup if there was an error */
	ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
	if (ret) {
		snp_cleanup_vmsa(vmsa, apic_id);
		vmsa = NULL;
+1 −1
Original line number Diff line number Diff line
@@ -75,7 +75,7 @@
#define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8			( 3*32+ 4) /* Opteron, Athlon64 */
#define X86_FEATURE_ZEN5		( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
/* Free                                 ( 3*32+ 6) */
#define X86_FEATURE_ZEN6		( 3*32+ 6) /* CPU based on Zen6 microarchitecture */
/* Free                                 ( 3*32+ 7) */
#define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
#define X86_FEATURE_UP			( 3*32+ 9) /* "up" SMP kernel running on UP */
+1 −1
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ enum psc_op {
#define GHCB_MSR_VMPL_REQ		0x016
#define GHCB_MSR_VMPL_REQ_LEVEL(v)			\
	/* GHCBData[39:32] */				\
	(((u64)(v) & GENMASK_ULL(7, 0) << 32) |		\
	((((u64)(v) & GENMASK_ULL(7, 0)) << 32) |	\
	/* GHCBDdata[11:0] */				\
	GHCB_MSR_VMPL_REQ)

+5 −0
Original line number Diff line number Diff line
@@ -472,6 +472,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
		case 0x60 ... 0x7f:
			setup_force_cpu_cap(X86_FEATURE_ZEN5);
			break;
		case 0x50 ... 0x5f:
		case 0x90 ... 0xaf:
		case 0xc0 ... 0xcf:
			setup_force_cpu_cap(X86_FEATURE_ZEN6);
			break;
		default:
			goto warn;
		}
Loading