Commit 18fbc247 authored by Oliver Upton's avatar Oliver Upton
Browse files

KVM: arm64: nv: Use guest hypervisor's vSError state



When HCR_EL2.AMO is set, physical SErrors are routed to EL2 and virtual
SError injection is enabled for EL1. Conceptually treating
host-initiated SErrors as 'physical', this means we can delegate control
of the vSError injection context to the guest hypervisor when nesting &&
AMO is set.

Reviewed-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20250708172532.1699409-9-oliver.upton@linux.dev


Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parent 211fced4
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -257,6 +257,11 @@ static inline bool is_nested_ctxt(struct kvm_vcpu *vcpu)
	return vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu);
}

static inline bool vserror_state_is_nested(struct kvm_vcpu *vcpu)
{
	return is_nested_ctxt(vcpu) && vcpu_el2_amo_is_set(vcpu);
}

/*
 * The layout of SPSR for an AArch32 state is different when observed from an
 * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
+3 −0
Original line number Diff line number Diff line
@@ -1682,6 +1682,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k)				\
	(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))

#define kvm_has_ras(k)					\
	(kvm_has_feat((k), ID_AA64PFR0_EL1, RAS, IMP))

static inline bool kvm_arch_has_irq_bypass(void)
{
	return true;
+40 −5
Original line number Diff line number Diff line
@@ -476,21 +476,56 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)

	write_sysreg_hcr(hcr);

	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) {
		u64 vsesr;

		/*
		 * When HCR_EL2.AMO is set, physical SErrors are taken to EL2
		 * and vSError injection is enabled for EL1. Conveniently, for
		 * NV this means that it is never the case where a 'physical'
		 * SError (injected by KVM or userspace) and vSError are
		 * deliverable to the same context.
		 *
		 * As such, we can trivially select between the host or guest's
		 * VSESR_EL2. Except for the case that FEAT_RAS hasn't been
		 * exposed to the guest, where ESR propagation in hardware
		 * occurs unconditionally.
		 *
		 * Paper over the architectural wart and use an IMPLEMENTATION
		 * DEFINED ESR value in case FEAT_RAS is hidden from the guest.
		 */
		if (!vserror_state_is_nested(vcpu))
			vsesr = vcpu->arch.vsesr_el2;
		else if (kvm_has_ras(kern_hyp_va(vcpu->kvm)))
			vsesr = __vcpu_sys_reg(vcpu, VSESR_EL2);
		else
			vsesr = ESR_ELx_ISV;

		write_sysreg_s(vsesr, SYS_VSESR_EL2);
	}
}

static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
{
	u64 *hcr;

	if (vserror_state_is_nested(vcpu))
		hcr = __ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2);
	else
		hcr = &vcpu->arch.hcr_el2;

	/*
	 * If we pended a virtual abort, preserve it until it gets
	 * cleared. See D1.14.3 (Virtual Interrupts) for details, but
	 * the crucial bit is "On taking a vSError interrupt,
	 * HCR_EL2.VSE is cleared to 0."
	 *
	 * Additionally, when in a nested context we need to propagate the
	 * updated state to the guest hypervisor's HCR_EL2.
	 */
	if (vcpu->arch.hcr_el2 & HCR_VSE) {
		vcpu->arch.hcr_el2 &= ~HCR_VSE;
		vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
	if (*hcr & HCR_VSE) {
		*hcr &= ~HCR_VSE;
		*hcr |= read_sysreg(hcr_el2) & HCR_VSE;
	}
}

+29 −3
Original line number Diff line number Diff line
@@ -109,6 +109,17 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
	return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}

static inline bool ctxt_has_ras(struct kvm_cpu_context *ctxt)
{
	struct kvm_vcpu *vcpu;

	if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
		return false;

	vcpu = ctxt_to_vcpu(ctxt);
	return kvm_has_ras(kern_hyp_va(vcpu->kvm));
}

static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
	ctxt_sys_reg(ctxt, SCTLR_EL1)	= read_sysreg_el1(SYS_SCTLR);
@@ -159,8 +170,13 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
	if (!has_vhe() && ctxt->__hyp_running_vcpu)
		ctxt->regs.pstate	= read_sysreg_el2(SYS_SPSR);

	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
	if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
		return;

	if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
		ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
	else if (ctxt_has_ras(ctxt))
		ctxt_sys_reg(ctxt, VDISR_EL2) = read_sysreg_s(SYS_VDISR_EL2);
}

static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
@@ -275,6 +291,7 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
{
	u64 pstate = to_hw_pstate(ctxt);
	u64 mode = pstate & PSR_AA32_MODE_MASK;
	u64 vdisr;

	/*
	 * Safety check to ensure we're setting the CPU up to enter the guest
@@ -293,8 +310,17 @@ static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctx
	write_sysreg_el2(ctxt->regs.pc,			SYS_ELR);
	write_sysreg_el2(pstate,			SYS_SPSR);

	if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
		write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2);
	if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
		return;

	if (!vserror_state_is_nested(ctxt_to_vcpu(ctxt)))
		vdisr = ctxt_sys_reg(ctxt, DISR_EL1);
	else if (ctxt_has_ras(ctxt))
		vdisr = ctxt_sys_reg(ctxt, VDISR_EL2);
	else
		vdisr = 0;

	write_sysreg_s(vdisr, SYS_VDISR_EL2);
}

static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu)