Commit 11b8e6ed authored by Oliver Upton's avatar Oliver Upton
Browse files

Merge branch 'kvm-arm64/sea-user' into kvmarm/next



* kvm-arm64/sea-user:
  : Userspace handling of SEAs, courtesy of Jiaqi Yan
  :
  : Add support for processing external aborts in userspace in situations
  : where the host has failed to do so, allowing the VMM to potentially
  : reinject an external abort into the VM.
  Documentation: kvm: new UAPI for handling SEA
  KVM: selftests: Test for KVM_EXIT_ARM_SEA
  KVM: arm64: VM exit to userspace to handle SEA

Signed-off-by: default avatarOliver Upton <oupton@kernel.org>
parents 404c2027 4debb5e8
Loading
Loading
Loading
Loading
+47 −0
Original line number Diff line number Diff line
@@ -7286,6 +7286,41 @@ exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
it will enter with output fields already valid; in the common case, the
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
Userspace need not do anything if it does not wish to support a TDVMCALL.

::

		/* KVM_EXIT_ARM_SEA */
		struct {
  #define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID   (1ULL << 0)
			__u64 flags;
			__u64 esr;
			__u64 gva;
			__u64 gpa;
		} arm_sea;

Used on arm64 systems. When the VM capability ``KVM_CAP_ARM_SEA_TO_USER`` is
enabled, a KVM exits to userspace if a guest access causes a synchronous
external abort (SEA) and the host APEI fails to handle the SEA.

``esr`` is set to a sanitized value of ESR_EL2 from the exception taken to KVM,
consisting of the following fields:

 - ``ESR_EL2.EC``
 - ``ESR_EL2.IL``
 - ``ESR_EL2.FnV``
 - ``ESR_EL2.EA``
 - ``ESR_EL2.CM``
 - ``ESR_EL2.WNR``
 - ``ESR_EL2.FSC``
 - ``ESR_EL2.SET`` (when FEAT_RAS is implemented for the VM)

``gva`` is set to the value of FAR_EL2 from the exception taken to KVM when
``ESR_EL2.FnV == 0``. Otherwise, the value of ``gva`` is unknown.

``gpa`` is set to the faulting IPA from the exception taken to KVM when
the ``KVM_EXIT_ARM_SEA_FLAG_GPA_VALID`` flag is set. Otherwise, the value of
``gpa`` is unknown.

::

		/* Fix the size of the union. */
@@ -8703,6 +8738,18 @@ This capability indicate to the userspace whether a PFNMAP memory region
can be safely mapped as cacheable. This relies on the presence of
force write back (FWB) feature support on the hardware.

7.45 KVM_CAP_ARM_SEA_TO_USER
----------------------------

:Architecture: arm64
:Target: VM
:Parameters: none
:Returns: 0 on success, -EINVAL if unsupported.

When this capability is enabled, KVM may exit to userspace for SEAs taken to
EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more
information.

8. Other capabilities.
======================

+2 −0
Original line number Diff line number Diff line
@@ -350,6 +350,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_GUEST_HAS_SVE			9
	/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS		10
	/* Unhandled SEAs are taken to userspace */
#define KVM_ARCH_FLAG_EXIT_SEA				11
	unsigned long flags;

	/* VM-wide vCPU feature set */
+5 −0
Original line number Diff line number Diff line
@@ -132,6 +132,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
		}
		mutex_unlock(&kvm->lock);
		break;
	case KVM_CAP_ARM_SEA_TO_USER:
		r = 0;
		set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags);
		break;
	default:
		break;
	}
@@ -327,6 +331,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
	case KVM_CAP_IRQFD_RESAMPLE:
	case KVM_CAP_COUNTER_OFFSET:
	case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
	case KVM_CAP_ARM_SEA_TO_USER:
		r = 1;
		break;
	case KVM_CAP_SET_GUEST_DEBUG2:
+67 −1
Original line number Diff line number Diff line
@@ -1931,8 +1931,48 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
	read_unlock(&vcpu->kvm->mmu_lock);
}

/*
 * Returns true if the SEA should be handled locally within KVM if the abort
 * is caused by a kernel memory allocation (e.g. stage-2 table memory).
 */
static bool host_owns_sea(struct kvm_vcpu *vcpu, u64 esr)
{
	/*
	 * Without FEAT_RAS HCR_EL2.TEA is RES0, meaning any external abort
	 * taken from a guest EL to EL2 is due to a host-imposed access (e.g.
	 * stage-2 PTW).
	 */
	if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
		return true;

	/* KVM owns the VNCR when the vCPU isn't in a nested context. */
	if (is_hyp_ctxt(vcpu) && !kvm_vcpu_trap_is_iabt(vcpu) && (esr & ESR_ELx_VNCR))
		return true;

	/*
	 * Determining if an external abort during a table walk happened at
	 * stage-2 is only possible with S1PTW is set. Otherwise, since KVM
	 * sets HCR_EL2.TEA, SEAs due to a stage-1 walk (i.e. accessing the
	 * PA of the stage-1 descriptor) can reach here and are reported
	 * with a TTW ESR value.
	 */
	return (esr_fsc_is_sea_ttw(esr) && (esr & ESR_ELx_S1PTW));
}

int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
{
	struct kvm *kvm = vcpu->kvm;
	struct kvm_run *run = vcpu->run;
	u64 esr = kvm_vcpu_get_esr(vcpu);
	u64 esr_mask = ESR_ELx_EC_MASK	|
		       ESR_ELx_IL	|
		       ESR_ELx_FnV	|
		       ESR_ELx_EA	|
		       ESR_ELx_CM	|
		       ESR_ELx_WNR	|
		       ESR_ELx_FSC;
	u64 ipa;

	/*
	 * Give APEI the opportunity to claim the abort before handling it
	 * within KVM. apei_claim_sea() expects to be called with IRQs enabled.
@@ -1941,7 +1981,33 @@ int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
	if (apei_claim_sea(NULL) == 0)
		return 1;

	if (host_owns_sea(vcpu, esr) ||
	    !test_bit(KVM_ARCH_FLAG_EXIT_SEA, &vcpu->kvm->arch.flags))
		return kvm_inject_serror(vcpu);

	/* ESR_ELx.SET is RES0 when FEAT_RAS isn't implemented. */
	if (kvm_has_ras(kvm))
		esr_mask |= ESR_ELx_SET_MASK;

	/*
	 * Exit to userspace, and provide faulting guest virtual and physical
	 * addresses in case userspace wants to emulate SEA to guest by
	 * writing to FAR_ELx and HPFAR_ELx registers.
	 */
	memset(&run->arm_sea, 0, sizeof(run->arm_sea));
	run->exit_reason = KVM_EXIT_ARM_SEA;
	run->arm_sea.esr = esr & esr_mask;

	if (!(esr & ESR_ELx_FnV))
		run->arm_sea.gva = kvm_vcpu_get_hfar(vcpu);

	ipa = kvm_vcpu_get_fault_ipa(vcpu);
	if (ipa != INVALID_GPA) {
		run->arm_sea.flags |= KVM_EXIT_ARM_SEA_FLAG_GPA_VALID;
		run->arm_sea.gpa = ipa;
	}

	return 0;
}

/**
+10 −0
Original line number Diff line number Diff line
@@ -179,6 +179,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_LOONGARCH_IOCSR  38
#define KVM_EXIT_MEMORY_FAULT     39
#define KVM_EXIT_TDX              40
#define KVM_EXIT_ARM_SEA          41

/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -473,6 +474,14 @@ struct kvm_run {
				} setup_event_notify;
			};
		} tdx;
		/* KVM_EXIT_ARM_SEA */
		struct {
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID	(1ULL << 0)
			__u64 flags;
			__u64 esr;
			__u64 gva;
			__u64 gpa;
		} arm_sea;
		/* Fix the size of the union. */
		char padding[256];
	};
@@ -963,6 +972,7 @@ struct kvm_enable_cap {
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
#define KVM_CAP_ARM_SEA_TO_USER 245

struct kvm_irq_routing_irqchip {
	__u32 irqchip;
Loading