Commit e669e322 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix another set of FP/SIMD/SVE bugs affecting NV, and plugging some
     missing synchronisation

   - A small fix for the irqbypass hook fixes, tightening the check and
     ensuring that we only deal with MSI for both the old and the new
     route entry

   - Rework the way the shadow LRs are addressed in a nesting
     configuration, plugging an embarrassing bug as well as simplifying
     the whole process

   - Add yet another fix for the dreaded arch_timer_edge_cases selftest

  RISC-V:

   - Fix the size parameter check in SBI SFENCE calls

   - Don't treat SBI HFENCE calls as NOPs

  x86 TDX:

   - Complete API for handling complex TDVMCALLs in userspace.

     This was delayed because the spec lacked a way for userspace to
     deny supporting these calls; the new exit code is now approved"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: TDX: Exit to userspace for GetTdVmCallInfo
  KVM: TDX: Handle TDG.VP.VMCALL<GetQuote>
  KVM: TDX: Add new TDVMCALL status code for unsupported subfuncs
  KVM: arm64: VHE: Centralize ISBs when returning to host
  KVM: arm64: Remove cpacr_clear_set()
  KVM: arm64: Remove ad-hoc CPTR manipulation from kvm_hyp_handle_fpsimd()
  KVM: arm64: Remove ad-hoc CPTR manipulation from fpsimd_sve_sync()
  KVM: arm64: Reorganise CPTR trap manipulation
  KVM: arm64: VHE: Synchronize CPTR trap deactivation
  KVM: arm64: VHE: Synchronize restore of host debug registers
  KVM: arm64: selftests: Close the GIC FD in arch_timer_edge_cases
  KVM: arm64: Explicitly treat routing entry type changes as changes
  KVM: arm64: nv: Fix tracking of shadow list registers
  RISC-V: KVM: Don't treat SBI HFENCE calls as NOPs
  RISC-V: KVM: Fix the size parameter check in SBI SFENCE calls
parents 75f99f8c 25e8b1dd
Loading
Loading
Loading
Loading
+58 −1
Original line number Diff line number Diff line
@@ -6645,7 +6645,8 @@ to the byte array.
.. note::

      For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN,
      KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
      KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX,
      KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
      operations are complete (and guest state is consistent) only after userspace
      has re-entered the kernel with KVM_RUN.  The kernel side will first finish
      incomplete operations and then check for pending signals.
@@ -7174,6 +7175,62 @@ The valid value for 'flags' is:
  - KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid
    in VMCS. It would run into unknown result if resume the target VM.

::

		/* KVM_EXIT_TDX */
		struct {
			__u64 flags;
			__u64 nr;
			union {
				struct {
					u64 ret;
					u64 data[5];
				} unknown;
				struct {
					u64 ret;
					u64 gpa;
					u64 size;
				} get_quote;
				struct {
					u64 ret;
					u64 leaf;
					u64 r11, r12, r13, r14;
				} get_tdvmcall_info;
			};
		} tdx;

Process a TDVMCALL from the guest.  KVM forwards select TDVMCALL based
on the Guest-Hypervisor Communication Interface (GHCI) specification;
KVM bridges these requests to the userspace VMM with minimal changes,
placing the inputs in the union and copying them back to the guest
on re-entry.

Flags are currently always zero, whereas ``nr`` contains the TDVMCALL
number from register R11.  The remaining field of the union provide the
inputs and outputs of the TDVMCALL.  Currently the following values of
``nr`` are defined:

* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
signed by a service hosting TD-Quoting Enclave operating on the host.
Parameters and return value are in the ``get_quote`` field of the union.
The ``gpa`` field and ``size`` specify the guest physical address
(without the shared bit set) and the size of a shared-memory buffer, in
which the TDX guest passes a TD Report.  The ``ret`` field represents
the return value of the GetQuote request.  When the request has been
queued successfully, the TDX guest can poll the status field in the
shared-memory area to check whether the Quote generation is completed or
not. When completed, the generated Quote is returned via the same buffer.

* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
status of TDVMCALLs.  The output values for the given leaf should be
placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
field of the union.

KVM may add support for more values in the future that may cause a userspace
exit, even without calls to ``KVM_ENABLE_CAP`` or similar.  In this case,
it will enter with output fields already valid; in the common case, the
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
Userspace need not do anything if it does not wish to support a TDVMCALL.
::

		/* Fix the size of the union. */
+0 −62
Original line number Diff line number Diff line
@@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
		vcpu_set_flag((v), e);					\
	} while (0)

#define __build_check_all_or_none(r, bits)				\
	BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))

#define __cpacr_to_cptr_clr(clr, set)					\
	({								\
		u64 cptr = 0;						\
									\
		if ((set) & CPACR_EL1_FPEN)				\
			cptr |= CPTR_EL2_TFP;				\
		if ((set) & CPACR_EL1_ZEN)				\
			cptr |= CPTR_EL2_TZ;				\
		if ((set) & CPACR_EL1_SMEN)				\
			cptr |= CPTR_EL2_TSM;				\
		if ((clr) & CPACR_EL1_TTA)				\
			cptr |= CPTR_EL2_TTA;				\
		if ((clr) & CPTR_EL2_TAM)				\
			cptr |= CPTR_EL2_TAM;				\
		if ((clr) & CPTR_EL2_TCPAC)				\
			cptr |= CPTR_EL2_TCPAC;				\
									\
		cptr;							\
	})

#define __cpacr_to_cptr_set(clr, set)					\
	({								\
		u64 cptr = 0;						\
									\
		if ((clr) & CPACR_EL1_FPEN)				\
			cptr |= CPTR_EL2_TFP;				\
		if ((clr) & CPACR_EL1_ZEN)				\
			cptr |= CPTR_EL2_TZ;				\
		if ((clr) & CPACR_EL1_SMEN)				\
			cptr |= CPTR_EL2_TSM;				\
		if ((set) & CPACR_EL1_TTA)				\
			cptr |= CPTR_EL2_TTA;				\
		if ((set) & CPTR_EL2_TAM)				\
			cptr |= CPTR_EL2_TAM;				\
		if ((set) & CPTR_EL2_TCPAC)				\
			cptr |= CPTR_EL2_TCPAC;				\
									\
		cptr;							\
	})

#define cpacr_clear_set(clr, set)					\
	do {								\
		BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0);		\
		BUILD_BUG_ON((clr) & CPACR_EL1_E0POE);			\
		__build_check_all_or_none((clr), CPACR_EL1_FPEN);	\
		__build_check_all_or_none((set), CPACR_EL1_FPEN);	\
		__build_check_all_or_none((clr), CPACR_EL1_ZEN);	\
		__build_check_all_or_none((set), CPACR_EL1_ZEN);	\
		__build_check_all_or_none((clr), CPACR_EL1_SMEN);	\
		__build_check_all_or_none((set), CPACR_EL1_SMEN);	\
									\
		if (has_vhe() || has_hvhe())				\
			sysreg_clear_set(cpacr_el1, clr, set);		\
		else							\
			sysreg_clear_set(cptr_el2,			\
					 __cpacr_to_cptr_clr(clr, set),	\
					 __cpacr_to_cptr_set(clr, set));\
	} while (0)

/*
 * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
 * format if E2H isn't set.
+2 −4
Original line number Diff line number Diff line
@@ -1289,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
	})

/*
 * The couple of isb() below are there to guarantee the same behaviour
 * on VHE as on !VHE, where the eret to EL1 acts as a context
 * synchronization event.
 * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
 * where the eret to EL1 acts as a context synchronization event.
 */
#define kvm_call_hyp(f, ...)						\
	do {								\
@@ -1309,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
									\
		if (has_vhe()) {					\
			ret = f(__VA_ARGS__);				\
			isb();						\
		} else {						\
			ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__);	\
		}							\
+2 −1
Original line number Diff line number Diff line
@@ -2764,7 +2764,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
				  struct kvm_kernel_irq_routing_entry *new)
{
	if (new->type != KVM_IRQ_ROUTING_MSI)
	if (old->type != KVM_IRQ_ROUTING_MSI ||
	    new->type != KVM_IRQ_ROUTING_MSI)
		return true;

	return memcmp(&old->msi, &new->msi, sizeof(new->msi));
+138 −9
Original line number Diff line number Diff line
@@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
	}
}

static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
{
	u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA;

	/*
	 * Always trap SME since it's not supported in KVM.
	 * TSM is RES1 if SME isn't implemented.
	 */
	val |= CPTR_EL2_TSM;

	if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
		val |= CPTR_EL2_TZ;

	if (!guest_owns_fp_regs())
		val |= CPTR_EL2_TFP;

	write_sysreg(val, cptr_el2);
}

static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
{
	/*
	 * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
	 * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2,
	 * except for some missing controls, such as TAM.
	 * In this case, CPTR_EL2.TAM has the same position with or without
	 * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM
	 * shift value for trapping the AMU accesses.
	 */
	u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA;
	u64 cptr;

	if (guest_owns_fp_regs()) {
		val |= CPACR_EL1_FPEN;
		if (vcpu_has_sve(vcpu))
			val |= CPACR_EL1_ZEN;
	}

	if (!vcpu_has_nv(vcpu))
		goto write;

	/*
	 * The architecture is a bit crap (what a surprise): an EL2 guest
	 * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA,
	 * as they are RES0 in the guest's view. To work around it, trap the
	 * sucker using the very same bit it can't set...
	 */
	if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu))
		val |= CPTR_EL2_TCPAC;

	/*
	 * Layer the guest hypervisor's trap configuration on top of our own if
	 * we're in a nested context.
	 */
	if (is_hyp_ctxt(vcpu))
		goto write;

	cptr = vcpu_sanitised_cptr_el2(vcpu);

	/*
	 * Pay attention, there's some interesting detail here.
	 *
	 * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two
	 * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest):
	 *
	 *  - CPTR_EL2.xEN = x0, traps are enabled
	 *  - CPTR_EL2.xEN = x1, traps are disabled
	 *
	 * In other words, bit[0] determines if guest accesses trap or not. In
	 * the interest of simplicity, clear the entire field if the guest
	 * hypervisor has traps enabled to dispel any illusion of something more
	 * complicated taking place.
	 */
	if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0)))
		val &= ~CPACR_EL1_FPEN;
	if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0)))
		val &= ~CPACR_EL1_ZEN;

	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
		val |= cptr & CPACR_EL1_E0POE;

	val |= cptr & CPTR_EL2_TCPAC;

write:
	write_sysreg(val, cpacr_el1);
}

static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu)
{
	if (!guest_owns_fp_regs())
		__activate_traps_fpsimd32(vcpu);

	if (has_vhe() || has_hvhe())
		__activate_cptr_traps_vhe(vcpu);
	else
		__activate_cptr_traps_nvhe(vcpu);
}

static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu)
{
	u64 val = CPTR_NVHE_EL2_RES1;

	if (!cpus_have_final_cap(ARM64_SVE))
		val |= CPTR_EL2_TZ;
	if (!cpus_have_final_cap(ARM64_SME))
		val |= CPTR_EL2_TSM;

	write_sysreg(val, cptr_el2);
}

static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu)
{
	u64 val = CPACR_EL1_FPEN;

	if (cpus_have_final_cap(ARM64_SVE))
		val |= CPACR_EL1_ZEN;
	if (cpus_have_final_cap(ARM64_SME))
		val |= CPACR_EL1_SMEN;

	write_sysreg(val, cpacr_el1);
}

static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
{
	if (has_vhe() || has_hvhe())
		__deactivate_cptr_traps_vhe(vcpu);
	else
		__deactivate_cptr_traps_nvhe(vcpu);
}

#define reg_to_fgt_masks(reg)						\
	({								\
		struct fgt_masks *m;					\
@@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
	 */
	if (system_supports_sve()) {
		__hyp_sve_save_host();

		/* Re-enable SVE traps if not supported for the guest vcpu. */
		if (!vcpu_has_sve(vcpu))
			cpacr_clear_set(CPACR_EL1_ZEN, 0);

	} else {
		__fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
	}
@@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
	/* Valid trap.  Switch the context: */

	/* First disable enough traps to allow us to update the registers */
	if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
		cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN);
	else
		cpacr_clear_set(0, CPACR_EL1_FPEN);
	__deactivate_cptr_traps(vcpu);
	isb();

	/* Write out the host state if it's in the registers */
@@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)

	*host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;

	/*
	 * Re-enable traps necessary for the current state of the guest, e.g.
	 * those enabled by a guest hypervisor. The ERET to the guest will
	 * provide the necessary context synchronization.
	 */
	__activate_cptr_traps(vcpu);

	return true;
}

Loading