Commit 3d626ce5 authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: TDX: Add macro to retry SEAMCALLs when forcing vCPUs out of guest



Add a macro to handle kicking vCPUs out of the guest and retrying
SEAMCALLs on TDX_OPERAND_BUSY instead of providing small helpers to be
used by each SEAMCALL.  Wrapping the SEAMCALLs in a macro makes it a
little harder to tease out which SEAMCALL is being made, but
significantly reduces the amount of copy+paste code, and makes it all but
impossible to leave an elevated wait_for_sept_zap.

Reviewed-by: default avatarBinbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: default avatarKai Huang <kai.huang@intel.com>
Reviewed-by: default avatarYan Zhao <yan.y.zhao@intel.com>
Tested-by: default avatarYan Zhao <yan.y.zhao@intel.com>
Tested-by: default avatarKai Huang <kai.huang@intel.com>
Link: https://patch.msgid.link/20251030200951.3402865-22-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 2ff14116
Loading
Loading
Loading
Loading
+33 −49
Original line number Diff line number Diff line
@@ -294,25 +294,34 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
	vcpu->cpu = -1;
}

static void tdx_no_vcpus_enter_start(struct kvm *kvm)
{
	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);

	lockdep_assert_held_write(&kvm->mmu_lock);

	WRITE_ONCE(kvm_tdx->wait_for_sept_zap, true);

	kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
}

static void tdx_no_vcpus_enter_stop(struct kvm *kvm)
{
	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);

	lockdep_assert_held_write(&kvm->mmu_lock);

	WRITE_ONCE(kvm_tdx->wait_for_sept_zap, false);
}
/*
 * Execute a SEAMCALL related to removing/blocking S-EPT entries, with a single
 * retry (if necessary) after forcing vCPUs to exit and wait for the operation
 * to complete.  All flows that remove/block S-EPT entries run with mmu_lock
 * held for write, i.e. are mutually exclusive with each other, but they aren't
 * mutually exclusive with running vCPUs, and so can fail with "operand busy"
 * if a vCPU acquires a relevant lock in the TDX-Module, e.g. when doing TDCALL.
 *
 * Note, the retry is guaranteed to succeed, absent KVM and/or TDX-Module bugs.
 */
#define tdh_do_no_vcpus(tdh_func, kvm, args...)					\
({										\
	struct kvm_tdx *__kvm_tdx = to_kvm_tdx(kvm);				\
	u64 __err;								\
										\
	lockdep_assert_held_write(&kvm->mmu_lock);				\
										\
	__err = tdh_func(args);							\
	if (unlikely(tdx_operand_busy(__err))) {				\
		WRITE_ONCE(__kvm_tdx->wait_for_sept_zap, true);			\
		kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);	\
										\
		__err = tdh_func(args);						\
										\
		WRITE_ONCE(__kvm_tdx->wait_for_sept_zap, false);		\
	}									\
	__err;									\
})

/* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */
static int __tdx_reclaim_page(struct page *page)
@@ -1722,14 +1731,7 @@ static void tdx_track(struct kvm *kvm)
	 */
	lockdep_assert_held_write(&kvm->mmu_lock);

	err = tdh_mem_track(&kvm_tdx->td);
	if (unlikely(tdx_operand_busy(err))) {
		/* After no vCPUs enter, the second retry is expected to succeed */
		tdx_no_vcpus_enter_start(kvm);
		err = tdh_mem_track(&kvm_tdx->td);
		tdx_no_vcpus_enter_stop(kvm);
	}

	err = tdh_do_no_vcpus(tdh_mem_track, kvm, &kvm_tdx->td);
	TDX_BUG_ON(err, TDH_MEM_TRACK, kvm);

	kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
@@ -1781,14 +1783,8 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
	if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
		return;

	err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
	if (unlikely(tdx_operand_busy(err))) {
		/* After no vCPUs enter, the second retry is expected to succeed */
		tdx_no_vcpus_enter_start(kvm);
		err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
		tdx_no_vcpus_enter_stop(kvm);
	}

	err = tdh_do_no_vcpus(tdh_mem_range_block, kvm, &kvm_tdx->td, gpa,
			      tdx_level, &entry, &level_state);
	if (TDX_BUG_ON_2(err, TDH_MEM_RANGE_BLOCK, entry, level_state, kvm))
		return;

@@ -1803,20 +1799,8 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
	 * with other vcpu sept operation.
	 * Race with TDH.VP.ENTER due to (0-step mitigation) and Guest TDCALLs.
	 */
	err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
				  &level_state);

	if (unlikely(tdx_operand_busy(err))) {
		/*
		 * The second retry is expected to succeed after kicking off all
		 * other vCPUs and prevent them from invoking TDH.VP.ENTER.
		 */
		tdx_no_vcpus_enter_start(kvm);
		err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
					  &level_state);
		tdx_no_vcpus_enter_stop(kvm);
	}

	err = tdh_do_no_vcpus(tdh_mem_page_remove, kvm, &kvm_tdx->td, gpa,
			      tdx_level, &entry, &level_state);
	if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
		return;