Commit c268f204 authored by Oliver Upton's avatar Oliver Upton Committed by Marc Zyngier
Browse files

KVM: arm64: nv: Punt stage-2 recycling to a vCPU request



Currently, when a nested MMU is repurposed for some other MMU context,
KVM unmaps everything during vcpu_load() while holding the MMU lock for
write. This is quite a performance bottleneck for large nested VMs, as
all vCPU scheduling will spin until the unmap completes.

Start punting the MMU cleanup to a vCPU request, where it is then
possible to periodically release the MMU lock and CPU in the presence of
contention.

Ensure that no vCPU winds up using a stale MMU by tracking the pending
unmap on the S2 MMU itself and requesting an unmap on every vCPU that
finds it.

Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20241007233028.2236133-4-oliver.upton@linux.dev


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 3c164eb9
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@
#define KVM_REQ_RELOAD_PMU	KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND		KVM_ARCH_REQ(6)
#define KVM_REQ_RESYNC_PMU_EL0	KVM_ARCH_REQ(7)
#define KVM_REQ_NESTED_S2_UNMAP	KVM_ARCH_REQ(8)

#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
				     KVM_DIRTY_LOG_INITIALLY_SET)
@@ -211,6 +212,12 @@ struct kvm_s2_mmu {
	 */
	bool	nested_stage2_enabled;

	/*
	 * true when this MMU needs to be unmapped before being used for a new
	 * purpose.
	 */
	bool	pending_unmap;

	/*
	 *  0: Nobody is currently using this, check vttbr for validity
	 * >0: Somebody is actively using this.
+2 −0
Original line number Diff line number Diff line
@@ -78,6 +78,8 @@ extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);

extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);

struct kvm_s2_trans {
	phys_addr_t output;
	unsigned long block_size;
+2 −0
Original line number Diff line number Diff line
@@ -1031,6 +1031,8 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)

		if (kvm_dirty_ring_check_request(vcpu))
			return 0;

		check_nested_vcpu_requests(vcpu);
	}

	return 1;
+26 −2
Original line number Diff line number Diff line
@@ -632,9 +632,9 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)
	/* Set the scene for the next search */
	kvm->arch.nested_mmus_next = (i + 1) % kvm->arch.nested_mmus_size;

	/* Clear the old state */
	/* Make sure we don't forget to do the laundry */
	if (kvm_s2_mmu_valid(s2_mmu))
		kvm_stage2_unmap_range(s2_mmu, 0, kvm_phys_size(s2_mmu), false);
		s2_mmu->pending_unmap = true;

	/*
	 * The virtual VMID (modulo CnP) will be used as a key when matching
@@ -650,6 +650,16 @@ static struct kvm_s2_mmu *get_s2_mmu_nested(struct kvm_vcpu *vcpu)

out:
	atomic_inc(&s2_mmu->refcnt);

	/*
	 * Set the vCPU request to perform an unmap, even if the pending unmap
	 * originates from another vCPU. This guarantees that the MMU has been
	 * completely unmapped before any vCPU actually uses it, and allows
	 * multiple vCPUs to lend a hand with completing the unmap.
	 */
	if (s2_mmu->pending_unmap)
		kvm_make_request(KVM_REQ_NESTED_S2_UNMAP, vcpu);

	return s2_mmu;
}

@@ -1199,3 +1209,17 @@ int kvm_init_nv_sysregs(struct kvm *kvm)

	return 0;
}

void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
{
	if (kvm_check_request(KVM_REQ_NESTED_S2_UNMAP, vcpu)) {
		struct kvm_s2_mmu *mmu = vcpu->arch.hw_mmu;

		write_lock(&vcpu->kvm->mmu_lock);
		if (mmu->pending_unmap) {
			kvm_stage2_unmap_range(mmu, 0, kvm_phys_size(mmu), true);
			mmu->pending_unmap = false;
		}
		write_unlock(&vcpu->kvm->mmu_lock);
	}
}