Commit 638ea796 authored by Fuad Tabba's avatar Fuad Tabba Committed by Paolo Bonzini
Browse files

KVM: arm64: Refactor user_mem_abort()



Refactor user_mem_abort() to improve code clarity and simplify
assumptions within the function.

Key changes include:

* Immediately set force_pte to true at the beginning of the function if
  logging_active is true. This simplifies the flow and makes the
  condition for forcing a PTE more explicit.

* Remove the misleading comment stating that logging_active is
  guaranteed to never be true for VM_PFNMAP memslots, as this assertion
  is not entirely correct.

* Extract reusable code blocks into new helper functions:
  * prepare_mmu_memcache(): Encapsulates the logic for preparing and
    topping up the MMU page cache.
  * adjust_nested_fault_perms(): Isolates the adjustments to shadow S2
    permissions and the encoding of nested translation levels.

* Update min(a, (long)b) to min_t(long, a, b) for better type safety and
  consistency.

* Perform other minor tidying up of the code.

These changes primarily aim to simplify user_mem_abort() and make its
logic easier to understand and maintain, setting the stage for future
modifications.

Reviewed-by: default avatarGavin Shan <gshan@redhat.com>
Reviewed-by: default avatarMarc Zyngier <maz@kernel.org>
Reviewed-by: default avatarTao Chan <chentao@kylinos.cn>
Signed-off-by: default avatarFuad Tabba <tabba@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-ID: <20250729225455.670324-18-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent f029f04d
Loading
Loading
Loading
Loading
+59 −51
Original line number Diff line number Diff line
@@ -1477,13 +1477,56 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
	}
}

static int prepare_mmu_memcache(struct kvm_vcpu *vcpu, bool topup_memcache,
				void **memcache)
{
	int min_pages;

	if (!is_protected_kvm_enabled())
		*memcache = &vcpu->arch.mmu_page_cache;
	else
		*memcache = &vcpu->arch.pkvm_memcache;

	if (!topup_memcache)
		return 0;

	min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);

	if (!is_protected_kvm_enabled())
		return kvm_mmu_topup_memory_cache(*memcache, min_pages);

	return topup_hyp_memcache(*memcache, min_pages);
}

/*
 * Potentially reduce shadow S2 permissions to match the guest's own S2. For
 * exec faults, we'd only reach this point if the guest actually allowed it (see
 * kvm_s2_handle_perm_fault).
 *
 * Also encode the level of the original translation in the SW bits of the leaf
 * entry as a proxy for the span of that translation. This will be retrieved on
 * TLB invalidation from the guest and used to limit the invalidation scope if a
 * TTL hint or a range isn't provided.
 */
static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
				      enum kvm_pgtable_prot *prot,
				      bool *writable)
{
	*writable &= kvm_s2_trans_writable(nested);
	if (!kvm_s2_trans_readable(nested))
		*prot &= ~KVM_PGTABLE_PROT_R;

	*prot |= kvm_encode_nested_level(nested);
}

static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_s2_trans *nested,
			  struct kvm_memory_slot *memslot, unsigned long hva,
			  bool fault_is_perm)
{
	int ret = 0;
	bool write_fault, writable, force_pte = false;
	bool topup_memcache;
	bool write_fault, writable;
	bool exec_fault, mte_allowed, is_vma_cacheable;
	bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
	unsigned long mmu_seq;
@@ -1495,6 +1538,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	gfn_t gfn;
	kvm_pfn_t pfn;
	bool logging_active = memslot_is_logging(memslot);
	bool force_pte = logging_active;
	long vma_pagesize, fault_granule;
	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
	struct kvm_pgtable *pgt;
@@ -1506,17 +1550,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
	write_fault = kvm_is_write_fault(vcpu);
	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
	VM_BUG_ON(write_fault && exec_fault);

	if (fault_is_perm && !write_fault && !exec_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}

	if (!is_protected_kvm_enabled())
		memcache = &vcpu->arch.mmu_page_cache;
	else
		memcache = &vcpu->arch.pkvm_memcache;
	VM_WARN_ON_ONCE(write_fault && exec_fault);

	/*
	 * Permission faults just need to update the existing leaf entry,
@@ -1524,17 +1558,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * only exception to this is when dirty logging is enabled at runtime
	 * and a write fault needs to collapse a block entry into a table.
	 */
	if (!fault_is_perm || (logging_active && write_fault)) {
		int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);

		if (!is_protected_kvm_enabled())
			ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
		else
			ret = topup_hyp_memcache(memcache, min_pages);

	topup_memcache = !fault_is_perm || (logging_active && write_fault);
	ret = prepare_mmu_memcache(vcpu, topup_memcache, &memcache);
	if (ret)
		return ret;
	}

	/*
	 * Let's check if we will get back a huge page backed by hugetlbfs, or
@@ -1548,16 +1575,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		return -EFAULT;
	}

	/*
	 * logging_active is guaranteed to never be true for VM_PFNMAP
	 * memslots.
	 */
	if (logging_active) {
		force_pte = true;
	if (force_pte)
		vma_shift = PAGE_SHIFT;
	} else {
	else
		vma_shift = get_vma_page_shift(vma, hva);
	}

	switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
@@ -1609,7 +1630,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			max_map_size = PAGE_SIZE;

		force_pte = (max_map_size == PAGE_SIZE);
		vma_pagesize = min(vma_pagesize, (long)max_map_size);
		vma_pagesize = min_t(long, vma_pagesize, max_map_size);
	}

	/*
@@ -1642,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
	 * with the smp_wmb() in kvm_mmu_invalidate_end().
	 */
	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
	mmu_seq = kvm->mmu_invalidate_seq;
	mmap_read_unlock(current->mm);

	pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
@@ -1698,24 +1719,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (exec_fault && s2_force_noncacheable)
		return -ENOEXEC;

	/*
	 * Potentially reduce shadow S2 permissions to match the guest's own
	 * S2. For exec faults, we'd only reach this point if the guest
	 * actually allowed it (see kvm_s2_handle_perm_fault).
	 *
	 * Also encode the level of the original translation in the SW bits
	 * of the leaf entry as a proxy for the span of that translation.
	 * This will be retrieved on TLB invalidation from the guest and
	 * used to limit the invalidation scope if a TTL hint or a range
	 * isn't provided.
	 */
	if (nested) {
		writable &= kvm_s2_trans_writable(nested);
		if (!kvm_s2_trans_readable(nested))
			prot &= ~KVM_PGTABLE_PROT_R;

		prot |= kvm_encode_nested_level(nested);
	}
	if (nested)
		adjust_nested_fault_perms(nested, &prot, &writable);

	kvm_fault_lock(kvm);
	pgt = vcpu->arch.hw_mmu->pgt;
@@ -1981,6 +1986,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
		goto out_unlock;
	}

	VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
			!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));

	ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
			     esr_fsc_is_permission_fault(esr));
	if (ret == 0)