Commit c535d132 authored by Oliver Upton's avatar Oliver Upton
Browse files

Merge branch 'kvm-arm64/cacheable-pfnmap' into kvmarm/next



* kvm-arm64/cacheable-pfnmap:
  : Cacheable PFNMAP support at stage-2, courtesy of Ankit Agrawal
  :
  : For historical reasons, KVM only allows cacheable mappings at stage-2
  : when a kernel alias exists in the direct map for the memory region. On
  : hardware without FEAT_S2FWB, this is necessary as KVM must do cache
  : maintenance to keep guest/host accesses coherent.
  :
  : This is unnecessarily restrictive on systems with FEAT_S2FWB and
  : CTR_EL0.DIC, as KVM no longer needs to perform cache maintenance to
  : maintain correctness.
  :
  : Allow cacheable mappings at stage-2 on supporting hardware when the
  : corresponding VMA has cacheable memory attributes and advertise a
  : capability to userspace such that a VMM can determine if a stage-2
  : mapping can be established (e.g. VFIO device).
  KVM: arm64: Expose new KVM cap for cacheable PFNMAP
  KVM: arm64: Allow cacheable stage 2 mapping using VMA flags
  KVM: arm64: Block cacheable PFNMAP mapping
  KVM: arm64: Assume non-PFNMAP/MIXEDMAP VMAs can be mapped cacheable
  KVM: arm64: Rename the device variable to s2_force_noncacheable

Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parents 86731a2a f55ce5a6
Loading
Loading
Loading
Loading
+12 −1
Original line number Diff line number Diff line
@@ -8585,7 +8585,7 @@ ENOSYS for the others.
When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.

7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
7.42 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS
-------------------------------------

:Architectures: arm64
@@ -8614,6 +8614,17 @@ given VM.
When this capability is enabled, KVM resets the VCPU when setting
MP_STATE_INIT_RECEIVED through IOCTL.  The original MP_STATE is preserved.

7.43 KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED
-------------------------------------------

:Architectures: arm64
:Target: VM
:Parameters: None

This capability indicate to the userspace whether a PFNMAP memory region
can be safely mapped as cacheable. This relies on the presence of
force write back (FWB) feature support on the hardware.

8. Other capabilities.
======================

+18 −0
Original line number Diff line number Diff line
@@ -371,6 +371,24 @@ static inline void kvm_fault_unlock(struct kvm *kvm)
		read_unlock(&kvm->mmu_lock);
}

/*
 * ARM64 KVM relies on a simple conversion from physaddr to a kernel
 * virtual address (KVA) when it does cache maintenance as the CMO
 * instructions work on virtual addresses. This is incompatible with
 * VM_PFNMAP VMAs which may not have a kernel direct mapping to a
 * virtual address.
 *
 * With S2FWB and CACHE DIC features, KVM need not do cache flushing
 * and CMOs are NOP'd. This has the effect of no longer requiring a
 * KVA for addresses mapped into the S2. The presence of these features
 * are thus necessary to support cacheable S2 mapping of VM_PFNMAP.
 */
static inline bool kvm_supports_cacheable_pfnmap(void)
{
	return cpus_have_final_cap(ARM64_HAS_STAGE2_FWB) &&
	       cpus_have_final_cap(ARM64_HAS_CACHE_DIC);
}

#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
+7 −0
Original line number Diff line number Diff line
@@ -408,6 +408,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
	case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES:
		r = BIT(0);
		break;
	case KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED:
		if (!kvm)
			r = -EINVAL;
		else
			r = kvm_supports_cacheable_pfnmap();
		break;

	default:
		r = 0;
	}
+65 −23
Original line number Diff line number Diff line
@@ -193,11 +193,6 @@ int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
	return 0;
}

static bool kvm_is_device_pfn(unsigned long pfn)
{
	return !pfn_is_map_memory(pfn);
}

static void *stage2_memcache_zalloc_page(void *arg)
{
	struct kvm_mmu_memory_cache *mc = arg;
@@ -1470,6 +1465,18 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
	return vma->vm_flags & VM_MTE_ALLOWED;
}

static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
{
	switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
	case MT_NORMAL_NC:
	case MT_DEVICE_nGnRnE:
	case MT_DEVICE_nGnRE:
		return false;
	default:
		return true;
	}
}

static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_s2_trans *nested,
			  struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1477,8 +1484,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
	int ret = 0;
	bool write_fault, writable, force_pte = false;
	bool exec_fault, mte_allowed;
	bool device = false, vfio_allow_any_uc = false;
	bool exec_fault, mte_allowed, is_vma_cacheable;
	bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
	unsigned long mmu_seq;
	phys_addr_t ipa = fault_ipa;
	struct kvm *kvm = vcpu->kvm;
@@ -1492,6 +1499,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
	struct kvm_pgtable *pgt;
	struct page *page;
	vm_flags_t vm_flags;
	enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED;

	if (fault_is_perm)
@@ -1619,6 +1627,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,

	vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;

	vm_flags = vma->vm_flags;

	is_vma_cacheable = kvm_vma_is_cacheable(vma);

	/* Don't use the VMA after the unlock -- it may have vanished */
	vma = NULL;

@@ -1642,7 +1654,27 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (is_error_noslot_pfn(pfn))
		return -EFAULT;

	if (kvm_is_device_pfn(pfn)) {
	/*
	 * Check if this is non-struct page memory PFN, and cannot support
	 * CMOs. It could potentially be unsafe to access as cachable.
	 */
	if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP) && !pfn_is_map_memory(pfn)) {
		if (is_vma_cacheable) {
			/*
			 * Whilst the VMA owner expects cacheable mapping to this
			 * PFN, hardware also has to support the FWB and CACHE DIC
			 * features.
			 *
			 * ARM64 KVM relies on kernel VA mapping to the PFN to
			 * perform cache maintenance as the CMO instructions work on
			 * virtual addresses. VM_PFNMAP region are not necessarily
			 * mapped to a KVA and hence the presence of hardware features
			 * S2FWB and CACHE DIC are mandatory to avoid the need for
			 * cache maintenance.
			 */
			if (!kvm_supports_cacheable_pfnmap())
				return -EFAULT;
		} else {
			/*
			 * If the page was identified as device early by looking at
			 * the VMA flags, vma_pagesize is already representing the
@@ -1653,7 +1685,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			 * In both cases, we don't let transparent_hugepage_adjust()
			 * change things at the last minute.
			 */
		device = true;
			s2_force_noncacheable = true;
		}
	} else if (logging_active && !write_fault) {
		/*
		 * Only actually map the page as writable if this was a write
@@ -1662,7 +1695,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		writable = false;
	}

	if (exec_fault && device)
	if (exec_fault && s2_force_noncacheable)
		return -ENOEXEC;

	/*
@@ -1695,7 +1728,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * If we are not forced to use page mapping, check if we are
	 * backed by a THP and thus use block mapping if possible.
	 */
	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
	if (vma_pagesize == PAGE_SIZE && !(force_pte || s2_force_noncacheable)) {
		if (fault_is_perm && fault_granule > PAGE_SIZE)
			vma_pagesize = fault_granule;
		else
@@ -1709,7 +1742,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		}
	}

	if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
	if (!fault_is_perm && !s2_force_noncacheable && kvm_has_mte(kvm)) {
		/* Check the VMM hasn't introduced a new disallowed VMA */
		if (mte_allowed) {
			sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1725,7 +1758,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (exec_fault)
		prot |= KVM_PGTABLE_PROT_X;

	if (device) {
	if (s2_force_noncacheable) {
		if (vfio_allow_any_uc)
			prot |= KVM_PGTABLE_PROT_NORMAL_NC;
		else
@@ -2221,6 +2254,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
				ret = -EINVAL;
				break;
			}

			/*
			 * Cacheable PFNMAP is allowed only if the hardware
			 * supports it.
			 */
			if (kvm_vma_is_cacheable(vma) && !kvm_supports_cacheable_pfnmap()) {
				ret = -EINVAL;
				break;
			}
		}
		hva = min(reg_end, vma->vm_end);
	} while (hva < reg_end);
+1 −0
Original line number Diff line number Diff line
@@ -956,6 +956,7 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243

struct kvm_irq_routing_irqchip {
	__u32 irqchip;