Commit 54f15ebf authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-riscv-6.20-1' of https://github.com/kvm-riscv/linux into HEAD

KVM/riscv changes for 6.20

- Fixes for issues discoverd by KVM API fuzzing in
  kvm_riscv_aia_imsic_has_attr(), kvm_riscv_aia_imsic_rw_attr(),
  and kvm_riscv_vcpu_aia_imsic_update()
- Allow Zalasr, Zilsd and Zclsd extensions for Guest/VM
- Add riscv vm satp modes in KVM selftests
- Transparent huge page support for G-stage
- Adjust the number of available guest irq files based on
  MMIO register sizes in DeviceTree or ACPI
parents 9e03b7ca 376e2f8c
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -192,6 +192,9 @@ enum KVM_RISCV_ISA_EXT_ID {
	KVM_RISCV_ISA_EXT_ZFBFMIN,
	KVM_RISCV_ISA_EXT_ZVFBFMIN,
	KVM_RISCV_ISA_EXT_ZVFBFWMA,
	KVM_RISCV_ISA_EXT_ZCLSD,
	KVM_RISCV_ISA_EXT_ZILSD,
	KVM_RISCV_ISA_EXT_ZALASR,
	KVM_RISCV_ISA_EXT_MAX,
};

+1 −1
Original line number Diff line number Diff line
@@ -630,7 +630,7 @@ int kvm_riscv_aia_init(void)
	 */
	if (gc)
		kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei,
					    BIT(gc->guest_index_bits) - 1);
					    gc->nr_guest_files);
	else
		kvm_riscv_aia_nr_hgei = 0;

+11 −2
Original line number Diff line number Diff line
@@ -797,6 +797,10 @@ int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu)
	if (kvm->arch.aia.mode == KVM_DEV_RISCV_AIA_MODE_EMUL)
		return 1;

	/* IMSIC vCPU state may not be initialized yet */
	if (!imsic)
		return 1;

	/* Read old IMSIC VS-file details */
	read_lock_irqsave(&imsic->vsfile_lock, flags);
	old_vsfile_hgei = imsic->vsfile_hgei;
@@ -952,8 +956,10 @@ int kvm_riscv_aia_imsic_rw_attr(struct kvm *kvm, unsigned long type,
	if (!vcpu)
		return -ENODEV;

	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
	imsic = vcpu->arch.aia_context.imsic_state;
	if (!imsic)
		return -ENODEV;
	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);

	read_lock_irqsave(&imsic->vsfile_lock, flags);

@@ -993,8 +999,11 @@ int kvm_riscv_aia_imsic_has_attr(struct kvm *kvm, unsigned long type)
	if (!vcpu)
		return -ENODEV;

	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
	imsic = vcpu->arch.aia_context.imsic_state;
	if (!imsic)
		return -ENODEV;

	isel = KVM_DEV_RISCV_AIA_IMSIC_GET_ISEL(type);
	return imsic_mrif_isel_check(imsic->nr_eix, isel);
}

+140 −0
Original line number Diff line number Diff line
@@ -305,6 +305,142 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
	return pte_young(ptep_get(ptep));
}

static bool fault_supports_gstage_huge_mapping(struct kvm_memory_slot *memslot,
					       unsigned long hva)
{
	hva_t uaddr_start, uaddr_end;
	gpa_t gpa_start;
	size_t size;

	size = memslot->npages * PAGE_SIZE;
	uaddr_start = memslot->userspace_addr;
	uaddr_end = uaddr_start + size;

	gpa_start = memslot->base_gfn << PAGE_SHIFT;

	/*
	 * Pages belonging to memslots that don't have the same alignment
	 * within a PMD for userspace and GPA cannot be mapped with g-stage
	 * PMD entries, because we'll end up mapping the wrong pages.
	 *
	 * Consider a layout like the following:
	 *
	 *    memslot->userspace_addr:
	 *    +-----+--------------------+--------------------+---+
	 *    |abcde|fgh  vs-stage block  |    vs-stage block tv|xyz|
	 *    +-----+--------------------+--------------------+---+
	 *
	 *    memslot->base_gfn << PAGE_SHIFT:
	 *      +---+--------------------+--------------------+-----+
	 *      |abc|def  g-stage block  |    g-stage block   |tvxyz|
	 *      +---+--------------------+--------------------+-----+
	 *
	 * If we create those g-stage blocks, we'll end up with this incorrect
	 * mapping:
	 *   d -> f
	 *   e -> g
	 *   f -> h
	 */
	if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
		return false;

	/*
	 * Next, let's make sure we're not trying to map anything not covered
	 * by the memslot. This means we have to prohibit block size mappings
	 * for the beginning and end of a non-block aligned and non-block sized
	 * memory slot (illustrated by the head and tail parts of the
	 * userspace view above containing pages 'abcde' and 'xyz',
	 * respectively).
	 *
	 * Note that it doesn't matter if we do the check using the
	 * userspace_addr or the base_gfn, as both are equally aligned (per
	 * the check above) and equally sized.
	 */
	return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
}

static int get_hva_mapping_size(struct kvm *kvm,
				unsigned long hva)
{
	int size = PAGE_SIZE;
	unsigned long flags;
	pgd_t pgd;
	p4d_t p4d;
	pud_t pud;
	pmd_t pmd;

	/*
	 * Disable IRQs to prevent concurrent tear down of host page tables,
	 * e.g. if the primary MMU promotes a P*D to a huge page and then frees
	 * the original page table.
	 */
	local_irq_save(flags);

	/*
	 * Read each entry once.  As above, a non-leaf entry can be promoted to
	 * a huge page _during_ this walk.  Re-reading the entry could send the
	 * walk into the weeks, e.g. p*d_leaf() returns false (sees the old
	 * value) and then p*d_offset() walks into the target huge page instead
	 * of the old page table (sees the new value).
	 */
	pgd = pgdp_get(pgd_offset(kvm->mm, hva));
	if (pgd_none(pgd))
		goto out;

	p4d = p4dp_get(p4d_offset(&pgd, hva));
	if (p4d_none(p4d) || !p4d_present(p4d))
		goto out;

	pud = pudp_get(pud_offset(&p4d, hva));
	if (pud_none(pud) || !pud_present(pud))
		goto out;

	if (pud_leaf(pud)) {
		size = PUD_SIZE;
		goto out;
	}

	pmd = pmdp_get(pmd_offset(&pud, hva));
	if (pmd_none(pmd) || !pmd_present(pmd))
		goto out;

	if (pmd_leaf(pmd))
		size = PMD_SIZE;

out:
	local_irq_restore(flags);
	return size;
}

static unsigned long transparent_hugepage_adjust(struct kvm *kvm,
						 struct kvm_memory_slot *memslot,
						 unsigned long hva,
						 kvm_pfn_t *hfnp, gpa_t *gpa)
{
	kvm_pfn_t hfn = *hfnp;

	/*
	 * Make sure the adjustment is done only for THP pages. Also make
	 * sure that the HVA and GPA are sufficiently aligned and that the
	 * block map is contained within the memslot.
	 */
	if (fault_supports_gstage_huge_mapping(memslot, hva)) {
		int sz;

		sz = get_hva_mapping_size(kvm, hva);
		if (sz < PMD_SIZE)
			return sz;

		*gpa &= PMD_MASK;
		hfn &= ~(PTRS_PER_PMD - 1);
		*hfnp = hfn;

		return PMD_SIZE;
	}

	return PAGE_SIZE;
}

int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
		      gpa_t gpa, unsigned long hva, bool is_write,
		      struct kvm_gstage_mapping *out_map)
@@ -398,6 +534,10 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
	if (mmu_invalidate_retry(kvm, mmu_seq))
		goto out_unlock;

	/* Check if we are backed by a THP and thus use block mapping if possible */
	if (vma_pagesize == PAGE_SIZE)
		vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &hfn, &gpa);

	if (writable) {
		mark_page_dirty_in_slot(kvm, memslot, gfn);
		ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
+4 −0
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
	KVM_ISA_EXT_ARR(ZAAMO),
	KVM_ISA_EXT_ARR(ZABHA),
	KVM_ISA_EXT_ARR(ZACAS),
	KVM_ISA_EXT_ARR(ZALASR),
	KVM_ISA_EXT_ARR(ZALRSC),
	KVM_ISA_EXT_ARR(ZAWRS),
	KVM_ISA_EXT_ARR(ZBA),
@@ -63,6 +64,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
	KVM_ISA_EXT_ARR(ZCB),
	KVM_ISA_EXT_ARR(ZCD),
	KVM_ISA_EXT_ARR(ZCF),
	KVM_ISA_EXT_ARR(ZCLSD),
	KVM_ISA_EXT_ARR(ZCMOP),
	KVM_ISA_EXT_ARR(ZFA),
	KVM_ISA_EXT_ARR(ZFBFMIN),
@@ -79,6 +81,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
	KVM_ISA_EXT_ARR(ZIHINTNTL),
	KVM_ISA_EXT_ARR(ZIHINTPAUSE),
	KVM_ISA_EXT_ARR(ZIHPM),
	KVM_ISA_EXT_ARR(ZILSD),
	KVM_ISA_EXT_ARR(ZIMOP),
	KVM_ISA_EXT_ARR(ZKND),
	KVM_ISA_EXT_ARR(ZKNE),
@@ -187,6 +190,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
	case KVM_RISCV_ISA_EXT_ZAAMO:
	case KVM_RISCV_ISA_EXT_ZABHA:
	case KVM_RISCV_ISA_EXT_ZACAS:
	case KVM_RISCV_ISA_EXT_ZALASR:
	case KVM_RISCV_ISA_EXT_ZALRSC:
	case KVM_RISCV_ISA_EXT_ZAWRS:
	case KVM_RISCV_ISA_EXT_ZBA:
Loading