Commit 0fd06844 authored by Alex Williamson's avatar Alex Williamson
Browse files

vfio/type1: Use mapping page mask for pfnmaps



vfio-pci supports huge_fault for PCI MMIO BARs and will insert pud and
pmd mappings for well aligned mappings.  follow_pfnmap_start() walks the
page table and therefore knows the page mask of the level where the
address is found and returns this through follow_pfnmap_args.addr_mask.
Subsequent pfns from this address until the end of the mapping page are
necessarily consecutive.  Use this information to retrieve a range of
pfnmap pfns in a single pass.

With optimal mappings and alignment on systems with 1GB pud and 4KB
page size, this reduces iterations for DMA mapping PCI BARs by a
factor of 256K.  In real world testing, the overhead of iterating
pfns for a VM DMA mapping a 32GB PCI BAR is reduced from ~1s to
sub-millisecond overhead.

Reviewed-by: default avatarPeter Xu <peterx@redhat.com>
Reviewed-by: default avatarMitchell Augustin <mitchell.augustin@canonical.com>
Tested-by: default avatarMitchell Augustin <mitchell.augustin@canonical.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20250218222209.1382449-7-alex.williamson@redhat.com


Signed-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 62fb8adc
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -520,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)

static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
			    unsigned long vaddr, unsigned long *pfn,
			    bool write_fault)
			    unsigned long *addr_mask, bool write_fault)
{
	struct follow_pfnmap_args args = { .vma = vma, .address = vaddr };
	int ret;
@@ -544,10 +544,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
			return ret;
	}

	if (write_fault && !args.writable)
	if (write_fault && !args.writable) {
		ret = -EFAULT;
	else
	} else {
		*pfn = args.pfn;
		*addr_mask = args.addr_mask;
	}

	follow_pfnmap_end(&args);
	return ret;
@@ -590,17 +592,24 @@ static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
	vma = vma_lookup(mm, vaddr);

	if (vma && vma->vm_flags & VM_PFNMAP) {
		ret = follow_fault_pfn(vma, mm, vaddr, pfn, prot & IOMMU_WRITE);
		unsigned long addr_mask;

		ret = follow_fault_pfn(vma, mm, vaddr, pfn, &addr_mask,
				       prot & IOMMU_WRITE);
		if (ret == -EAGAIN)
			goto retry;

		if (!ret) {
			if (is_invalid_reserved_pfn(*pfn))
				ret = 1;
			else
			if (is_invalid_reserved_pfn(*pfn)) {
				unsigned long epfn;

				epfn = (*pfn | (~addr_mask >> PAGE_SHIFT)) + 1;
				ret = min_t(long, npages, epfn - *pfn);
			} else {
				ret = -EFAULT;
			}
		}
	}
done:
	mmap_read_unlock(mm);
	return ret;