Commit fd2825b0 authored by Alistair Popple's avatar Alistair Popple Committed by Andrew Morton
Browse files

mm/gup: remove pXX_devmap usage from get_user_pages()

GUP uses pXX_devmap() calls to see if it needs to a get a reference on the
associated pgmap data structure to ensure the pages won't go away. 
However it's a driver responsibility to ensure that if pages are mapped
(ie.  discoverable by GUP) that they are not offlined or removed from the
memmap so there is no need to hold a reference on the pgmap data structure
to ensure this.

Furthermore mappings with PFN_DEV are no longer created, hence this
effectively dead code anyway so can be removed.

Link: https://lkml.kernel.org/r/708b2be76876659ec5261fe5d059b07268b98b36.1750323463.git-series.apopple@nvidia.com


Signed-off-by: default avatarAlistair Popple <apopple@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Björn Töpel <bjorn@kernel.org>
Cc: Björn Töpel <bjorn@rivosinc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Inki Dae <m.szyprowski@samsung.com>
Cc: John Groves <john@groves.net>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 4b1d3145
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -473,9 +473,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
	return folio_order(folio) >= HPAGE_PMD_ORDER;
}

struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, int flags, struct dev_pagemap **pgmap);

vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);

extern struct folio *huge_zero_folio;
+5 −155
Original line number Diff line number Diff line
@@ -679,31 +679,9 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
		return NULL;

	pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;

	if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
	    pud_devmap(pud)) {
		/*
		 * device mapped pages can only be returned if the caller
		 * will manage the page reference count.
		 *
		 * At least one of FOLL_GET | FOLL_PIN must be set, so
		 * assert that here:
		 */
		if (!(flags & (FOLL_GET | FOLL_PIN)))
			return ERR_PTR(-EEXIST);

		if (flags & FOLL_TOUCH)
			touch_pud(vma, addr, pudp, flags & FOLL_WRITE);

		ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
		if (!ctx->pgmap)
			return ERR_PTR(-EFAULT);
	}

	page = pfn_to_page(pfn);

	if (!pud_devmap(pud) && !pud_write(pud) &&
	    gup_must_unshare(vma, flags, page))
	if (!pud_write(pud) && gup_must_unshare(vma, flags, page))
		return ERR_PTR(-EMLINK);

	ret = try_grab_folio(page_folio(page), 1, flags);
@@ -857,8 +835,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
	page = vm_normal_page(vma, address, pte);

	/*
	 * We only care about anon pages in can_follow_write_pte() and don't
	 * have to worry about pte_devmap() because they are never anon.
	 * We only care about anon pages in can_follow_write_pte().
	 */
	if ((flags & FOLL_WRITE) &&
	    !can_follow_write_pte(pte, page, vma, flags)) {
@@ -866,18 +843,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
		goto out;
	}

	if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
		/*
		 * Only return device mapping pages in the FOLL_GET or FOLL_PIN
		 * case since they are only valid while holding the pgmap
		 * reference.
		 */
		*pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
		if (*pgmap)
			page = pte_page(pte);
		else
			goto no_page;
	} else if (unlikely(!page)) {
	if (unlikely(!page)) {
		if (flags & FOLL_DUMP) {
			/* Avoid special (like zero) pages in core dumps */
			page = ERR_PTR(-EFAULT);
@@ -959,14 +925,6 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
		return no_page_table(vma, flags, address);
	if (!pmd_present(pmdval))
		return no_page_table(vma, flags, address);
	if (pmd_devmap(pmdval)) {
		ptl = pmd_lock(mm, pmd);
		page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
		spin_unlock(ptl);
		if (page)
			return page;
		return no_page_table(vma, flags, address);
	}
	if (likely(!pmd_leaf(pmdval)))
		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);

@@ -2896,7 +2854,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
		int *nr)
{
	struct dev_pagemap *pgmap = NULL;
	int nr_start = *nr, ret = 0;
	int ret = 0;
	pte_t *ptep, *ptem;

	ptem = ptep = pte_offset_map(&pmd, addr);
@@ -2920,16 +2878,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
		if (!pte_access_permitted(pte, flags & FOLL_WRITE))
			goto pte_unmap;

		if (pte_devmap(pte)) {
			if (unlikely(flags & FOLL_LONGTERM))
				goto pte_unmap;

			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
			if (unlikely(!pgmap)) {
				gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
				goto pte_unmap;
			}
		} else if (pte_special(pte))
		if (pte_special(pte))
			goto pte_unmap;

		/* If it's not marked as special it must have a valid memmap. */
@@ -3001,91 +2950,6 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
}
#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */

#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
	unsigned long end, unsigned int flags, struct page **pages, int *nr)
{
	int nr_start = *nr;
	struct dev_pagemap *pgmap = NULL;

	do {
		struct folio *folio;
		struct page *page = pfn_to_page(pfn);

		pgmap = get_dev_pagemap(pfn, pgmap);
		if (unlikely(!pgmap)) {
			gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
			break;
		}

		folio = try_grab_folio_fast(page, 1, flags);
		if (!folio) {
			gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
			break;
		}
		folio_set_referenced(folio);
		pages[*nr] = page;
		(*nr)++;
		pfn++;
	} while (addr += PAGE_SIZE, addr != end);

	put_dev_pagemap(pgmap);
	return addr == end;
}

static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
		unsigned long end, unsigned int flags, struct page **pages,
		int *nr)
{
	unsigned long fault_pfn;
	int nr_start = *nr;

	fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
	if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
		return 0;

	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
		gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
		return 0;
	}
	return 1;
}

static int gup_fast_devmap_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
		unsigned long end, unsigned int flags, struct page **pages,
		int *nr)
{
	unsigned long fault_pfn;
	int nr_start = *nr;

	fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
	if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
		return 0;

	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
		gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
		return 0;
	}
	return 1;
}
#else
static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
		unsigned long end, unsigned int flags, struct page **pages,
		int *nr)
{
	BUILD_BUG();
	return 0;
}

static int gup_fast_devmap_pud_leaf(pud_t pud, pud_t *pudp, unsigned long addr,
		unsigned long end, unsigned int flags, struct page **pages,
		int *nr)
{
	BUILD_BUG();
	return 0;
}
#endif

static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
		unsigned long end, unsigned int flags, struct page **pages,
		int *nr)
@@ -3100,13 +2964,6 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
	if (pmd_special(orig))
		return 0;

	if (pmd_devmap(orig)) {
		if (unlikely(flags & FOLL_LONGTERM))
			return 0;
		return gup_fast_devmap_pmd_leaf(orig, pmdp, addr, end, flags,
					        pages, nr);
	}

	page = pmd_page(orig);
	refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);

@@ -3147,13 +3004,6 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
	if (pud_special(orig))
		return 0;

	if (pud_devmap(orig)) {
		if (unlikely(flags & FOLL_LONGTERM))
			return 0;
		return gup_fast_devmap_pud_leaf(orig, pudp, addr, end, flags,
					        pages, nr);
	}

	page = pud_page(orig);
	refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);

+0 −40
Original line number Diff line number Diff line
@@ -1672,46 +1672,6 @@ void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
		update_mmu_cache_pmd(vma, addr, pmd);
}

struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
		pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
{
	unsigned long pfn = pmd_pfn(*pmd);
	struct mm_struct *mm = vma->vm_mm;
	struct page *page;
	int ret;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	if (flags & FOLL_WRITE && !pmd_write(*pmd))
		return NULL;

	if (pmd_present(*pmd) && pmd_devmap(*pmd))
		/* pass */;
	else
		return NULL;

	if (flags & FOLL_TOUCH)
		touch_pmd(vma, addr, pmd, flags & FOLL_WRITE);

	/*
	 * device mapped pages can only be returned if the
	 * caller will manage the page reference count.
	 */
	if (!(flags & (FOLL_GET | FOLL_PIN)))
		return ERR_PTR(-EEXIST);

	pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
	*pgmap = get_dev_pagemap(pfn, *pgmap);
	if (!*pgmap)
		return ERR_PTR(-EFAULT);
	page = pfn_to_page(pfn);
	ret = try_grab_folio(page_folio(page), 1, flags);
	if (ret)
		page = ERR_PTR(ret);

	return page;
}

int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
		  struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)