Commit 4b1d3145 authored by Alistair Popple's avatar Alistair Popple Committed by Andrew Morton
Browse files

mm: convert vmf_insert_mixed() from using pte_devmap to pte_special

DAX no longer requires device PTEs as it always has a ZONE_DEVICE page
associated with the PTE that can be reference counted normally.  Other
users of pte_devmap are drivers that set PFN_DEV when calling
vmf_insert_mixed() which ensures vm_normal_page() returns NULL for these
entries.

There is no reason to distinguish these pte_devmap users so in order to
free up a PTE bit use pte_special instead for entries created with
vmf_insert_mixed().  This will ensure vm_normal_page() will continue to
return NULL for these pages.

Architectures that don't support pte_special also don't support pte_devmap
so those will continue to rely on pfn_valid() to determine if the page can
be mapped.

Link: https://lkml.kernel.org/r/93086bd446e7bf8e4c85345613ac18f706b01f60.1750323463.git-series.apopple@nvidia.com


Signed-off-by: default avatarAlistair Popple <apopple@nvidia.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Björn Töpel <bjorn@kernel.org>
Cc: Björn Töpel <bjorn@rivosinc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Deepak Gupta <debug@rivosinc.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Inki Dae <m.szyprowski@samsung.com>
Cc: John Groves <john@groves.net>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 79065255
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -302,13 +302,10 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
		goto fault;

	/*
	 * Bypass devmap pte such as DAX page when all pfn requested
	 * flags(pfn_req_flags) are fulfilled.
	 * Since each architecture defines a struct page for the zero page, just
	 * fall through and treat it like a normal page.
	 */
	if (!vm_normal_page(walk->vma, addr, pte) &&
	    !pte_devmap(pte) &&
	    !is_zero_pfn(pte_pfn(pte))) {
		if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
			pte_unmap(ptep);
+2 −18
Original line number Diff line number Diff line
@@ -598,16 +598,6 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
			return NULL;
		if (is_zero_pfn(pfn))
			return NULL;
		if (pte_devmap(pte))
		/*
		 * NOTE: New users of ZONE_DEVICE will not set pte_devmap()
		 * and will have refcounts incremented on their struct pages
		 * when they are inserted into PTEs, thus they are safe to
		 * return here. Legacy ZONE_DEVICE pages that set pte_devmap()
		 * do not have refcounts. Example of legacy ZONE_DEVICE is
		 * MEMORY_DEVICE_FS_DAX type in pmem or virtio_fs drivers.
		 */
			return NULL;

		print_bad_pte(vma, addr, pte, NULL);
		return NULL;
@@ -2483,9 +2473,6 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
	}

	/* Ok, finally just insert the thing.. */
	if (pfn_t_devmap(pfn))
		entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
	else
	entry = pte_mkspecial(pfn_t_pte(pfn, prot));

	if (mkwrite) {
@@ -2597,8 +2584,6 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn, bool mkwrite)
	/* these checks mirror the abort conditions in vm_normal_page */
	if (vma->vm_flags & VM_MIXEDMAP)
		return true;
	if (pfn_t_devmap(pfn))
		return true;
	if (pfn_t_special(pfn))
		return true;
	if (is_zero_pfn(pfn_t_to_pfn(pfn)))
@@ -2630,8 +2615,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
	 * than insert_pfn).  If a zero_pfn were inserted into a VM_MIXEDMAP
	 * without pte special, it would there be refcounted as a normal page.
	 */
	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) &&
	    !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pfn_t_valid(pfn)) {
		struct page *page;

		/*
+1 −1
Original line number Diff line number Diff line
@@ -3425,7 +3425,7 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
	if (!pte_present(pte) || is_zero_pfn(pfn))
		return -1;

	if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
	if (WARN_ON_ONCE(pte_special(pte)))
		return -1;

	if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))