Commit 0aa3657d authored by Dev Jain's avatar Dev Jain Committed by Andrew Morton
Browse files

mm: add batched versions of ptep_modify_prot_start/commit

Batch ptep_modify_prot_start/commit in preparation for optimizing
mprotect, implementing them as a simple loop over the corresponding single
pte helpers.  Architecture may override these helpers.

Link: https://lkml.kernel.org/r/20250718090244.21092-4-dev.jain@arm.com


Signed-off-by: default avatarDev Jain <dev.jain@arm.com>
Reviewed-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: default avatarBarry Song <baohua@kernel.org>
Reviewed-by: default avatarRyan Roberts <ryan.roberts@arm.com>
Reviewed-by: default avatarZi Yan <ziy@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <yang@os.amperecomputing.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Zhenhua Huang <quic_zhenhuah@quicinc.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 1d40f4e3
Loading
Loading
Loading
Loading
+83 −1
Original line number Diff line number Diff line
@@ -1331,7 +1331,9 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,

/*
 * Commit an update to a pte, leaving any hardware-controlled bits in
 * the PTE unmodified.
 * the PTE unmodified. The pte returned from ptep_modify_prot_start() may
 * additionally have young and/or dirty bits set where previously they were not,
 * so the updated pte may have these additional changes.
 */
static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
					   unsigned long addr,
@@ -1340,6 +1342,86 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
	__ptep_modify_prot_commit(vma, addr, ptep, pte);
}
#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */

/**
 * modify_prot_start_ptes - Start a pte protection read-modify-write transaction
 * over a batch of ptes, which protects against asynchronous hardware
 * modifications to the ptes. The intention is not to prevent the hardware from
 * making pte updates, but to prevent any updates it may make from being lost.
 * Please see the comment above ptep_modify_prot_start() for full description.
 *
 * @vma: The virtual memory area the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @nr: Number of entries.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_modify_prot_start(), collecting the a/d bits from each pte
 * in the batch.
 *
 * Note that PTE bits in the PTE batch besides the PFN can differ.
 *
 * Context: The caller holds the page table lock.  The PTEs map consecutive
 * pages that belong to the same folio. All other PTE bits must be identical for
 * all PTEs in the batch except for young and dirty bits.  The PTEs are all in
 * the same PMD.
 */
#ifndef modify_prot_start_ptes
static inline pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
		unsigned long addr, pte_t *ptep, unsigned int nr)
{
	pte_t pte, tmp_pte;

	pte = ptep_modify_prot_start(vma, addr, ptep);
	while (--nr) {
		ptep++;
		addr += PAGE_SIZE;
		tmp_pte = ptep_modify_prot_start(vma, addr, ptep);
		if (pte_dirty(tmp_pte))
			pte = pte_mkdirty(pte);
		if (pte_young(tmp_pte))
			pte = pte_mkyoung(pte);
	}
	return pte;
}
#endif

/**
 * modify_prot_commit_ptes - Commit an update to a batch of ptes, leaving any
 * hardware-controlled bits in the PTE unmodified.
 *
 * @vma: The virtual memory area the pages are mapped into.
 * @addr: Address the first page is mapped at.
 * @ptep: Page table pointer for the first entry.
 * @old_pte: Old page table entry (for the first entry) which is now cleared.
 * @pte: New page table entry to be set.
 * @nr: Number of entries.
 *
 * May be overridden by the architecture; otherwise, implemented as a simple
 * loop over ptep_modify_prot_commit().
 *
 * Context: The caller holds the page table lock. The PTEs are all in the same
 * PMD. On exit, the set ptes in the batch map the same folio. The ptes set by
 * ptep_modify_prot_start() may additionally have young and/or dirty bits set
 * where previously they were not, so the updated ptes may have these
 * additional changes.
 */
#ifndef modify_prot_commit_ptes
static inline void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
		pte_t *ptep, pte_t old_pte, pte_t pte, unsigned int nr)
{
	int i;

	for (i = 0; i < nr; ++i, ++ptep, addr += PAGE_SIZE) {
		ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);

		/* Advance PFN only, set same prot */
		old_pte = pte_next_pfn(old_pte);
		pte = pte_next_pfn(pte);
	}
}
#endif

#endif /* CONFIG_MMU */

/*
+2 −2
Original line number Diff line number Diff line
@@ -204,7 +204,7 @@ static long change_pte_range(struct mmu_gather *tlb,
				}
			}

			oldpte = ptep_modify_prot_start(vma, addr, pte);
			oldpte = modify_prot_start_ptes(vma, addr, pte, nr_ptes);
			ptent = pte_modify(oldpte, newprot);

			if (uffd_wp)
@@ -230,7 +230,7 @@ static long change_pte_range(struct mmu_gather *tlb,
			    can_change_pte_writable(vma, addr, ptent))
				ptent = pte_mkwrite(ptent, vma);

			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
			modify_prot_commit_ptes(vma, addr, pte, oldpte, ptent, nr_ptes);
			if (pte_needs_flush(oldpte, ptent))
				tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
			pages++;