Commit 5a00bfd6 authored by Ryan Roberts's avatar Ryan Roberts Committed by Andrew Morton
Browse files

arm64/mm: new ptep layer to manage contig bit

Create a new layer for the in-table PTE manipulation APIs.  For now, The
existing API is prefixed with double underscore to become the arch-private
API and the public API is just a simple wrapper that calls the private
API.

The public API implementation will subsequently be used to transparently
manipulate the contiguous bit where appropriate.  But since there are
already some contig-aware users (e.g.  hugetlb, kernel mapper), we must
first ensure those users use the private API directly so that the future
contig-bit manipulations in the public API do not interfere with those
existing uses.

The following APIs are treated this way:

 - ptep_get
 - set_pte
 - set_ptes
 - pte_clear
 - ptep_get_and_clear
 - ptep_test_and_clear_young
 - ptep_clear_flush_young
 - ptep_set_wrprotect
 - ptep_set_access_flags

Link: https://lkml.kernel.org/r/20240215103205.2607016-11-ryan.roberts@arm.com


Signed-off-by: default avatarRyan Roberts <ryan.roberts@arm.com>
Tested-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Acked-by: default avatarMark Rutland <mark.rutland@arm.com>
Acked-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morse <james.morse@arm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent cbb0294f
Loading
Loading
Loading
Loading
+45 −38
Original line number Diff line number Diff line
@@ -93,7 +93,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))

#define pte_none(pte)		(!pte_val(pte))
#define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
#define __pte_clear(mm, addr, ptep) \
				__set_pte(ptep, __pte(0))
#define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))

/*
@@ -137,7 +138,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 * so that we don't erroneously return false for pages that have been
 * remapped as PROT_NONE but are yet to be flushed from the TLB.
 * Note that we can't make any assumptions based on the state of the access
 * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
 * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
 * TLB.
 */
#define pte_accessible(mm, pte)	\
@@ -261,7 +262,7 @@ static inline pte_t pte_mkdevmap(pte_t pte)
	return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
}

static inline void set_pte(pte_t *ptep, pte_t pte)
static inline void __set_pte(pte_t *ptep, pte_t pte)
{
	WRITE_ONCE(*ptep, pte);

@@ -275,8 +276,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
	}
}

#define ptep_get ptep_get
static inline pte_t ptep_get(pte_t *ptep)
static inline pte_t __ptep_get(pte_t *ptep)
{
	return READ_ONCE(*ptep);
}
@@ -308,7 +308,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
	if (!IS_ENABLED(CONFIG_DEBUG_VM))
		return;

	old_pte = ptep_get(ptep);
	old_pte = __ptep_get(ptep);

	if (!pte_valid(old_pte) || !pte_valid(pte))
		return;
@@ -317,7 +317,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,

	/*
	 * Check for potential race with hardware updates of the pte
	 * (ptep_set_access_flags safely changes valid ptes without going
	 * (__ptep_set_access_flags safely changes valid ptes without going
	 * through an invalid entry).
	 */
	VM_WARN_ONCE(!pte_young(pte),
@@ -363,7 +363,7 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
	return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}

static inline void set_ptes(struct mm_struct *mm,
static inline void __set_ptes(struct mm_struct *mm,
			      unsigned long __always_unused addr,
			      pte_t *ptep, pte_t pte, unsigned int nr)
{
@@ -372,14 +372,13 @@ static inline void set_ptes(struct mm_struct *mm,

	for (;;) {
		__check_safe_pte_update(mm, ptep, pte);
		set_pte(ptep, pte);
		__set_pte(ptep, pte);
		if (--nr == 0)
			break;
		ptep++;
		pte = pte_advance_pfn(pte, 1);
	}
}
#define set_ptes set_ptes

/*
 * Huge pte definitions.
@@ -546,7 +545,7 @@ static inline void __set_pte_at(struct mm_struct *mm,
{
	__sync_cache_and_tags(pte, nr);
	__check_safe_pte_update(mm, ptep, pte);
	set_pte(ptep, pte);
	__set_pte(ptep, pte);
}

static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -860,8 +859,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
}

#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
extern int __ptep_set_access_flags(struct vm_area_struct *vma,
				 unsigned long address, pte_t *ptep,
				 pte_t entry, int dirty);

@@ -871,7 +869,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
					unsigned long address, pmd_t *pmdp,
					pmd_t entry, int dirty)
{
	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
	return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
							pmd_pte(entry), dirty);
}

static inline int pud_devmap(pud_t pud)
@@ -905,12 +904,13 @@ static inline bool pud_user_accessible_page(pud_t pud)
/*
 * Atomic pte/pmd modifications.
 */
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int __ptep_test_and_clear_young(pte_t *ptep)
static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
					      unsigned long address,
					      pte_t *ptep)
{
	pte_t old_pte, pte;

	pte = ptep_get(ptep);
	pte = __ptep_get(ptep);
	do {
		old_pte = pte;
		pte = pte_mkold(pte);
@@ -921,18 +921,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep)
	return pte_young(pte);
}

static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
					    unsigned long address,
					    pte_t *ptep)
{
	return __ptep_test_and_clear_young(ptep);
}

#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
					 unsigned long address, pte_t *ptep)
{
	int young = ptep_test_and_clear_young(vma, address, ptep);
	int young = __ptep_test_and_clear_young(vma, address, ptep);

	if (young) {
		/*
@@ -955,12 +947,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
					    unsigned long address,
					    pmd_t *pmdp)
{
	return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
	return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
				       unsigned long address, pte_t *ptep)
{
	pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
@@ -984,15 +975,15 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

/*
 * ptep_set_wrprotect - mark read-only while trasferring potential hardware
 * __ptep_set_wrprotect - mark read-only while trasferring potential hardware
 * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
 */
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
static inline void __ptep_set_wrprotect(struct mm_struct *mm,
					unsigned long address, pte_t *ptep)
{
	pte_t old_pte, pte;

	pte = ptep_get(ptep);
	pte = __ptep_get(ptep);
	do {
		old_pte = pte;
		pte = pte_wrprotect(pte);
@@ -1006,7 +997,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
				      unsigned long address, pmd_t *pmdp)
{
	ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
	__ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
}

#define pmdp_establish pmdp_establish
@@ -1084,7 +1075,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#endif /* CONFIG_ARM64_MTE */

/*
 * On AArch64, the cache coherency is handled via the set_ptes() function.
 * On AArch64, the cache coherency is handled via the __set_ptes() function.
 */
static inline void update_mmu_cache_range(struct vm_fault *vmf,
		struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
@@ -1136,6 +1127,22 @@ extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
				    unsigned long addr, pte_t *ptep,
				    pte_t old_pte, pte_t new_pte);

#define ptep_get				__ptep_get
#define set_pte					__set_pte
#define set_ptes				__set_ptes
#define pte_clear				__pte_clear
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define ptep_get_and_clear			__ptep_get_and_clear
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define ptep_test_and_clear_young		__ptep_test_and_clear_young
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young			__ptep_clear_flush_young
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define ptep_set_wrprotect			__ptep_set_wrprotect
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
#define ptep_set_access_flags			__ptep_set_access_flags

#endif /* !__ASSEMBLY__ */

#endif /* __ASM_PGTABLE_H */
+2 −2
Original line number Diff line number Diff line
@@ -103,7 +103,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
	struct set_perm_data *spd = data;
	const efi_memory_desc_t *md = spd->md;
	pte_t pte = ptep_get(ptep);
	pte_t pte = __ptep_get(ptep);

	if (md->attribute & EFI_MEMORY_RO)
		pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -111,7 +111,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
		pte = set_pte_bit(pte, __pgprot(PTE_PXN));
	else if (system_supports_bti_kernel() && spd->has_bti)
		pte = set_pte_bit(pte, __pgprot(PTE_GP));
	set_pte(ptep, pte);
	__set_pte(ptep, pte);
	return 0;
}

+1 −1
Original line number Diff line number Diff line
@@ -67,7 +67,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
	/*
	 * If the page content is identical but at least one of the pages is
	 * tagged, return non-zero to avoid KSM merging. If only one of the
	 * pages is tagged, set_ptes() may zero or change the tags of the
	 * pages is tagged, __set_ptes() may zero or change the tags of the
	 * other page via mte_sync_tags().
	 */
	if (page_mte_tagged(page1) || page_mte_tagged(page2))
+1 −1
Original line number Diff line number Diff line
@@ -1072,7 +1072,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
		} else {
			/*
			 * Only locking to serialise with a concurrent
			 * set_ptes() in the VMM but still overriding the
			 * __set_ptes() in the VMM but still overriding the
			 * tags, hence ignoring the return value.
			 */
			try_page_mte_tagging(page);
+6 −6
Original line number Diff line number Diff line
@@ -191,7 +191,7 @@ static void show_pte(unsigned long addr)
		if (!ptep)
			break;

		pte = ptep_get(ptep);
		pte = __ptep_get(ptep);
		pr_cont(", pte=%016llx", pte_val(pte));
		pte_unmap(ptep);
	} while(0);
@@ -205,16 +205,16 @@ static void show_pte(unsigned long addr)
 *
 * It needs to cope with hardware update of the accessed/dirty state by other
 * agents in the system and can safely skip the __sync_icache_dcache() call as,
 * like set_ptes(), the PTE is never changed from no-exec to exec here.
 * like __set_ptes(), the PTE is never changed from no-exec to exec here.
 *
 * Returns whether or not the PTE actually changed.
 */
int ptep_set_access_flags(struct vm_area_struct *vma,
int __ptep_set_access_flags(struct vm_area_struct *vma,
			    unsigned long address, pte_t *ptep,
			    pte_t entry, int dirty)
{
	pteval_t old_pteval, pteval;
	pte_t pte = ptep_get(ptep);
	pte_t pte = __ptep_get(ptep);

	if (pte_same(pte, entry))
		return 0;
Loading