Commit 96d81e47 authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Andrew Morton
Browse files

mm/pagewalk: split walk_page_range_novma() into kernel/user parts

walk_page_range_novma() is rather confusing - it supports two modes, one
used often, the other used only for debugging.

The first mode is the common case of traversal of kernel page tables,
which is what nearly all callers use this for.

Secondly it provides an unusual debugging interface that allows for the
traversal of page tables in a userland range of memory even for that
memory which is not described by a VMA.

It is far from certain that such page tables should even exist, but
perhaps this is precisely why it is useful as a debugging mechanism.

As a result, this is utilised by ptdump only.  Historically, things were
reversed - ptdump was the only user, and other parts of the kernel evolved
to use the kernel page table walking here.

Since we have some complicated and confusing locking rules for the novma
case, it makes sense to separate the two usages into their own functions.

Doing this also provide self-documentation as to the intent of the caller
- are they doing something rather unusual or are they simply doing a
standard kernel page table walk?

We therefore establish two separate functions - walk_page_range_debug()
for this single usage, and walk_kernel_page_table_range() for general
kernel page table walking.

The walk_page_range_debug() function is currently used to traverse both
userland and kernel mappings, so we maintain this and in the case of
kernel mappings being traversed, we have walk_page_range_debug() invoke
walk_kernel_page_table_range() internally.

We additionally make walk_page_range_debug() internal to mm.

Link: https://lkml.kernel.org/r/20250605135104.90720-1-lorenzo.stoakes@oracle.com


Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: default avatarMike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: default avatarQi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Reviewed-by: default avatarSuren Baghdasaryan <surenb@google.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Barry Song <baohua@kernel.org>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: WANG Xuerui <kernel@xen0n.name>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 03dfefda
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -118,7 +118,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, pgp
		return 0;

	mmap_write_lock(&init_mm);
	ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, &masks);
	ret = walk_kernel_page_table_range(start, end, &pageattr_ops, NULL, &masks);
	mmap_write_unlock(&init_mm);

	flush_tlb_kernel_range(start, end);
+2 −2
Original line number Diff line number Diff line
@@ -72,7 +72,7 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size)
	 * them and setting the cache-inhibit bit.
	 */
	mmap_write_lock(&init_mm);
	error = walk_page_range_novma(&init_mm, va, va + size,
	error = walk_kernel_page_table_range(va, va + size,
			&set_nocache_walk_ops, NULL, NULL);
	mmap_write_unlock(&init_mm);

@@ -87,7 +87,7 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size)

	mmap_write_lock(&init_mm);
	/* walk_page_range shouldn't be able to fail here */
	WARN_ON(walk_page_range_novma(&init_mm, va, va + size,
	WARN_ON(walk_kernel_page_table_range(va, va + size,
			&clear_nocache_walk_ops, NULL, NULL));
	mmap_write_unlock(&init_mm);
}
+4 −4
Original line number Diff line number Diff line
@@ -299,7 +299,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
			if (ret)
				goto unlock;

			ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
			ret = walk_kernel_page_table_range(lm_start, lm_end,
						    &pageattr_ops, NULL, &masks);
			if (ret)
				goto unlock;
@@ -317,13 +317,13 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
		if (ret)
			goto unlock;

		ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
		ret = walk_kernel_page_table_range(lm_start, lm_end,
					    &pageattr_ops, NULL, &masks);
		if (ret)
			goto unlock;
	}

	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
	ret =  walk_kernel_page_table_range(start, end, &pageattr_ops, NULL,
				     &masks);

unlock:
@@ -335,7 +335,7 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
	 */
	flush_tlb_all();
#else
	ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
	ret =  walk_kernel_page_table_range(start, end, &pageattr_ops, NULL,
				     &masks);

	mmap_write_unlock(&init_mm);
+3 −4
Original line number Diff line number Diff line
@@ -129,10 +129,9 @@ struct mm_walk {
int walk_page_range(struct mm_struct *mm, unsigned long start,
		unsigned long end, const struct mm_walk_ops *ops,
		void *private);
int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
int walk_kernel_page_table_range(unsigned long start,
		unsigned long end, const struct mm_walk_ops *ops,
			  pgd_t *pgd,
			  void *private);
		pgd_t *pgd, void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
			unsigned long end, const struct mm_walk_ops *ops,
			void *private);
+1 −1
Original line number Diff line number Diff line
@@ -166,7 +166,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
	VM_BUG_ON(!PAGE_ALIGNED(start | end));

	mmap_read_lock(&init_mm);
	ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops,
	ret = walk_kernel_page_table_range(start, end, &vmemmap_remap_ops,
				    NULL, walk);
	mmap_read_unlock(&init_mm);
	if (ret)
Loading