Commit fabc0e8d authored by Qi Zheng's avatar Qi Zheng Committed by Andrew Morton
Browse files

mm: introduce zap_nonpresent_ptes()

Similar to zap_present_ptes(), let's introduce zap_nonpresent_ptes() to
handle non-present ptes, which can improve code readability.

No functional change.

Link: https://lkml.kernel.org/r/009ca882036d9c7a9f815489cfeafe0bdb79d62d.1733305182.git.zhengqi.arch@bytedance.com


Signed-off-by: default avatarQi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: default avatarJann Horn <jannh@google.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: Zach O'Keefe <zokeefe@google.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent dd95d278
Loading
Loading
Loading
Loading
+73 −63
Original line number Diff line number Diff line
@@ -1587,60 +1587,22 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
	return 1;
}

static unsigned long zap_pte_range(struct mmu_gather *tlb,
				struct vm_area_struct *vma, pmd_t *pmd,
				unsigned long addr, unsigned long end,
				struct zap_details *details)
static inline int zap_nonpresent_ptes(struct mmu_gather *tlb,
		struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
		unsigned int max_nr, unsigned long addr,
		struct zap_details *details, int *rss)
{
	bool force_flush = false, force_break = false;
	struct mm_struct *mm = tlb->mm;
	int rss[NR_MM_COUNTERS];
	spinlock_t *ptl;
	pte_t *start_pte;
	pte_t *pte;
	swp_entry_t entry;
	int nr;

	tlb_change_page_size(tlb, PAGE_SIZE);
	init_rss_vec(rss);
	start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	if (!pte)
		return addr;

	flush_tlb_batched_pending(mm);
	arch_enter_lazy_mmu_mode();
	do {
		pte_t ptent = ptep_get(pte);
		struct folio *folio;
		struct page *page;
		int max_nr;

		nr = 1;
		if (pte_none(ptent))
			continue;

		if (need_resched())
			break;

		if (pte_present(ptent)) {
			max_nr = (end - addr) / PAGE_SIZE;
			nr = zap_present_ptes(tlb, vma, pte, ptent, max_nr,
					      addr, details, rss, &force_flush,
					      &force_break);
			if (unlikely(force_break)) {
				addr += nr * PAGE_SIZE;
				break;
			}
			continue;
		}
	int nr = 1;

	entry = pte_to_swp_entry(ptent);
	if (is_device_private_entry(entry) ||
		is_device_exclusive_entry(entry)) {
			page = pfn_swap_entry_to_page(entry);
			folio = page_folio(page);
		struct page *page = pfn_swap_entry_to_page(entry);
		struct folio *folio = page_folio(page);

		if (unlikely(!should_zap_folio(details, folio)))
				continue;
			return 1;
		/*
		 * Both device private/exclusive mappings should only
		 * work with anonymous page so far, so we don't need to
@@ -1653,26 +1615,26 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
			folio_remove_rmap_pte(folio, page, vma);
		folio_put(folio);
	} else if (!non_swap_entry(entry)) {
			max_nr = (end - addr) / PAGE_SIZE;
			nr = swap_pte_batch(pte, max_nr, ptent);
		/* Genuine swap entries, hence a private anon pages */
		if (!should_zap_cows(details))
				continue;
			return 1;

		nr = swap_pte_batch(pte, max_nr, ptent);
		rss[MM_SWAPENTS] -= nr;
		free_swap_and_cache_nr(entry, nr);
	} else if (is_migration_entry(entry)) {
			folio = pfn_swap_entry_folio(entry);
		struct folio *folio = pfn_swap_entry_folio(entry);

		if (!should_zap_folio(details, folio))
				continue;
			return 1;
		rss[mm_counter(folio)]--;
	} else if (pte_marker_entry_uffd_wp(entry)) {
		/*
		 * For anon: always drop the marker; for file: only
		 * drop the marker if explicitly requested.
		 */
			if (!vma_is_anonymous(vma) &&
			    !zap_drop_markers(details))
				continue;
		if (!vma_is_anonymous(vma) && !zap_drop_markers(details))
			return 1;
	} else if (is_guard_swp_entry(entry)) {
		/*
		 * Ordinary zapping should not remove guard PTE
@@ -1680,18 +1642,66 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
		 * in general.
		 */
		if (!zap_drop_markers(details))
				continue;
		} else if (is_hwpoison_entry(entry) ||
			   is_poisoned_swp_entry(entry)) {
			return 1;
	} else if (is_hwpoison_entry(entry) || is_poisoned_swp_entry(entry)) {
		if (!should_zap_cows(details))
				continue;
			return 1;
	} else {
		/* We should have covered all the swap entry types */
		pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
		WARN_ON_ONCE(1);
	}
		clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm);
	clear_not_present_full_ptes(vma->vm_mm, addr, pte, nr, tlb->fullmm);
	zap_install_uffd_wp_if_needed(vma, addr, pte, nr, details, ptent);

	return nr;
}

static unsigned long zap_pte_range(struct mmu_gather *tlb,
				struct vm_area_struct *vma, pmd_t *pmd,
				unsigned long addr, unsigned long end,
				struct zap_details *details)
{
	bool force_flush = false, force_break = false;
	struct mm_struct *mm = tlb->mm;
	int rss[NR_MM_COUNTERS];
	spinlock_t *ptl;
	pte_t *start_pte;
	pte_t *pte;
	int nr;

	tlb_change_page_size(tlb, PAGE_SIZE);
	init_rss_vec(rss);
	start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
	if (!pte)
		return addr;

	flush_tlb_batched_pending(mm);
	arch_enter_lazy_mmu_mode();
	do {
		pte_t ptent = ptep_get(pte);
		int max_nr;

		nr = 1;
		if (pte_none(ptent))
			continue;

		if (need_resched())
			break;

		max_nr = (end - addr) / PAGE_SIZE;
		if (pte_present(ptent)) {
			nr = zap_present_ptes(tlb, vma, pte, ptent, max_nr,
					      addr, details, rss, &force_flush,
					      &force_break);
			if (unlikely(force_break)) {
				addr += nr * PAGE_SIZE;
				break;
			}
		} else {
			nr = zap_nonpresent_ptes(tlb, vma, pte, ptent, max_nr,
						 addr, details, rss);
		}
	} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);

	add_mm_rss_vec(mm, rss);