mm/rmap: handle device-exclusive entries correctly in try_to_migrate_one() (bf983108) · Commits · git / linux-net

mm/rmap.c

+51 −73

Original line number	Diff line number	Diff line
		@@ -2039,9 +2039,9 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		{
		struct mm_struct *mm = vma->vm_mm;
		DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
		bool anon_exclusive, writable, ret = true;
		pte_t pteval;
		struct page *subpage;
		bool anon_exclusive, ret = true;
		struct mmu_notifier_range range;
		enum ttu_flags flags = (enum ttu_flags)(long)arg;
		unsigned long pfn;
		@@ -2108,24 +2108,19 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		/* Unexpected PMD-mapped THP? */
		VM_BUG_ON_FOLIO(!pvmw.pte, folio);

		pfn = pte_pfn(ptep_get(pvmw.pte));

		if (folio_is_zone_device(folio)) {
		/*
		* Our PTE is a non-present device exclusive entry and
		* calculating the subpage as for the common case would
		* result in an invalid pointer.
		*
		* Since only PAGE_SIZE pages can currently be
		* migrated, just set it to page. This will need to be
		* changed when hugepage migrations to device private
		* memory are supported.
		* Handle PFN swap PTEs, such as device-exclusive ones, that
		* actually map pages.
		*/
		VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
		subpage = &folio->page;
		pteval = ptep_get(pvmw.pte);
		if (likely(pte_present(pteval))) {
		pfn = pte_pfn(pteval);
		} else {
		subpage = folio_page(folio, pfn - folio_pfn(folio));
		pfn = swp_offset_pfn(pte_to_swp_entry(pteval));
		VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
		}

		subpage = folio_page(folio, pfn - folio_pfn(folio));
		address = pvmw.address;
		anon_exclusive = folio_test_anon(folio) &&
		PageAnonExclusive(subpage);
		@@ -2181,7 +2176,10 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		}
		/* Nuke the hugetlb page table entry */
		pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
		} else {
		if (pte_dirty(pteval))
		folio_mark_dirty(folio);
		writable = pte_write(pteval);
		} else if (likely(pte_present(pteval))) {
		flush_cache_page(vma, address, pfn);
		/* Nuke the page table entry. */
		if (should_defer_flush(mm, flags)) {
		@@ -2199,54 +2197,23 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		} else {
		pteval = ptep_clear_flush(vma, address, pvmw.pte);
		}
		}

		/* Set the dirty flag on the folio now the pte is gone. */
		if (pte_dirty(pteval))
		folio_mark_dirty(folio);
		writable = pte_write(pteval);
		} else {
		pte_clear(mm, address, pvmw.pte);
		writable = is_writable_device_private_entry(pte_to_swp_entry(pteval));
		}

		VM_WARN_ON_FOLIO(writable && folio_test_anon(folio) &&
		!anon_exclusive, folio);

		/* Update high watermark before we lower rss */
		update_hiwater_rss(mm);

		if (folio_is_device_private(folio)) {
		unsigned long pfn = folio_pfn(folio);
		swp_entry_t entry;
		pte_t swp_pte;

		if (anon_exclusive)
		WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio,
		subpage));

		/*
		* Store the pfn of the page in a special migration
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		entry = pte_to_swp_entry(pteval);
		if (is_writable_device_private_entry(entry))
		entry = make_writable_migration_entry(pfn);
		else if (anon_exclusive)
		entry = make_readable_exclusive_migration_entry(pfn);
		else
		entry = make_readable_migration_entry(pfn);
		swp_pte = swp_entry_to_pte(entry);
		if (PageHWPoison(subpage)) {
		VM_WARN_ON_FOLIO(folio_is_device_private(folio), folio);

		/*
		* pteval maps a zone device page and is therefore
		* a swap pte.
		*/
		if (pte_swp_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_swp_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
		trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
		folio_order(folio));
		/*
		* No need to invalidate here it will synchronize on
		* against the special swap migration pte.
		*/
		} else if (PageHWPoison(subpage)) {
		pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
		if (folio_test_hugetlb(folio)) {
		hugetlb_count_sub(folio_nr_pages(folio), mm);
		@@ -2256,8 +2223,8 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		dec_mm_counter(mm, mm_counter(folio));
		set_pte_at(mm, address, pvmw.pte, pteval);
		}

		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
		} else if (likely(pte_present(pteval)) && pte_unused(pteval) &&
		!userfaultfd_armed(vma)) {
		/*
		* The guest indicated that the page content is of no
		* interest anymore. Simply discard the pte, vmscan
		@@ -2273,6 +2240,11 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		swp_entry_t entry;
		pte_t swp_pte;

		/*
		* arch_unmap_one() is expected to be a NOP on
		* architectures where we could have PFN swap PTEs,
		* so we'll not check/care.
		*/
		if (arch_unmap_one(mm, vma, address, pteval) < 0) {
		if (folio_test_hugetlb(folio))
		set_huge_pte_at(mm, address, pvmw.pte,
		@@ -2283,8 +2255,6 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		page_vma_mapped_walk_done(&pvmw);
		break;
		}
		VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
		!anon_exclusive, subpage);

		/* See folio_try_share_anon_rmap_pte(): clear PTE first. */
		if (folio_test_hugetlb(folio)) {
		@@ -2309,7 +2279,7 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		* pte. do_swap_page() will wait until the migration
		* pte is removed and then restart fault handling.
		*/
		if (pte_write(pteval))
		if (writable)
		entry = make_writable_migration_entry(
		page_to_pfn(subpage));
		else if (anon_exclusive)
		@@ -2318,6 +2288,7 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		else
		entry = make_readable_migration_entry(
		page_to_pfn(subpage));
		if (likely(pte_present(pteval))) {
		if (pte_young(pteval))
		entry = make_migration_entry_young(entry);
		if (pte_dirty(pteval))
		@@ -2327,6 +2298,13 @@ static bool try_to_migrate_one(struct folio folio, struct vm_area_struct vma,
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		} else {
		swp_pte = swp_entry_to_pte(entry);
		if (pte_swp_soft_dirty(pteval))
		swp_pte = pte_swp_mksoft_dirty(swp_pte);
		if (pte_swp_uffd_wp(pteval))
		swp_pte = pte_swp_mkuffd_wp(swp_pte);
		}
		if (folio_test_hugetlb(folio))
		set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
		hsz);