Commit 65edfda6 authored by Balbir Singh's avatar Balbir Singh Committed by Andrew Morton
Browse files

mm/rmap: extend rmap and migration support device-private entries

Add device-private THP support to reverse mapping infrastructure, enabling
proper handling during migration and walk operations.

The key changes are:
- add_migration_pmd()/remove_migration_pmd(): Handle device-private
  entries during folio migration and splitting
- page_vma_mapped_walk(): Recognize device-private THP entries during
  VMA traversal operations

This change supports folio splitting and migration operations on
device-private entries.

[balbirs@nvidia.com: fix override of entry in remove_migration_pmd]
  Link: https://lkml.kernel.org/r/20251114012153.2634497-2-balbirs@nvidia.com
[balbirs@nvidia.com: follow pattern used in remove_migration_pte()]
  Link: https://lkml.kernel.org/r/20251115002835.3515194-1-balbirs@nvidia.com
Link: https://lkml.kernel.org/r/20251001065707.920170-5-balbirs@nvidia.com


Signed-off-by: default avatarBalbir Singh <balbirs@nvidia.com>
Reviewed-by: default avatarSeongJae Park <sj@kernel.org>
Acked-by: default avatarZi Yan <ziy@nvidia.com>
Reviewed-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 368076f5
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -75,12 +75,24 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr
void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	struct folio *folio = damon_get_folio(pmd_pfn(pmdp_get(pmd)));
	pmd_t pmdval = pmdp_get(pmd);
	struct folio *folio;
	bool young = false;
	unsigned long pfn;

	if (likely(pmd_present(pmdval)))
		pfn = pmd_pfn(pmdval);
	else
		pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval));

	folio = damon_get_folio(pfn);
	if (!folio)
		return;

	if (pmdp_clear_young_notify(vma, addr, pmd))
	if (likely(pmd_present(pmdval)))
		young |= pmdp_clear_young_notify(vma, addr, pmd);
	young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + HPAGE_PMD_SIZE);
	if (young)
		folio_set_young(folio);

	folio_set_idle(folio);
@@ -199,7 +211,9 @@ static bool damon_folio_young_one(struct folio *folio,
				mmu_notifier_test_young(vma->vm_mm, addr);
		} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			*accessed = pmd_young(pmdp_get(pvmw.pmd)) ||
			pmd_t pmd = pmdp_get(pvmw.pmd);

			*accessed = (pmd_present(pmd) && pmd_young(pmd)) ||
				!folio_test_idle(folio) ||
				mmu_notifier_test_young(vma->vm_mm, addr);
#else
+22 −1
Original line number Diff line number Diff line
@@ -4583,6 +4583,9 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
		return 0;

	flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
	if (unlikely(!pmd_present(*pvmw->pmd)))
		pmdval = pmdp_huge_get_and_clear(vma->vm_mm, address, pvmw->pmd);
	else
		pmdval = pmdp_invalidate(vma, address, pvmw->pmd);

	/* See folio_try_share_anon_rmap_pmd(): invalidate PMD first. */
@@ -4633,6 +4636,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
	entry = pmd_to_swp_entry(*pvmw->pmd);
	folio_get(folio);
	pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));

	if (pmd_swp_soft_dirty(*pvmw->pmd))
		pmde = pmd_mksoft_dirty(pmde);
	if (is_writable_migration_entry(entry))
@@ -4645,6 +4649,23 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
	if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
		pmde = pmd_mkdirty(pmde);

	if (folio_is_device_private(folio)) {
		swp_entry_t entry;

		if (pmd_write(pmde))
			entry = make_writable_device_private_entry(
							page_to_pfn(new));
		else
			entry = make_readable_device_private_entry(
							page_to_pfn(new));
		pmde = swp_entry_to_pmd(entry);

		if (pmd_swp_soft_dirty(*pvmw->pmd))
			pmde = pmd_swp_mksoft_dirty(pmde);
		if (pmd_swp_uffd_wp(*pvmw->pmd))
			pmde = pmd_swp_mkuffd_wp(pmde);
	}

	if (folio_test_anon(folio)) {
		rmap_t rmap_flags = RMAP_NONE;

+5 −2
Original line number Diff line number Diff line
@@ -71,8 +71,11 @@ static bool page_idle_clear_pte_refs_one(struct folio *folio,
				referenced |= ptep_test_and_clear_young(vma, addr, pvmw.pte);
			referenced |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PAGE_SIZE);
		} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
			if (pmdp_clear_young_notify(vma, addr, pvmw.pmd))
				referenced = true;
			pmd_t pmdval = pmdp_get(pvmw.pmd);

			if (likely(pmd_present(pmdval)))
				referenced |= pmdp_clear_young_notify(vma, addr, pvmw.pmd);
			referenced |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE);
		} else {
			/* unexpected pmd-mapped page? */
			WARN_ON_ONCE(1);
+7 −0
Original line number Diff line number Diff line
@@ -277,6 +277,13 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
			 * cannot return prematurely, while zap_huge_pmd() has
			 * cleared *pmd but not decremented compound_mapcount().
			 */
			swp_entry_t entry = pmd_to_swp_entry(pmde);

			if (is_device_private_entry(entry)) {
				pvmw->ptl = pmd_lock(mm, pvmw->pmd);
				return true;
			}

			if ((pvmw->flags & PVMW_SYNC) &&
			    thp_vma_suitable_order(vma, pvmw->address,
						   PMD_ORDER) &&
+20 −4
Original line number Diff line number Diff line
@@ -1022,9 +1022,16 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
		} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
			pmd_t *pmd = pvmw->pmd;
			pmd_t entry;
			pmd_t entry = pmdp_get(pmd);

			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
			/*
			 * Please see the comment above (!pte_present).
			 * A non present PMD is not writable from a CPU
			 * perspective.
			 */
			if (!pmd_present(entry))
				continue;
			if (!pmd_dirty(entry) && !pmd_write(entry))
				continue;

			flush_cache_range(vma, address,
@@ -2319,6 +2326,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
	while (page_vma_mapped_walk(&pvmw)) {
		/* PMD-mapped THP migration entry */
		if (!pvmw.pte) {
			__maybe_unused unsigned long pfn;
			__maybe_unused pmd_t pmdval;

			if (flags & TTU_SPLIT_HUGE_PMD) {
				split_huge_pmd_locked(vma, pvmw.address,
						      pvmw.pmd, true);
@@ -2327,8 +2337,14 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
				break;
			}
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
			subpage = folio_page(folio,
				pmd_pfn(*pvmw.pmd) - folio_pfn(folio));
			pmdval = pmdp_get(pvmw.pmd);
			if (likely(pmd_present(pmdval)))
				pfn = pmd_pfn(pmdval);
			else
				pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval));

			subpage = folio_page(folio, pfn - folio_pfn(folio));

			VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
					!folio_test_pmd_mappable(folio), folio);