Commit 775465fd authored by Balbir Singh's avatar Balbir Singh Committed by Andrew Morton
Browse files

lib/test_hmm: add zone device private THP test infrastructure

Enhance the hmm test driver (lib/test_hmm) with support for THP pages.

A new pool of free_folios() has now been added to the dmirror device,
which can be allocated when a request for a THP zone device private page
is made.

Add compound page awareness to the allocation function during normal
migration and fault based migration.  These routines also copy
folio_nr_pages() when moving data between system memory and device memory.

args.src and args.dst used to hold migration entries are now dynamically
allocated (as they need to hold HPAGE_PMD_NR entries or more).

Split and migrate support will be added in future patches in this series.

Link: https://lkml.kernel.org/r/20251001065707.920170-10-balbirs@nvidia.com


Signed-off-by: default avatarBalbir Singh <balbirs@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 49640991
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -177,6 +177,18 @@ static inline bool folio_is_pci_p2pdma(const struct folio *folio)
		folio->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}

static inline void *folio_zone_device_data(const struct folio *folio)
{
	VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
	return folio->page.zone_device_data;
}

static inline void folio_set_zone_device_data(struct folio *folio, void *data)
{
	VM_WARN_ON_FOLIO(!folio_is_device_private(folio), folio);
	folio->page.zone_device_data = data;
}

static inline bool is_pci_p2pdma_page(const struct page *page)
{
	return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
+292 −76
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ struct dmirror_device {
	unsigned long		calloc;
	unsigned long		cfree;
	struct page		*free_pages;
	struct folio		*free_folios;
	spinlock_t		lock;		/* protects the above */
};

@@ -492,7 +493,7 @@ static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
}

static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
				   struct page **ppage)
				  struct page **ppage, bool is_large)
{
	struct dmirror_chunk *devmem;
	struct resource *res = NULL;
@@ -572,20 +573,45 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
		pfn_first, pfn_last);

	spin_lock(&mdevice->lock);
	for (pfn = pfn_first; pfn < pfn_last; pfn++) {
	for (pfn = pfn_first; pfn < pfn_last; ) {
		struct page *page = pfn_to_page(pfn);

		if (is_large && IS_ALIGNED(pfn, HPAGE_PMD_NR)
			&& (pfn + HPAGE_PMD_NR <= pfn_last)) {
			page->zone_device_data = mdevice->free_folios;
			mdevice->free_folios = page_folio(page);
			pfn += HPAGE_PMD_NR;
			continue;
		}

		page->zone_device_data = mdevice->free_pages;
		mdevice->free_pages = page;
		pfn++;
	}

	ret = 0;
	if (ppage) {
		if (is_large) {
			if (!mdevice->free_folios) {
				ret = -ENOMEM;
				goto err_unlock;
			}
			*ppage = folio_page(mdevice->free_folios, 0);
			mdevice->free_folios = (*ppage)->zone_device_data;
			mdevice->calloc += HPAGE_PMD_NR;
		} else if (mdevice->free_pages) {
			*ppage = mdevice->free_pages;
			mdevice->free_pages = (*ppage)->zone_device_data;
			mdevice->calloc++;
		} else {
			ret = -ENOMEM;
			goto err_unlock;
		}
	}
err_unlock:
	spin_unlock(&mdevice->lock);

	return 0;
	return ret;

err_release:
	mutex_unlock(&mdevice->devmem_lock);
@@ -598,10 +624,13 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
	return ret;
}

static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
static struct page *dmirror_devmem_alloc_page(struct dmirror *dmirror,
					      bool is_large)
{
	struct page *dpage = NULL;
	struct page *rpage = NULL;
	unsigned int order = is_large ? HPAGE_PMD_ORDER : 0;
	struct dmirror_device *mdevice = dmirror->mdevice;

	/*
	 * For ZONE_DEVICE private type, this is a fake device so we allocate
@@ -610,49 +639,55 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
	 * data and ignore rpage.
	 */
	if (dmirror_is_private_zone(mdevice)) {
		rpage = alloc_page(GFP_HIGHUSER);
		rpage = folio_page(folio_alloc(GFP_HIGHUSER, order), 0);
		if (!rpage)
			return NULL;
	}
	spin_lock(&mdevice->lock);

	if (mdevice->free_pages) {
	if (is_large && mdevice->free_folios) {
		dpage = folio_page(mdevice->free_folios, 0);
		mdevice->free_folios = dpage->zone_device_data;
		mdevice->calloc += 1 << order;
		spin_unlock(&mdevice->lock);
	} else if (!is_large && mdevice->free_pages) {
		dpage = mdevice->free_pages;
		mdevice->free_pages = dpage->zone_device_data;
		mdevice->calloc++;
		spin_unlock(&mdevice->lock);
	} else {
		spin_unlock(&mdevice->lock);
		if (dmirror_allocate_chunk(mdevice, &dpage))
		if (dmirror_allocate_chunk(mdevice, &dpage, is_large))
			goto error;
	}

	zone_device_page_init(dpage, 0);
	zone_device_folio_init(page_folio(dpage), order);
	dpage->zone_device_data = rpage;
	return dpage;

error:
	if (rpage)
		__free_page(rpage);
		__free_pages(rpage, order);
	return NULL;
}

static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
					   struct dmirror *dmirror)
{
	struct dmirror_device *mdevice = dmirror->mdevice;
	const unsigned long *src = args->src;
	unsigned long *dst = args->dst;
	unsigned long addr;

	for (addr = args->start; addr < args->end; addr += PAGE_SIZE,
						   src++, dst++) {
	for (addr = args->start; addr < args->end; ) {
		struct page *spage;
		struct page *dpage;
		struct page *rpage;
		bool is_large = *src & MIGRATE_PFN_COMPOUND;
		int write = (*src & MIGRATE_PFN_WRITE) ? MIGRATE_PFN_WRITE : 0;
		unsigned long nr = 1;

		if (!(*src & MIGRATE_PFN_MIGRATE))
			continue;
			goto next;

		/*
		 * Note that spage might be NULL which is OK since it is an
@@ -662,17 +697,45 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
		if (WARN(spage && is_zone_device_page(spage),
		     "page already in device spage pfn: 0x%lx\n",
		     page_to_pfn(spage)))
			continue;
			goto next;

		dpage = dmirror_devmem_alloc_page(mdevice);
		if (!dpage)
			continue;
		dpage = dmirror_devmem_alloc_page(dmirror, is_large);
		if (!dpage) {
			struct folio *folio;
			unsigned long i;
			unsigned long spfn = *src >> MIGRATE_PFN_SHIFT;
			struct page *src_page;

			if (!is_large)
				goto next;

			if (!spage && is_large) {
				nr = HPAGE_PMD_NR;
			} else {
				folio = page_folio(spage);
				nr = folio_nr_pages(folio);
			}

			for (i = 0; i < nr && addr < args->end; i++) {
				dpage = dmirror_devmem_alloc_page(dmirror, false);
				rpage = BACKING_PAGE(dpage);
				rpage->zone_device_data = dmirror;

				*dst = migrate_pfn(page_to_pfn(dpage)) | write;
				src_page = pfn_to_page(spfn + i);

				if (spage)
			copy_highpage(rpage, spage);
					copy_highpage(rpage, src_page);
				else
					clear_highpage(rpage);
				src++;
				dst++;
				addr += PAGE_SIZE;
			}
			continue;
		}

		rpage = BACKING_PAGE(dpage);

		/*
		 * Normally, a device would use the page->zone_device_data to
@@ -684,10 +747,42 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,

		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
			 page_to_pfn(spage), page_to_pfn(dpage));
		*dst = migrate_pfn(page_to_pfn(dpage));
		if ((*src & MIGRATE_PFN_WRITE) ||
		    (!spage && args->vma->vm_flags & VM_WRITE))
			*dst |= MIGRATE_PFN_WRITE;

		*dst = migrate_pfn(page_to_pfn(dpage)) | write;

		if (is_large) {
			int i;
			struct folio *folio = page_folio(dpage);
			*dst |= MIGRATE_PFN_COMPOUND;

			if (folio_test_large(folio)) {
				for (i = 0; i < folio_nr_pages(folio); i++) {
					struct page *dst_page =
						pfn_to_page(page_to_pfn(rpage) + i);
					struct page *src_page =
						pfn_to_page(page_to_pfn(spage) + i);

					if (spage)
						copy_highpage(dst_page, src_page);
					else
						clear_highpage(dst_page);
					src++;
					dst++;
					addr += PAGE_SIZE;
				}
				continue;
			}
		}

		if (spage)
			copy_highpage(rpage, spage);
		else
			clear_highpage(rpage);

next:
		src++;
		dst++;
		addr += PAGE_SIZE;
	}
}

@@ -734,14 +829,17 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
	const unsigned long *src = args->src;
	const unsigned long *dst = args->dst;
	unsigned long pfn;
	const unsigned long start_pfn = start >> PAGE_SHIFT;
	const unsigned long end_pfn = end >> PAGE_SHIFT;

	/* Map the migrated pages into the device's page tables. */
	mutex_lock(&dmirror->mutex);

	for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++,
								src++, dst++) {
	for (pfn = start_pfn; pfn < end_pfn; pfn++, src++, dst++) {
		struct page *dpage;
		void *entry;
		int nr, i;
		struct page *rpage;

		if (!(*src & MIGRATE_PFN_MIGRATE))
			continue;
@@ -750,15 +848,27 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args,
		if (!dpage)
			continue;

		entry = BACKING_PAGE(dpage);
		if (*dst & MIGRATE_PFN_COMPOUND)
			nr = folio_nr_pages(page_folio(dpage));
		else
			nr = 1;

		WARN_ON_ONCE(end_pfn < start_pfn + nr);

		rpage = BACKING_PAGE(dpage);
		VM_WARN_ON(folio_nr_pages(page_folio(rpage)) != nr);

		for (i = 0; i < nr; i++) {
			entry = folio_page(page_folio(rpage), i);
			if (*dst & MIGRATE_PFN_WRITE)
				entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE);
		entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
			entry = xa_store(&dmirror->pt, pfn + i, entry, GFP_ATOMIC);
			if (xa_is_err(entry)) {
				mutex_unlock(&dmirror->mutex);
				return xa_err(entry);
			}
		}
	}

	mutex_unlock(&dmirror->mutex);
	return 0;
@@ -829,31 +939,66 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
	unsigned long start = args->start;
	unsigned long end = args->end;
	unsigned long addr;
	unsigned int order = 0;
	int i;

	for (addr = start; addr < end; addr += PAGE_SIZE,
				       src++, dst++) {
	for (addr = start; addr < end; ) {
		struct page *dpage, *spage;

		spage = migrate_pfn_to_page(*src);
		if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
			continue;
		if (!spage || !(*src & MIGRATE_PFN_MIGRATE)) {
			addr += PAGE_SIZE;
			goto next;
		}

		if (WARN_ON(!is_device_private_page(spage) &&
			    !is_device_coherent_page(spage)))
			continue;
			    !is_device_coherent_page(spage))) {
			addr += PAGE_SIZE;
			goto next;
		}

		spage = BACKING_PAGE(spage);
		order = folio_order(page_folio(spage));

		if (order)
			dpage = folio_page(vma_alloc_folio(GFP_HIGHUSER_MOVABLE,
						order, args->vma, addr), 0);
		else
			dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);

		/* Try with smaller pages if large allocation fails */
		if (!dpage && order) {
			dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
			if (!dpage)
			continue;
		pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
			 page_to_pfn(spage), page_to_pfn(dpage));
				return VM_FAULT_OOM;
			order = 0;
		}

		pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
				page_to_pfn(spage), page_to_pfn(dpage));
		lock_page(dpage);
		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
		copy_highpage(dpage, spage);
		*dst = migrate_pfn(page_to_pfn(dpage));
		if (*src & MIGRATE_PFN_WRITE)
			*dst |= MIGRATE_PFN_WRITE;
		if (order)
			*dst |= MIGRATE_PFN_COMPOUND;

		for (i = 0; i < (1 << order); i++) {
			struct page *src_page;
			struct page *dst_page;

			src_page = pfn_to_page(page_to_pfn(spage) + i);
			dst_page = pfn_to_page(page_to_pfn(dpage) + i);

			xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
			copy_highpage(dst_page, src_page);
		}
next:
		addr += PAGE_SIZE << order;
		src += 1 << order;
		dst += 1 << order;
	}
	return 0;
}
@@ -879,11 +1024,14 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
	unsigned long size = cmd->npages << PAGE_SHIFT;
	struct mm_struct *mm = dmirror->notifier.mm;
	struct vm_area_struct *vma;
	unsigned long src_pfns[32] = { 0 };
	unsigned long dst_pfns[32] = { 0 };
	struct migrate_vma args = { 0 };
	unsigned long next;
	int ret;
	unsigned long *src_pfns;
	unsigned long *dst_pfns;

	src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
	dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);

	start = cmd->addr;
	end = start + size;
@@ -902,7 +1050,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
			ret = -EINVAL;
			goto out;
		}
		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
		next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
		if (next > vma->vm_end)
			next = vma->vm_end;

@@ -912,7 +1060,7 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
		args.start = addr;
		args.end = next;
		args.pgmap_owner = dmirror->mdevice;
		args.flags = dmirror_select_device(dmirror);
		args.flags = dmirror_select_device(dmirror) | MIGRATE_VMA_SELECT_COMPOUND;

		ret = migrate_vma_setup(&args);
		if (ret)
@@ -928,6 +1076,8 @@ static int dmirror_migrate_to_system(struct dmirror *dmirror,
out:
	mmap_read_unlock(mm);
	mmput(mm);
	kvfree(src_pfns);
	kvfree(dst_pfns);

	return ret;
}
@@ -939,12 +1089,12 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
	unsigned long size = cmd->npages << PAGE_SHIFT;
	struct mm_struct *mm = dmirror->notifier.mm;
	struct vm_area_struct *vma;
	unsigned long src_pfns[32] = { 0 };
	unsigned long dst_pfns[32] = { 0 };
	struct dmirror_bounce bounce;
	struct migrate_vma args = { 0 };
	unsigned long next;
	int ret;
	unsigned long *src_pfns = NULL;
	unsigned long *dst_pfns = NULL;

	start = cmd->addr;
	end = start + size;
@@ -955,6 +1105,18 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
	if (!mmget_not_zero(mm))
		return -EINVAL;

	ret = -ENOMEM;
	src_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*src_pfns),
			  GFP_KERNEL | __GFP_NOFAIL);
	if (!src_pfns)
		goto free_mem;

	dst_pfns = kvcalloc(PTRS_PER_PTE, sizeof(*dst_pfns),
			  GFP_KERNEL | __GFP_NOFAIL);
	if (!dst_pfns)
		goto free_mem;

	ret = 0;
	mmap_read_lock(mm);
	for (addr = start; addr < end; addr = next) {
		vma = vma_lookup(mm, addr);
@@ -962,7 +1124,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
			ret = -EINVAL;
			goto out;
		}
		next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
		next = min(end, addr + (PTRS_PER_PTE << PAGE_SHIFT));
		if (next > vma->vm_end)
			next = vma->vm_end;

@@ -972,7 +1134,8 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
		args.start = addr;
		args.end = next;
		args.pgmap_owner = dmirror->mdevice;
		args.flags = MIGRATE_VMA_SELECT_SYSTEM;
		args.flags = MIGRATE_VMA_SELECT_SYSTEM |
				MIGRATE_VMA_SELECT_COMPOUND;
		ret = migrate_vma_setup(&args);
		if (ret)
			goto out;
@@ -992,7 +1155,7 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
	 */
	ret = dmirror_bounce_init(&bounce, start, size);
	if (ret)
		return ret;
		goto free_mem;
	mutex_lock(&dmirror->mutex);
	ret = dmirror_do_read(dmirror, start, end, &bounce);
	mutex_unlock(&dmirror->mutex);
@@ -1003,11 +1166,14 @@ static int dmirror_migrate_to_device(struct dmirror *dmirror,
	}
	cmd->cpages = bounce.cpages;
	dmirror_bounce_fini(&bounce);
	return ret;
	goto free_mem;

out:
	mmap_read_unlock(mm);
	mmput(mm);
free_mem:
	kfree(src_pfns);
	kfree(dst_pfns);
	return ret;
}

@@ -1200,6 +1366,7 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
	unsigned long i;
	unsigned long *src_pfns;
	unsigned long *dst_pfns;
	unsigned int order = 0;

	src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
	dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
@@ -1215,13 +1382,25 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
		if (WARN_ON(!is_device_private_page(spage) &&
			    !is_device_coherent_page(spage)))
			continue;

		order = folio_order(page_folio(spage));
		spage = BACKING_PAGE(spage);
		if (src_pfns[i] & MIGRATE_PFN_COMPOUND) {
			dpage = folio_page(folio_alloc(GFP_HIGHUSER_MOVABLE,
					      order), 0);
		} else {
			dpage = alloc_page(GFP_HIGHUSER_MOVABLE | __GFP_NOFAIL);
			order = 0;
		}

		/* TODO Support splitting here */
		lock_page(dpage);
		copy_highpage(dpage, spage);
		dst_pfns[i] = migrate_pfn(page_to_pfn(dpage));
		if (src_pfns[i] & MIGRATE_PFN_WRITE)
			dst_pfns[i] |= MIGRATE_PFN_WRITE;
		if (order)
			dst_pfns[i] |= MIGRATE_PFN_COMPOUND;
		folio_copy(page_folio(dpage), page_folio(spage));
	}
	migrate_device_pages(src_pfns, dst_pfns, npages);
	migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -1234,7 +1413,12 @@ static void dmirror_remove_free_pages(struct dmirror_chunk *devmem)
{
	struct dmirror_device *mdevice = devmem->mdevice;
	struct page *page;
	struct folio *folio;


	for (folio = mdevice->free_folios; folio; folio = folio_zone_device_data(folio))
		if (dmirror_page_to_chunk(folio_page(folio, 0)) == devmem)
			mdevice->free_folios = folio_zone_device_data(folio);
	for (page = mdevice->free_pages; page; page = page->zone_device_data)
		if (dmirror_page_to_chunk(page) == devmem)
			mdevice->free_pages = page->zone_device_data;
@@ -1265,6 +1449,7 @@ static void dmirror_device_remove_chunks(struct dmirror_device *mdevice)
		mdevice->devmem_count = 0;
		mdevice->devmem_capacity = 0;
		mdevice->free_pages = NULL;
		mdevice->free_folios = NULL;
		kfree(mdevice->devmem_chunks);
		mdevice->devmem_chunks = NULL;
	}
@@ -1379,55 +1564,83 @@ static void dmirror_devmem_free(struct folio *folio)
	struct page *page = &folio->page;
	struct page *rpage = BACKING_PAGE(page);
	struct dmirror_device *mdevice;
	struct folio *rfolio = page_folio(rpage);
	unsigned int order = folio_order(rfolio);

	if (rpage != page)
	if (rpage != page) {
		if (order)
			__free_pages(rpage, order);
		else
			__free_page(rpage);
		rpage = NULL;
	}

	mdevice = dmirror_page_to_device(page);
	spin_lock(&mdevice->lock);

	/* Return page to our allocator if not freeing the chunk */
	if (!dmirror_page_to_chunk(page)->remove) {
		mdevice->cfree++;
		mdevice->cfree += 1 << order;
		if (order) {
			page->zone_device_data = mdevice->free_folios;
			mdevice->free_folios = page_folio(page);
		} else {
			page->zone_device_data = mdevice->free_pages;
			mdevice->free_pages = page;
		}
	}
	spin_unlock(&mdevice->lock);
}

static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
{
	struct migrate_vma args = { 0 };
	unsigned long src_pfns = 0;
	unsigned long dst_pfns = 0;
	struct page *rpage;
	struct dmirror *dmirror;
	vm_fault_t ret;
	vm_fault_t ret = 0;
	unsigned int order, nr;

	/*
	 * Normally, a device would use the page->zone_device_data to point to
	 * the mirror but here we use it to hold the page for the simulated
	 * device memory and that page holds the pointer to the mirror.
	 */
	rpage = vmf->page->zone_device_data;
	rpage = folio_zone_device_data(page_folio(vmf->page));
	dmirror = rpage->zone_device_data;

	/* FIXME demonstrate how we can adjust migrate range */
	order = folio_order(page_folio(vmf->page));
	nr = 1 << order;

	/*
	 * Consider a per-cpu cache of src and dst pfns, but with
	 * large number of cpus that might not scale well.
	 */
	args.start = ALIGN_DOWN(vmf->address, (PAGE_SIZE << order));
	args.vma = vmf->vma;
	args.start = vmf->address;
	args.end = args.start + PAGE_SIZE;
	args.src = &src_pfns;
	args.dst = &dst_pfns;
	args.end = args.start + (PAGE_SIZE << order);

	nr = (args.end - args.start) >> PAGE_SHIFT;
	args.src = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
	args.dst = kcalloc(nr, sizeof(unsigned long), GFP_KERNEL);
	args.pgmap_owner = dmirror->mdevice;
	args.flags = dmirror_select_device(dmirror);
	args.fault_page = vmf->page;

	if (!args.src || !args.dst) {
		ret = VM_FAULT_OOM;
		goto err;
	}

	if (order)
		args.flags |= MIGRATE_VMA_SELECT_COMPOUND;

	if (migrate_vma_setup(&args))
		return VM_FAULT_SIGBUS;

	ret = dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
	if (ret)
		return ret;
		goto err;
	migrate_vma_pages(&args);
	/*
	 * No device finalize step is needed since
@@ -1435,7 +1648,10 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf)
	 * invalidated the device page table.
	 */
	migrate_vma_finalize(&args);
	return 0;
err:
	kfree(args.src);
	kfree(args.dst);
	return ret;
}

static const struct dev_pagemap_ops dmirror_devmem_ops = {
@@ -1466,7 +1682,7 @@ static int dmirror_device_init(struct dmirror_device *mdevice, int id)
		return ret;

	/* Build a list of free ZONE_DEVICE struct pages */
	return dmirror_allocate_chunk(mdevice, NULL);
	return dmirror_allocate_chunk(mdevice, NULL, false);
}

static void dmirror_device_remove(struct dmirror_device *mdevice)