Commit e2e0b826 authored by Mike Rapoport (Microsoft)'s avatar Mike Rapoport (Microsoft) Committed by Andrew Morton
Browse files

userfaultfd: introduce mfill_establish_pmd() helper

There is a lengthy code chunk in mfill_atomic() that establishes the PMD
for UFFDIO operations.  This code may be called twice: first time when the
copy is performed with VMA/mm locks held and the other time after the copy
is retried with locks dropped.

Move the code that establishes a PMD into a helper function so it can be
reused later during refactoring of mfill_atomic_pte_copy().

Link: https://lore.kernel.org/20260402041156.1377214-4-rppt@kernel.org


Signed-off-by: default avatarMike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: default avatarHarry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrei Vagin <avagin@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent db0062d2
Loading
Loading
Loading
Loading
+52 −50
Original line number Diff line number Diff line
@@ -157,6 +157,56 @@ static void uffd_mfill_unlock(struct vm_area_struct *vma)
}
#endif

static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;

	pgd = pgd_offset(mm, address);
	p4d = p4d_alloc(mm, pgd, address);
	if (!p4d)
		return NULL;
	pud = pud_alloc(mm, p4d, address);
	if (!pud)
		return NULL;
	/*
	 * Note that we didn't run this because the pmd was
	 * missing, the *pmd may be already established and in
	 * turn it may also be a trans_huge_pmd.
	 */
	return pmd_alloc(mm, pud, address);
}

static int mfill_establish_pmd(struct mfill_state *state)
{
	struct mm_struct *dst_mm = state->ctx->mm;
	pmd_t *dst_pmd, dst_pmdval;

	dst_pmd = mm_alloc_pmd(dst_mm, state->dst_addr);
	if (unlikely(!dst_pmd))
		return -ENOMEM;

	dst_pmdval = pmdp_get_lockless(dst_pmd);
	if (unlikely(pmd_none(dst_pmdval)) &&
	    unlikely(__pte_alloc(dst_mm, dst_pmd)))
		return -ENOMEM;

	dst_pmdval = pmdp_get_lockless(dst_pmd);
	/*
	 * If the dst_pmd is THP don't override it and just be strict.
	 * (This includes the case where the PMD used to be THP and
	 * changed back to none after __pte_alloc().)
	 */
	if (unlikely(!pmd_present(dst_pmdval) || pmd_leaf(dst_pmdval)))
		return -EEXIST;
	if (unlikely(pmd_bad(dst_pmdval)))
		return -EFAULT;

	state->pmd = dst_pmd;
	return 0;
}

/* Check if dst_addr is outside of file's size. Must be called with ptl held. */
static bool mfill_file_over_size(struct vm_area_struct *dst_vma,
				 unsigned long dst_addr)
@@ -489,27 +539,6 @@ static int mfill_atomic_pte_poison(struct mfill_state *state)
	return ret;
}

static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;

	pgd = pgd_offset(mm, address);
	p4d = p4d_alloc(mm, pgd, address);
	if (!p4d)
		return NULL;
	pud = pud_alloc(mm, p4d, address);
	if (!pud)
		return NULL;
	/*
	 * Note that we didn't run this because the pmd was
	 * missing, the *pmd may be already established and in
	 * turn it may also be a trans_huge_pmd.
	 */
	return pmd_alloc(mm, pud, address);
}

#ifdef CONFIG_HUGETLB_PAGE
/*
 * mfill_atomic processing for HUGETLB vmas.  Note that this routine is
@@ -742,7 +771,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
	struct vm_area_struct *dst_vma;
	long copied = 0;
	ssize_t err;
	pmd_t *dst_pmd;

	/*
	 * Sanitize the command parameters:
@@ -808,41 +836,15 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
	while (state.src_addr < src_start + len) {
		VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len);

		pmd_t dst_pmdval;

		dst_pmd = mm_alloc_pmd(dst_mm, state.dst_addr);
		if (unlikely(!dst_pmd)) {
			err = -ENOMEM;
		err = mfill_establish_pmd(&state);
		if (err)
			break;
		}

		dst_pmdval = pmdp_get_lockless(dst_pmd);
		if (unlikely(pmd_none(dst_pmdval)) &&
		    unlikely(__pte_alloc(dst_mm, dst_pmd))) {
			err = -ENOMEM;
			break;
		}
		dst_pmdval = pmdp_get_lockless(dst_pmd);
		/*
		 * If the dst_pmd is THP don't override it and just be strict.
		 * (This includes the case where the PMD used to be THP and
		 * changed back to none after __pte_alloc().)
		 */
		if (unlikely(!pmd_present(dst_pmdval) ||
				pmd_trans_huge(dst_pmdval))) {
			err = -EEXIST;
			break;
		}
		if (unlikely(pmd_bad(dst_pmdval))) {
			err = -EFAULT;
			break;
		}
		/*
		 * For shmem mappings, khugepaged is allowed to remove page
		 * tables under us; pte_offset_map_lock() will deal with that.
		 */

		state.pmd = dst_pmd;
		err = mfill_atomic_pte(&state);
		cond_resched();