Commit c98175b7 authored by Claudio Imbrenda's avatar Claudio Imbrenda
Browse files

KVM: s390: Add gmap_helper_set_unused()



Add gmap_helper_set_unused() to mark userspace ptes as unused.

Core mm code will use that information to discard unused pages instead
of attempting to swap them.

Reviewed-by: default avatarNico Boehr <nrb@linux.ibm.com>
Tested-by: default avatarNico Boehr <nrb@linux.ibm.com>
Acked-by: default avatarChristoph Schlameuss <schlameuss@linux.ibm.com>
Acked-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Signed-off-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
parent 4dadf64d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11,5 +11,6 @@
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
int gmap_helper_disable_cow_sharing(void);
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr);

#endif /* _ASM_S390_GMAP_HELPERS_H */
+79 −0
Original line number Diff line number Diff line
@@ -129,6 +129,85 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
}
EXPORT_SYMBOL_GPL(gmap_helper_discard);

/**
 * gmap_helper_try_set_pte_unused() - mark a pte entry as unused
 * @mm: the mm
 * @vmaddr: the userspace address whose pte is to be marked
 *
 * Mark the pte corresponding the given address as unused. This will cause
 * core mm code to just drop this page instead of swapping it.
 *
 * This function needs to be called with interrupts disabled (for example
 * while holding a spinlock), or while holding the mmap lock. Normally this
 * function is called as a result of an unmap operation, and thus KVM common
 * code will already hold kvm->mmu_lock in write mode.
 *
 * Context: Needs to be called while holding the mmap lock or with interrupts
 *          disabled.
 */
void gmap_helper_try_set_pte_unused(struct mm_struct *mm, unsigned long vmaddr)
{
	pmd_t *pmdp, pmd, pmdval;
	pud_t *pudp, pud;
	p4d_t *p4dp, p4d;
	pgd_t *pgdp, pgd;
	spinlock_t *ptl;	/* Lock for the host (userspace) page table */
	pte_t *ptep;

	pgdp = pgd_offset(mm, vmaddr);
	pgd = pgdp_get(pgdp);
	if (pgd_none(pgd) || !pgd_present(pgd))
		return;

	p4dp = p4d_offset(pgdp, vmaddr);
	p4d = p4dp_get(p4dp);
	if (p4d_none(p4d) || !p4d_present(p4d))
		return;

	pudp = pud_offset(p4dp, vmaddr);
	pud = pudp_get(pudp);
	if (pud_none(pud) || pud_leaf(pud) || !pud_present(pud))
		return;

	pmdp = pmd_offset(pudp, vmaddr);
	pmd = pmdp_get_lockless(pmdp);
	if (pmd_none(pmd) || pmd_leaf(pmd) || !pmd_present(pmd))
		return;

	ptep = pte_offset_map_rw_nolock(mm, pmdp, vmaddr, &pmdval, &ptl);
	if (!ptep)
		return;

	/*
	 * Several paths exists that takes the ptl lock and then call the
	 * mmu_notifier, which takes the mmu_lock. The unmap path, instead,
	 * takes the mmu_lock in write mode first, and then potentially
	 * calls this function, which takes the ptl lock. This can lead to a
	 * deadlock.
	 * The unused page mechanism is only an optimization, if the
	 * _PAGE_UNUSED bit is not set, the unused page is swapped as normal
	 * instead of being discarded.
	 * If the lock is contended the bit is not set and the deadlock is
	 * avoided.
	 */
	if (spin_trylock(ptl)) {
		/*
		 * Make sure the pte we are touching is still the correct
		 * one. In theory this check should not be needed, but
		 * better safe than sorry.
		 * Disabling interrupts or holding the mmap lock is enough to
		 * guarantee that no concurrent updates to the page tables
		 * are possible.
		 */
		if (likely(pmd_same(pmdval, pmdp_get_lockless(pmdp))))
			__atomic64_or(_PAGE_UNUSED, (long *)ptep);
		spin_unlock(ptl);
	}

	pte_unmap(ptep);
}
EXPORT_SYMBOL_GPL(gmap_helper_try_set_pte_unused);

static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
				   unsigned long end, struct mm_walk *walk)
{