Commit 2d505c29 authored by Claudio Imbrenda's avatar Claudio Imbrenda
Browse files

KVM: s390: vsie: Fix unshadowing logic



In some cases (i.e. under extreme memory pressure on the host),
attempting to shadow memory will result in the same memory being
unshadowed, causing a loop.

Add a PGSTE bit to distinguish between shadowed memory and shadowed DAT
tables, fix the unshadowing logic in _gmap_ptep_xchg() to prevent
unnecessary unshadowing and perform better checks.

Also fix the unshadowing logic in _gmap_crstep_xchg_atomic() which did
not unshadow properly when the large page would become unprotected.

Opportunistically add a check in gmap_protect_rmap() to make sure it
won't be called with level == TABLE_TYPE_PAGE_TABLE.

Signed-off-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
Fixes: a2c17f92 ("KVM: s390: New gmap code")
Reviewed-by: default avatarChristian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: default avatarChristian Borntraeger <borntraeger@linux.ibm.com>
parent 4df4b7cd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -267,6 +267,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
			/* No need to take locks as the page table is not installed yet. */
			pgste_init.prefix_notif = old.s.fc1.prefix_notif;
			pgste_init.vsie_notif = old.s.fc1.vsie_notif;
			pgste_init.vsie_gmem = old.s.fc1.vsie_notif;
			pgste_init.pcl = uses_skeys && init.h.i;
			dat_init_pgstes(pt, pgste_init.val);
		} else {
+2 −1
Original line number Diff line number Diff line
@@ -145,7 +145,8 @@ union pgste {
		unsigned long cmma_d       : 1; /* Dirty flag for CMMA bits */
		unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
		unsigned long vsie_notif   : 1; /* Referenced in a shadow table */
		unsigned long              : 5;
		unsigned long vsie_gmem    : 1; /* Contains nested guest memory */
		unsigned long              : 4;
		unsigned long              : 8;
	};
	struct {
+1 −0
Original line number Diff line number Diff line
@@ -1445,6 +1445,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
	} else {
		pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
		pgste.vsie_notif = 1;
		pgste.vsie_gmem = 1;
	}
	pgste_set_unlock(ptep_h, pgste);
	if (rc)
+2 −1
Original line number Diff line number Diff line
@@ -1031,7 +1031,8 @@ int gmap_protect_rmap(struct kvm_s390_mmu_cache *mc, struct gmap *sg, gfn_t p_gf
	union pte pte;
	int flags, rc;

	KVM_BUG_ON(!is_shadow(sg), sg->kvm);
	if (KVM_BUG_ON(!is_shadow(sg) || level <= TABLE_TYPE_PAGE_TABLE, sg->kvm))
		return -EINVAL;
	lockdep_assert_held(&sg->parent->children_lock);

	flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
+57 −3
Original line number Diff line number Diff line
@@ -167,6 +167,36 @@ static inline bool gmap_unmap_prefix(struct gmap *gmap, gfn_t gfn, gfn_t end)
	return _gmap_unmap_prefix(gmap, gfn, end, false);
}

/**
 * pte_needs_unshadow() -- Check if the pte operations triggers unshadowing.
 * @oldpte: the previous value for the guest pte.
 * @newpte: the new pte being set.
 * @pgste: the pgste for the pte entry.
 *
 * If the pgste.vsie_notif bit is not set, return false: the page is not
 * involved in vsie and thus should not trigger an unshadow operation.
 *
 * If the pgste.vsie_gmem bit is set, this pte represents shadowed guest
 * memory. The access rights on g3's memory should be synchronized with g1's
 * and g2's. Therefore unshadowing is triggered if the new and old pte
 * differ in protection, or if the new pte is invalid.
 *
 * If the pgste.vsie_gmem bit is not set, this pte maps the g2 dat tables
 * for g3. If the entry becomes writable or absent, it becomes impossible to
 * guarantee that the shadow mapping will match g2's mapping. In that case,
 * trigger an unshadow event.
 *
 * Return: true if an unshadow event should be triggered, otherwise false.
 */
static inline bool pte_needs_unshadow(union pte oldpte, union pte newpte, union pgste pgste)
{
	if (!pgste.vsie_notif)
		return false;
	if (pgste.vsie_gmem)
		return (oldpte.h.p != newpte.h.p) || newpte.h.i;
	return !newpte.h.p || !newpte.s.pr;
}

static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, union pte newpte,
					  union pgste pgste, gfn_t gfn, bool needs_lock)
{
@@ -180,8 +210,9 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
		pgste.prefix_notif = 0;
		gmap_unmap_prefix(gmap, gfn, gfn + 1);
	}
	if (pgste.vsie_notif && (ptep->h.p != newpte.h.p || newpte.h.i)) {
	if (pte_needs_unshadow(*ptep, newpte, pgste)) {
		pgste.vsie_notif = 0;
		pgste.vsie_gmem = 0;
		if (needs_lock)
			gmap_handle_vsie_unshadow_event(gmap, gfn);
		else
@@ -198,6 +229,30 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
	return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}

/**
 * crste_needs_unshadow() -- Check if the crste operations triggers unshadowing.
 * @oldcrste: the previous value for the crste.
 * @newcrste: the new value for the crste.
 *
 * If the old crste did not have the vsie_notif bit set, return false: the
 * page is not involved in vsie and thus should not trigger an unshadow
 * operation. Conversely, if the bit is set, it can only be g3 memory, since
 * dat tables are never mapped using large pages.
 *
 * Similar to the pgste.vsie_gmem case of pte_needs_unshadow(), if the
 * protection bit is changing or the new page is invalid, trigger an
 * unshadow event. Also trigger an unshadow event if the new crste does not
 * have the vsie_notif bit set.
 *
 * Return: true if an unshadow event should be triggered, otherwise false.
 */
static inline bool crste_needs_unshadow(union crste oldcrste, union crste newcrste)
{
	if (!oldcrste.s.fc1.vsie_notif)
		return false;
	return (newcrste.h.p != oldcrste.h.p) || newcrste.h.i || !newcrste.s.fc1.vsie_notif;
}

static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
							 union crste oldcrste, union crste newcrste,
							 gfn_t gfn, bool needs_lock)
@@ -216,8 +271,7 @@ static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, unio
		newcrste.s.fc1.prefix_notif = 0;
		gmap_unmap_prefix(gmap, gfn, gfn + align);
	}
	if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
	    (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
	if (crste_leaf(oldcrste) && crste_needs_unshadow(oldcrste, newcrste)) {
		newcrste.s.fc1.vsie_notif = 0;
		if (needs_lock)
			gmap_handle_vsie_unshadow_event(gmap, gfn);