Commit 45921d02 authored by Claudio Imbrenda's avatar Claudio Imbrenda
Browse files

KVM: s390: Fix gmap_link()



The slow path of the fault handler ultimately called gmap_link(), which
assumed the fault was a major fault, and blindly called dat_link().

In case of minor faults, things were not always handled properly; in
particular the prefix and vsie marker bits were ignored.

Move dat_link() into gmap.c, renaming it accordingly. Once moved, the
new _gmap_link() function will be able to correctly honour the prefix
and vsie markers.

This will cause spurious unshadows in some uncommon cases.

Fixes: 94fd9b16 ("KVM: s390: KVM page table management functions: lifecycle management")
Fixes: a2c17f92 ("KVM: s390: New gmap code")
Reviewed-by: default avatarSteffen Eiden <seiden@linux.ibm.com>
Signed-off-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
parent 6f93d1ed
Loading
Loading
Loading
Loading
+0 −48
Original line number Diff line number Diff line
@@ -997,54 +997,6 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
	return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
}

int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
	     bool uses_skeys, struct guest_fault *f)
{
	union crste oldval, newval;
	union pte newpte, oldpte;
	union pgste pgste;
	int rc = 0;

	rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
	if (rc == -EINVAL || rc == -ENOMEM)
		return rc;
	if (rc)
		return -EAGAIN;

	if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
		return -EINVAL;

	if (f->ptep) {
		pgste = pgste_get_lock(f->ptep);
		oldpte = *f->ptep;
		newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
		newpte.s.sd = oldpte.s.sd;
		oldpte.s.sd = 0;
		if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
			pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
			if (f->callback)
				f->callback(f);
		} else {
			rc = -EAGAIN;
		}
		pgste_set_unlock(f->ptep, pgste);
	} else {
		oldval = READ_ONCE(*f->crstep);
		newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
				    f->write_attempt | oldval.s.fc1.d);
		newval.s.fc1.sd = oldval.s.fc1.sd;
		if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
		    crste_origin_large(oldval) != crste_origin_large(newval))
			return -EAGAIN;
		if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
			return -EAGAIN;
		if (f->callback)
			f->callback(f);
	}

	return rc;
}

static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
	union crste newcrste, oldcrste;
+0 −2
Original line number Diff line number Diff line
@@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
		 u16 type, u16 param);
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
	     bool uses_skeys, struct guest_fault *f);

int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
+52 −4
Original line number Diff line number Diff line
@@ -631,10 +631,60 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
	return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
}

static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
		      struct guest_fault *f)
{
	union crste oldval, newval;
	union pte newpte, oldpte;
	union pgste pgste;
	int rc = 0;

	rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
			    &f->crstep, &f->ptep);
	if (rc == -ENOMEM)
		return rc;
	if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
		return rc;
	if (rc)
		return -EAGAIN;
	if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
		return -EINVAL;

	if (f->ptep) {
		pgste = pgste_get_lock(f->ptep);
		oldpte = *f->ptep;
		newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
		newpte.s.sd = oldpte.s.sd;
		oldpte.s.sd = 0;
		if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
			pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
			if (f->callback)
				f->callback(f);
		} else {
			rc = -EAGAIN;
		}
		pgste_set_unlock(f->ptep, pgste);
	} else {
		do {
			oldval = READ_ONCE(*f->crstep);
			newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
					    f->write_attempt | oldval.s.fc1.d);
			newval.s.fc1.sd = oldval.s.fc1.sd;
			if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
			    crste_origin_large(oldval) != crste_origin_large(newval))
				return -EAGAIN;
		} while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
		if (f->callback)
			f->callback(f);
	}

	return rc;
}

int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
{
	unsigned int order;
	int rc, level;
	int level;

	lockdep_assert_held(&gmap->kvm->mmu_lock);

@@ -646,9 +696,7 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
		else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
			level = TABLE_TYPE_SEGMENT;
	}
	rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
	KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
	return rc;
	return _gmap_link(mc, gmap, level, f);
}

static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,