mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/
synced 2026-04-05 08:17:42 -04:00
KVM: s390: KVM page table management functions: walks
Add page table management functions to be used for KVM guest (gmap) page tables. This patch adds functions to walk to specific table entries, or to perform actions on a range of entries. Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
This commit is contained in:
@@ -216,3 +216,389 @@ union pgste __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new, g
|
||||
WRITE_ONCE(*ptep, new);
|
||||
return pgste;
|
||||
}
|
||||
|
||||
/*
|
||||
* dat_split_ste() - Split a segment table entry into page table entries.
|
||||
*
|
||||
* Context: This function is assumed to be called with kvm->mmu_lock held.
|
||||
*
|
||||
* Return: 0 in case of success, -ENOMEM if running out of memory.
|
||||
*/
|
||||
static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t gfn,
|
||||
union asce asce, bool uses_skeys)
|
||||
{
|
||||
union pgste pgste_init;
|
||||
struct page_table *pt;
|
||||
union pmd new, old;
|
||||
union pte init;
|
||||
int i;
|
||||
|
||||
BUG_ON(!mc);
|
||||
old = READ_ONCE(*pmdp);
|
||||
|
||||
/* Already split, nothing to do. */
|
||||
if (!old.h.i && !old.h.fc)
|
||||
return 0;
|
||||
|
||||
pt = dat_alloc_pt_noinit(mc);
|
||||
if (!pt)
|
||||
return -ENOMEM;
|
||||
new.val = virt_to_phys(pt);
|
||||
|
||||
while (old.h.i || old.h.fc) {
|
||||
init.val = pmd_origin_large(old);
|
||||
init.h.p = old.h.p;
|
||||
init.h.i = old.h.i;
|
||||
init.s.d = old.s.fc1.d;
|
||||
init.s.w = old.s.fc1.w;
|
||||
init.s.y = old.s.fc1.y;
|
||||
init.s.sd = old.s.fc1.sd;
|
||||
init.s.pr = old.s.fc1.pr;
|
||||
pgste_init.val = 0;
|
||||
if (old.h.fc) {
|
||||
for (i = 0; i < _PAGE_ENTRIES; i++)
|
||||
pt->ptes[i].val = init.val | i * PAGE_SIZE;
|
||||
/* No need to take locks as the page table is not installed yet. */
|
||||
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
|
||||
pgste_init.pcl = uses_skeys && init.h.i;
|
||||
dat_init_pgstes(pt, pgste_init.val);
|
||||
} else {
|
||||
dat_init_page_table(pt, init.val, 0);
|
||||
}
|
||||
|
||||
if (dat_pmdp_xchg_atomic(pmdp, old, new, gfn, asce)) {
|
||||
if (!pgste_init.pcl)
|
||||
return 0;
|
||||
for (i = 0; i < _PAGE_ENTRIES; i++) {
|
||||
union pgste pgste = pt->pgstes[i];
|
||||
|
||||
pgste = dat_save_storage_key_into_pgste(pt->ptes[i], pgste);
|
||||
pgste_set_unlock(pt->ptes + i, pgste);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
old = READ_ONCE(*pmdp);
|
||||
}
|
||||
|
||||
dat_free_pt(pt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* dat_split_crste() - Split a crste into smaller crstes.
|
||||
*
|
||||
* Context: This function is assumed to be called with kvm->mmu_lock held.
|
||||
*
|
||||
* Return: %0 in case of success, %-ENOMEM if running out of memory.
|
||||
*/
|
||||
static int dat_split_crste(struct kvm_s390_mmu_cache *mc, union crste *crstep,
|
||||
gfn_t gfn, union asce asce, bool uses_skeys)
|
||||
{
|
||||
struct crst_table *table;
|
||||
union crste old, new, init;
|
||||
int i;
|
||||
|
||||
old = READ_ONCE(*crstep);
|
||||
if (is_pmd(old))
|
||||
return dat_split_ste(mc, &crstep->pmd, gfn, asce, uses_skeys);
|
||||
|
||||
BUG_ON(!mc);
|
||||
|
||||
/* Already split, nothing to do. */
|
||||
if (!old.h.i && !old.h.fc)
|
||||
return 0;
|
||||
|
||||
table = dat_alloc_crst_noinit(mc);
|
||||
if (!table)
|
||||
return -ENOMEM;
|
||||
|
||||
new.val = virt_to_phys(table);
|
||||
new.h.tt = old.h.tt;
|
||||
new.h.fc0.tl = _REGION_ENTRY_LENGTH;
|
||||
|
||||
while (old.h.i || old.h.fc) {
|
||||
init = old;
|
||||
init.h.tt--;
|
||||
if (old.h.fc) {
|
||||
for (i = 0; i < _CRST_ENTRIES; i++)
|
||||
table->crstes[i].val = init.val | i * HPAGE_SIZE;
|
||||
} else {
|
||||
crst_table_init((void *)table, init.val);
|
||||
}
|
||||
if (dat_crstep_xchg_atomic(crstep, old, new, gfn, asce))
|
||||
return 0;
|
||||
old = READ_ONCE(*crstep);
|
||||
}
|
||||
|
||||
dat_free_crst(table);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* dat_entry_walk() - Walk the gmap page tables.
|
||||
* @mc: Cache to use to allocate dat tables, if needed; can be NULL if neither
|
||||
* %DAT_WALK_SPLIT or %DAT_WALK_ALLOC is specified in @flags.
|
||||
* @gfn: Guest frame.
|
||||
* @asce: The ASCE of the address space.
|
||||
* @flags: Flags from WALK_* macros.
|
||||
* @walk_level: Level to walk to, from LEVEL_* macros.
|
||||
* @last: Will be filled the last visited non-pte DAT entry.
|
||||
* @ptepp: Will be filled the last visited pte entry, if any, otherwise NULL.
|
||||
*
|
||||
* Returns a table entry pointer for the given guest address and @walk_level.
|
||||
*
|
||||
* The @flags have the following meanings:
|
||||
* * %DAT_WALK_IGN_HOLES: consider holes as normal table entries
|
||||
* * %DAT_WALK_ALLOC: allocate new tables to reach the requested level, if needed
|
||||
* * %DAT_WALK_SPLIT: split existing large pages to reach the requested level, if needed
|
||||
* * %DAT_WALK_LEAF: return successfully whenever a large page is encountered
|
||||
* * %DAT_WALK_ANY: return successfully even if the requested level could not be reached
|
||||
* * %DAT_WALK_CONTINUE: walk to the requested level with the specified flags, and then try to
|
||||
* continue walking to ptes with only DAT_WALK_ANY
|
||||
* * %DAT_WALK_USES_SKEYS: storage keys are in use
|
||||
*
|
||||
* Context: called with kvm->mmu_lock held.
|
||||
*
|
||||
* Return:
|
||||
* * %PGM_ADDRESSING if the requested address lies outside memory
|
||||
* * a PIC number if the requested address lies in a memory hole of type _DAT_TOKEN_PIC
|
||||
* * %-EFAULT if the requested address lies inside a memory hole of a different type
|
||||
* * %-EINVAL if the given ASCE is not compatible with the requested level
|
||||
* * %-EFBIG if the requested level could not be reached because a larger frame was found
|
||||
* * %-ENOENT if the requested level could not be reached for other reasons
|
||||
* * %-ENOMEM if running out of memory while allocating or splitting a table
|
||||
*/
|
||||
int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
|
||||
int walk_level, union crste **last, union pte **ptepp)
|
||||
{
|
||||
union vaddress vaddr = { .addr = gfn_to_gpa(gfn) };
|
||||
bool continue_anyway = flags & DAT_WALK_CONTINUE;
|
||||
bool uses_skeys = flags & DAT_WALK_USES_SKEYS;
|
||||
bool ign_holes = flags & DAT_WALK_IGN_HOLES;
|
||||
bool allocate = flags & DAT_WALK_ALLOC;
|
||||
bool split = flags & DAT_WALK_SPLIT;
|
||||
bool leaf = flags & DAT_WALK_LEAF;
|
||||
bool any = flags & DAT_WALK_ANY;
|
||||
struct page_table *pgtable;
|
||||
struct crst_table *table;
|
||||
union crste entry;
|
||||
int rc;
|
||||
|
||||
*last = NULL;
|
||||
*ptepp = NULL;
|
||||
if (WARN_ON_ONCE(unlikely(!asce.val)))
|
||||
return -EINVAL;
|
||||
if (WARN_ON_ONCE(unlikely(walk_level > asce.dt)))
|
||||
return -EINVAL;
|
||||
if (!asce_contains_gfn(asce, gfn))
|
||||
return PGM_ADDRESSING;
|
||||
|
||||
table = dereference_asce(asce);
|
||||
if (asce.dt >= ASCE_TYPE_REGION1) {
|
||||
*last = table->crstes + vaddr.rfx;
|
||||
entry = READ_ONCE(**last);
|
||||
if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION1))
|
||||
return -EINVAL;
|
||||
if (crste_hole(entry) && !ign_holes)
|
||||
return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
|
||||
if (walk_level == TABLE_TYPE_REGION1)
|
||||
return 0;
|
||||
if (entry.pgd.h.i) {
|
||||
if (!allocate)
|
||||
return any ? 0 : -ENOENT;
|
||||
rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
table = dereference_crste(entry.pgd);
|
||||
}
|
||||
|
||||
if (asce.dt >= ASCE_TYPE_REGION2) {
|
||||
*last = table->crstes + vaddr.rsx;
|
||||
entry = READ_ONCE(**last);
|
||||
if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION2))
|
||||
return -EINVAL;
|
||||
if (crste_hole(entry) && !ign_holes)
|
||||
return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
|
||||
if (walk_level == TABLE_TYPE_REGION2)
|
||||
return 0;
|
||||
if (entry.p4d.h.i) {
|
||||
if (!allocate)
|
||||
return any ? 0 : -ENOENT;
|
||||
rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
table = dereference_crste(entry.p4d);
|
||||
}
|
||||
|
||||
if (asce.dt >= ASCE_TYPE_REGION3) {
|
||||
*last = table->crstes + vaddr.rtx;
|
||||
entry = READ_ONCE(**last);
|
||||
if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION3))
|
||||
return -EINVAL;
|
||||
if (crste_hole(entry) && !ign_holes)
|
||||
return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
|
||||
if (walk_level == TABLE_TYPE_REGION3 &&
|
||||
continue_anyway && !entry.pud.h.fc && !entry.h.i) {
|
||||
walk_level = TABLE_TYPE_PAGE_TABLE;
|
||||
allocate = false;
|
||||
}
|
||||
if (walk_level == TABLE_TYPE_REGION3 || ((leaf || any) && entry.pud.h.fc))
|
||||
return 0;
|
||||
if (entry.pud.h.i && !entry.pud.h.fc) {
|
||||
if (!allocate)
|
||||
return any ? 0 : -ENOENT;
|
||||
rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
if (walk_level <= TABLE_TYPE_SEGMENT && entry.pud.h.fc) {
|
||||
if (!split)
|
||||
return -EFBIG;
|
||||
rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
table = dereference_crste(entry.pud);
|
||||
}
|
||||
|
||||
*last = table->crstes + vaddr.sx;
|
||||
entry = READ_ONCE(**last);
|
||||
if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_SEGMENT))
|
||||
return -EINVAL;
|
||||
if (crste_hole(entry) && !ign_holes)
|
||||
return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT;
|
||||
if (continue_anyway && !entry.pmd.h.fc && !entry.h.i) {
|
||||
walk_level = TABLE_TYPE_PAGE_TABLE;
|
||||
allocate = false;
|
||||
}
|
||||
if (walk_level == TABLE_TYPE_SEGMENT || ((leaf || any) && entry.pmd.h.fc))
|
||||
return 0;
|
||||
|
||||
if (entry.pmd.h.i && !entry.pmd.h.fc) {
|
||||
if (!allocate)
|
||||
return any ? 0 : -ENOENT;
|
||||
rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
if (walk_level <= TABLE_TYPE_PAGE_TABLE && entry.pmd.h.fc) {
|
||||
if (!split)
|
||||
return -EFBIG;
|
||||
rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys);
|
||||
if (rc)
|
||||
return rc;
|
||||
entry = READ_ONCE(**last);
|
||||
}
|
||||
pgtable = dereference_pmd(entry.pmd);
|
||||
*ptepp = pgtable->ptes + vaddr.px;
|
||||
if (pte_hole(**ptepp) && !ign_holes)
|
||||
return (*ptepp)->tok.type == _DAT_TOKEN_PIC ? (*ptepp)->tok.par : -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long dat_pte_walk_range(gfn_t gfn, gfn_t end, struct page_table *table, struct dat_walk *w)
|
||||
{
|
||||
unsigned int idx = gfn & (_PAGE_ENTRIES - 1);
|
||||
long rc = 0;
|
||||
|
||||
for ( ; gfn < end; idx++, gfn++) {
|
||||
if (pte_hole(READ_ONCE(table->ptes[idx]))) {
|
||||
if (!(w->flags & DAT_WALK_IGN_HOLES))
|
||||
return -EFAULT;
|
||||
if (!(w->flags & DAT_WALK_ANY))
|
||||
continue;
|
||||
}
|
||||
|
||||
rc = w->ops->pte_entry(table->ptes + idx, gfn, gfn + 1, w);
|
||||
if (rc)
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static long dat_crste_walk_range(gfn_t start, gfn_t end, struct crst_table *table,
|
||||
struct dat_walk *walk)
|
||||
{
|
||||
unsigned long idx, cur_shift, cur_size;
|
||||
dat_walk_op the_op;
|
||||
union crste crste;
|
||||
gfn_t cur, next;
|
||||
long rc = 0;
|
||||
|
||||
cur_shift = 8 + table->crstes[0].h.tt * 11;
|
||||
idx = (start >> cur_shift) & (_CRST_ENTRIES - 1);
|
||||
cur_size = 1UL << cur_shift;
|
||||
|
||||
for (cur = ALIGN_DOWN(start, cur_size); cur < end; idx++, cur = next) {
|
||||
next = cur + cur_size;
|
||||
walk->last = table->crstes + idx;
|
||||
crste = READ_ONCE(*walk->last);
|
||||
|
||||
if (crste_hole(crste)) {
|
||||
if (!(walk->flags & DAT_WALK_IGN_HOLES))
|
||||
return -EFAULT;
|
||||
if (!(walk->flags & DAT_WALK_ANY))
|
||||
continue;
|
||||
}
|
||||
|
||||
the_op = walk->ops->crste_ops[crste.h.tt];
|
||||
if (the_op) {
|
||||
rc = the_op(walk->last, cur, next, walk);
|
||||
crste = READ_ONCE(*walk->last);
|
||||
}
|
||||
if (rc)
|
||||
break;
|
||||
if (!crste.h.i && !crste.h.fc) {
|
||||
if (!is_pmd(crste))
|
||||
rc = dat_crste_walk_range(max(start, cur), min(end, next),
|
||||
_dereference_crste(crste), walk);
|
||||
else if (walk->ops->pte_entry)
|
||||
rc = dat_pte_walk_range(max(start, cur), min(end, next),
|
||||
dereference_pmd(crste.pmd), walk);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* _dat_walk_gfn_range() - Walk DAT tables.
|
||||
* @start: The first guest page frame to walk.
|
||||
* @end: The guest page frame immediately after the last one to walk.
|
||||
* @asce: The ASCE of the guest mapping.
|
||||
* @ops: The gmap_walk_ops that will be used to perform the walk.
|
||||
* @flags: Flags from WALK_* (currently only WALK_IGN_HOLES is supported).
|
||||
* @priv: Will be passed as-is to the callbacks.
|
||||
*
|
||||
* Any callback returning non-zero causes the walk to stop immediately.
|
||||
*
|
||||
* Return: %-EINVAL in case of error, %-EFAULT if @start is too high for the
|
||||
* given ASCE unless the DAT_WALK_IGN_HOLES flag is specified,
|
||||
* otherwise it returns whatever the callbacks return.
|
||||
*/
|
||||
long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
|
||||
const struct dat_walk_ops *ops, int flags, void *priv)
|
||||
{
|
||||
struct crst_table *table = dereference_asce(asce);
|
||||
struct dat_walk walk = {
|
||||
.ops = ops,
|
||||
.asce = asce,
|
||||
.priv = priv,
|
||||
.flags = flags,
|
||||
.start = start,
|
||||
.end = end,
|
||||
};
|
||||
|
||||
if (WARN_ON_ONCE(unlikely(!asce.val)))
|
||||
return -EINVAL;
|
||||
if (!asce_contains_gfn(asce, start))
|
||||
return (flags & DAT_WALK_IGN_HOLES) ? 0 : -EFAULT;
|
||||
|
||||
return dat_crste_walk_range(start, min(end, asce_end(asce)), table, &walk);
|
||||
}
|
||||
|
||||
@@ -45,6 +45,7 @@ enum {
|
||||
#define TABLE_TYPE_PAGE_TABLE -1
|
||||
|
||||
enum dat_walk_flags {
|
||||
DAT_WALK_USES_SKEYS = 0x40,
|
||||
DAT_WALK_CONTINUE = 0x20,
|
||||
DAT_WALK_IGN_HOLES = 0x10,
|
||||
DAT_WALK_SPLIT = 0x08,
|
||||
@@ -332,6 +333,34 @@ struct page_table {
|
||||
static_assert(sizeof(struct crst_table) == _CRST_TABLE_SIZE);
|
||||
static_assert(sizeof(struct page_table) == PAGE_SIZE);
|
||||
|
||||
struct dat_walk;
|
||||
|
||||
typedef long (*dat_walk_op)(union crste *crste, gfn_t gfn, gfn_t next, struct dat_walk *w);
|
||||
|
||||
struct dat_walk_ops {
|
||||
union {
|
||||
dat_walk_op crste_ops[4];
|
||||
struct {
|
||||
dat_walk_op pmd_entry;
|
||||
dat_walk_op pud_entry;
|
||||
dat_walk_op p4d_entry;
|
||||
dat_walk_op pgd_entry;
|
||||
};
|
||||
};
|
||||
long (*pte_entry)(union pte *pte, gfn_t gfn, gfn_t next, struct dat_walk *w);
|
||||
};
|
||||
|
||||
struct dat_walk {
|
||||
const struct dat_walk_ops *ops;
|
||||
union crste *last;
|
||||
union pte *last_pte;
|
||||
union asce asce;
|
||||
gfn_t start;
|
||||
gfn_t end;
|
||||
int flags;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
/**
|
||||
* _pte() - Useful constructor for union pte
|
||||
* @pfn: the pfn this pte should point to.
|
||||
@@ -436,6 +465,11 @@ bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste ne
|
||||
union asce asce);
|
||||
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce);
|
||||
|
||||
long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce,
|
||||
const struct dat_walk_ops *ops, int flags, void *priv);
|
||||
|
||||
int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags,
|
||||
int walk_level, union crste **last, union pte **ptepp);
|
||||
void dat_free_level(struct crst_table *table, bool owns_ptes);
|
||||
struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
|
||||
|
||||
@@ -834,4 +868,9 @@ static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce a
|
||||
dat_crstep_xchg(crstep, newcrste, gfn, asce);
|
||||
}
|
||||
|
||||
static inline int get_level(union crste *crstep, union pte *ptep)
|
||||
{
|
||||
return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
|
||||
}
|
||||
|
||||
#endif /* __KVM_S390_DAT_H */
|
||||
|
||||
Reference in New Issue
Block a user