Commit a51324c4 authored by Heiko Carstens's avatar Heiko Carstens Committed by Vasily Gorbik
Browse files

s390/cmma: rework no-dat handling



Rework the way physical pages are set no-dat / dat:

The old way is:

- Rely on that all pages are initially marked "dat"
- Allocate page tables for the kernel mapping
- Enable dat
- Walk the whole kernel mapping and set PG_arch_1 bit in all struct pages
  that belong to pages of kernel page tables
- Walk all struct pages and test and clear the PG_arch_1 bit. If the bit is
  not set, set the page state to no-dat
- For all subsequent page table allocations, set the page state to dat
  (remove the no-dat state) on allocation time

Change this rather complex logic to a simpler approach:

- Set the whole physical memory (all pages) to "no-dat"
- Explicitly set those page table pages to "dat" which are part of the
  kernel image (e.g. swapper_pg_dir)
- For all subsequent page table allocations, set the page state to dat
  (remove the no-dat state) on allocation time

In result the code is simpler, and this also allows to get rid of one
odd usage of the PG_arch_1 bit.

Reviewed-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parent 65d37f16
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
@@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
	crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
	crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
	memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
	__arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
	__arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
	__arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
	__arch_set_page_dat(kasan_early_shadow_pte, 1);

	/*
	 * Current memory layout:
@@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)

	table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
	crst_table_init(table, val);
	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
	return table;
}

@@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
	if (!pte_leftover) {
		pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
		pte = pte_leftover + _PAGE_TABLE_SIZE;
		__arch_set_page_dat(pte, 1);
	} else {
		pte = pte_leftover;
		pte_leftover = NULL;
@@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
	unsigned long asce_bits;
	int i;

	/*
	 * Mark whole memory as no-dat. This must be done before any
	 * page tables are allocated, or kernel image builtin pages
	 * are marked as dat tables.
	 */
	for_each_physmem_online_range(i, &start, &end)
		__arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);

	if (asce_limit == _REGION1_SIZE) {
		asce_type = _REGION2_ENTRY_EMPTY;
		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)

	crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
	crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
	__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
	__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);

	/*
	 * To allow prefixing the lowcore must be mapped with 4KB pages.
+0 −2
Original line number Diff line number Diff line
@@ -125,8 +125,6 @@ static inline void vmcp_cma_reserve(void) { }

void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);

void cmma_init_nodat(void);

extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
+0 −2
Original line number Diff line number Diff line
@@ -168,8 +168,6 @@ void __init mem_init(void)
	/* this will put all low memory onto the freelists */
	memblock_free_all();
	setup_zero_pages();	/* Setup zeroed pages. */

	cmma_init_nodat();
}

void free_initmem(void)
+2 −125
Original line number Diff line number Diff line
@@ -7,136 +7,13 @@
 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
 */

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/memblock.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <asm/asm-extable.h>
#include <asm/facility.h>
#include <asm/page-states.h>
#include <asm/sections.h>
#include <asm/page.h>

int __bootdata_preserved(cmma_flag);

static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
{
	unsigned long next;
	struct page *page;
	pmd_t *pmd;

	pmd = pmd_offset(pud, addr);
	do {
		next = pmd_addr_end(addr, end);
		if (pmd_none(*pmd) || pmd_large(*pmd))
			continue;
		page = phys_to_page(pmd_val(*pmd));
		set_bit(PG_arch_1, &page->flags);
	} while (pmd++, addr = next, addr != end);
}

static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
{
	unsigned long next;
	struct page *page;
	pud_t *pud;
	int i;

	pud = pud_offset(p4d, addr);
	do {
		next = pud_addr_end(addr, end);
		if (pud_none(*pud) || pud_large(*pud))
			continue;
		if (!pud_folded(*pud)) {
			page = phys_to_page(pud_val(*pud));
			for (i = 0; i < 4; i++)
				set_bit(PG_arch_1, &page[i].flags);
		}
		mark_kernel_pmd(pud, addr, next);
	} while (pud++, addr = next, addr != end);
}

static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
{
	unsigned long next;
	struct page *page;
	p4d_t *p4d;
	int i;

	p4d = p4d_offset(pgd, addr);
	do {
		next = p4d_addr_end(addr, end);
		if (p4d_none(*p4d))
			continue;
		if (!p4d_folded(*p4d)) {
			page = phys_to_page(p4d_val(*p4d));
			for (i = 0; i < 4; i++)
				set_bit(PG_arch_1, &page[i].flags);
		}
		mark_kernel_pud(p4d, addr, next);
	} while (p4d++, addr = next, addr != end);
}

static void mark_kernel_pgd(void)
{
	unsigned long addr, next, max_addr;
	struct page *page;
	pgd_t *pgd;
	int i;

	addr = 0;
	/*
	 * Figure out maximum virtual address accessible with the
	 * kernel ASCE. This is required to keep the page table walker
	 * from accessing non-existent entries.
	 */
	max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
	max_addr = 1UL << (max_addr * 11 + 31);
	pgd = pgd_offset_k(addr);
	do {
		next = pgd_addr_end(addr, max_addr);
		if (pgd_none(*pgd))
			continue;
		if (!pgd_folded(*pgd)) {
			page = phys_to_page(pgd_val(*pgd));
			for (i = 0; i < 4; i++)
				set_bit(PG_arch_1, &page[i].flags);
		}
		mark_kernel_p4d(pgd, addr, next);
	} while (pgd++, addr = next, addr != max_addr);
}

void __init cmma_init_nodat(void)
{
	struct page *page;
	unsigned long start, end, ix;
	int i;

	if (cmma_flag < 2)
		return;
	/* Mark pages used in kernel page tables */
	mark_kernel_pgd();
	page = virt_to_page(&swapper_pg_dir);
	for (i = 0; i < 4; i++)
		set_bit(PG_arch_1, &page[i].flags);
	page = virt_to_page(&invalid_pg_dir);
	for (i = 0; i < 4; i++)
		set_bit(PG_arch_1, &page[i].flags);

	/* Set all kernel pages not used for page tables to stable/no-dat */
	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
		page = pfn_to_page(start);
		for (ix = start; ix < end; ix++, page++) {
			if (__test_and_clear_bit(PG_arch_1, &page->flags))
				continue;	/* skip page table pages */
			if (!list_empty(&page->lru))
				continue;	/* skip free pages */
			__set_page_stable_nodat(page_to_virt(page), 1);
		}
	}
}

void arch_free_page(struct page *page, int order)
{
	if (!cmma_flag)
+2 −2
Original line number Diff line number Diff line
@@ -50,7 +50,6 @@ void *vmem_crst_alloc(unsigned long val)
	if (!table)
		return NULL;
	crst_table_init(table, val);
	if (slab_is_available())
	__arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
	return table;
}
@@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
	if (!pte)
		return NULL;
	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
	__arch_set_page_dat(pte, 1);
	return pte;
}