powerpc/mm: remove hugepd leftovers (0c22e4b2) · Commits · git / linux-net

arch/powerpc/include/asm/hugetlb.h

+0 −7

Original line number	Diff line number	Diff line
		@@ -30,13 +30,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
		}
		#define is_hugepage_only_range is_hugepage_only_range

		#ifdef CONFIG_ARCH_HAS_HUGEPD
		#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
		void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
		unsigned long end, unsigned long floor,
		unsigned long ceiling);
		#endif

		#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
		void set_huge_pte_at(struct mm_struct mm, unsigned long addr, pte_t ptep,
		pte_t pte, unsigned long sz);

arch/powerpc/include/asm/page.h

+0 −6

Original line number	Diff line number	Diff line
		@@ -269,12 +269,6 @@ static inline const void *pfn_to_kaddr(unsigned long pfn)
		#define is_kernel_addr(x) ((x) >= TASK_SIZE)
		#endif

		/*
		* Some number of bits at the level of the page table that points to
		* a hugepte are used to encode the size. This masks those bits.
		*/
		#define HUGEPD_SHIFT_MASK 0x3f

		#ifndef __ASSEMBLY__

		#ifdef CONFIG_PPC_BOOK3S_64

arch/powerpc/include/asm/pgtable-be-types.h

+0 −10

Original line number	Diff line number	Diff line
		@@ -101,14 +101,4 @@ static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new)
		return pmd_raw(old) == prev;
		}

		#ifdef CONFIG_ARCH_HAS_HUGEPD
		typedef struct { __be64 pdbe; } hugepd_t;
		#define __hugepd(x) ((hugepd_t) { cpu_to_be64(x) })

		static inline unsigned long hpd_val(hugepd_t x)
		{
		return be64_to_cpu(x.pdbe);
		}
		#endif

		#endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */

arch/powerpc/include/asm/pgtable-types.h

+0 −9

Original line number	Diff line number	Diff line
		@@ -87,13 +87,4 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
		}
		#endif

		#ifdef CONFIG_ARCH_HAS_HUGEPD
		typedef struct { unsigned long pd; } hugepd_t;
		#define __hugepd(x) ((hugepd_t) { (x) })
		static inline unsigned long hpd_val(hugepd_t x)
		{
		return x.pd;
		}
		#endif

		#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */

arch/powerpc/mm/hugetlbpage.c

+0 −413

Original line number	Diff line number	Diff line
		@@ -28,8 +28,6 @@

		bool hugetlb_disabled = false;

		#define hugepd_none(hpd) (hpd_val(hpd) == 0)

		#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
		__builtin_ffs(sizeof(void *)))

		@@ -42,156 +40,6 @@ pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr, unsigned long s
		return __find_linux_pte(mm->pgd, addr, NULL, NULL);
		}

		#ifdef CONFIG_ARCH_HAS_HUGEPD
		static int __hugepte_alloc(struct mm_struct mm, hugepd_t hpdp,
		unsigned long address, unsigned int pdshift,
		unsigned int pshift, spinlock_t *ptl)
		{
		struct kmem_cache *cachep;
		pte_t *new;
		int i;
		int num_hugepd;

		if (pshift >= pdshift) {
		cachep = PGT_CACHE(PTE_T_ORDER);
		num_hugepd = 1 << (pshift - pdshift);
		} else {
		cachep = PGT_CACHE(pdshift - pshift);
		num_hugepd = 1;
		}

		if (!cachep) {
		WARN_ONCE(1, "No page table cache created for hugetlb tables");
		return -ENOMEM;
		}

		new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));

		BUG_ON(pshift > HUGEPD_SHIFT_MASK);
		BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);

		if (!new)
		return -ENOMEM;

		/*
		* Make sure other cpus find the hugepd set only after a
		* properly initialized page table is visible to them.
		* For more details look for comment in __pte_alloc().
		*/
		smp_wmb();

		spin_lock(ptl);
		/*
		* We have multiple higher-level entries that point to the same
		* actual pte location. Fill in each as we go and backtrack on error.
		* We need all of these so the DTLB pgtable walk code can find the
		* right higher-level entry without knowing if it's a hugepage or not.
		*/
		for (i = 0; i < num_hugepd; i++, hpdp++) {
		if (unlikely(!hugepd_none(*hpdp)))
		break;
		hugepd_populate(hpdp, new, pshift);
		}
		/* If we bailed from the for loop early, an error occurred, clean up */
		if (i < num_hugepd) {
		for (i = i - 1 ; i >= 0; i--, hpdp--)
		*hpdp = __hugepd(0);
		kmem_cache_free(cachep, new);
		} else {
		kmemleak_ignore(new);
		}
		spin_unlock(ptl);
		return 0;
		}

		/*
		* At this point we do the placement change only for BOOK3S 64. This would
		* possibly work on other subarchs.
		*/
		pte_t huge_pte_alloc(struct mm_struct mm, struct vm_area_struct *vma,
		unsigned long addr, unsigned long sz)
		{
		pgd_t *pg;
		p4d_t *p4;
		pud_t *pu;
		pmd_t *pm;
		hugepd_t *hpdp = NULL;
		unsigned pshift = __ffs(sz);
		unsigned pdshift = PGDIR_SHIFT;
		spinlock_t *ptl;

		addr &= ~(sz-1);
		pg = pgd_offset(mm, addr);
		p4 = p4d_offset(pg, addr);

		#ifdef CONFIG_PPC_BOOK3S_64
		if (pshift == PGDIR_SHIFT)
		/* 16GB huge page */
		return (pte_t *) p4;
		else if (pshift > PUD_SHIFT) {
		/*
		* We need to use hugepd table
		*/
		ptl = &mm->page_table_lock;
		hpdp = (hugepd_t *)p4;
		} else {
		pdshift = PUD_SHIFT;
		pu = pud_alloc(mm, p4, addr);
		if (!pu)
		return NULL;
		if (pshift == PUD_SHIFT)
		return (pte_t *)pu;
		else if (pshift > PMD_SHIFT) {
		ptl = pud_lockptr(mm, pu);
		hpdp = (hugepd_t *)pu;
		} else {
		pdshift = PMD_SHIFT;
		pm = pmd_alloc(mm, pu, addr);
		if (!pm)
		return NULL;
		if (pshift == PMD_SHIFT)
		/* 16MB hugepage */
		return (pte_t *)pm;
		else {
		ptl = pmd_lockptr(mm, pm);
		hpdp = (hugepd_t *)pm;
		}
		}
		}
		#else
		if (pshift >= PGDIR_SHIFT) {
		ptl = &mm->page_table_lock;
		hpdp = (hugepd_t *)p4;
		} else {
		pdshift = PUD_SHIFT;
		pu = pud_alloc(mm, p4, addr);
		if (!pu)
		return NULL;
		if (pshift >= PUD_SHIFT) {
		ptl = pud_lockptr(mm, pu);
		hpdp = (hugepd_t *)pu;
		} else {
		pdshift = PMD_SHIFT;
		pm = pmd_alloc(mm, pu, addr);
		if (!pm)
		return NULL;
		ptl = pmd_lockptr(mm, pm);
		hpdp = (hugepd_t *)pm;
		}
		}
		#endif
		if (!hpdp)
		return NULL;

		BUG_ON(!hugepd_none(hpdp) && !hugepd_ok(hpdp));

		if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
		pdshift, pshift, ptl))
		return NULL;

		return hugepte_offset(*hpdp, addr, pdshift);
		}
		#else
		pte_t huge_pte_alloc(struct mm_struct mm, struct vm_area_struct *vma,
		unsigned long addr, unsigned long sz)
		{
		@@ -230,7 +78,6 @@ pte_t huge_pte_alloc(struct mm_struct mm, struct vm_area_struct *vma,

		return pte_alloc_huge(mm, pmd, addr);
		}
		#endif

		#ifdef CONFIG_PPC_BOOK3S_64
		/*
		@@ -286,266 +133,6 @@ int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
		return __alloc_bootmem_huge_page(h, nid);
		}

		#ifdef CONFIG_ARCH_HAS_HUGEPD
		#ifndef CONFIG_PPC_BOOK3S_64
		#define HUGEPD_FREELIST_SIZE \
		((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))

		struct hugepd_freelist {
		struct rcu_head rcu;
		unsigned int index;
		void *ptes[];
		};

		static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);

		static void hugepd_free_rcu_callback(struct rcu_head *head)
		{
		struct hugepd_freelist *batch =
		container_of(head, struct hugepd_freelist, rcu);
		unsigned int i;

		for (i = 0; i < batch->index; i++)
		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);

		free_page((unsigned long)batch);
		}

		static void hugepd_free(struct mmu_gather tlb, void hugepte)
		{
		struct hugepd_freelist **batchp;

		batchp = &get_cpu_var(hugepd_freelist_cur);

		if (atomic_read(&tlb->mm->mm_users) < 2 \|\|
		mm_is_thread_local(tlb->mm)) {
		kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
		put_cpu_var(hugepd_freelist_cur);
		return;
		}

		if (*batchp == NULL) {
		batchp = (struct hugepd_freelist )__get_free_page(GFP_ATOMIC);
		(*batchp)->index = 0;
		}

		(batchp)->ptes[(batchp)->index++] = hugepte;
		if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
		call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
		*batchp = NULL;
		}
		put_cpu_var(hugepd_freelist_cur);
		}
		#else
		static inline void hugepd_free(struct mmu_gather tlb, void hugepte) {}
		#endif

		/* Return true when the entry to be freed maps more than the area being freed */
		static bool range_is_outside_limits(unsigned long start, unsigned long end,
		unsigned long floor, unsigned long ceiling,
		unsigned long mask)
		{
		if ((start & mask) < floor)
		return true;
		if (ceiling) {
		ceiling &= mask;
		if (!ceiling)
		return true;
		}
		return end - 1 > ceiling - 1;
		}

		static void free_hugepd_range(struct mmu_gather tlb, hugepd_t hpdp, int pdshift,
		unsigned long start, unsigned long end,
		unsigned long floor, unsigned long ceiling)
		{
		pte_t hugepte = hugepd_page(hpdp);
		int i;

		unsigned long pdmask = ~((1UL << pdshift) - 1);
		unsigned int num_hugepd = 1;
		unsigned int shift = hugepd_shift(*hpdp);

		/* Note: On fsl the hpdp may be the first of several */
		if (shift > pdshift)
		num_hugepd = 1 << (shift - pdshift);

		if (range_is_outside_limits(start, end, floor, ceiling, pdmask))
		return;

		for (i = 0; i < num_hugepd; i++, hpdp++)
		*hpdp = __hugepd(0);

		if (shift >= pdshift)
		hugepd_free(tlb, hugepte);
		else
		pgtable_free_tlb(tlb, hugepte,
		get_hugepd_cache_index(pdshift - shift));
		}

		static void hugetlb_free_pte_range(struct mmu_gather tlb, pmd_t pmd,
		unsigned long addr, unsigned long end,
		unsigned long floor, unsigned long ceiling)
		{
		pgtable_t token = pmd_pgtable(*pmd);

		if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK))
		return;

		pmd_clear(pmd);
		pte_free_tlb(tlb, token, addr);
		mm_dec_nr_ptes(tlb->mm);
		}

		static void hugetlb_free_pmd_range(struct mmu_gather tlb, pud_t pud,
		unsigned long addr, unsigned long end,
		unsigned long floor, unsigned long ceiling)
		{
		pmd_t *pmd;
		unsigned long next;
		unsigned long start;

		start = addr;
		do {
		unsigned long more;

		pmd = pmd_offset(pud, addr);
		next = pmd_addr_end(addr, end);
		if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
		if (pmd_none_or_clear_bad(pmd))
		continue;

		/*
		* if it is not hugepd pointer, we should already find
		* it cleared.
		*/
		WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));

		hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);

		continue;
		}
		/*
		* Increment next by the size of the huge mapping since
		* there may be more than one entry at this level for a
		* single hugepage, but all of them point to
		* the same kmem cache that holds the hugepte.
		*/
		more = addr + (1UL << hugepd_shift((hugepd_t )pmd));
		if (more > next)
		next = more;

		free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
		addr, next, floor, ceiling);
		} while (addr = next, addr != end);

		if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK))
		return;

		pmd = pmd_offset(pud, start & PUD_MASK);
		pud_clear(pud);
		pmd_free_tlb(tlb, pmd, start & PUD_MASK);
		mm_dec_nr_pmds(tlb->mm);
		}

		static void hugetlb_free_pud_range(struct mmu_gather tlb, p4d_t p4d,
		unsigned long addr, unsigned long end,
		unsigned long floor, unsigned long ceiling)
		{
		pud_t *pud;
		unsigned long next;
		unsigned long start;

		start = addr;
		do {
		pud = pud_offset(p4d, addr);
		next = pud_addr_end(addr, end);
		if (!is_hugepd(__hugepd(pud_val(*pud)))) {
		if (pud_none_or_clear_bad(pud))
		continue;
		hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
		ceiling);
		} else {
		unsigned long more;
		/*
		* Increment next by the size of the huge mapping since
		* there may be more than one entry at this level for a
		* single hugepage, but all of them point to
		* the same kmem cache that holds the hugepte.
		*/
		more = addr + (1UL << hugepd_shift((hugepd_t )pud));
		if (more > next)
		next = more;

		free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
		addr, next, floor, ceiling);
		}
		} while (addr = next, addr != end);

		if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK))
		return;

		pud = pud_offset(p4d, start & PGDIR_MASK);
		p4d_clear(p4d);
		pud_free_tlb(tlb, pud, start & PGDIR_MASK);
		mm_dec_nr_puds(tlb->mm);
		}

		/*
		* This function frees user-level page tables of a process.
		*/
		void hugetlb_free_pgd_range(struct mmu_gather *tlb,
		unsigned long addr, unsigned long end,
		unsigned long floor, unsigned long ceiling)
		{
		pgd_t *pgd;
		p4d_t *p4d;
		unsigned long next;

		/*
		* Because there are a number of different possible pagetable
		* layouts for hugepage ranges, we limit knowledge of how
		* things should be laid out to the allocation path
		* (huge_pte_alloc(), above). Everything else works out the
		* structure as it goes from information in the hugepd
		* pointers. That means that we can't here use the
		* optimization used in the normal page free_pgd_range(), of
		* checking whether we're actually covering a large enough
		* range to have to do anything at the top level of the walk
		* instead of at the bottom.
		*
		* To make sense of this, you should probably go read the big
		* block comment at the top of the normal free_pgd_range(),
		* too.
		*/

		do {
		next = pgd_addr_end(addr, end);
		pgd = pgd_offset(tlb->mm, addr);
		p4d = p4d_offset(pgd, addr);
		if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
		if (p4d_none_or_clear_bad(p4d))
		continue;
		hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
		} else {
		unsigned long more;
		/*
		* Increment next by the size of the huge mapping since
		* there may be more than one entry at the pgd level
		* for a single hugepage, but all of them point to the
		* same kmem cache that holds the hugepte.
		*/
		more = addr + (1UL << hugepd_shift((hugepd_t )pgd));
		if (more > next)
		next = more;

		free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT,
		addr, next, floor, ceiling);
		}
		} while (addr = next, addr != end);
		}
		#endif

		bool __init arch_hugetlb_valid_size(unsigned long size)
		{
		int shift = __ffs(size);