Commit 91f386bf authored by Mike Kravetz's avatar Mike Kravetz Committed by Andrew Morton
Browse files

hugetlb: batch freeing of vmemmap pages

Now that batching of hugetlb vmemmap optimization processing is possible,
batch the freeing of vmemmap pages.  When freeing vmemmap pages for a
hugetlb page, we add them to a list that is freed after the entire batch
has been processed.

This enhances the ability to return contiguous ranges of memory to the low
level allocators.

Link: https://lkml.kernel.org/r/20231019023113.345257-6-mike.kravetz@oracle.com


Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarMuchun Song <songmuchun@bytedance.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Barry Song <21cnbao@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Konrad Dybcio <konradybcio@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Usama Arif <usama.arif@bytedance.com>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent cfb8c750
Loading
Loading
Loading
Loading
+56 −26
Original line number Diff line number Diff line
@@ -251,7 +251,7 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
	}

	entry = mk_pte(walk->reuse_page, pgprot);
	list_add_tail(&page->lru, walk->vmemmap_pages);
	list_add(&page->lru, walk->vmemmap_pages);
	set_pte_at(&init_mm, addr, pte, entry);
}

@@ -306,18 +306,20 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
 * @end:	end address of the vmemmap virtual address range that we want to
 *		remap.
 * @reuse:	reuse address.
 * @vmemmap_pages: list to deposit vmemmap pages to be freed.  It is callers
 *		responsibility to free pages.
 *
 * Return: %0 on success, negative error code otherwise.
 */
static int vmemmap_remap_free(unsigned long start, unsigned long end,
			      unsigned long reuse)
			      unsigned long reuse,
			      struct list_head *vmemmap_pages)
{
	int ret;
	LIST_HEAD(vmemmap_pages);
	struct vmemmap_remap_walk walk = {
		.remap_pte	= vmemmap_remap_pte,
		.reuse_addr	= reuse,
		.vmemmap_pages	= &vmemmap_pages,
		.vmemmap_pages	= vmemmap_pages,
	};
	int nid = page_to_nid((struct page *)reuse);
	gfp_t gfp_mask = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
@@ -334,7 +336,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
	if (walk.reuse_page) {
		copy_page(page_to_virt(walk.reuse_page),
			  (void *)walk.reuse_addr);
		list_add(&walk.reuse_page->lru, &vmemmap_pages);
		list_add(&walk.reuse_page->lru, vmemmap_pages);
	}

	/*
@@ -365,15 +367,13 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
		walk = (struct vmemmap_remap_walk) {
			.remap_pte	= vmemmap_restore_pte,
			.reuse_addr	= reuse,
			.vmemmap_pages	= &vmemmap_pages,
			.vmemmap_pages	= vmemmap_pages,
		};

		vmemmap_remap_range(reuse, end, &walk);
	}
	mmap_read_unlock(&init_mm);

	free_vmemmap_page_list(&vmemmap_pages);

	return ret;
}

@@ -389,7 +389,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
		page = alloc_pages_node(nid, gfp_mask, 0);
		if (!page)
			goto out;
		list_add_tail(&page->lru, list);
		list_add(&page->lru, list);
	}

	return 0;
@@ -577,24 +577,17 @@ static bool vmemmap_should_optimize(const struct hstate *h, const struct page *h
	return true;
}

/**
 * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
 * @h:		struct hstate.
 * @head:	the head page whose vmemmap pages will be optimized.
 *
 * This function only tries to optimize @head's vmemmap pages and does not
 * guarantee that the optimization will succeed after it returns. The caller
 * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
 * have been optimized.
 */
void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
static int __hugetlb_vmemmap_optimize(const struct hstate *h,
					struct page *head,
					struct list_head *vmemmap_pages)
{
	int ret = 0;
	unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
	unsigned long vmemmap_reuse;

	VM_WARN_ON_ONCE(!PageHuge(head));
	if (!vmemmap_should_optimize(h, head))
		return;
		return ret;

	static_branch_inc(&hugetlb_optimize_vmemmap_key);

@@ -604,21 +597,58 @@ void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)

	/*
	 * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end)
	 * to the page which @vmemmap_reuse is mapped to, then free the pages
	 * which the range [@vmemmap_start, @vmemmap_end] is mapped to.
	 * to the page which @vmemmap_reuse is mapped to.  Add pages previously
	 * mapping the range to vmemmap_pages list so that they can be freed by
	 * the caller.
	 */
	if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse))
	ret = vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse, vmemmap_pages);
	if (ret)
		static_branch_dec(&hugetlb_optimize_vmemmap_key);
	else
		SetHPageVmemmapOptimized(head);

	return ret;
}

/**
 * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
 * @h:		struct hstate.
 * @head:	the head page whose vmemmap pages will be optimized.
 *
 * This function only tries to optimize @head's vmemmap pages and does not
 * guarantee that the optimization will succeed after it returns. The caller
 * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
 * have been optimized.
 */
void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
{
	LIST_HEAD(vmemmap_pages);

	__hugetlb_vmemmap_optimize(h, head, &vmemmap_pages);
	free_vmemmap_page_list(&vmemmap_pages);
}

void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
{
	struct folio *folio;
	LIST_HEAD(vmemmap_pages);

	list_for_each_entry(folio, folio_list, lru)
		hugetlb_vmemmap_optimize(h, &folio->page);
	list_for_each_entry(folio, folio_list, lru) {
		int ret = __hugetlb_vmemmap_optimize(h, &folio->page,
								&vmemmap_pages);

		/*
		 * Pages to be freed may have been accumulated.  If we
		 * encounter an ENOMEM,  free what we have and try again.
		 */
		if (ret == -ENOMEM && !list_empty(&vmemmap_pages)) {
			free_vmemmap_page_list(&vmemmap_pages);
			INIT_LIST_HEAD(&vmemmap_pages);
			__hugetlb_vmemmap_optimize(h, &folio->page, &vmemmap_pages);
		}
	}

	free_vmemmap_page_list(&vmemmap_pages);
}

static struct ctl_table hugetlb_vmemmap_sysctls[] = {