mm/huge_memory: add buddy allocator like (non-uniform) folio_split() (58729c04) · Commits · git / linux-net

mm/huge_memory.c

+128 −42

Original line number	Diff line number	Diff line
		@@ -3869,12 +3869,85 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
		return ret;
		}

		static bool non_uniform_split_supported(struct folio *folio, unsigned int new_order,
		bool warns)
		{
		if (folio_test_anon(folio)) {
		/* order-1 is not supported for anonymous THP. */
		VM_WARN_ONCE(warns && new_order == 1,
		"Cannot split to order-1 folio");
		return new_order != 1;
		} else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
		!mapping_large_folio_support(folio->mapping)) {
		/*
		* No split if the file system does not support large folio.
		* Note that we might still have THPs in such mappings due to
		* CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
		* does not actually support large folios properly.
		*/
		VM_WARN_ONCE(warns,
		"Cannot split file folio to non-0 order");
		return false;
		}

		/* Only swapping a whole PMD-mapped folio is supported */
		if (folio_test_swapcache(folio)) {
		VM_WARN_ONCE(warns,
		"Cannot split swapcache folio to non-0 order");
		return false;
		}

		return true;
		}

		/* See comments in non_uniform_split_supported() */
		static bool uniform_split_supported(struct folio *folio, unsigned int new_order,
		bool warns)
		{
		if (folio_test_anon(folio)) {
		VM_WARN_ONCE(warns && new_order == 1,
		"Cannot split to order-1 folio");
		return new_order != 1;
		} else if (new_order) {
		if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
		!mapping_large_folio_support(folio->mapping)) {
		VM_WARN_ONCE(warns,
		"Cannot split file folio to non-0 order");
		return false;
		}
		}

		if (new_order && folio_test_swapcache(folio)) {
		VM_WARN_ONCE(warns,
		"Cannot split swapcache folio to non-0 order");
		return false;
		}

		return true;
		}

		/*
		* __folio_split: split a folio at @split_at to a @new_order folio
		* @folio: folio to split
		* @new_order: the order of the new folio
		* @split_at: a page within the new folio
		* @lock_at: a page within @folio to be left locked to caller
		* @list: after-split folios will be put on it if non NULL
		* @uniform_split: perform uniform split or not (non-uniform split)
		*
		* It calls __split_unmapped_folio() to perform uniform and non-uniform split.
		* It is in charge of checking whether the split is supported or not and
		* preparing @folio for __split_unmapped_folio().
		*
		* return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
		* split but not to @new_order, the caller needs to check)
		*/
		static int __folio_split(struct folio *folio, unsigned int new_order,
		struct page page, struct list_head list)
		struct page split_at, struct page lock_at,
		struct list_head *list, bool uniform_split)
		{
		struct deferred_split *ds_queue = get_deferred_split_queue(folio);
		/* reset xarray order to new order after split */
		XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
		XA_STATE(xas, &folio->mapping->i_pages, folio->index);
		bool is_anon = folio_test_anon(folio);
		struct address_space *mapping = NULL;
		struct anon_vma *anon_vma = NULL;
		@@ -3886,32 +3959,17 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
		VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
		VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);

		if (new_order >= folio_order(folio))
		if (folio != page_folio(split_at) \|\| folio != page_folio(lock_at))
		return -EINVAL;

		if (is_anon) {
		/* order-1 is not supported for anonymous THP. */
		if (new_order == 1) {
		VM_WARN_ONCE(1, "Cannot split to order-1 folio");
		if (new_order >= folio_order(folio))
		return -EINVAL;
		}
		} else if (new_order) {
		/*
		* No split if the file system does not support large folio.
		* Note that we might still have THPs in such mappings due to
		* CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
		* does not actually support large folios properly.
		*/
		if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
		!mapping_large_folio_support(folio->mapping)) {
		VM_WARN_ONCE(1,
		"Cannot split file folio to non-0 order");

		if (uniform_split && !uniform_split_supported(folio, new_order, true))
		return -EINVAL;
		}
		}

		/* Only swapping a whole PMD-mapped folio is supported */
		if (folio_test_swapcache(folio) && new_order)
		if (!uniform_split &&
		!non_uniform_split_supported(folio, new_order, true))
		return -EINVAL;

		is_hzp = is_huge_zero_folio(folio);
		@@ -3973,21 +4031,24 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
		goto out;
		}

		if (uniform_split) {
		xas_set_order(&xas, folio->index, new_order);
		xas_split_alloc(&xas, folio, folio_order(folio), gfp);
		if (xas_error(&xas)) {
		ret = xas_error(&xas);
		goto out;
		}
		}

		anon_vma = NULL;
		i_mmap_lock_read(mapping);

		/*
		*__split_huge_page() may need to trim off pages beyond EOF:
		* but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
		* which cannot be nested inside the page tree lock. So note
		* end now: i_size itself may be changed at any moment, but
		* folio lock is good enough to serialize the trimming.
		*__split_unmapped_folio() may need to trim off pages beyond
		* EOF: but on 32-bit, i_size_read() takes an irq-unsafe
		* seqlock, which cannot be nested inside the page tree lock.
		* So note end now: i_size itself may be changed at any moment,
		* but folio lock is good enough to serialize the trimming.
		*/
		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
		if (shmem_mapping(mapping))
		@@ -4041,7 +4102,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
		if (mapping) {
		int nr = folio_nr_pages(folio);

		xas_split(&xas, folio, folio_order(folio));
		if (folio_test_pmd_mappable(folio) &&
		new_order < HPAGE_PMD_ORDER) {
		if (folio_test_swapbacked(folio)) {
		@@ -4055,12 +4115,9 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
		}
		}

		if (is_anon) {
		mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
		mod_mthp_stat(new_order, MTHP_STAT_NR_ANON, 1 << (order - new_order));
		}
		__split_huge_page(page, list, end, new_order);
		ret = 0;
		ret = __split_unmapped_folio(folio, new_order,
		split_at, lock_at, list, end, &xas, mapping,
		uniform_split);
		} else {
		spin_unlock(&ds_queue->split_queue_lock);
		fail:
		@@ -4138,7 +4195,36 @@ int split_huge_page_to_list_to_order(struct page page, struct list_head list,
		{
		struct folio *folio = page_folio(page);

		return __folio_split(folio, new_order, page, list);
		return __folio_split(folio, new_order, &folio->page, page, list, true);
		}

		/*
		* folio_split: split a folio at @split_at to a @new_order folio
		* @folio: folio to split
		* @new_order: the order of the new folio
		* @split_at: a page within the new folio
		*
		* return: 0: successful, <0 failed (if -ENOMEM is returned, @folio might be
		* split but not to @new_order, the caller needs to check)
		*
		* It has the same prerequisites and returns as
		* split_huge_page_to_list_to_order().
		*
		* Split a folio at @split_at to a new_order folio, leave the
		* remaining subpages of the original folio as large as possible. For example,
		* in the case of splitting an order-9 folio at its third order-3 subpages to
		* an order-3 folio, there are 2^(9-3)=64 order-3 subpages in the order-9 folio.
		* After the split, there will be a group of folios with different orders and
		* the new folio containing @split_at is marked in bracket:
		* [order-4, {order-3}, order-3, order-5, order-6, order-7, order-8].
		*
		* After split, folio is left locked for caller.
		*/
		static int folio_split(struct folio *folio, unsigned int new_order,
		struct page split_at, struct list_head list)
		{
		return __folio_split(folio, new_order, split_at, &folio->page, list,
		false);
		}

		int min_order_for_split(struct folio *folio)