Commit 8218f62c authored by Yunsheng Lin's avatar Yunsheng Lin Committed by Jakub Kicinski
Browse files

mm: page_frag: use initial zero offset for page_frag_alloc_align()

We are about to use page_frag_alloc_*() API to not just
allocate memory for skb->data, but also use them to do
the memory allocation for skb frag too. Currently the
implementation of page_frag in mm subsystem is running
the offset as a countdown rather than count-up value,
there may have several advantages to that as mentioned
in [1], but it may have some disadvantages, for example,
it may disable skb frag coalescing and more correct cache
prefetching

We have a trade-off to make in order to have a unified
implementation and API for page_frag, so use a initial zero
offset in this patch, and the following patch will try to
make some optimization to avoid the disadvantages as much
as possible.

1. https://lore.kernel.org/all/f4abe71b3439b39d17a6fb2d410180f367cadf5c.camel@gmail.com/



CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: default avatarYunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: default avatarAlexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-4-linyunsheng@huawei.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 65941f10
Loading
Loading
Loading
Loading
+23 −23
Original line number Diff line number Diff line
@@ -63,9 +63,13 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
			      unsigned int fragsz, gfp_t gfp_mask,
			      unsigned int align_mask)
{
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
	unsigned int size = nc->size;
#else
	unsigned int size = PAGE_SIZE;
#endif
	unsigned int offset;
	struct page *page;
	int offset;

	if (unlikely(!nc->va)) {
refill:
@@ -85,11 +89,24 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
		/* reset page count bias and offset to start of new frag */
		nc->pfmemalloc = page_is_pfmemalloc(page);
		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
		nc->offset = size;
		nc->offset = 0;
	}

	offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
	if (unlikely(offset + fragsz > size)) {
		if (unlikely(fragsz > PAGE_SIZE)) {
			/*
			 * The caller is trying to allocate a fragment
			 * with fragsz > PAGE_SIZE but the cache isn't big
			 * enough to satisfy the request, this may
			 * happen in low memory conditions.
			 * We don't release the cache page because
			 * it could make memory pressure worse
			 * so we simply return NULL here.
			 */
			return NULL;
		}

	offset = nc->offset - fragsz;
	if (unlikely(offset < 0)) {
		page = virt_to_page(nc->va);

		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
@@ -100,33 +117,16 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
			goto refill;
		}

#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
		/* if size can vary use size else just use PAGE_SIZE */
		size = nc->size;
#endif
		/* OK, page count is 0, we can safely set it */
		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);

		/* reset page count bias and offset to start of new frag */
		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
		offset = size - fragsz;
		if (unlikely(offset < 0)) {
			/*
			 * The caller is trying to allocate a fragment
			 * with fragsz > PAGE_SIZE but the cache isn't big
			 * enough to satisfy the request, this may
			 * happen in low memory conditions.
			 * We don't release the cache page because
			 * it could make memory pressure worse
			 * so we simply return NULL here.
			 */
			return NULL;
		}
		offset = 0;
	}

	nc->pagecnt_bias--;
	offset &= align_mask;
	nc->offset = offset;
	nc->offset = offset + fragsz;

	return nc->va + offset;
}