Commit 4d1d3b52 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'replace-page_frag-with-page_frag_cache-part-1'

Yunsheng Lin says:

====================
Replace page_frag with page_frag_cache (Part-1)

This is part 1 of "Replace page_frag with page_frag_cache",
which mainly contain refactoring and optimization for the
implementation of page_frag API before the replacing.

As the discussion in [1], it would be better to target net-next
tree to get more testing as all the callers page_frag API are
in networking, and the chance of conflicting with MM tree seems
low as implementation of page_frag API seems quite self-contained.

After [2], there are still two implementations for page frag:

1. mm/page_alloc.c: net stack seems to be using it in the
   rx part with 'struct page_frag_cache' and the main API
   being page_frag_alloc_align().
2. net/core/sock.c: net stack seems to be using it in the
   tx part with 'struct page_frag' and the main API being
   skb_page_frag_refill().

This patchset tries to unfiy the page frag implementation
by replacing page_frag with page_frag_cache for sk_page_frag()
first. net_high_order_alloc_disable_key for the implementation
in net/core/sock.c doesn't seems matter that much now as pcp
is also supported for high-order pages:
commit 44042b44 ("mm/page_alloc: allow high-order pages to
be stored on the per-cpu lists")

As the related change is mostly related to networking, so
targeting the net-next. And will try to replace the rest
of page_frag in the follow patchset.

After this patchset:
1. Unify the page frag implementation by taking the best out of
   two the existing implementations: we are able to save some space
   for the 'page_frag_cache' API user, and avoid 'get_page()' for
   the old 'page_frag' API user.
2. Future bugfix and performance can be done in one place, hence
   improving maintainability of page_frag's implementation.

Kernel Image changing:
    Linux Kernel   total |      text      data        bss
    ------------------------------------------------------
    after     45250307 |   27274279   17209996     766032
    before    45254134 |   27278118   17209984     766032
    delta        -3827 |      -3839        +12         +0

1. https://lore.kernel.org/all/add10dd4-7f5d-4aa1-aa04-767590f944e0@redhat.com/
2. https://lore.kernel.org/all/20240228093013.8263-1-linyunsheng@huawei.com/
====================

Link: https://patch.msgid.link/20241028115343.3405838-1-linyunsheng@huawei.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents a885a6b2 ec397ea0
Loading
Loading
Loading
Loading
+0 −18
Original line number Diff line number Diff line
@@ -109,26 +109,8 @@ typedef struct page *pgtable_t;
#define __pgd(x)	((pgd_t) { (x) } )
#define __pgprot(x)	((pgprot_t) { (x) } )

/*
 * Pure 2^n version of get_order
 * Use 'nsau' instructions if supported by the processor or the generic version.
 */

#if XCHAL_HAVE_NSA

static inline __attribute_const__ int get_order(unsigned long size)
{
	int lz;
	asm ("nsau %0, %1" : "=r" (lz) : "r" ((size - 1) >> PAGE_SHIFT));
	return 32 - lz;
}

#else

# include <asm-generic/getorder.h>

#endif

struct page;
struct vm_area_struct;
extern void clear_page(void *page);
+1 −1
Original line number Diff line number Diff line
@@ -1325,7 +1325,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
			vqs[VHOST_NET_VQ_RX]);

	f->private_data = n;
	n->pf_cache.va = NULL;
	page_frag_cache_init(&n->pf_cache);

	return 0;
}
+0 −22
Original line number Diff line number Diff line
@@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);

struct page_frag_cache;
void page_frag_cache_drain(struct page_frag_cache *nc);
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
			      gfp_t gfp_mask, unsigned int align_mask);

static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
					  unsigned int fragsz, gfp_t gfp_mask,
					  unsigned int align)
{
	WARN_ON_ONCE(!is_power_of_2(align));
	return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
}

static inline void *page_frag_alloc(struct page_frag_cache *nc,
			     unsigned int fragsz, gfp_t gfp_mask)
{
	return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
}

extern void page_frag_free(void *addr);

#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr), 0)

+0 −18
Original line number Diff line number Diff line
@@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
 */
#define STRUCT_PAGE_MAX_SHIFT	(order_base_2(sizeof(struct page)))

#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)

/*
 * page_private can be used on tail pages.  However, PagePrivate is only
 * checked by the VM on the head page.  So page_private on the tail pages
@@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
	return folio->private;
}

struct page_frag_cache {
	void * va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
	__u16 offset;
	__u16 size;
#else
	__u32 offset;
#endif
	/* we maintain a pagecount bias, so that we dont dirty cache line
	 * containing page->_refcount every time we allocate a fragment.
	 */
	unsigned int		pagecnt_bias;
	bool pfmemalloc;
};

typedef unsigned long vm_flags_t;

/*
+21 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
 * (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
 */

#include <linux/align.h>
#include <linux/types.h>

#include <asm/page.h>
@@ -43,6 +44,26 @@ struct page_frag {
#endif
};

#define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
struct page_frag_cache {
	/* encoded_page consists of the virtual address, pfmemalloc bit and
	 * order of a page.
	 */
	unsigned long encoded_page;

	/* we maintain a pagecount bias, so that we dont dirty cache line
	 * containing page->_refcount every time we allocate a fragment.
	 */
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
	__u16 offset;
	__u16 pagecnt_bias;
#else
	__u32 offset;
	__u32 pagecnt_bias;
#endif
};

/* Track pages that require TLB flushes */
struct tlbflush_unmap_batch {
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
Loading