Revert "net: skb: introduce and use a single page frag cache" (6bc7e4eb) · Commits · git / linux-net

include/linux/netdevice.h

+0 −1

Original line number	Diff line number	Diff line
		@@ -4117,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head);
		gro_result_t napi_gro_receive(struct napi_struct napi, struct sk_buff skb);
		void napi_gro_flush(struct napi_struct *napi, bool flush_old);
		struct sk_buff napi_get_frags(struct napi_struct napi);
		void napi_get_frags_check(struct napi_struct *napi);
		gro_result_t napi_gro_frags(struct napi_struct *napi);

		static inline void napi_free_frags(struct napi_struct *napi)

net/core/dev.c

+17 −0

Original line number	Diff line number	Diff line
		@@ -6991,6 +6991,23 @@ netif_napi_dev_list_add(struct net_device dev, struct napi_struct napi)
		list_add_rcu(&napi->dev_list, higher); /* adds after higher */
		}

		/* Double check that napi_get_frags() allocates skbs with
		* skb->head being backed by slab, not a page fragment.
		* This is to make sure bug fixed in 3226b158e67c
		* ("net: avoid 32 x truesize under-estimation for tiny skbs")
		* does not accidentally come back.
		*/
		static void napi_get_frags_check(struct napi_struct *napi)
		{
		struct sk_buff *skb;

		local_bh_disable();
		skb = napi_get_frags(napi);
		WARN_ON_ONCE(skb && skb->head_frag);
		napi_free_frags(napi);
		local_bh_enable();
		}

		void netif_napi_add_weight_locked(struct net_device *dev,
		struct napi_struct *napi,
		int (poll)(struct napi_struct , int),

net/core/skbuff.c

+5 −99

Original line number	Diff line number	Diff line
		@@ -223,67 +223,9 @@ static void skb_under_panic(struct sk_buff skb, unsigned int sz, void addr)
		#define NAPI_SKB_CACHE_BULK 16
		#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)

		#if PAGE_SIZE == SZ_4K

		#define NAPI_HAS_SMALL_PAGE_FRAG 1
		#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)

		/* specialized page frag allocator using a single order 0 page
		* and slicing it into 1K sized fragment. Constrained to systems
		* with a very limited amount of 1K fragments fitting a single
		* page - to avoid excessive truesize underestimation
		*/

		struct page_frag_1k {
		void *va;
		u16 offset;
		bool pfmemalloc;
		};

		static void page_frag_alloc_1k(struct page_frag_1k nc, gfp_t gfp)
		{
		struct page *page;
		int offset;

		offset = nc->offset - SZ_1K;
		if (likely(offset >= 0))
		goto use_frag;

		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
		if (!page)
		return NULL;

		nc->va = page_address(page);
		nc->pfmemalloc = page_is_pfmemalloc(page);
		offset = PAGE_SIZE - SZ_1K;
		page_ref_add(page, offset / SZ_1K);

		use_frag:
		nc->offset = offset;
		return nc->va + offset;
		}
		#else

		/* the small page is actually unused in this build; add dummy helpers
		* to please the compiler and avoid later preprocessor's conditionals
		*/
		#define NAPI_HAS_SMALL_PAGE_FRAG 0
		#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false

		struct page_frag_1k {
		};

		static void page_frag_alloc_1k(struct page_frag_1k nc, gfp_t gfp_mask)
		{
		return NULL;
		}

		#endif

		struct napi_alloc_cache {
		local_lock_t bh_lock;
		struct page_frag_cache page;
		struct page_frag_1k page_small;
		unsigned int skb_count;
		void *skb_cache[NAPI_SKB_CACHE_SIZE];
		};
		@@ -293,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
		.bh_lock = INIT_LOCAL_LOCK(bh_lock),
		};

		/* Double check that napi_get_frags() allocates skbs with
		* skb->head being backed by slab, not a page fragment.
		* This is to make sure bug fixed in 3226b158e67c
		* ("net: avoid 32 x truesize under-estimation for tiny skbs")
		* does not accidentally come back.
		*/
		void napi_get_frags_check(struct napi_struct *napi)
		{
		struct sk_buff *skb;

		local_bh_disable();
		skb = napi_get_frags(napi);
		WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
		napi_free_frags(napi);
		local_bh_enable();
		}

		void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
		{
		struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
		@@ -816,11 +741,8 @@ struct sk_buff napi_alloc_skb(struct napi_struct napi, unsigned int len)

		/* If requested length is either too small or too big,
		* we use kmalloc() for skb->head allocation.
		* When the small frag allocator is available, prefer it over kmalloc
		* for small fragments
		*/
		if ((!NAPI_HAS_SMALL_PAGE_FRAG &&
		len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) \|\|
		if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) \|\|
		len > SKB_WITH_OVERHEAD(PAGE_SIZE) \|\|
		(gfp_mask & (__GFP_DIRECT_RECLAIM \| GFP_DMA))) {
		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX \| SKB_ALLOC_NAPI,
		@@ -830,32 +752,16 @@ struct sk_buff napi_alloc_skb(struct napi_struct napi, unsigned int len)
		goto skb_success;
		}

		len = SKB_HEAD_ALIGN(len);

		if (sk_memalloc_socks())
		gfp_mask \|= __GFP_MEMALLOC;

		local_lock_nested_bh(&napi_alloc_cache.bh_lock);
		nc = this_cpu_ptr(&napi_alloc_cache);
		if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
		/* we are artificially inflating the allocation size, but
		* that is not as bad as it may look like, as:
		* - 'len' less than GRO_MAX_HEAD makes little sense
		* - On most systems, larger 'len' values lead to fragment
		* size above 512 bytes
		* - kmalloc would use the kmalloc-1k slab for such values
		* - Builds with smaller GRO_MAX_HEAD will very likely do
		* little networking, as that implies no WiFi and no
		* tunnels support, and 32 bits arches.
		*/
		len = SZ_1K;

		data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
		pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
		} else {
		len = SKB_HEAD_ALIGN(len);

		data = page_frag_alloc(&nc->page, len, gfp_mask);
		pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
		}
		local_unlock_nested_bh(&napi_alloc_cache.bh_lock);

		if (unlikely(!data))