Commit 173e7622 authored by Mina Almasry's avatar Mina Almasry Committed by Jakub Kicinski
Browse files

Revert "net: mirror skb frag ref/unref helpers"

This reverts commit a580ea99.

This revert is to resolve Dragos's report of page_pool leak here:
https://lore.kernel.org/lkml/20240424165646.1625690-2-dtatulea@nvidia.com/



The reverted patch interacts very badly with commit 2cc3aeb5 ("skbuff:
Fix a potential race while recycling page_pool packets"). The reverted
commit hopes that the pp_recycle + is_pp_page variables do not change
between the skb_frag_ref and skb_frag_unref operation. If such a change
occurs, the skb_frag_ref/unref will not operate on the same reference type.
In the case of Dragos's report, the grabbed ref was a pp ref, but the unref
was a page ref, because the pp_recycle setting on the skb was changed.

Attempting to fix this issue on the fly is risky. Lets revert and I hope
to reland this with better understanding and testing to ensure we don't
regress some edge case while streamlining skb reffing.

Fixes: a580ea99 ("net: mirror skb frag ref/unref helpers")
Reported-by: default avatarDragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: default avatarMina Almasry <almasrymina@google.com>
Link: https://lore.kernel.org/r/20240502175423.2456544-1-almasrymina@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 5bfadc57
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1659,7 +1659,7 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
	for (i = 0; i < record->num_frags; i++) {
		skb_shinfo(nskb)->frags[i] = record->frags[i];
		/* increase the frag ref count */
		__skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle);
		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
	}

	skb_shinfo(nskb)->nr_frags = record->num_frags;
+2 −2
Original line number Diff line number Diff line
@@ -2000,7 +2000,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
		skb->len      += hlen - swivel;

		skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
		__skb_frag_ref(frag, skb->pp_recycle);
		__skb_frag_ref(frag);

		/* any more data? */
		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2024,7 +2024,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
			frag++;

			skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
			__skb_frag_ref(frag, skb->pp_recycle);
			__skb_frag_ref(frag);
			RX_USED_ADD(page, hlen + cp->crc_size);
		}

+1 −1
Original line number Diff line number Diff line
@@ -717,7 +717,7 @@ static void veth_xdp_get(struct xdp_buff *xdp)
		return;

	for (i = 0; i < sinfo->nr_frags; i++)
		__skb_frag_ref(&sinfo->frags[i], false);
		__skb_frag_ref(&sinfo->frags[i]);
}

static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
+4 −35
Original line number Diff line number Diff line
@@ -8,47 +8,16 @@
#define _LINUX_SKBUFF_REF_H

#include <linux/skbuff.h>
#include <net/page_pool/helpers.h>

#ifdef CONFIG_PAGE_POOL
static inline bool is_pp_page(struct page *page)
{
	return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}

static inline bool napi_pp_get_page(struct page *page)
{
	page = compound_head(page);

	if (!is_pp_page(page))
		return false;

	page_pool_ref_page(page);
	return true;
}
#endif

static inline void skb_page_ref(struct page *page, bool recycle)
{
#ifdef CONFIG_PAGE_POOL
	if (recycle && napi_pp_get_page(page))
		return;
#endif
	get_page(page);
}

/**
 * __skb_frag_ref - take an addition reference on a paged fragment.
 * @frag: the paged fragment
 * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb.
 *
 * Takes an additional reference on the paged fragment @frag. Obtains the
 * correct reference count depending on whether skb->pp_recycle is set and
 * whether the frag is a page pool frag.
 * Takes an additional reference on the paged fragment @frag.
 */
static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
static inline void __skb_frag_ref(skb_frag_t *frag)
{
	skb_page_ref(skb_frag_page(frag), recycle);
	get_page(skb_frag_page(frag));
}

/**
@@ -60,7 +29,7 @@ static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
 */
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
	__skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}

bool napi_pp_put_page(struct page *page);
+42 −4
Original line number Diff line number Diff line
@@ -904,6 +904,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
		skb_get(list);
}

static bool is_pp_page(struct page *page)
{
	return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
}

int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
		    unsigned int headroom)
{
@@ -1025,6 +1030,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
	return napi_pp_put_page(virt_to_page(data));
}

/**
 * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
 * @skb:	page pool aware skb
 *
 * Increase the fragment reference count (pp_ref_count) of a skb. This is
 * intended to gain fragment references only for page pool aware skbs,
 * i.e. when skb->pp_recycle is true, and not for fragments in a
 * non-pp-recycling skb. It has a fallback to increase references on normal
 * pages, as page pool aware skbs may also have normal page fragments.
 */
static int skb_pp_frag_ref(struct sk_buff *skb)
{
	struct skb_shared_info *shinfo;
	struct page *head_page;
	int i;

	if (!skb->pp_recycle)
		return -EINVAL;

	shinfo = skb_shinfo(skb);

	for (i = 0; i < shinfo->nr_frags; i++) {
		head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
		if (likely(is_pp_page(head_page)))
			page_pool_ref_page(head_page);
		else
			page_ref_inc(head_page);
	}
	return 0;
}

static void skb_kfree_head(void *head, unsigned int end_offset)
{
	if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -4160,7 +4196,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
			to++;

		} else {
			__skb_frag_ref(fragfrom, skb->pp_recycle);
			__skb_frag_ref(fragfrom);
			skb_frag_page_copy(fragto, fragfrom);
			skb_frag_off_copy(fragto, fragfrom);
			skb_frag_size_set(fragto, todo);
@@ -4810,7 +4846,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
			}

			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
			__skb_frag_ref(nskb_frag, nskb->pp_recycle);
			__skb_frag_ref(nskb_frag);
			size = skb_frag_size(nskb_frag);

			if (pos < offset) {
@@ -5941,8 +5977,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
	/* if the skb is not cloned this does nothing
	 * since we set nr_frags to 0.
	 */
	if (skb_pp_frag_ref(from)) {
		for (i = 0; i < from_shinfo->nr_frags; i++)
		__skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
			__skb_frag_ref(&from_shinfo->frags[i]);
	}

	to->truesize += delta;
	to->len += len;
Loading