Commit 9d71bc83 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'net-bpf_xdp_adjust_tail-and-intel-mbuf-fixes'

Maciej Fijalkowski says:

====================
net: bpf_xdp_adjust_tail() and Intel mbuf fixes

Hey,

after a break followed by dealing with sickness, here is a v6 that makes
bpf_xdp_adjust_tail() actually usable for ZC drivers that support XDP
multi-buffer. Since v4 I tried also using bpf_xdp_adjust_tail() with
positive offset which exposed yet another issues, which can be observed
by increased commit count when compared to v3.

John, in the end I think we should remove handling
MEM_TYPE_XSK_BUFF_POOL from __xdp_return(), but it is out of the scope
for fixes set, IMHO.

Thanks,
Maciej

v6:
- add acks [Magnus]
- fix spelling mistakes [Magnus]
- avoid touching xdp_buff in xp_alloc_{reused,new_from_fq}() [Magnus]
- s/shrink_data/bpf_xdp_shrink_data [Jakub]
- remove __shrink_data() [Jakub]
- check retvals from __xdp_rxq_info_reg() [Magnus]

v5:
- pick correct version of patch 5 [Simon]
- elaborate a bit more on what patch 2 fixes

v4:
- do not clear frags flag when deleting tail; xsk_buff_pool now does
  that
- skip some NULL tests for xsk_buff_get_tail [Martin, John]
- address problems around registering xdp_rxq_info
- fix bpf_xdp_frags_increase_tail() for ZC mbuf

v3:
- add acks
- s/xsk_buff_tail_del/xsk_buff_del_tail
- address i40e as well (thanks Tirthendu)

v2:
- fix !CONFIG_XDP_SOCKETS builds
- add reviewed-by tag to patch 3
====================

Link: https://lore.kernel.org/r/20240124191602.566724-1-maciej.fijalkowski@intel.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 1732ebc4 0cbb0870
Loading
Loading
Loading
Loading
+31 −16
Original line number Diff line number Diff line
@@ -3588,40 +3588,55 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
	struct i40e_hmc_obj_rxq rx_ctx;
	int err = 0;
	bool ok;
	int ret;

	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);

	/* clear the context structure first */
	memset(&rx_ctx, 0, sizeof(rx_ctx));

	if (ring->vsi->type == I40E_VSI_MAIN)
		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
	ring->rx_buf_len = vsi->rx_buf_len;

	/* XDP RX-queue info only needed for RX rings exposed to XDP */
	if (ring->vsi->type != I40E_VSI_MAIN)
		goto skip;

	if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
					 ring->queue_index,
					 ring->q_vector->napi.napi_id,
					 ring->rx_buf_len);
		if (err)
			return err;
	}

	ring->xsk_pool = i40e_xsk_pool(ring);
	if (ring->xsk_pool) {
		ring->rx_buf_len =
		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
		xdp_rxq_info_unreg(&ring->xdp_rxq);
		ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
					 ring->queue_index,
					 ring->q_vector->napi.napi_id,
					 ring->rx_buf_len);
		if (err)
			return err;
		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
						 MEM_TYPE_XSK_BUFF_POOL,
						 NULL);
		if (ret)
			return ret;
		if (err)
			return err;
		dev_info(&vsi->back->pdev->dev,
			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
			 ring->queue_index);

	} else {
		ring->rx_buf_len = vsi->rx_buf_len;
		if (ring->vsi->type == I40E_VSI_MAIN) {
			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
						 MEM_TYPE_PAGE_SHARED,
						 NULL);
			if (ret)
				return ret;
		}
		if (err)
			return err;
	}

skip:
	xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);

	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
+23 −26
Original line number Diff line number Diff line
@@ -1548,7 +1548,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
	struct device *dev = rx_ring->dev;
	int err;

	u64_stats_init(&rx_ring->syncp);

@@ -1569,14 +1568,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
	rx_ring->next_to_process = 0;
	rx_ring->next_to_use = 0;

	/* XDP RX-queue info only needed for RX rings exposed to XDP */
	if (rx_ring->vsi->type == I40E_VSI_MAIN) {
		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
				       rx_ring->queue_index, rx_ring->q_vector->napi.napi_id);
		if (err < 0)
			return err;
	}

	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;

	rx_ring->rx_bi =
@@ -2087,7 +2078,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
				  struct xdp_buff *xdp)
{
	u32 next = rx_ring->next_to_clean;
	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
	u32 next = rx_ring->next_to_clean, i = 0;
	struct i40e_rx_buffer *rx_buffer;

	xdp->flags = 0;
@@ -2100,10 +2092,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
		if (!rx_buffer->page)
			continue;

		if (xdp_res == I40E_XDP_CONSUMED)
			rx_buffer->pagecnt_bias++;
		else
		if (xdp_res != I40E_XDP_CONSUMED)
			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
		else if (i++ <= nr_frags)
			rx_buffer->pagecnt_bias++;

		/* EOP buffer will be put in i40e_clean_rx_irq() */
		if (next == rx_ring->next_to_process)
@@ -2117,20 +2109,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 * i40e_construct_skb - Allocate skb and populate it
 * @rx_ring: rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 * @nr_frags: number of buffers for the packet
 *
 * This function allocates an skb.  It then populates it with the page
 * data from the current receive descriptor, taking care to set up the
 * skb correctly.
 */
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
					  struct xdp_buff *xdp,
					  u32 nr_frags)
					  struct xdp_buff *xdp)
{
	unsigned int size = xdp->data_end - xdp->data;
	struct i40e_rx_buffer *rx_buffer;
	struct skb_shared_info *sinfo;
	unsigned int headlen;
	struct sk_buff *skb;
	u32 nr_frags = 0;

	/* prefetch first cache line of first page */
	net_prefetch(xdp->data);
@@ -2168,6 +2160,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
	memcpy(__skb_put(skb, headlen), xdp->data,
	       ALIGN(headlen, sizeof(long)));

	if (unlikely(xdp_buff_has_frags(xdp))) {
		sinfo = xdp_get_shared_info_from_buff(xdp);
		nr_frags = sinfo->nr_frags;
	}
	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
	/* update all of the pointers */
	size -= headlen;
@@ -2187,9 +2183,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
	}

	if (unlikely(xdp_buff_has_frags(xdp))) {
		struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
		struct skb_shared_info *skinfo = skb_shinfo(skb);

		sinfo = xdp_get_shared_info_from_buff(xdp);
		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
		       sizeof(skb_frag_t) * nr_frags);

@@ -2212,17 +2207,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 * i40e_build_skb - Build skb around an existing buffer
 * @rx_ring: Rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 * @nr_frags: number of buffers for the packet
 *
 * This function builds an skb around an existing Rx buffer, taking care
 * to set up the skb correctly and avoid any memcpy overhead.
 */
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
				      struct xdp_buff *xdp,
				      u32 nr_frags)
				      struct xdp_buff *xdp)
{
	unsigned int metasize = xdp->data - xdp->data_meta;
	struct skb_shared_info *sinfo;
	struct sk_buff *skb;
	u32 nr_frags;

	/* Prefetch first cache line of first page. If xdp->data_meta
	 * is unused, this points exactly as xdp->data, otherwise we
@@ -2231,6 +2226,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
	 */
	net_prefetch(xdp->data_meta);

	if (unlikely(xdp_buff_has_frags(xdp))) {
		sinfo = xdp_get_shared_info_from_buff(xdp);
		nr_frags = sinfo->nr_frags;
	}

	/* build an skb around the page buffer */
	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
	if (unlikely(!skb))
@@ -2243,9 +2243,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
		skb_metadata_set(skb, metasize);

	if (unlikely(xdp_buff_has_frags(xdp))) {
		struct skb_shared_info *sinfo;

		sinfo = xdp_get_shared_info_from_buff(xdp);
		xdp_update_skb_shared_info(skb, nr_frags,
					   sinfo->xdp_frags_size,
					   nr_frags * xdp->frame_sz,
@@ -2589,9 +2586,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
			total_rx_bytes += size;
		} else {
			if (ring_uses_build_skb(rx_ring))
				skb = i40e_build_skb(rx_ring, xdp, nfrags);
				skb = i40e_build_skb(rx_ring, xdp);
			else
				skb = i40e_construct_skb(rx_ring, xdp, nfrags);
				skb = i40e_construct_skb(rx_ring, xdp);

			/* drop if we failed to retrieve a buffer */
			if (!skb) {
+2 −2
Original line number Diff line number Diff line
@@ -414,7 +414,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
	}

	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
				   virt_to_page(xdp->data_hard_start), 0, size);
				   virt_to_page(xdp->data_hard_start),
				   XDP_PACKET_HEADROOM, size);
	sinfo->xdp_frags_size += size;
	xsk_buff_add_frag(xdp);

@@ -498,7 +499,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
		xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
		i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
					  &rx_bytes, xdp_res, &failure);
		first->flags = 0;
		next_to_clean = next_to_process;
		if (failure)
			break;
+23 −14
Original line number Diff line number Diff line
@@ -547,19 +547,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
	ring->rx_buf_len = ring->vsi->rx_buf_len;

	if (ring->vsi->type == ICE_VSI_PF) {
		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
			/* coverity[check_return] */
			__xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
						 ring->q_index,
						 ring->q_vector->napi.napi_id,
					   ring->vsi->rx_buf_len);
						 ring->rx_buf_len);
			if (err)
				return err;
		}

		ring->xsk_pool = ice_xsk_pool(ring);
		if (ring->xsk_pool) {
			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
			xdp_rxq_info_unreg(&ring->xdp_rxq);

			ring->rx_buf_len =
				xsk_pool_get_rx_frame_size(ring->xsk_pool);
			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
						 ring->q_index,
						 ring->q_vector->napi.napi_id,
						 ring->rx_buf_len);
			if (err)
				return err;
			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
							 MEM_TYPE_XSK_BUFF_POOL,
							 NULL);
@@ -571,13 +579,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
				 ring->q_index);
		} else {
			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
				/* coverity[check_return] */
				__xdp_rxq_info_reg(&ring->xdp_rxq,
						   ring->netdev,
			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
				err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
							 ring->q_index,
							 ring->q_vector->napi.napi_id,
						   ring->vsi->rx_buf_len);
							 ring->rx_buf_len);
				if (err)
					return err;
			}

			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
							 MEM_TYPE_PAGE_SHARED,
+9 −10
Original line number Diff line number Diff line
@@ -513,11 +513,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);

	if (rx_ring->vsi->type == ICE_VSI_PF &&
	    !xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
		if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
				     rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
			goto err;
	return 0;

err:
@@ -603,8 +598,6 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
		ret = ICE_XDP_CONSUMED;
	}
exit:
	rx_buf->act = ret;
	if (unlikely(xdp_buff_has_frags(xdp)))
	ice_set_rx_bufs_act(xdp, rx_ring, ret);
}

@@ -893,7 +886,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
	}

	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
		if (unlikely(xdp_buff_has_frags(xdp)))
		ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
		return -ENOMEM;
	}
@@ -901,6 +893,10 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
				   rx_buf->page_offset, size);
	sinfo->xdp_frags_size += size;
	/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
	 * can pop off frags but driver has to handle it on its own
	 */
	rx_ring->nr_frags = sinfo->nr_frags;

	if (page_is_pfmemalloc(rx_buf->page))
		xdp_buff_set_frag_pfmemalloc(xdp);
@@ -1251,6 +1247,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)

		xdp->data = NULL;
		rx_ring->first_desc = ntc;
		rx_ring->nr_frags = 0;
		continue;
construct_skb:
		if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1266,10 +1263,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
						    ICE_XDP_CONSUMED);
			xdp->data = NULL;
			rx_ring->first_desc = ntc;
			rx_ring->nr_frags = 0;
			break;
		}
		xdp->data = NULL;
		rx_ring->first_desc = ntc;
		rx_ring->nr_frags = 0;

		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
		if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
Loading