Commit fdf8e6d1 authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Daniel Borkmann says:

====================
pull-request: bpf 2024-01-25

The following pull-request contains BPF updates for your *net* tree.

We've added 12 non-merge commits during the last 2 day(s) which contain
a total of 13 files changed, 190 insertions(+), 91 deletions(-).

The main changes are:

1) Fix bpf_xdp_adjust_tail() in context of XSK zero-copy drivers which
   support XDP multi-buffer. The former triggered a NULL pointer
   dereference upon shrinking, from Maciej Fijalkowski & Tirthendu Sarkar.

2) Fix a bug in riscv64 BPF JIT which emitted a wrong prologue and
   epilogue for struct_ops programs, from Pu Lehui.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  i40e: update xdp_rxq_info::frag_size for ZC enabled Rx queue
  i40e: set xdp_rxq_info::frag_size
  xdp: reflect tail increase for MEM_TYPE_XSK_BUFF_POOL
  ice: update xdp_rxq_info::frag_size for ZC enabled Rx queue
  intel: xsk: initialize skb_frag_t::bv_offset in ZC drivers
  ice: remove redundant xdp_rxq_info registration
  i40e: handle multi-buffer packets that are shrunk by xdp prog
  ice: work on pre-XDP prog frag count
  xsk: fix usage of multi-buffer BPF helpers for ZC XDP
  xsk: make xsk_buff_pool responsible for clearing xdp_buff::flags
  xsk: recycle buffer in case Rx queue was full
  riscv, bpf: Fix unpredictable kernel crash about RV64 struct_ops
====================

Link: https://lore.kernel.org/r/20240125084416.10876-1-daniel@iogearbox.net


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 5e344807 9d71bc83
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -795,6 +795,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
	bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
	void *orig_call = func_addr;
	bool save_ret;
	u32 insn;
@@ -878,7 +879,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,

	stack_size = round_up(stack_size, 16);

	if (func_addr) {
	if (!is_struct_ops) {
		/* For the trampoline called from function entry,
		 * the frame of traced function and the frame of
		 * trampoline need to be considered.
@@ -998,7 +999,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,

	emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);

	if (func_addr) {
	if (!is_struct_ops) {
		/* trampoline called from function entry */
		emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx);
		emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx);
+31 −16
Original line number Diff line number Diff line
@@ -3588,40 +3588,55 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
	struct i40e_hmc_obj_rxq rx_ctx;
	int err = 0;
	bool ok;
	int ret;

	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);

	/* clear the context structure first */
	memset(&rx_ctx, 0, sizeof(rx_ctx));

	if (ring->vsi->type == I40E_VSI_MAIN)
		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
	ring->rx_buf_len = vsi->rx_buf_len;

	/* XDP RX-queue info only needed for RX rings exposed to XDP */
	if (ring->vsi->type != I40E_VSI_MAIN)
		goto skip;

	if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
					 ring->queue_index,
					 ring->q_vector->napi.napi_id,
					 ring->rx_buf_len);
		if (err)
			return err;
	}

	ring->xsk_pool = i40e_xsk_pool(ring);
	if (ring->xsk_pool) {
		ring->rx_buf_len =
		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
		xdp_rxq_info_unreg(&ring->xdp_rxq);
		ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
					 ring->queue_index,
					 ring->q_vector->napi.napi_id,
					 ring->rx_buf_len);
		if (err)
			return err;
		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
						 MEM_TYPE_XSK_BUFF_POOL,
						 NULL);
		if (ret)
			return ret;
		if (err)
			return err;
		dev_info(&vsi->back->pdev->dev,
			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
			 ring->queue_index);

	} else {
		ring->rx_buf_len = vsi->rx_buf_len;
		if (ring->vsi->type == I40E_VSI_MAIN) {
			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
						 MEM_TYPE_PAGE_SHARED,
						 NULL);
			if (ret)
				return ret;
		}
		if (err)
			return err;
	}

skip:
	xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);

	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
+23 −26
Original line number Diff line number Diff line
@@ -1548,7 +1548,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
	struct device *dev = rx_ring->dev;
	int err;

	u64_stats_init(&rx_ring->syncp);

@@ -1569,14 +1568,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
	rx_ring->next_to_process = 0;
	rx_ring->next_to_use = 0;

	/* XDP RX-queue info only needed for RX rings exposed to XDP */
	if (rx_ring->vsi->type == I40E_VSI_MAIN) {
		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
				       rx_ring->queue_index, rx_ring->q_vector->napi.napi_id);
		if (err < 0)
			return err;
	}

	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;

	rx_ring->rx_bi =
@@ -2087,7 +2078,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
				  struct xdp_buff *xdp)
{
	u32 next = rx_ring->next_to_clean;
	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
	u32 next = rx_ring->next_to_clean, i = 0;
	struct i40e_rx_buffer *rx_buffer;

	xdp->flags = 0;
@@ -2100,10 +2092,10 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
		if (!rx_buffer->page)
			continue;

		if (xdp_res == I40E_XDP_CONSUMED)
			rx_buffer->pagecnt_bias++;
		else
		if (xdp_res != I40E_XDP_CONSUMED)
			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
		else if (i++ <= nr_frags)
			rx_buffer->pagecnt_bias++;

		/* EOP buffer will be put in i40e_clean_rx_irq() */
		if (next == rx_ring->next_to_process)
@@ -2117,20 +2109,20 @@ static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
 * i40e_construct_skb - Allocate skb and populate it
 * @rx_ring: rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 * @nr_frags: number of buffers for the packet
 *
 * This function allocates an skb.  It then populates it with the page
 * data from the current receive descriptor, taking care to set up the
 * skb correctly.
 */
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
					  struct xdp_buff *xdp,
					  u32 nr_frags)
					  struct xdp_buff *xdp)
{
	unsigned int size = xdp->data_end - xdp->data;
	struct i40e_rx_buffer *rx_buffer;
	struct skb_shared_info *sinfo;
	unsigned int headlen;
	struct sk_buff *skb;
	u32 nr_frags = 0;

	/* prefetch first cache line of first page */
	net_prefetch(xdp->data);
@@ -2168,6 +2160,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
	memcpy(__skb_put(skb, headlen), xdp->data,
	       ALIGN(headlen, sizeof(long)));

	if (unlikely(xdp_buff_has_frags(xdp))) {
		sinfo = xdp_get_shared_info_from_buff(xdp);
		nr_frags = sinfo->nr_frags;
	}
	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
	/* update all of the pointers */
	size -= headlen;
@@ -2187,9 +2183,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
	}

	if (unlikely(xdp_buff_has_frags(xdp))) {
		struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
		struct skb_shared_info *skinfo = skb_shinfo(skb);

		sinfo = xdp_get_shared_info_from_buff(xdp);
		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
		       sizeof(skb_frag_t) * nr_frags);

@@ -2212,17 +2207,17 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
 * i40e_build_skb - Build skb around an existing buffer
 * @rx_ring: Rx descriptor ring to transact packets on
 * @xdp: xdp_buff pointing to the data
 * @nr_frags: number of buffers for the packet
 *
 * This function builds an skb around an existing Rx buffer, taking care
 * to set up the skb correctly and avoid any memcpy overhead.
 */
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
				      struct xdp_buff *xdp,
				      u32 nr_frags)
				      struct xdp_buff *xdp)
{
	unsigned int metasize = xdp->data - xdp->data_meta;
	struct skb_shared_info *sinfo;
	struct sk_buff *skb;
	u32 nr_frags;

	/* Prefetch first cache line of first page. If xdp->data_meta
	 * is unused, this points exactly as xdp->data, otherwise we
@@ -2231,6 +2226,11 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
	 */
	net_prefetch(xdp->data_meta);

	if (unlikely(xdp_buff_has_frags(xdp))) {
		sinfo = xdp_get_shared_info_from_buff(xdp);
		nr_frags = sinfo->nr_frags;
	}

	/* build an skb around the page buffer */
	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
	if (unlikely(!skb))
@@ -2243,9 +2243,6 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
		skb_metadata_set(skb, metasize);

	if (unlikely(xdp_buff_has_frags(xdp))) {
		struct skb_shared_info *sinfo;

		sinfo = xdp_get_shared_info_from_buff(xdp);
		xdp_update_skb_shared_info(skb, nr_frags,
					   sinfo->xdp_frags_size,
					   nr_frags * xdp->frame_sz,
@@ -2589,9 +2586,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
			total_rx_bytes += size;
		} else {
			if (ring_uses_build_skb(rx_ring))
				skb = i40e_build_skb(rx_ring, xdp, nfrags);
				skb = i40e_build_skb(rx_ring, xdp);
			else
				skb = i40e_construct_skb(rx_ring, xdp, nfrags);
				skb = i40e_construct_skb(rx_ring, xdp);

			/* drop if we failed to retrieve a buffer */
			if (!skb) {
+2 −2
Original line number Diff line number Diff line
@@ -414,7 +414,8 @@ i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
	}

	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
				   virt_to_page(xdp->data_hard_start), 0, size);
				   virt_to_page(xdp->data_hard_start),
				   XDP_PACKET_HEADROOM, size);
	sinfo->xdp_frags_size += size;
	xsk_buff_add_frag(xdp);

@@ -498,7 +499,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
		xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
		i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
					  &rx_bytes, xdp_res, &failure);
		first->flags = 0;
		next_to_clean = next_to_process;
		if (failure)
			break;
+23 −14
Original line number Diff line number Diff line
@@ -547,19 +547,27 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
	ring->rx_buf_len = ring->vsi->rx_buf_len;

	if (ring->vsi->type == ICE_VSI_PF) {
		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
			/* coverity[check_return] */
			__xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
						 ring->q_index,
						 ring->q_vector->napi.napi_id,
					   ring->vsi->rx_buf_len);
						 ring->rx_buf_len);
			if (err)
				return err;
		}

		ring->xsk_pool = ice_xsk_pool(ring);
		if (ring->xsk_pool) {
			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
			xdp_rxq_info_unreg(&ring->xdp_rxq);

			ring->rx_buf_len =
				xsk_pool_get_rx_frame_size(ring->xsk_pool);
			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
						 ring->q_index,
						 ring->q_vector->napi.napi_id,
						 ring->rx_buf_len);
			if (err)
				return err;
			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
							 MEM_TYPE_XSK_BUFF_POOL,
							 NULL);
@@ -571,13 +579,14 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
				 ring->q_index);
		} else {
			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
				/* coverity[check_return] */
				__xdp_rxq_info_reg(&ring->xdp_rxq,
						   ring->netdev,
			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
				err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
							 ring->q_index,
							 ring->q_vector->napi.napi_id,
						   ring->vsi->rx_buf_len);
							 ring->rx_buf_len);
				if (err)
					return err;
			}

			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
							 MEM_TYPE_PAGE_SHARED,
Loading