Commit 93f53db9 authored by Michal Kubiak's avatar Michal Kubiak Committed by Tony Nguyen
Browse files

ice: switch to Page Pool



This patch completes the transition of the ice driver to use the Page Pool
and libeth APIs, following the same direction as commit 5fa4caff
("iavf: switch to Page Pool"). With the legacy page splitting and recycling
logic already removed, the driver is now in a clean state to adopt the
modern memory model.

The Page Pool integration simplifies buffer management by offloading
DMA mapping and recycling to the core infrastructure. This eliminates
the need for driver-specific handling of headroom, buffer sizing, and
page order. The libeth helper is used for CPU-side processing, while
DMA-for-device is handled by the Page Pool core.

Additionally, this patch extends the conversion to cover XDP support.
The driver now uses libeth_xdp helpers for Rx buffer processing,
and optimizes XDP_TX by skipping per-frame DMA mapping. Instead, all
buffers are mapped as bi-directional up front, leveraging Page Pool's
lifecycle management. This significantly reduces overhead in virtualized
environments.

Performance observations:
- In typical scenarios (netperf, XDP_PASS, XDP_DROP), performance remains
  on par with the previous implementation.
- In XDP_TX mode:
  * With IOMMU enabled, performance improves dramatically - over 5x
    increase - due to reduced DMA mapping overhead and better memory reuse.
  * With IOMMU disabled, performance remains comparable to the previous
    implementation, with no significant changes observed.
- In XDP_DROP mode:
  * For small MTUs, (where multiple buffers can be allocated on a single
    memory page), a performance drop of approximately 20% is observed.
    According to 'perf top' analysis, the bottleneck is caused by atomic
    reference counter increments in the Page Pool.
  * For normal MTUs, (where only one buffer can be allocated within a
    single memory page), performance remains comparable to baseline
    levels.

This change is also a step toward a more modular and unified XDP
implementation across Intel Ethernet drivers, aligning with ongoing
efforts to consolidate and streamline feature support.

Suggested-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Suggested-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: default avatarAlexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: default avatarJacob Keller <jacob.e.keller@intel.com>
Signed-off-by: default avatarMichal Kubiak <michal.kubiak@intel.com>
Tested-by: default avatarAlexander Nowlin <alexander.nowlin@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent 3a4f419f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ config ICE
	depends on GNSS || GNSS = n
	select AUXILIARY_BUS
	select DIMLIB
	select LIBETH_XDP
	select LIBIE
	select LIBIE_ADMINQ
	select LIBIE_FWLOG
+40 −51
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
/* Copyright (c) 2019, Intel Corporation. */

#include <net/xdp_sock_drv.h>
#include <linux/net/intel/libie/rx.h>
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
@@ -495,7 +496,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
	/* Receive Packet Data Buffer Size.
	 * The Packet Data Buffer Size is defined in 128 byte units.
	 */
	rlan_ctx.dbuf = DIV_ROUND_UP(ICE_RXBUF_3072,
	rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
				     BIT_ULL(ICE_RLAN_CTX_DBUF_S));

	/* use 32 byte descriptors */
@@ -537,7 +538,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
	 * than 5 x DBUF
	 */
	rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
			       ICE_MAX_CHAINED_RX_BUFS * ICE_RXBUF_3072);
			       ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len);

	/* Rx queue threshold in units of 64 */
	rlan_ctx.lrxqthresh = 1;
@@ -573,8 +574,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
	if (vsi->type == ICE_VSI_VF)
		return 0;

	ring->rx_offset = ICE_SKB_PAD;

	/* init queue specific tail register */
	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
	writel(0, ring->tail);
@@ -582,38 +581,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
	return 0;
}

static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
{
	void *ctx_ptr = &ring->pkt_ctx;
	struct xsk_cb_desc desc = {};

	XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
	desc.src = &ctx_ptr;
	desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) -
		   sizeof(struct xdp_buff);
	desc.bytes = sizeof(ctx_ptr);
	xsk_pool_fill_cb(ring->xsk_pool, &desc);
}

/**
 * ice_get_frame_sz - calculate xdp_buff::frame_sz
 * @rx_ring: the ring being configured
 *
 * Return frame size based on underlying PAGE_SIZE
 */
static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring)
{
	unsigned int frame_sz;

#if (PAGE_SIZE >= 8192)
	frame_sz = rx_ring->rx_buf_len;
#else
	frame_sz = PAGE_SIZE;
#endif

	return frame_sz;
}

/**
 * ice_vsi_cfg_rxq - Configure an Rx queue
 * @ring: the ring being configured
@@ -622,8 +589,14 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring)
 */
static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
	struct libeth_fq fq = {
		.count		= ring->count,
		.nid		= NUMA_NO_NODE,
		.xdp		= ice_is_xdp_ena_vsi(ring->vsi),
		.buf_len	= LIBIE_MAX_RX_BUF_LEN,
	};
	struct device *dev = ice_pf_to_dev(ring->vsi->back);
	u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
	u32 num_bufs = ICE_DESC_UNUSED(ring);
	u32 rx_buf_len;
	int err;

@@ -632,12 +605,16 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
						 ring->q_index,
						 ring->q_vector->napi.napi_id,
						 ICE_RXBUF_3072);
						 ring->rx_buf_len);
			if (err)
				return err;
		}

		ice_rx_xsk_pool(ring);
		err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool);
		if (err)
			return err;

		if (ring->xsk_pool) {
			xdp_rxq_info_unreg(&ring->xdp_rxq);

@@ -655,36 +632,38 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
			if (err)
				return err;
			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
			ice_xsk_pool_fill_cb(ring);

			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
				 ring->q_index);
		} else {
			err = libeth_rx_fq_create(&fq, &ring->q_vector->napi);
			if (err)
				return err;

			ring->pp = fq.pp;
			ring->rx_fqes = fq.fqes;
			ring->truesize = fq.truesize;
			ring->rx_buf_len = fq.buf_len;

			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
				err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
							 ring->q_index,
							 ring->q_vector->napi.napi_id,
							 ICE_RXBUF_3072);
							 ring->rx_buf_len);
				if (err)
					return err;
					goto err_destroy_fq;
			}

			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
							 MEM_TYPE_PAGE_SHARED,
							 NULL);
			if (err)
				return err;
			xdp_rxq_info_attach_page_pool(&ring->xdp_rxq,
						      ring->pp);
		}
	}

	xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq);
	ring->xdp.data = NULL;
	ring->xdp_ext.pkt_ctx = &ring->pkt_ctx;
	err = ice_setup_rx_ctx(ring);
	if (err) {
		dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
			ring->q_index, err);
		return err;
		goto err_destroy_fq;
	}

	if (ring->xsk_pool) {
@@ -712,9 +691,19 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
	if (ring->vsi->type == ICE_VSI_CTRL)
		ice_init_ctrl_rx_descs(ring, num_bufs);
	else
		ice_alloc_rx_bufs(ring, num_bufs);
		err = ice_alloc_rx_bufs(ring, num_bufs);

	if (err)
		goto err_destroy_fq;

	return 0;

err_destroy_fq:
	libeth_rx_fq_destroy(&fq);
	ring->rx_fqes = NULL;
	ring->pp = NULL;

	return err;
}

int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
+9 −8
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include "ice_lib.h"
#include "ice_dcb_lib.h"
#include <net/dcbnl.h>
#include <net/libeth/rx.h>

struct ice_stats {
	char stat_string[ETH_GSTRING_LEN];
@@ -1230,8 +1231,9 @@ static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size)
 */
static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
{
	struct ice_rx_buf *rx_buf;
	struct libeth_fqe *rx_buf;
	int valid_frames, i;
	struct page *page;
	u8 *received_buf;

	valid_frames = 0;
@@ -1246,8 +1248,10 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
		     cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
			continue;

		rx_buf = &rx_ring->rx_buf[i];
		received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
		rx_buf = &rx_ring->rx_fqes[i];
		page = __netmem_to_page(rx_buf->netmem);
		received_buf = page_address(page) + rx_buf->offset +
			       page->pp->p.offset;

		if (ice_lbtest_check_frame(received_buf))
			valid_frames++;
@@ -3303,7 +3307,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
		rx_rings[i].count = new_rx_cnt;
		rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
		rx_rings[i].desc = NULL;
		rx_rings[i].rx_buf = NULL;
		rx_rings[i].xdp_buf = NULL;

		/* this is to allow wr32 to have something to write to
		 * during early allocation of Rx buffers
		 */
@@ -3312,10 +3317,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
		err = ice_setup_rx_ring(&rx_rings[i]);
		if (err)
			goto rx_unwind;

		/* allocate Rx buffers */
		err = ice_alloc_rx_bufs(&rx_rings[i],
					ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
		if (err) {
			while (i) {
+0 −1
Original line number Diff line number Diff line
@@ -1427,7 +1427,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
		ring->reg_idx = vsi->rxq_map[i];
		ring->vsi = vsi;
		ring->netdev = vsi->netdev;
		ring->dev = dev;
		ring->count = vsi->num_rx_desc;
		ring->cached_phctime = pf->ptp.cached_phc_time;

+2 −8
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
#define ICE_DDP_PKG_FILE	ICE_DDP_PKG_PATH "ice.pkg"

MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS("LIBETH");
MODULE_IMPORT_NS("LIBETH_XDP");
MODULE_IMPORT_NS("LIBIE");
MODULE_IMPORT_NS("LIBIE_ADMINQ");
MODULE_IMPORT_NS("LIBIE_FWLOG");
@@ -3015,19 +3017,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
			}
		}
		xdp_features_set_redirect_target(vsi->netdev, true);
		/* reallocate Rx queues that are used for zero-copy */
		xdp_ring_err = ice_realloc_zc_buf(vsi, true);
		if (xdp_ring_err)
			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
		xdp_features_clear_redirect_target(vsi->netdev);
		xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
		if (xdp_ring_err)
			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
		/* reallocate Rx queues that were used for zero-copy */
		xdp_ring_err = ice_realloc_zc_buf(vsi, false);
		if (xdp_ring_err)
			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
	}

resume_if:
Loading