Commit 6089970b authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'eth-fbnic-add-xdp-support-for-fbnic'

Mohsin Bashir says:

====================
eth: fbnic: Add XDP support for fbnic

This patch series introduces basic XDP support for fbnic. To enable this,
it also includes preparatory changes such as making the HDS threshold
configurable via ethtool, updating headroom for fbnic, tracking
frag state in shinfo, and prefetching the first cacheline of data.

V3: https://lore.kernel.org/netdev/20250812220150.161848-1-mohsin.bashr@gmail.com/
V2: https://lore.kernel.org/netdev/20250811211338.857992-1-mohsin.bashr@gmail.com/
V1: https://lore.kernel.org/netdev/20250723145926.4120434-1-mohsin.bashr@gmail.com/
====================

Link: https://patch.msgid.link/20250813221319.3367670-1-mohsin.bashr@gmail.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 38e14673 7fedb8f2
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -160,3 +160,14 @@ behavior and potential performance bottlenecks.
	  credit exhaustion
        - ``pcie_ob_rd_no_np_cred``: Read requests dropped due to non-posted
	  credit exhaustion

XDP Length Error:
~~~~~~~~~~~~~~~~~

For XDP programs without frags support, fbnic tries to make sure that MTU fits
into a single buffer. If an oversized frame is received and gets fragmented,
it is dropped and the following netlink counters are updated

   - ``rx-length``: number of frames dropped due to lack of fragmentation
     support in the attached XDP program
   - ``rx-errors``: total number of packets with errors received on the interface
+78 −4
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */

#include <linux/ethtool.h>
#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <net/ipv6.h>
@@ -111,6 +112,20 @@ static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = {
	 FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \
	 FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES)

#define FBNIC_QUEUE_STAT(name, stat) \
	FBNIC_STAT_FIELDS(fbnic_ring, name, stat)

static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = {
	FBNIC_QUEUE_STAT("xdp_tx_queue_%u_packets", stats.packets),
	FBNIC_QUEUE_STAT("xdp_tx_queue_%u_bytes", stats.bytes),
	FBNIC_QUEUE_STAT("xdp_tx_queue_%u_dropped", stats.dropped),
};

#define FBNIC_XDP_STATS_LEN ARRAY_SIZE(fbnic_gstrings_xdp_stats)

#define FBNIC_STATS_LEN \
	(FBNIC_HW_STATS_LEN + FBNIC_XDP_STATS_LEN * FBNIC_MAX_XDPQS)

static void
fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
{
@@ -160,6 +175,7 @@ static void fbnic_clone_swap_cfg(struct fbnic_net *orig,
	swap(clone->num_rx_queues, orig->num_rx_queues);
	swap(clone->num_tx_queues, orig->num_tx_queues);
	swap(clone->num_napi, orig->num_napi);
	swap(clone->hds_thresh, orig->hds_thresh);
}

static void fbnic_aggregate_vector_counters(struct fbnic_net *fbn,
@@ -277,15 +293,21 @@ fbnic_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
	ring->rx_mini_pending = fbn->hpq_size;
	ring->rx_jumbo_pending = fbn->ppq_size;
	ring->tx_pending = fbn->txq_size;

	kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
	kernel_ring->hds_thresh_max = FBNIC_HDS_THRESH_MAX;
	kernel_ring->hds_thresh = fbn->hds_thresh;
}

static void fbnic_set_rings(struct fbnic_net *fbn,
			    struct ethtool_ringparam *ring)
			    struct ethtool_ringparam *ring,
			    struct kernel_ethtool_ringparam *kernel_ring)
{
	fbn->rcq_size = ring->rx_pending;
	fbn->hpq_size = ring->rx_mini_pending;
	fbn->ppq_size = ring->rx_jumbo_pending;
	fbn->txq_size = ring->tx_pending;
	fbn->hds_thresh = kernel_ring->hds_thresh;
}

static int
@@ -316,8 +338,24 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
		return -EINVAL;
	}

	if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED) {
		NL_SET_ERR_MSG_MOD(extack, "Cannot disable TCP data split");
		return -EINVAL;
	}

	/* If an XDP program is attached, we should check for potential frame
	 * splitting. If the new HDS threshold can cause splitting, we should
	 * only allow if the attached XDP program can handle frags.
	 */
	if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu,
				     kernel_ring->hds_thresh)) {
		NL_SET_ERR_MSG_MOD(extack,
				   "Use higher HDS threshold or multi-buf capable program");
		return -EINVAL;
	}

	if (!netif_running(netdev)) {
		fbnic_set_rings(fbn, ring);
		fbnic_set_rings(fbn, ring, kernel_ring);
		return 0;
	}

@@ -325,7 +363,7 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
	if (!clone)
		return -ENOMEM;

	fbnic_set_rings(clone, ring);
	fbnic_set_rings(clone, ring, kernel_ring);

	err = fbnic_alloc_napi_vectors(clone);
	if (err)
@@ -398,6 +436,16 @@ static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx)
		ethtool_sprintf(data, stat->string, idx);
}

static void fbnic_get_xdp_queue_strings(u8 **data, unsigned int idx)
{
	const struct fbnic_stat *stat;
	int i;

	stat = fbnic_gstrings_xdp_stats;
	for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++)
		ethtool_sprintf(data, stat->string, idx);
}

static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
{
	const struct fbnic_stat *stat;
@@ -423,6 +471,9 @@ static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
			for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++)
				ethtool_sprintf(&data, stat->string, idx);
		}

		for (i = 0; i < FBNIC_MAX_XDPQS; i++)
			fbnic_get_xdp_queue_strings(&data, i);
		break;
	}
}
@@ -440,6 +491,24 @@ static void fbnic_report_hw_stats(const struct fbnic_stat *stat,
	}
}

static void fbnic_get_xdp_queue_stats(struct fbnic_ring *ring, u64 **data)
{
	const struct fbnic_stat *stat;
	int i;

	if (!ring) {
		*data += FBNIC_XDP_STATS_LEN;
		return;
	}

	stat = fbnic_gstrings_xdp_stats;
	for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++, (*data)++) {
		u8 *p = (u8 *)ring + stat->offset;

		**data = *(u64 *)p;
	}
}

static void fbnic_get_ethtool_stats(struct net_device *dev,
				    struct ethtool_stats *stats, u64 *data)
{
@@ -487,13 +556,16 @@ static void fbnic_get_ethtool_stats(struct net_device *dev,
				      FBNIC_HW_Q_STATS_LEN, &data);
	}
	spin_unlock(&fbd->hw_stats_lock);

	for (i = 0; i < FBNIC_MAX_XDPQS; i++)
		fbnic_get_xdp_queue_stats(fbn->tx[i + FBNIC_MAX_TXQS], &data);
}

static int fbnic_get_sset_count(struct net_device *dev, int sset)
{
	switch (sset) {
	case ETH_SS_STATS:
		return FBNIC_HW_STATS_LEN;
		return FBNIC_STATS_LEN;
	default:
		return -EOPNOTSUPP;
	}
@@ -1678,6 +1750,8 @@ fbnic_get_rmon_stats(struct net_device *netdev,
static const struct ethtool_ops fbnic_ethtool_ops = {
	.supported_coalesce_params	= ETHTOOL_COALESCE_USECS |
					  ETHTOOL_COALESCE_RX_MAX_FRAMES,
	.supported_ring_params		= ETHTOOL_RING_USE_TCP_DATA_SPLIT |
					  ETHTOOL_RING_USE_HDS_THRS,
	.rxfh_max_num_contexts		= FBNIC_RPC_RSS_TBL_COUNT,
	.get_drvinfo			= fbnic_get_drvinfo,
	.get_regs_len			= fbnic_get_regs_len,
+74 −1
Original line number Diff line number Diff line
@@ -407,11 +407,12 @@ static void fbnic_get_stats64(struct net_device *dev,
			      struct rtnl_link_stats64 *stats64)
{
	u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0;
	u64 rx_over = 0, rx_missed = 0, rx_length = 0;
	u64 tx_bytes, tx_packets, tx_dropped = 0;
	struct fbnic_net *fbn = netdev_priv(dev);
	struct fbnic_dev *fbd = fbn->fbd;
	struct fbnic_queue_stats *stats;
	u64 rx_over = 0, rx_missed = 0;

	unsigned int start, i;

	fbnic_get_hw_stats(fbd);
@@ -489,6 +490,7 @@ static void fbnic_get_stats64(struct net_device *dev,
	stats64->rx_missed_errors = rx_missed;

	for (i = 0; i < fbn->num_rx_queues; i++) {
		struct fbnic_ring *xdpr = fbn->tx[FBNIC_MAX_TXQS + i];
		struct fbnic_ring *rxr = fbn->rx[i];

		if (!rxr)
@@ -500,12 +502,64 @@ static void fbnic_get_stats64(struct net_device *dev,
			rx_bytes = stats->bytes;
			rx_packets = stats->packets;
			rx_dropped = stats->dropped;
			rx_length = stats->rx.length_errors;
		} while (u64_stats_fetch_retry(&stats->syncp, start));

		stats64->rx_bytes += rx_bytes;
		stats64->rx_packets += rx_packets;
		stats64->rx_dropped += rx_dropped;
		stats64->rx_errors += rx_length;
		stats64->rx_length_errors += rx_length;

		if (!xdpr)
			continue;

		stats = &xdpr->stats;
		do {
			start = u64_stats_fetch_begin(&stats->syncp);
			tx_bytes = stats->bytes;
			tx_packets = stats->packets;
			tx_dropped = stats->dropped;
		} while (u64_stats_fetch_retry(&stats->syncp, start));

		stats64->tx_bytes += tx_bytes;
		stats64->tx_packets += tx_packets;
		stats64->tx_dropped += tx_dropped;
	}
}

bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu,
			      u32 hds_thresh)
{
	if (!prog)
		return false;

	if (prog->aux->xdp_has_frags)
		return false;

	return mtu + ETH_HLEN > hds_thresh;
}

static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf)
{
	struct bpf_prog *prog = bpf->prog, *prev_prog;
	struct fbnic_net *fbn = netdev_priv(netdev);

	if (bpf->command != XDP_SETUP_PROG)
		return -EINVAL;

	if (fbnic_check_split_frames(prog, netdev->mtu,
				     fbn->hds_thresh)) {
		NL_SET_ERR_MSG_MOD(bpf->extack,
				   "MTU too high, or HDS threshold is too low for single buffer XDP");
		return -EOPNOTSUPP;
	}

	prev_prog = xchg(&fbn->xdp_prog, prog);
	if (prev_prog)
		bpf_prog_put(prev_prog);

	return 0;
}

static const struct net_device_ops fbnic_netdev_ops = {
@@ -517,6 +571,7 @@ static const struct net_device_ops fbnic_netdev_ops = {
	.ndo_set_mac_address	= fbnic_set_mac,
	.ndo_set_rx_mode	= fbnic_set_rx_mode,
	.ndo_get_stats64	= fbnic_get_stats64,
	.ndo_bpf		= fbnic_bpf,
	.ndo_hwtstamp_get	= fbnic_hwtstamp_get,
	.ndo_hwtstamp_set	= fbnic_hwtstamp_set,
};
@@ -568,6 +623,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
	struct fbnic_ring *txr = fbn->tx[idx];
	struct fbnic_queue_stats *stats;
	u64 stop, wake, csum, lso;
	struct fbnic_ring *xdpr;
	unsigned int start;
	u64 bytes, packets;

@@ -591,6 +647,19 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
	tx->hw_gso_wire_packets = lso;
	tx->stop = stop;
	tx->wake = wake;

	xdpr = fbn->tx[FBNIC_MAX_TXQS + idx];
	if (xdpr) {
		stats = &xdpr->stats;
		do {
			start = u64_stats_fetch_begin(&stats->syncp);
			bytes = stats->bytes;
			packets = stats->packets;
		} while (u64_stats_fetch_retry(&stats->syncp, start));

		tx->bytes += bytes;
		tx->packets += packets;
	}
}

static void fbnic_get_base_stats(struct net_device *dev,
@@ -695,6 +764,10 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd)
	fbn->rx_usecs = FBNIC_RX_USECS_DEFAULT;
	fbn->rx_max_frames = FBNIC_RX_FRAMES_DEFAULT;

	/* Initialize the hds_thresh */
	netdev->cfg->hds_thresh = FBNIC_HDS_THRESH_DEFAULT;
	fbn->hds_thresh = FBNIC_HDS_THRESH_DEFAULT;

	default_queues = netif_get_num_default_rss_queues();
	if (default_queues > fbd->max_num_queues)
		default_queues = fbd->max_num_queues;
+8 −1
Original line number Diff line number Diff line
@@ -18,7 +18,9 @@
#define FBNIC_TUN_GSO_FEATURES		NETIF_F_GSO_IPXIP6

struct fbnic_net {
	struct fbnic_ring *tx[FBNIC_MAX_TXQS];
	struct bpf_prog *xdp_prog;

	struct fbnic_ring *tx[FBNIC_MAX_TXQS + FBNIC_MAX_XDPQS];
	struct fbnic_ring *rx[FBNIC_MAX_RXQS];

	struct fbnic_napi_vector *napi[FBNIC_MAX_NAPI_VECTORS];
@@ -31,6 +33,8 @@ struct fbnic_net {
	u32 ppq_size;
	u32 rcq_size;

	u32 hds_thresh;

	u16 rx_usecs;
	u16 tx_usecs;

@@ -102,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev,
int fbnic_phylink_get_fecparam(struct net_device *netdev,
			       struct ethtool_fecparam *fecparam);
int fbnic_phylink_init(struct net_device *netdev);

bool fbnic_check_split_frames(struct bpf_prog *prog,
			      unsigned int mtu, u32 hds_threshold);
#endif /* _FBNIC_NETDEV_H_ */
+387 −71

File changed.

Preview size limit exceeded, changes collapsed.

Loading