Commit 02614eee authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

idpf: do not linearize big TSO packets



idpf has a limit on number of scatter-gather frags
that can be used per segment.

Currently, idpf_tx_start() checks if the limit is hit
and forces a linearization of the whole packet.

This requires high order allocations that can fail
under memory pressure. A full size BIG-TCP packet
would require order-7 alocation on x86_64 :/

We can move the check earlier from idpf_features_check()
for TSO packets, to force GSO in this case, removing the
cost of a big copy.

This means that a linearization will eventually happen
with sizes smaller than one MSS.

__idpf_chk_linearize() is renamed to idpf_chk_tso_segment()
and moved to idpf_lib.c

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Cc: Jacob Keller <jacob.e.keller@intel.com>
Cc: Madhu Chittim <madhu.chittim@intel.com>
Cc: Pavan Kumar Linga <pavan.kumar.linga@intel.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Reviewed-by: default avatarJoshua Hay <joshua.a.hay@intel.com>
Tested-by: default avatarBrian Vazquez <brianvv@google.com>
Acked-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
Link: https://patch.msgid.link/20250818195934.757936-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent cd31182c
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -148,6 +148,7 @@ enum idpf_vport_state {
 * @link_speed_mbps: Link speed in mbps
 * @vport_idx: Relative vport index
 * @max_tx_hdr_size: Max header length hardware can support
 * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
 * @state: See enum idpf_vport_state
 * @netstats: Packet and byte stats
 * @stats_lock: Lock to protect stats update
@@ -159,6 +160,7 @@ struct idpf_netdev_priv {
	u32 link_speed_mbps;
	u16 vport_idx;
	u16 max_tx_hdr_size;
	u16 tx_max_bufs;
	enum idpf_vport_state state;
	struct rtnl_link_stats64 netstats;
	spinlock_t stats_lock;
+96 −6
Original line number Diff line number Diff line
@@ -776,6 +776,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
	np->vport_idx = vport->idx;
	np->vport_id = vport->vport_id;
	np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter);
	np->tx_max_bufs = idpf_get_max_tx_bufs(adapter);

	spin_lock_init(&np->stats_lock);

@@ -2271,6 +2272,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu)
	return err;
}

/**
 * idpf_chk_tso_segment - Check skb is not using too many buffers
 * @skb: send buffer
 * @max_bufs: maximum number of buffers
 *
 * For TSO we need to count the TSO header and segment payload separately.  As
 * such we need to check cases where we have max_bufs-1 fragments or more as we
 * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
 * for the segment payload in the first descriptor, and another max_buf-1 for
 * the fragments.
 *
 * Returns true if the packet needs to be software segmented by core stack.
 */
static bool idpf_chk_tso_segment(const struct sk_buff *skb,
				 unsigned int max_bufs)
{
	const struct skb_shared_info *shinfo = skb_shinfo(skb);
	const skb_frag_t *frag, *stale;
	int nr_frags, sum;

	/* no need to check if number of frags is less than max_bufs - 1 */
	nr_frags = shinfo->nr_frags;
	if (nr_frags < (max_bufs - 1))
		return false;

	/* We need to walk through the list and validate that each group
	 * of max_bufs-2 fragments totals at least gso_size.
	 */
	nr_frags -= max_bufs - 2;
	frag = &shinfo->frags[0];

	/* Initialize size to the negative value of gso_size minus 1.  We use
	 * this as the worst case scenario in which the frag ahead of us only
	 * provides one byte which is why we are limited to max_bufs-2
	 * descriptors for a single transmit as the header and previous
	 * fragment are already consuming 2 descriptors.
	 */
	sum = 1 - shinfo->gso_size;

	/* Add size of frags 0 through 4 to create our initial sum */
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);

	/* Walk through fragments adding latest fragment, testing it, and
	 * then removing stale fragments from the sum.
	 */
	for (stale = &shinfo->frags[0];; stale++) {
		int stale_size = skb_frag_size(stale);

		sum += skb_frag_size(frag++);

		/* The stale fragment may present us with a smaller
		 * descriptor than the actual fragment size. To account
		 * for that we need to remove all the data on the front and
		 * figure out what the remainder would be in the last
		 * descriptor associated with the fragment.
		 */
		if (stale_size > IDPF_TX_MAX_DESC_DATA) {
			int align_pad = -(skb_frag_off(stale)) &
					(IDPF_TX_MAX_READ_REQ_SIZE - 1);

			sum -= align_pad;
			stale_size -= align_pad;

			do {
				sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
				stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
			} while (stale_size > IDPF_TX_MAX_DESC_DATA);
		}

		/* if sum is negative we failed to make sufficient progress */
		if (sum < 0)
			return true;

		if (!nr_frags--)
			break;

		sum -= stale_size;
	}

	return false;
}

/**
 * idpf_features_check - Validate packet conforms to limits
 * @skb: skb buffer
@@ -2292,12 +2379,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
	if (skb->ip_summed != CHECKSUM_PARTIAL)
		return features;

	if (skb_is_gso(skb)) {
		/* We cannot support GSO if the MSS is going to be less than
		 * 88 bytes. If it is then we need to drop support for GSO.
		 */
	if (skb_is_gso(skb) &&
	    (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS))
		if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)
			features &= ~NETIF_F_GSO_MASK;
		else if (idpf_chk_tso_segment(skb, np->tx_max_bufs))
			features &= ~NETIF_F_GSO_MASK;
	}

	/* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */
	len = skb_network_offset(skb);
+22 −107
Original line number Diff line number Diff line
@@ -16,8 +16,28 @@ struct idpf_tx_stash {
#define idpf_tx_buf_compl_tag(buf)	(*(u32 *)&(buf)->priv)
LIBETH_SQE_CHECK_PRIV(u32);

static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
			       unsigned int count);
/**
 * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
 * @skb: send buffer
 * @max_bufs: maximum scatter gather buffers for single packet
 * @count: number of buffers this packet needs
 *
 * Make sure we don't exceed maximum scatter gather buffers for a single
 * packet.
 * TSO case has been handled earlier from idpf_features_check().
 */
static bool idpf_chk_linearize(const struct sk_buff *skb,
			       unsigned int max_bufs,
			       unsigned int count)
{
	if (likely(count <= max_bufs))
		return false;

	if (skb_is_gso(skb))
		return false;

	return true;
}

/**
 * idpf_buf_lifo_push - push a buffer pointer onto stack
@@ -2627,111 +2647,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
	return 1;
}

/**
 * __idpf_chk_linearize - Check skb is not using too many buffers
 * @skb: send buffer
 * @max_bufs: maximum number of buffers
 *
 * For TSO we need to count the TSO header and segment payload separately.  As
 * such we need to check cases where we have max_bufs-1 fragments or more as we
 * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
 * for the segment payload in the first descriptor, and another max_buf-1 for
 * the fragments.
 */
static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
{
	const struct skb_shared_info *shinfo = skb_shinfo(skb);
	const skb_frag_t *frag, *stale;
	int nr_frags, sum;

	/* no need to check if number of frags is less than max_bufs - 1 */
	nr_frags = shinfo->nr_frags;
	if (nr_frags < (max_bufs - 1))
		return false;

	/* We need to walk through the list and validate that each group
	 * of max_bufs-2 fragments totals at least gso_size.
	 */
	nr_frags -= max_bufs - 2;
	frag = &shinfo->frags[0];

	/* Initialize size to the negative value of gso_size minus 1.  We use
	 * this as the worst case scenario in which the frag ahead of us only
	 * provides one byte which is why we are limited to max_bufs-2
	 * descriptors for a single transmit as the header and previous
	 * fragment are already consuming 2 descriptors.
	 */
	sum = 1 - shinfo->gso_size;

	/* Add size of frags 0 through 4 to create our initial sum */
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);
	sum += skb_frag_size(frag++);

	/* Walk through fragments adding latest fragment, testing it, and
	 * then removing stale fragments from the sum.
	 */
	for (stale = &shinfo->frags[0];; stale++) {
		int stale_size = skb_frag_size(stale);

		sum += skb_frag_size(frag++);

		/* The stale fragment may present us with a smaller
		 * descriptor than the actual fragment size. To account
		 * for that we need to remove all the data on the front and
		 * figure out what the remainder would be in the last
		 * descriptor associated with the fragment.
		 */
		if (stale_size > IDPF_TX_MAX_DESC_DATA) {
			int align_pad = -(skb_frag_off(stale)) &
					(IDPF_TX_MAX_READ_REQ_SIZE - 1);

			sum -= align_pad;
			stale_size -= align_pad;

			do {
				sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
				stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
			} while (stale_size > IDPF_TX_MAX_DESC_DATA);
		}

		/* if sum is negative we failed to make sufficient progress */
		if (sum < 0)
			return true;

		if (!nr_frags--)
			break;

		sum -= stale_size;
	}

	return false;
}

/**
 * idpf_chk_linearize - Check if skb exceeds max descriptors per packet
 * @skb: send buffer
 * @max_bufs: maximum scatter gather buffers for single packet
 * @count: number of buffers this packet needs
 *
 * Make sure we don't exceed maximum scatter gather buffers for a single
 * packet. We have to do some special checking around the boundary (max_bufs-1)
 * if TSO is on since we need count the TSO header and payload separately.
 * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
 * header, 1 for segment payload, and then 7 for the fragments.
 */
static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
			       unsigned int count)
{
	if (likely(count < max_bufs))
		return false;
	if (skb_is_gso(skb))
		return __idpf_chk_linearize(skb, max_bufs);

	return count > max_bufs;
}

/**
 * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring