Commit 1fdad81d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'big-tcp-without-hbh-in-ipv6'

Alice Mikityanska says:

====================
BIG TCP without HBH in IPv6

Resubmitting after the grace period.

This series is part 1 of "BIG TCP for UDP tunnels". Due to the number of
patches, I'm splitting it into two logical parts:

* Remove hop-by-hop header for BIG TCP IPv6 to align with BIG TCP IPv4.
* Fix up things that prevent BIG TCP from working with UDP tunnels.

The current BIG TCP IPv6 code inserts a hop-by-hop extension header with
32-bit length of the packet. When the packet is encapsulated, and either
the outer or the inner protocol is IPv6, or both are IPv6, there will be
1 or 2 HBH headers that need to be dealt with. The issues that arise:

1. The drivers don't strip it, and they'd all need to know the structure
of each tunnel protocol in order to strip it correctly, also taking into
account all combinations of IPv4/IPv6 inner/outer protocols.

2. Even if (1) is implemented, it would be an additional performance
penalty per aggregated packet.

3. The skb_gso_validate_network_len check is skipped in
ip6_finish_output_gso when IP6SKB_FAKEJUMBO is set, but it seems that it
would make sense to do the actual validation, just taking into account
the length of the HBH header. When the support for tunnels is added, it
becomes trickier, because there may be one or two HBH headers, depending
on whether it's IPv6 in IPv6 or not.

At the same time, having an HBH header to store the 32-bit length is not
strictly necessary, as BIG TCP IPv4 doesn't do anything like this and
just restores the length from skb->len. The same thing can be done for
BIG TCP IPv6. Removing HBH from BIG TCP would allow to simplify the
implementation significantly, and align it with BIG TCP IPv4, which has
been a long-standing goal.
====================

Link: https://patch.msgid.link/20260205133925.526371-1-alice.kernel@fastmail.im


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 5826eec8 35f66ce9
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -1463,9 +1463,6 @@ netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev)
			return NETDEV_TX_BUSY;
	}

	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
		goto tx_free;

	last_frag = skb_shinfo(skb)->nr_frags;

	txbd = &txr->tx_desc_ring[TX_RING(bn, prod)][TX_IDX(prod)];
+0 −21
Original line number Diff line number Diff line
@@ -517,9 +517,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
			return NETDEV_TX_BUSY;
	}

	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
		goto tx_free;

	length = skb->len;
	len = skb_headlen(skb);
	last_frag = skb_shinfo(skb)->nr_frags;
@@ -13881,7 +13878,6 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
			      u8 **nextp)
{
	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
	struct hop_jumbo_hdr *jhdr;
	int hdr_count = 0;
	u8 *nexthdr;
	int start;
@@ -13910,24 +13906,7 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
		if (hdrlen > 64)
			return false;

		/* The ext header may be a hop-by-hop header inserted for
		 * big TCP purposes. This will be removed before sending
		 * from NIC, so do not count it.
		 */
		if (*nexthdr == NEXTHDR_HOP) {
			if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
				goto increment_hdr;

			jhdr = (struct hop_jumbo_hdr *)hp;
			if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
			    jhdr->nexthdr != IPPROTO_TCP)
				goto increment_hdr;

			goto next_hdr;
		}
increment_hdr:
		hdr_count++;
next_hdr:
		nexthdr = &hp->nexthdr;
		start += hdrlen;
	}
+0 −3
Original line number Diff line number Diff line
@@ -963,9 +963,6 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
	int num_buffer_descs;
	int total_num_descs;

	if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb)))
		goto drop;

	if (tx->dqo.qpl) {
		/* We do not need to verify the number of buffers used per
		 * packet or per segment in case of TSO as with 2K size buffers
+0 −3
Original line number Diff line number Diff line
@@ -2156,9 +2156,6 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)

	ice_trace(xmit_frame_ring, tx_ring, skb);

	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
		goto out_drop;

	count = ice_xmit_desc_count(skb);
	if (ice_chk_linearize(skb, count)) {
		if (__skb_linearize(skb))
+8 −34
Original line number Diff line number Diff line
@@ -636,28 +636,20 @@ static int get_real_size(const struct sk_buff *skb,
			 struct net_device *dev,
			 int *lso_header_size,
			 bool *inline_ok,
			 void **pfrag,
			 int *hopbyhop)
			 void **pfrag)
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	int real_size;

	if (shinfo->gso_size) {
		*inline_ok = false;
		*hopbyhop = 0;
		if (skb->encapsulation) {
			*lso_header_size = skb_inner_tcp_all_headers(skb);
		} else {
			/* Detects large IPV6 TCP packets and prepares for removal of
			 * HBH header that has been pushed by ip6_xmit(),
			 * mainly so that tcpdump can dissect them.
			 */
			if (ipv6_has_hopopt_jumbo(skb))
				*hopbyhop = sizeof(struct hop_jumbo_hdr);
			*lso_header_size = skb_tcp_all_headers(skb);
		}
		real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
			ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE);
			ALIGN(*lso_header_size + 4, DS_SIZE);
		if (unlikely(*lso_header_size != skb_headlen(skb))) {
			/* We add a segment for the skb linear buffer only if
			 * it contains data */
@@ -884,7 +876,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
	int desc_size;
	int real_size;
	u32 index, bf_index;
	struct ipv6hdr *h6;
	__be32 op_own;
	int lso_header_size;
	void *fragptr = NULL;
@@ -893,7 +884,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
	bool stop_queue;
	bool inline_ok;
	u8 data_offset;
	int hopbyhop;
	bool bf_ok;

	tx_ind = skb_get_queue_mapping(skb);
@@ -903,7 +893,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
		goto tx_drop;

	real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
				  &inline_ok, &fragptr, &hopbyhop);
				  &inline_ok, &fragptr);
	if (unlikely(!real_size))
		goto tx_drop_count;

@@ -956,7 +946,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
		data = &tx_desc->data;
		data_offset = offsetof(struct mlx4_en_tx_desc, data);
	} else {
		int lso_align = ALIGN(lso_header_size - hopbyhop + 4, DS_SIZE);
		int lso_align = ALIGN(lso_header_size + 4, DS_SIZE);

		data = (void *)&tx_desc->lso + lso_align;
		data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
@@ -1021,31 +1011,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
			((ring->prod & ring->size) ?
				cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);

		lso_header_size -= hopbyhop;
		/* Fill in the LSO prefix */
		tx_desc->lso.mss_hdr_size = cpu_to_be32(
			shinfo->gso_size << 16 | lso_header_size);


		if (unlikely(hopbyhop)) {
			/* remove the HBH header.
			 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
			 */
			memcpy(tx_desc->lso.header, skb->data, ETH_HLEN + sizeof(*h6));
			h6 = (struct ipv6hdr *)((char *)tx_desc->lso.header + ETH_HLEN);
			h6->nexthdr = IPPROTO_TCP;
			/* Copy the TCP header after the IPv6 one */
			memcpy(h6 + 1,
			       skb->data + ETH_HLEN + sizeof(*h6) +
					sizeof(struct hop_jumbo_hdr),
			       tcp_hdrlen(skb));
			/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
		} else {
		/* Copy headers;
		 * note that we already verified that it is linear
		 */
		memcpy(tx_desc->lso.header, skb->data, lso_header_size);
		}

		ring->tso_packets++;

		i = shinfo->gso_segs;
Loading