Commit d8c4ef76 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'ipv6-avoid-atomic-fragment-on-gso-output'

Yan Zhai says:

====================
ipv6: avoid atomic fragment on GSO output

When the ipv6 stack output a GSO packet, if its gso_size is larger than
dst MTU, then all segments would be fragmented. However, it is possible
for a GSO packet to have a trailing segment with smaller actual size
than both gso_size as well as the MTU, which leads to an "atomic
fragment". Atomic fragments are considered harmful in RFC-8021. An
Existing report from APNIC also shows that atomic fragments are more
likely to be dropped even it is equivalent to a no-op [1].

The series contains following changes:
* drop feature RTAX_FEATURE_ALLFRAG, which has been broken. This helps
  simplifying other changes in this set.
* refactor __ip6_finish_output code to separate GSO and non-GSO packet
  processing, mirroring IPv4 side logic.
* avoid generating atomic fragment on GSO packets.

Link: https://www.potaroo.net/presentations/2022-03-01-ipv6-frag.pdf [1]

V4: https://lore.kernel.org/netdev/cover.1698114636.git.yan@cloudflare.com/
V3: https://lore.kernel.org/netdev/cover.1697779681.git.yan@cloudflare.com/
V2: https://lore.kernel.org/netdev/ZS1%2Fqtr0dZJ35VII@debian.debian/
====================

Link: https://lore.kernel.org/r/cover.1698156966.git.yan@cloudflare.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 8846f9a0 03d6c848
Loading
Loading
Loading
Loading
+0 −7
Original line number Diff line number Diff line
@@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr
	return msecs_to_jiffies(dst_metric(dst, metric));
}

static inline u32
dst_allfrag(const struct dst_entry *dst)
{
	int ret = dst_feature(dst,  RTAX_FEATURE_ALLFRAG);
	return ret;
}

static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
+0 −1
Original line number Diff line number Diff line
@@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops {
				      struct request_sock *req_unhash,
				      bool *own_req);
	u16	    net_header_len;
	u16	    net_frag_header_len;
	u16	    sockaddr_len;
	int	    (*setsockopt)(struct sock *sk, int level, int optname,
				  sockptr_t optval, unsigned int optlen);
+0 −1
Original line number Diff line number Diff line
@@ -244,7 +244,6 @@ struct inet_sock {
};

#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
#define IPCORK_ALLFRAG	2	/* always fragment (for ipv6 for now) */

enum {
	INET_FLAGS_PKTINFO	= 0,
+1 −1
Original line number Diff line number Diff line
@@ -505,7 +505,7 @@ enum {
#define RTAX_FEATURE_ECN		(1 << 0)
#define RTAX_FEATURE_SACK		(1 << 1) /* unused */
#define RTAX_FEATURE_TIMESTAMP		(1 << 2) /* unused */
#define RTAX_FEATURE_ALLFRAG		(1 << 3)
#define RTAX_FEATURE_ALLFRAG		(1 << 3) /* unused */
#define RTAX_FEATURE_TCP_USEC_TS	(1 << 4)

#define RTAX_FEATURE_MASK	(RTAX_FEATURE_ECN |		\
+1 −19
Original line number Diff line number Diff line
@@ -1698,14 +1698,6 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
	 */
	mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);

	/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
	if (icsk->icsk_af_ops->net_frag_header_len) {
		const struct dst_entry *dst = __sk_dst_get(sk);

		if (dst && dst_allfrag(dst))
			mss_now -= icsk->icsk_af_ops->net_frag_header_len;
	}

	/* Clamp it (mss_clamp does not include tcp options) */
	if (mss_now > tp->rx_opt.mss_clamp)
		mss_now = tp->rx_opt.mss_clamp;
@@ -1733,21 +1725,11 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	const struct inet_connection_sock *icsk = inet_csk(sk);
	int mtu;

	mtu = mss +
	return mss +
	      tp->tcp_header_len +
	      icsk->icsk_ext_hdr_len +
	      icsk->icsk_af_ops->net_header_len;

	/* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
	if (icsk->icsk_af_ops->net_frag_header_len) {
		const struct dst_entry *dst = __sk_dst_get(sk);

		if (dst && dst_allfrag(dst))
			mtu += icsk->icsk_af_ops->net_frag_header_len;
	}
	return mtu;
}
EXPORT_SYMBOL(tcp_mss_to_mtu);

Loading