Commit feb2935e authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'follow-up-on-deduplicate-cookie-logic'

Willem de Bruijn says:

====================
follow-up on deduplicate cookie logic

1/3: I came across a leftover from cookie deduplication, due to UDP
having two code paths: lockless fast path and locked cork path.

3/3: Even though the leftover was in the fast path, this prompted me
to complete coverage to the cork path.

2/3: That uncovered a subtle API inconsistency in how dontfrag is
configured. It should not be possible to switch DF mid datagram.
====================

Link: https://patch.msgid.link/20250307033620.411611-1-willemdebruijn.kernel@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 48c57a49 0922cb68
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -207,6 +207,7 @@ struct inet6_cork {
	struct ipv6_txoptions *opt;
	u8 hop_limit;
	u8 tclass;
	u8 dontfrag:1;
};

/* struct ipv6_pinfo - ipv6 private area */
+5 −6
Original line number Diff line number Diff line
@@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
	}
	v6_cork->hop_limit = ipc6->hlimit;
	v6_cork->tclass = ipc6->tclass;
	v6_cork->dontfrag = ipc6->dontfrag;
	if (rt->dst.flags & DST_XFRM_TUNNEL)
		mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
@@ -1421,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk,
			     int getfrag(void *from, char *to, int offset,
					 int len, int odd, struct sk_buff *skb),
			     void *from, size_t length, int transhdrlen,
			     unsigned int flags, struct ipcm6_cookie *ipc6)
			     unsigned int flags)
{
	struct sk_buff *skb, *skb_prev = NULL;
	struct inet_cork *cork = &cork_full->base;
@@ -1475,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk,
	if (headersize + transhdrlen > mtu)
		goto emsgsize;

	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
	if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
	    (sk->sk_protocol == IPPROTO_UDP ||
	     sk->sk_protocol == IPPROTO_ICMPV6 ||
	     sk->sk_protocol == IPPROTO_RAW)) {
@@ -1855,7 +1856,7 @@ int ip6_append_data(struct sock *sk,

	return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
				 &np->cork, sk_page_frag(sk), getfrag,
				 from, length, transhdrlen, flags, ipc6);
				 from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);

@@ -2054,13 +2055,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
		ip6_cork_release(cork, &v6_cork);
		return ERR_PTR(err);
	}
	if (ipc6->dontfrag < 0)
		ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);

	err = __ip6_append_data(sk, &queue, cork, &v6_cork,
				&current->task_frag, getfrag, from,
				length + exthdrlen, transhdrlen + exthdrlen,
				flags, ipc6);
				flags);
	if (err) {
		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
		return ERR_PTR(err);
+7 −4
Original line number Diff line number Diff line
@@ -50,8 +50,9 @@ check_result() {
# IPV6_DONTFRAG
for ovr in setsock cmsg both diff; do
    for df in 0 1; do
	for p in u i r; do
	for p in u U i r; do
	    [ $p == "u" ] && prot=UDP
	    [ $p == "U" ] && prot=UDP
	    [ $p == "i" ] && prot=ICMP
	    [ $p == "r" ] && prot=RAW

@@ -81,8 +82,9 @@ test_dscp() {
    ip $IPVER -netns $NS route add table 300 prohibit any

    for ovr in setsock cmsg both diff; do
	for p in u i r; do
	for p in u U i r; do
	    [ $p == "u" ] && prot=UDP
	    [ $p == "U" ] && prot=UDP
	    [ $p == "i" ] && prot=ICMP
	    [ $p == "r" ] && prot=RAW

@@ -134,8 +136,9 @@ test_ttl_hoplimit() {
    local -r LIM=4

    for ovr in setsock cmsg both diff; do
	for p in u i r; do
	for p in u U i r; do
	    [ $p == "u" ] && prot=UDP
	    [ $p == "U" ] && prot=UDP
	    [ $p == "i" ] && prot=ICMP
	    [ $p == "r" ] && prot=RAW

@@ -166,7 +169,7 @@ test_ttl_hoplimit -4 $TGT4 ttl
test_ttl_hoplimit -6 $TGT6 hlim

# IPV6 exthdr
for p in u i r; do
for p in u U i r; do
    # Very basic "does it crash" test
    for h in h d r; do
	$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+19 −5
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ enum {
	ERN_RECVERR,
	ERN_CMSG_RD,
	ERN_CMSG_RCV,
	ERN_SEND_MORE,
};

struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
	const char *service;
	unsigned int size;
	unsigned int num_pkt;
	bool msg_more;
	struct {
		unsigned int mark;
		unsigned int dontfrag;
@@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
	       "\t\t-S      send() size\n"
	       "\t\t-4/-6   Force IPv4 / IPv6 only\n"
	       "\t\t-p prot Socket protocol\n"
	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
	       "\t\t        (u = UDP (default); i = ICMP; r = RAW;\n"
	       "\t\t         U = UDP with MSG_MORE)\n"
	       "\n"
	       "\t\t-m val  Set SO_MARK with given value\n"
	       "\t\t-M val  Set SO_MARK via setsockopt\n"
@@ -109,8 +112,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
	       "\t\t-l val  Set TTL/HOPLIMIT via cmsg\n"
	       "\t\t-L val  Set TTL/HOPLIMIT via setsockopt\n"
	       "\t\t-H type Add an IPv6 header option\n"
	       "\t\t        (h = HOP; d = DST; r = RTDST)"
	       "");
	       "\t\t        (h = HOP; d = DST; r = RTDST)\n"
	       "\n");
	exit(ERN_HELP);
}

@@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
			opt.sock.family = AF_INET6;
			break;
		case 'p':
			if (*optarg == 'u' || *optarg == 'U') {
			if (*optarg == 'u') {
				opt.sock.proto = IPPROTO_UDP;
			} else if (*optarg == 'U') {
				opt.sock.proto = IPPROTO_UDP;
				opt.msg_more = true;
			} else if (*optarg == 'i' || *optarg == 'I') {
				opt.sock.proto = IPPROTO_ICMP;
			} else if (*optarg == 'r') {
@@ -531,7 +537,7 @@ int main(int argc, char *argv[])
	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));

	for (i = 0; i < opt.num_pkt; i++) {
		err = sendmsg(fd, &msg, 0);
		err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
		if (err < 0) {
			if (!opt.silent_send)
				fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -542,6 +548,14 @@ int main(int argc, char *argv[])
			err = ERN_SEND_SHORT;
			goto err_out;
		}
		if (opt.msg_more) {
			err = write(fd, NULL, 0);
			if (err < 0) {
				fprintf(stderr, "send more: %s\n", strerror(errno));
				err = ERN_SEND_MORE;
				goto err_out;
			}
		}
	}
	err = ERN_SUCCESS;