Commit 6b724bc4 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

ipv6: lockless IPV6_MTU_DISCOVER implementation



Most np->pmtudisc reads are racy.

Move this 3bit field on a full byte, add annotations
and make IPV6_MTU_DISCOVER setsockopt() lockless.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 83cd5eb6
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -243,13 +243,12 @@ struct ipv6_pinfo {
	} rxopt;

	/* sockopt flags */
	__u16			sndflow:1,
				pmtudisc:3,
				padding:1,	/* 1 bit hole */
	__u8			sndflow:1,
				srcprefs:3;	/* 001: prefer temporary address
						 * 010: prefer public address
						 * 100: prefer care-of address
						 */
	__u8			pmtudisc;
	__u8			min_hopcount;
	__u8			tclass;
	__be32			rcv_flowinfo;
+9 −5
Original line number Diff line number Diff line
@@ -266,7 +266,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
	const struct dst_entry *dst = skb_dst(skb);
	unsigned int mtu;

	if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
	if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
		mtu = READ_ONCE(dst->dev->mtu);
		mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
	} else {
@@ -277,14 +277,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)

static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{
	return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
	u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);

	return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
	       pmtudisc != IPV6_PMTUDISC_OMIT;
}

static inline bool ip6_sk_ignore_df(const struct sock *sk)
{
	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
	u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);

	return pmtudisc < IPV6_PMTUDISC_DO ||
	       pmtudisc == IPV6_PMTUDISC_OMIT;
}

static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
+2 −2
Original line number Diff line number Diff line
@@ -1436,10 +1436,10 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
	v6_cork->hop_limit = ipc6->hlimit;
	v6_cork->tclass = ipc6->tclass;
	if (rt->dst.flags & DST_XFRM_TUNNEL)
		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
		mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
	else
		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
		mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));

	frag_size = READ_ONCE(np->frag_size);
+8 −9
Original line number Diff line number Diff line
@@ -493,6 +493,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
			return -EINVAL;
		inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
		return 0;
	case IPV6_MTU_DISCOVER:
		if (optlen < sizeof(int))
			return -EINVAL;
		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
			return -EINVAL;
		WRITE_ONCE(np->pmtudisc, val);
		return 0;
	}
	if (needs_rtnl)
		rtnl_lock();
@@ -941,14 +948,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
			goto e_inval;
		retv = ip6_ra_control(sk, val);
		break;
	case IPV6_MTU_DISCOVER:
		if (optlen < sizeof(int))
			goto e_inval;
		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
			goto e_inval;
		np->pmtudisc = val;
		retv = 0;
		break;
	case IPV6_FLOWINFO_SEND:
		if (optlen < sizeof(int))
			goto e_inval;
@@ -1374,7 +1373,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
		break;

	case IPV6_MTU_DISCOVER:
		val = np->pmtudisc;
		val = READ_ONCE(np->pmtudisc);
		break;

	case IPV6_RECVERR:
+1 −1
Original line number Diff line number Diff line
@@ -307,7 +307,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
	harderr = icmpv6_err_convert(type, code, &err);
	if (type == ICMPV6_PKT_TOOBIG) {
		ip6_sk_update_pmtu(skb, sk, info);
		harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
		harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
	}
	if (type == NDISC_REDIRECT) {
		ip6_sk_redirect(skb, sk);
Loading