Commit 4e519fb4 authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'udp-round-of-data-races-fixes'

Eric Dumazet says:

====================
udp: round of data-races fixes

This series is inspired by multiple syzbot reports.

Many udp fields reads or writes are racy.

Add a proper udp->udp_flags and move there all
flags needing atomic safety.

Also add missing READ_ONCE()/WRITE_ONCE() when
lockless readers need access to specific fields.
====================

Link: https://lore.kernel.org/r/20230912091730.1591459-1-edumazet@google.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 486e6ca6 882af43a
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -630,7 +630,7 @@ static void __gtp_encap_destroy(struct sock *sk)
			gtp->sk0 = NULL;
		else
			gtp->sk1u = NULL;
		udp_sk(sk)->encap_type = 0;
		WRITE_ONCE(udp_sk(sk)->encap_type, 0);
		rcu_assign_sk_user_data(sk, NULL);
		release_sock(sk);
		sock_put(sk);
@@ -682,7 +682,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)

	netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);

	switch (udp_sk(sk)->encap_type) {
	switch (READ_ONCE(udp_sk(sk)->encap_type)) {
	case UDP_ENCAP_GTP0:
		netdev_dbg(gtp->dev, "received GTP0 packet\n");
		ret = gtp0_udp_encap_recv(gtp, skb);
+39 −27
Original line number Diff line number Diff line
@@ -32,25 +32,30 @@ static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
	return (num + net_hash_mix(net)) & mask;
}

enum {
	UDP_FLAGS_CORK,		/* Cork is required */
	UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
	UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
	UDP_FLAGS_GRO_ENABLED,	/* Request GRO aggregation */
	UDP_FLAGS_ACCEPT_FRAGLIST,
	UDP_FLAGS_ACCEPT_L4,
	UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
	UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
	UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
};

struct udp_sock {
	/* inet_sock has to be the first member */
	struct inet_sock inet;
#define udp_port_hash		inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash	inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node

	unsigned long	 udp_flags;

	int		 pending;	/* Any pending frames ? */
	unsigned int	 corkflag;	/* Cork is required */
	__u8		 encap_type;	/* Is this an Encapsulation socket? */
	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
			 encap_enabled:1, /* This socket enabled encap
					   * processing; UDP tunnels and
					   * different encapsulation layer set
					   * this
					   */
			 gro_enabled:1,	/* Request GRO aggregation */
			 accept_udp_l4:1,
			 accept_udp_fraglist:1;

	/*
	 * Following member retains the information to create a UDP header
	 * when the socket is uncorked.
@@ -62,12 +67,6 @@ struct udp_sock {
	 */
	__u16		 pcslen;
	__u16		 pcrlen;
/* indicator bits used by pcflag: */
#define UDPLITE_BIT      0x1  		/* set by udplite proto init function */
#define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
#define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
	__u8		 unused[3];
	/*
	 * For encapsulation sockets.
	 */
@@ -95,28 +94,39 @@ struct udp_sock {
	int		forward_threshold;
};

#define udp_test_bit(nr, sk)			\
	test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_set_bit(nr, sk)			\
	set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_test_and_set_bit(nr, sk)		\
	test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_clear_bit(nr, sk)			\
	clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_assign_bit(nr, sk, val)		\
	assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)

#define UDP_MAX_SEGMENTS	(1 << 6UL)

#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)

static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
	udp_sk(sk)->no_check6_tx = val;
	udp_assign_bit(NO_CHECK6_TX, sk, val);
}

static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
{
	udp_sk(sk)->no_check6_rx = val;
	udp_assign_bit(NO_CHECK6_RX, sk, val);
}

static inline bool udp_get_no_check6_tx(struct sock *sk)
static inline bool udp_get_no_check6_tx(const struct sock *sk)
{
	return udp_sk(sk)->no_check6_tx;
	return udp_test_bit(NO_CHECK6_TX, sk);
}

static inline bool udp_get_no_check6_rx(struct sock *sk)
static inline bool udp_get_no_check6_rx(const struct sock *sk)
{
	return udp_sk(sk)->no_check6_rx;
	return udp_test_bit(NO_CHECK6_RX, sk);
}

static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
@@ -135,10 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
	if (!skb_is_gso(skb))
		return false;

	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
	    !udp_test_bit(ACCEPT_L4, sk))
		return true;

	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
	    !udp_test_bit(ACCEPT_FRAGLIST, sk))
		return true;

	return false;
@@ -146,8 +158,8 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)

static inline void udp_allow_gso(struct sock *sk)
{
	udp_sk(sk)->accept_udp_l4 = 1;
	udp_sk(sk)->accept_udp_fraglist = 1;
	udp_set_bit(ACCEPT_L4, sk);
	udp_set_bit(ACCEPT_FRAGLIST, sk);
}

#define udp_portaddr_for_each_entry(__sk, list) \
+3 −6
Original line number Diff line number Diff line
@@ -174,16 +174,13 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
}
#endif

static inline void udp_tunnel_encap_enable(struct socket *sock)
static inline void udp_tunnel_encap_enable(struct sock *sk)
{
	struct udp_sock *up = udp_sk(sock->sk);

	if (up->encap_enabled)
	if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
		return;

	up->encap_enabled = 1;
#if IS_ENABLED(CONFIG_IPV6)
	if (sock->sk->sk_family == PF_INET6)
	if (READ_ONCE(sk->sk_family) == PF_INET6)
		ipv6_stub->udpv6_encap_enable();
#endif
	udp_encap_enable();
+9 −5
Original line number Diff line number Diff line
@@ -66,14 +66,18 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
/* Fast-path computation of checksum. Socket may not be locked. */
static inline __wsum udplite_csum(struct sk_buff *skb)
{
	const struct udp_sock *up = udp_sk(skb->sk);
	const int off = skb_transport_offset(skb);
	const struct sock *sk = skb->sk;
	int len = skb->len - off;

	if ((up->pcflag & UDPLITE_SEND_CC) && up->pcslen < len) {
		if (0 < up->pcslen)
			len = up->pcslen;
		udp_hdr(skb)->len = htons(up->pcslen);
	if (udp_test_bit(UDPLITE_SEND_CC, sk)) {
		u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen);

		if (pcslen < len) {
			if (pcslen > 0)
				len = pcslen;
			udp_hdr(skb)->len = htons(pcslen);
		}
	}
	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */

+37 −37
Original line number Diff line number Diff line
@@ -714,7 +714,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
			       iph->saddr, uh->source, skb->dev->ifindex,
			       inet_sdif(skb), udptable, NULL);

	if (!sk || udp_sk(sk)->encap_type) {
	if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
		/* No socket for error: try tunnels before discarding */
		if (static_branch_unlikely(&udp_encap_needed_key)) {
			sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
@@ -1051,7 +1051,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	u8 tos, scope;
	__be16 dport;
	int err, is_udplite = IS_UDPLITE(sk);
	int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
	int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
	struct sk_buff *skb;
	struct ip_options_data opt_copy;
@@ -1315,11 +1315,11 @@ void udp_splice_eof(struct socket *sock)
	struct sock *sk = sock->sk;
	struct udp_sock *up = udp_sk(sk);

	if (!up->pending || READ_ONCE(up->corkflag))
	if (!up->pending || udp_test_bit(CORK, sk))
		return;

	lock_sock(sk);
	if (up->pending && !READ_ONCE(up->corkflag))
	if (up->pending && !udp_test_bit(CORK, sk))
		udp_push_pending_frames(sk);
	release_sock(sk);
}
@@ -1868,7 +1868,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
						      (struct sockaddr *)sin);
	}

	if (udp_sk(sk)->gro_enabled)
	if (udp_test_bit(GRO_ENABLED, sk))
		udp_cmsg_recv(msg, sk, skb);

	if (inet_cmsg_flags(inet))
@@ -2081,7 +2081,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
	}
	nf_reset_ct(skb);

	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
	if (static_branch_unlikely(&udp_encap_needed_key) &&
	    READ_ONCE(up->encap_type)) {
		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);

		/*
@@ -2119,7 +2120,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
	/*
	 * 	UDP-Lite specific tests, ignored on UDP sockets
	 */
	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
	if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
		u16 pcrlen = READ_ONCE(up->pcrlen);

		/*
		 * MIB statistics other than incrementing the error count are
@@ -2132,7 +2134,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
		 * delivery of packets with coverage values less than a value
		 * provided by the application."
		 */
		if (up->pcrlen == 0) {          /* full coverage was set  */
		if (pcrlen == 0) {          /* full coverage was set  */
			net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n",
					    UDP_SKB_CB(skb)->cscov, skb->len);
			goto drop;
@@ -2143,9 +2145,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
		 * that it wants x while sender emits packets of smaller size y.
		 * Therefore the above ...()->partial_cov statement is essential.
		 */
		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
		if (UDP_SKB_CB(skb)->cscov < pcrlen) {
			net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n",
					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
					    UDP_SKB_CB(skb)->cscov, pcrlen);
			goto drop;
		}
	}
@@ -2618,7 +2620,7 @@ void udp_destroy_sock(struct sock *sk)
			if (encap_destroy)
				encap_destroy(sk);
		}
		if (up->encap_enabled)
		if (udp_test_bit(ENCAP_ENABLED, sk))
			static_branch_dec(&udp_encap_needed_key);
	}
}
@@ -2658,9 +2660,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
	switch (optname) {
	case UDP_CORK:
		if (val != 0) {
			WRITE_ONCE(up->corkflag, 1);
			udp_set_bit(CORK, sk);
		} else {
			WRITE_ONCE(up->corkflag, 0);
			udp_clear_bit(CORK, sk);
			lock_sock(sk);
			push_pending_frames(sk);
			release_sock(sk);
@@ -2675,17 +2677,17 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
		case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
			if (sk->sk_family == AF_INET6)
				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
				WRITE_ONCE(up->encap_rcv,
					   ipv6_stub->xfrm6_udp_encap_rcv);
			else
#endif
				up->encap_rcv = xfrm4_udp_encap_rcv;
				WRITE_ONCE(up->encap_rcv,
					   xfrm4_udp_encap_rcv);
#endif
			fallthrough;
		case UDP_ENCAP_L2TPINUDP:
			up->encap_type = val;
			lock_sock(sk);
			udp_tunnel_encap_enable(sk->sk_socket);
			release_sock(sk);
			WRITE_ONCE(up->encap_type, val);
			udp_tunnel_encap_enable(sk);
			break;
		default:
			err = -ENOPROTOOPT;
@@ -2694,11 +2696,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
		break;

	case UDP_NO_CHECK6_TX:
		up->no_check6_tx = valbool;
		udp_set_no_check6_tx(sk, valbool);
		break;

	case UDP_NO_CHECK6_RX:
		up->no_check6_rx = valbool;
		udp_set_no_check6_rx(sk, valbool);
		break;

	case UDP_SEGMENT:
@@ -2708,14 +2710,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
		break;

	case UDP_GRO:
		lock_sock(sk);

		/* when enabling GRO, accept the related GSO packet type */
		if (valbool)
			udp_tunnel_encap_enable(sk->sk_socket);
		up->gro_enabled = valbool;
		up->accept_udp_l4 = valbool;
		release_sock(sk);
			udp_tunnel_encap_enable(sk);
		udp_assign_bit(GRO_ENABLED, sk, valbool);
		udp_assign_bit(ACCEPT_L4, sk, valbool);
		break;

	/*
@@ -2730,8 +2730,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
			val = 8;
		else if (val > USHRT_MAX)
			val = USHRT_MAX;
		up->pcslen = val;
		up->pcflag |= UDPLITE_SEND_CC;
		WRITE_ONCE(up->pcslen, val);
		udp_set_bit(UDPLITE_SEND_CC, sk);
		break;

	/* The receiver specifies a minimum checksum coverage value. To make
@@ -2744,8 +2744,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
			val = 8;
		else if (val > USHRT_MAX)
			val = USHRT_MAX;
		up->pcrlen = val;
		up->pcflag |= UDPLITE_RECV_CC;
		WRITE_ONCE(up->pcrlen, val);
		udp_set_bit(UDPLITE_RECV_CC, sk);
		break;

	default:
@@ -2783,19 +2783,19 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,

	switch (optname) {
	case UDP_CORK:
		val = READ_ONCE(up->corkflag);
		val = udp_test_bit(CORK, sk);
		break;

	case UDP_ENCAP:
		val = up->encap_type;
		val = READ_ONCE(up->encap_type);
		break;

	case UDP_NO_CHECK6_TX:
		val = up->no_check6_tx;
		val = udp_get_no_check6_tx(sk);
		break;

	case UDP_NO_CHECK6_RX:
		val = up->no_check6_rx;
		val = udp_get_no_check6_rx(sk);
		break;

	case UDP_SEGMENT:
@@ -2803,17 +2803,17 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
		break;

	case UDP_GRO:
		val = up->gro_enabled;
		val = udp_test_bit(GRO_ENABLED, sk);
		break;

	/* The following two cannot be changed on UDP sockets, the return is
	 * always 0 (which corresponds to the full checksum coverage of UDP). */
	case UDPLITE_SEND_CSCOV:
		val = up->pcslen;
		val = READ_ONCE(up->pcslen);
		break;

	case UDPLITE_RECV_CSCOV:
		val = up->pcrlen;
		val = READ_ONCE(up->pcrlen);
		break;

	default:
Loading