Commit 9af0620d authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-sysctl-races-part-6'



Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_net_table (Round 6, Final).

This series fixes data-races around 11 knobs after tcp_pacing_ss_ratio
ipv4_net_table, and this is the final round for ipv4_net_table.

While at it, other data-races around these related knobs are fixed.

  - decnet_mem
  - decnet_rmem
  - tipc_rmem

There are still 58 tables possibly missing some fixes under net/.

  $ grep -rnE "struct ctl_table.*?\[\] =" net/ | wc -l
  60
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3e7d18b9 96b9bd8c
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
	/* Does this proto have per netns sysctl_wmem ? */
	if (proto->sysctl_wmem_offset)
		return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
		return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));

	return *proto->sysctl_wmem;
	return READ_ONCE(*proto->sysctl_wmem);
}

static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
	/* Does this proto have per netns sysctl_rmem ? */
	if (proto->sysctl_rmem_offset)
		return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
		return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));

	return *proto->sysctl_rmem;
	return READ_ONCE(*proto->sysctl_rmem);
}

/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+2 −2
Original line number Diff line number Diff line
@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
	sk->sk_family      = PF_DECnet;
	sk->sk_protocol    = 0;
	sk->sk_allocation  = gfp;
	sk->sk_sndbuf	   = sysctl_decnet_wmem[1];
	sk->sk_rcvbuf	   = sysctl_decnet_rmem[1];
	sk->sk_sndbuf	   = READ_ONCE(sysctl_decnet_wmem[1]);
	sk->sk_rcvbuf	   = READ_ONCE(sysctl_decnet_rmem[1]);

	/* Initialization of DECnet Session Control Port		*/
	scp = DN_SK(sk);
+5 −2
Original line number Diff line number Diff line
@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)

void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
	u8 fib_notify_on_flag_change;
	struct fib_alias *fa_match;
	struct sk_buff *skb;
	int err;
@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
	WRITE_ONCE(fa_match->offload, fri->offload);
	WRITE_ONCE(fa_match->trap, fri->trap);

	fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);

	/* 2 means send notifications only if offload_failed was changed. */
	if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
	if (fib_notify_on_flag_change == 2 &&
	    READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
		goto out;

	WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);

	if (!net->ipv4.sysctl_fib_notify_on_flag_change)
	if (!fib_notify_on_flag_change)
		goto out;

	skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+3 −3
Original line number Diff line number Diff line
@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk)

	icsk->icsk_sync_mss = tcp_sync_mss;

	WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
	WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
	WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
	WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));

	sk_sockets_allocated_inc(sk);
}
@@ -1724,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
		cap = sk->sk_rcvbuf >> 1;
	else
		cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
	val = min(val, cap);
	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);

+13 −11
Original line number Diff line number Diff line
@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)

	if (sk->sk_sndbuf < sndmem)
		WRITE_ONCE(sk->sk_sndbuf,
			   min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
			   min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}

/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
	struct tcp_sock *tp = tcp_sk(sk);
	/* Optimize this! */
	int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
	int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
	int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;

	while (tp->rcv_ssthresh <= window) {
		if (truesize <= skb->len)
@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct net *net = sock_net(sk);
	int rmem2;

	icsk->icsk_ack.quick = 0;
	rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);

	if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
	if (sk->sk_rcvbuf < rmem2 &&
	    !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
	    !tcp_under_memory_pressure(sk) &&
	    sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
		WRITE_ONCE(sk->sk_rcvbuf,
			   min(atomic_read(&sk->sk_rmem_alloc),
			       net->ipv4.sysctl_tcp_rmem[2]));
			   min(atomic_read(&sk->sk_rmem_alloc), rmem2));
	}
	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
		tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)

		do_div(rcvwin, tp->advmss);
		rcvbuf = min_t(u64, rcvwin * rcvmem,
			       sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
		if (rcvbuf > sk->sk_rcvbuf) {
			WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);

@@ -910,9 +911,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
	 *	 end of slow start and should slow down.
	 */
	if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
		rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
	else
		rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
		rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);

	rate *= max(tcp_snd_cwnd(tp), tp->packets_out);

@@ -5520,7 +5521,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
	}

	if (!tcp_is_sack(tp) ||
	    tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
	    tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
		goto send_now;

	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5541,11 +5542,12 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
	if (tp->srtt_us && tp->srtt_us < rtt)
		rtt = tp->srtt_us;

	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
	delay = min_t(unsigned long,
		      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
		      rtt * (NSEC_PER_USEC >> 3)/20);
	sock_hold(sk);
	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
			       sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
			       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
			       HRTIMER_MODE_REL_PINNED_SOFT);
}

Loading