Commit 95b9a87c authored by David Morley's avatar David Morley Committed by Paolo Abeni
Browse files

tcp: record last received ipv6 flowlabel



In order to better estimate whether a data packet has been
retransmitted or is the result of a TLP, we save the last received
ipv6 flowlabel.

To make space for this field we resize the "ato" field in
inet_connection_sock as the current value of TCP_DELACK_MAX can be
fully contained in 8 bits and add a compile_time_assert ensuring this
field is the required size.

v2: addressed kernel bot feedback about dccp_delack_timer()
v3: addressed build error introduced by commit bbf80d71 ("tcp:
derive delack_max from rto_min")

Signed-off-by: default avatarDavid Morley <morleyd@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Tested-by: default avatarDavid Morley <morleyd@google.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 8cea95b0
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -114,7 +114,10 @@ struct inet_connection_sock {
		__u8		  quick;	 /* Scheduled number of quick acks	   */
		__u8		  pingpong;	 /* The session is interactive		   */
		__u8		  retry;	 /* Number of attempts			   */
		__u32		  ato;		 /* Predicted tick of soft clock	   */
		#define ATO_BITS 8
		__u32		  ato:ATO_BITS,	 /* Predicted tick of soft clock	   */
				  lrcv_flowlabel:20, /* last received ipv6 flowlabel	   */
				  unused:4;
		unsigned long	  timeout;	 /* Currently scheduled timeout		   */
		__u32		  lrcvtime;	 /* timestamp of last received data packet */
		__u16		  last_seg_size; /* Size of last incoming segment	   */
+2 −0
Original line number Diff line number Diff line
@@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */

#define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */
static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);

#if HZ >= 100
#define TCP_DELACK_MIN	((unsigned)(HZ/25))	/* minimal time to delay before sending an ACK */
#define TCP_ATO_MIN	((unsigned)(HZ/25))
+2 −2
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ static void dccp_delack_timer(struct timer_list *t)
	if (inet_csk_ack_scheduled(sk)) {
		if (!inet_csk_in_pingpong_mode(sk)) {
			/* Delayed ACK missed: inflate ATO. */
			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
			icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1,
						   icsk->icsk_rto);
		} else {
			/* Delayed ACK missed: leave pingpong mode and
+2 −2
Original line number Diff line number Diff line
@@ -3756,7 +3756,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
		info->tcpi_options |= TCPI_OPT_SYN_DATA;

	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
	info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato,
	info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
						tcp_delack_max(sk)));
	info->tcpi_snd_mss = tp->mss_cache;
	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
+15 −0
Original line number Diff line number Diff line
@@ -778,6 +778,16 @@ void tcp_rcv_space_adjust(struct sock *sk)
	tp->rcvq_space.time = tp->tcp_mstamp;
}

static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
	struct inet_connection_sock *icsk = inet_csk(sk);

	if (skb->protocol == htons(ETH_P_IPV6))
		icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
#endif
}

/* There is something which you must keep in mind when you analyze the
 * behavior of the tp->ato delayed ack timeout interval.  When a
 * connection starts up, we want to ack as quickly as possible.  The
@@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
		}
	}
	icsk->icsk_ack.lrcvtime = now;
	tcp_save_lrcv_flowlabel(sk, skb);

	tcp_ecn_check_ce(sk, skb);

@@ -4519,6 +4530,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
	    sk_rethink_txhash(sk))
		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);

	/* Save last flowlabel after a spurious retrans. */
	tcp_save_lrcv_flowlabel(sk, skb);
}

static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4835,6 +4849,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
	u32 seq, end_seq;
	bool fragstolen;

	tcp_save_lrcv_flowlabel(sk, skb);
	tcp_ecn_check_ce(sk, skb);

	if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
Loading