Commit fff755e7 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'TCP_INFO-RTO'



Aananth V says:

====================
tcp: new TCP_INFO stats for RTO events

The 2023 SIGCOMM paper "Improving Network Availability with Protective
ReRoute" has indicated Linux TCP's RTO-triggered txhash rehashing can
effectively reduce application disruption during outages. To better
measure the efficacy of this feature, this patch set adds three more
detailed stats during RTO recovery and exports via TCP_INFO.
Applications and monitoring systems can leverage this data to measure
the network path diversity and end-to-end repair latency during network
outages to improve their network infrastructure.

Patch 1 fixes a bug in TFO SYNACK that we encountered while testing
these new metrics.

Patch 2 adds the new metrics to tcp_sock and tcp_info.

v2: Addressed feedback from a check bot in patch 2 by removing the
inline keyword from the tcp_update_rto_time and tcp_update_rto_stats
functions. Changed a comment in include/net/tcp.h to fit under 80 words.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 50675d84 3868ab0f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -377,6 +377,14 @@ struct tcp_sock {
				 * Total data bytes retransmitted
				 */
	u32	total_retrans;	/* Total retransmits for entire connection */
	u32	rto_stamp;	/* Start time (ms) of last CA_Loss recovery */
	u16	total_rto;	/* Total number of RTO timeouts, including
				 * SYN/SYN-ACK and recurring timeouts.
				 */
	u16	total_rto_recoveries;	/* Total number of RTO recoveries,
					 * including any unfinished recovery.
					 */
	u32	total_rto_time;	/* ms spent in (completed) RTO recoveries. */

	u32	urg_seq;	/* Seq of received urgent pointer */
	unsigned int		keepalive_time;	  /* time before keep alive takes place */
+12 −0
Original line number Diff line number Diff line
@@ -289,6 +289,18 @@ struct tcp_info {
				      */

	__u32   tcpi_rehash;         /* PLB or timeout triggered rehash attempts */

	__u16	tcpi_total_rto;	/* Total number of RTO timeouts, including
				 * SYN/SYN-ACK and recurring timeouts.
				 */
	__u16	tcpi_total_rto_recoveries;	/* Total number of RTO
						 * recoveries, including any
						 * unfinished recovery.
						 */
	__u32	tcpi_total_rto_time;	/* Total time spent in RTO recoveries
					 * in milliseconds, including any
					 * unfinished recovery.
					 */
};

/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
+9 −0
Original line number Diff line number Diff line
@@ -3818,6 +3818,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
	info->tcpi_rcv_wnd = tp->rcv_wnd;
	info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash;
	info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;

	info->tcpi_total_rto = tp->total_rto;
	info->tcpi_total_rto_recoveries = tp->total_rto_recoveries;
	info->tcpi_total_rto_time = tp->total_rto_time;
	if (tp->rto_stamp) {
		info->tcpi_total_rto_time += tcp_time_stamp_raw() -
						tp->rto_stamp;
	}

	unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(tcp_get_info);
+20 −4
Original line number Diff line number Diff line
@@ -2088,6 +2088,10 @@ void tcp_clear_retrans(struct tcp_sock *tp)
	tp->undo_marker = 0;
	tp->undo_retrans = -1;
	tp->sacked_out = 0;
	tp->rto_stamp = 0;
	tp->total_rto = 0;
	tp->total_rto_recoveries = 0;
	tp->total_rto_time = 0;
}

static inline void tcp_init_undo(struct tcp_sock *tp)
@@ -2825,6 +2829,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
	tcp_set_ca_state(sk, TCP_CA_Recovery);
}

static void tcp_update_rto_time(struct tcp_sock *tp)
{
	if (tp->rto_stamp) {
		tp->total_rto_time += tcp_time_stamp(tp) - tp->rto_stamp;
		tp->rto_stamp = 0;
	}
}

/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
 * recovered or spurious. Otherwise retransmits more on partial ACKs.
 */
@@ -3029,6 +3041,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
		break;
	case TCP_CA_Loss:
		tcp_process_loss(sk, flag, num_dupack, rexmit);
		if (icsk->icsk_ca_state != TCP_CA_Loss)
			tcp_update_rto_time(tp);
		tcp_identify_packet_loss(sk, ack_flag);
		if (!(icsk->icsk_ca_state == TCP_CA_Open ||
		      (*ack_flag & FLAG_LOST_RETRANS)))
@@ -6444,22 +6458,24 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,

static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct request_sock *req;

	/* If we are still handling the SYNACK RTO, see if timestamp ECR allows
	 * undo. If peer SACKs triggered fast recovery, we can't undo here.
	 */
	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
		tcp_try_undo_loss(sk, false);
	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
		tcp_try_undo_recovery(sk);

	/* Reset rtx states to prevent spurious retransmits_timed_out() */
	tcp_sk(sk)->retrans_stamp = 0;
	tcp_update_rto_time(tp);
	tp->retrans_stamp = 0;
	inet_csk(sk)->icsk_retransmits = 0;

	/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
	 * we no longer need req so release it.
	 */
	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
	req = rcu_dereference_protected(tp->fastopen_rsk,
					lockdep_sock_is_held(sk));
	reqsk_fastopen_remove(sk, req, false);

+4 −0
Original line number Diff line number Diff line
@@ -565,6 +565,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
		newtp->undo_marker = treq->snt_isn;
		newtp->retrans_stamp = div_u64(treq->snt_synack,
					       USEC_PER_SEC / TCP_TS_HZ);
		newtp->total_rto = req->num_timeout;
		newtp->total_rto_recoveries = 1;
		newtp->total_rto_time = tcp_time_stamp_raw() -
						newtp->retrans_stamp;
	}
	newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
Loading