Merge branch 'tcp-make-tcp_ack-faster' (c0f38f31) · Commits · git / linux-nf

include/net/tcp.h

+0 −3

Original line number	Diff line number	Diff line
		@@ -2514,10 +2514,7 @@ void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced);
		extern s32 tcp_rack_skb_timeout(struct tcp_sock tp, struct sk_buff skb,
		u32 reo_wnd);
		extern bool tcp_rack_mark_lost(struct sock *sk);
		extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
		u64 xmit_time);
		extern void tcp_rack_reo_timeout(struct sock *sk);
		extern void tcp_rack_update_reo_wnd(struct sock sk, struct rate_sample rs);

		/* tcp_plb.c */

net/ipv4/tcp_input.c

+77 −2

Original line number	Diff line number	Diff line
		@@ -1558,6 +1558,38 @@ static int tcp_match_skb_to_sack(struct sock sk, struct sk_buff skb,
		return in_sack;
		}

		/* Record the most recently (re)sent time among the (s)acked packets
		* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
		* draft-cheng-tcpm-rack-00.txt
		*/
		static void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
		u32 end_seq, u64 xmit_time)
		{
		u32 rtt_us;

		rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
		if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
		/* If the sacked packet was retransmitted, it's ambiguous
		* whether the retransmission or the original (or the prior
		* retransmission) was sacked.
		*
		* If the original is lost, there is no ambiguity. Otherwise
		* we assume the original can be delayed up to aRTT + min_rtt.
		* the aRTT term is bounded by the fast recovery or timeout,
		* so it's at least one RTT (i.e., retransmission is at least
		* an RTT later).
		*/
		return;
		}
		tp->rack.advanced = 1;
		tp->rack.rtt_us = rtt_us;
		if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
		end_seq, tp->rack.end_seq)) {
		tp->rack.mstamp = xmit_time;
		tp->rack.end_seq = end_seq;
		}
		}

		/* Mark the given newly-SACKed range as such, adjusting counters and hints. */
		static u8 tcp_sacktag_one(struct sock *sk,
		struct tcp_sacktag_state *state, u8 sacked,
		@@ -4149,6 +4181,49 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered,
		return delivered;
		}

		/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
		*
		* If a DSACK is received that seems like it may have been due to reordering
		* triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
		* by srtt), since there is possibility that spurious retransmission was
		* due to reordering delay longer than reo_wnd.
		*
		* Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
		* no. of successful recoveries (accounts for full DSACK-based loss
		* recovery undo). After that, reset it to default (min_rtt/4).
		*
		* At max, reo_wnd is incremented only once per rtt. So that the new
		* DSACK on which we are reacting, is due to the spurious retx (approx)
		* after the reo_wnd has been updated last time.
		*
		* reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
		* absolute value to account for change in rtt.
		*/
		static void tcp_rack_update_reo_wnd(struct sock sk, struct rate_sample rs)
		{
		struct tcp_sock *tp = tcp_sk(sk);

		if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
		TCP_RACK_STATIC_REO_WND) \|\|
		!rs->prior_delivered)
		return;

		/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
		if (before(rs->prior_delivered, tp->rack.last_delivered))
		tp->rack.dsack_seen = 0;

		/* Adjust the reo_wnd if update is pending */
		if (tp->rack.dsack_seen) {
		tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
		tp->rack.reo_wnd_steps + 1);
		tp->rack.dsack_seen = 0;
		tp->rack.last_delivered = tp->delivered;
		tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
		} else if (!tp->rack.reo_wnd_persist) {
		tp->rack.reo_wnd_steps = 1;
		}
		}

		/* This routine deals with incoming acks, but not outgoing ones. */
		static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
		{
		@@ -4283,7 +4358,7 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)

		tcp_in_ack_event(sk, flag);

		if (tp->tlp_high_seq)
		if (unlikely(tp->tlp_high_seq))
		tcp_process_tlp_ack(sk, ack, flag);

		if (tcp_ack_is_dubious(sk, flag)) {
		@@ -4333,7 +4408,7 @@ static int tcp_ack(struct sock sk, const struct sk_buff skb, int flag)
		*/
		tcp_ack_probe(sk);

		if (tp->tlp_high_seq)
		if (unlikely(tp->tlp_high_seq))
		tcp_process_tlp_ack(sk, ack, flag);
		return 1;

net/ipv4/tcp_recovery.c

+0 −75

Original line number	Diff line number	Diff line
		@@ -111,38 +111,6 @@ bool tcp_rack_mark_lost(struct sock *sk)
		return !!timeout;
		}

		/* Record the most recently (re)sent time among the (s)acked packets
		* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
		* draft-cheng-tcpm-rack-00.txt
		*/
		void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
		u64 xmit_time)
		{
		u32 rtt_us;

		rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
		if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
		/* If the sacked packet was retransmitted, it's ambiguous
		* whether the retransmission or the original (or the prior
		* retransmission) was sacked.
		*
		* If the original is lost, there is no ambiguity. Otherwise
		* we assume the original can be delayed up to aRTT + min_rtt.
		* the aRTT term is bounded by the fast recovery or timeout,
		* so it's at least one RTT (i.e., retransmission is at least
		* an RTT later).
		*/
		return;
		}
		tp->rack.advanced = 1;
		tp->rack.rtt_us = rtt_us;
		if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
		end_seq, tp->rack.end_seq)) {
		tp->rack.mstamp = xmit_time;
		tp->rack.end_seq = end_seq;
		}
		}

		/* We have waited long enough to accommodate reordering. Mark the expired
		* packets lost and retransmit them.
		*/
		@@ -166,49 +134,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
		tcp_rearm_rto(sk);
		}

		/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
		*
		* If a DSACK is received that seems like it may have been due to reordering
		* triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
		* by srtt), since there is possibility that spurious retransmission was
		* due to reordering delay longer than reo_wnd.
		*
		* Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
		* no. of successful recoveries (accounts for full DSACK-based loss
		* recovery undo). After that, reset it to default (min_rtt/4).
		*
		* At max, reo_wnd is incremented only once per rtt. So that the new
		* DSACK on which we are reacting, is due to the spurious retx (approx)
		* after the reo_wnd has been updated last time.
		*
		* reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
		* absolute value to account for change in rtt.
		*/
		void tcp_rack_update_reo_wnd(struct sock sk, struct rate_sample rs)
		{
		struct tcp_sock *tp = tcp_sk(sk);

		if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
		TCP_RACK_STATIC_REO_WND) \|\|
		!rs->prior_delivered)
		return;

		/* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
		if (before(rs->prior_delivered, tp->rack.last_delivered))
		tp->rack.dsack_seen = 0;

		/* Adjust the reo_wnd if update is pending */
		if (tp->rack.dsack_seen) {
		tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
		tp->rack.reo_wnd_steps + 1);
		tp->rack.dsack_seen = 0;
		tp->rack.last_delivered = tp->delivered;
		tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
		} else if (!tp->rack.reo_wnd_persist) {
		tp->rack.reo_wnd_steps = 1;
		}
		}

		/* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
		* the next unacked packet upon receiving
		* a) three or more DUPACKs to start the fast recovery