Commit 8ccd1160 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-provide-better-locality-for-retransmit-timer'

Eric Dumazet says:

====================
tcp: provide better locality for retransmit timer

TCP stack uses three timers per flow, currently spread this way:

- sk->sk_timer : keepalive timer
- icsk->icsk_retransmit_timer : retransmit timer
- icsk->icsk_delack_timer : delayed ack timer

This series moves the retransmit timer to sk->sk_timer location,
to increase data locality in TX paths.

keepalive timers are not often used, this change should be neutral for them.

After the series we have following fields:

- sk->tcp_retransmit_timer : retransmit timer, in sock_write_tx group
- icsk->icsk_delack_timer : delayed ack timer
- icsk->icsk_keepalive_timer : keepalive timer

Moving icsk_delack_timer in a beter location would also be welcomed.
====================

Link: https://patch.msgid.link/20251124175013.1473655-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 436fa8e7 9a5e5334
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -12,8 +12,8 @@ struct inet_sock icsk_inet read_mostly r
struct request_sock_queue           icsk_accept_queue
struct inet_bind_bucket             icsk_bind_hash         read_mostly                             tcp_set_state
struct inet_bind2_bucket            icsk_bind2_hash        read_mostly                             tcp_set_state,inet_put_port
struct timer_list                   icsk_retransmit_timer  read_write                              inet_csk_reset_xmit_timer,tcp_connect
struct timer_list                   icsk_delack_timer      read_mostly                             inet_csk_reset_xmit_timer,tcp_connect
struct timer_list                   icsk_keepalive_timer
u32                                 icsk_rto               read_write                              tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
u32                                 icsk_rto_min
u32                                 icsk_rto_max           read_mostly                             tcp_reset_xmit_timer
+12 −8
Original line number Diff line number Diff line
@@ -56,7 +56,9 @@ struct inet_connection_sock_af_ops {
 * @icsk_accept_queue:	   FIFO of established children
 * @icsk_bind_hash:	   Bind node
 * @icsk_bind2_hash:	   Bind node in the bhash2 table
 * @icsk_retransmit_timer: Resend (no ack)
 * @icsk_delack_timer:     Delayed ACK timer
 * @icsk_keepalive_timer:  Keepalive timer
 * @mptcp_tout_timer: mptcp timer
 * @icsk_rto:		   Retransmit timeout
 * @icsk_pmtu_cookie	   Last pmtu seen by socket
 * @icsk_ca_ops		   Pluggable congestion control hook
@@ -81,8 +83,11 @@ struct inet_connection_sock {
	struct request_sock_queue icsk_accept_queue;
	struct inet_bind_bucket	  *icsk_bind_hash;
	struct inet_bind2_bucket  *icsk_bind2_hash;
 	struct timer_list	  icsk_retransmit_timer;
	struct timer_list	  icsk_delack_timer;
	union {
		struct timer_list icsk_keepalive_timer;
		struct timer_list mptcp_tout_timer;
	};
	__u32			  icsk_rto;
	__u32                     icsk_rto_min;
	u32			  icsk_rto_max;
@@ -184,10 +189,9 @@ static inline void inet_csk_delack_init(struct sock *sk)
	memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}

static inline unsigned long
icsk_timeout(const struct inet_connection_sock *icsk)
static inline unsigned long tcp_timeout_expires(const struct sock *sk)
{
	return READ_ONCE(icsk->icsk_retransmit_timer.expires);
	return READ_ONCE(sk->tcp_retransmit_timer.expires);
}

static inline unsigned long
@@ -203,7 +207,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
		smp_store_release(&icsk->icsk_pending, 0);
#ifdef INET_CSK_CLEAR_TIMERS
		sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
		sk_stop_timer(sk, &sk->tcp_retransmit_timer);
#endif
	} else if (what == ICSK_TIME_DACK) {
		smp_store_release(&icsk->icsk_ack.pending, 0);
@@ -235,7 +239,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
	    what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
		smp_store_release(&icsk->icsk_pending, what);
		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when);
		sk_reset_timer(sk, &sk->tcp_retransmit_timer, when);
	} else if (what == ICSK_TIME_DACK) {
		smp_store_release(&icsk->icsk_ack.pending,
				  icsk->icsk_ack.pending | ICSK_ACK_TIMER);
+9 −4
Original line number Diff line number Diff line
@@ -305,6 +305,8 @@ struct sk_filter;
  *	@sk_txrehash: enable TX hash rethink
  *	@sk_filter: socket filtering instructions
  *	@sk_timer: sock cleanup timer
  *	@tcp_retransmit_timer: tcp retransmit timer
  *	@mptcp_retransmit_timer: mptcp retransmit timer
  *	@sk_stamp: time stamp of last packet received
  *	@sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
  *	@sk_tsflags: SO_TIMESTAMPING flags
@@ -481,11 +483,12 @@ struct sock {
		struct rb_root	tcp_rtx_queue;
	};
	struct sk_buff_head	sk_write_queue;
	u32			sk_dst_pending_confirm;
	u32			sk_pacing_status; /* see enum sk_pacing */
	struct page_frag	sk_frag;
	union {
		struct timer_list	sk_timer;

		struct timer_list	tcp_retransmit_timer;
		struct timer_list	mptcp_retransmit_timer;
	};
	unsigned long		sk_pacing_rate; /* bytes per second */
	atomic_t		sk_zckey;
	atomic_t		sk_tskey;
@@ -493,6 +496,8 @@ struct sock {
	__cacheline_group_end(sock_write_tx);

	__cacheline_group_begin(sock_read_tx);
	u32			sk_dst_pending_confirm;
	u32			sk_pacing_status; /* see enum sk_pacing */
	unsigned long		sk_max_pacing_rate;
	long			sk_sndtimeo;
	u32			sk_priority;
+2 −2
Original line number Diff line number Diff line
@@ -4519,14 +4519,14 @@ static int __init sock_struct_check(void)
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);

	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_pending_confirm);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_status);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
+6 −6
Original line number Diff line number Diff line
@@ -737,9 +737,9 @@ void inet_csk_init_xmit_timers(struct sock *sk,
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
	timer_setup(&sk->tcp_retransmit_timer, retransmit_handler, 0);
	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
	timer_setup(&sk->sk_timer, keepalive_handler, 0);
	timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0);
	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}

@@ -750,9 +750,9 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
	smp_store_release(&icsk->icsk_pending, 0);
	smp_store_release(&icsk->icsk_ack.pending, 0);

	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
	sk_stop_timer(sk, &sk->tcp_retransmit_timer);
	sk_stop_timer(sk, &icsk->icsk_delack_timer);
	sk_stop_timer(sk, &sk->sk_timer);
	sk_stop_timer(sk, &icsk->icsk_keepalive_timer);
}

void inet_csk_clear_xmit_timers_sync(struct sock *sk)
@@ -765,9 +765,9 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
	smp_store_release(&icsk->icsk_pending, 0);
	smp_store_release(&icsk->icsk_ack.pending, 0);

	sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
	sk_stop_timer_sync(sk, &sk->tcp_retransmit_timer);
	sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
	sk_stop_timer_sync(sk, &sk->sk_timer);
	sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer);
}

struct dst_entry *inet_csk_route_req(const struct sock *sk,
Loading