Commit 3afb106f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-move-few-fields-for-data-locality'

Eric Dumazet says:

====================
tcp: move few fields for data locality

After recent additions (PSP and AccECN) I wanted to make another
round on fields locations to increase data locality.

This series manages to shrink TCP and TCPv6 objects by 128 bytes,
but more importantly should reduce number of touched cache lines
in TCP fast paths.

There is more to come.

v2: removed tcp CACHELINE_ASSERT_GROUP_SIZE after a kernel build bot
reported an error.
====================

Link: https://patch.msgid.link/20250919204856.2977245-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 4238cbf6 649091ef
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ u64 bytes_acked read_w
u32                           dsack_dups
u32                           snd_una                 read_mostly         read_write          tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx)
u32                           snd_sml                 read_write                              tcp_minshall_check,tcp_minshall_update
u32                           rcv_tstamp                                  read_mostly         tcp_ack
u32                           rcv_tstamp              read_write          read_write          tcp_ack
void *                        tcp_clean_acked         read_mostly                             tcp_ack
u32                           lsndtime                read_write                              tcp_slow_start_after_idle_check,tcp_event_data_sent
u32                           last_oow_ack_time
@@ -57,7 +57,7 @@ u8:1 is_sack_reneg read_m
u8:2                          fastopen_client_fail
u8:4                          nonagle                 read_write                              tcp_skb_entail,tcp_push_pending_frames
u8:1                          thin_lto
u8:1                          recvmsg_inq
u8:1                          recvmsg_inq                                 read_mostly         tcp_recvmsg
u8:1                          repair                  read_mostly                             tcp_write_xmit
u8:1                          frto
u8                            repair_queue
+10 −10
Original line number Diff line number Diff line
@@ -215,6 +215,9 @@ struct tcp_sock {
	u16	gso_segs;	/* Max number of segs per GSO packet	*/
	/* from STCP, retrans queue hinting */
	struct sk_buff *retransmit_skb_hint;
#if defined(CONFIG_TLS_DEVICE)
	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
#endif
	__cacheline_group_end(tcp_sock_read_tx);

	/* TXRX read-mostly hotpath cache lines */
@@ -232,13 +235,13 @@ struct tcp_sock {
		repair      : 1,
		tcp_usec_ts : 1, /* TSval values in usec */
		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
		recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
	__cacheline_group_end(tcp_sock_read_txrx);

	/* RX read-mostly hotpath cache lines */
	__cacheline_group_begin(tcp_sock_read_rx);
	u32	copied_seq;	/* Head of yet unread data */
	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
	u32	snd_wl1;	/* Sequence for window update		*/
	u32	tlp_high_seq;	/* snd_nxt at the time of TLP */
	u32	rttvar_us;	/* smoothed mdev_max			*/
@@ -246,14 +249,10 @@ struct tcp_sock {
	u16	advmss;		/* Advertised MSS			*/
	u16	urg_data;	/* Saved octet of OOB data and control flags */
	u32	lost;		/* Total data packets lost incl. rexmits */
	u32	snd_ssthresh;	/* Slow start size threshold		*/
	struct  minmax rtt_min;
	/* OOO segments go in this rbtree. Socket lock must be held. */
	struct rb_root	out_of_order_queue;
#if defined(CONFIG_TLS_DEVICE)
	void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
#endif
	u32	snd_ssthresh;	/* Slow start size threshold		*/
	u8	recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
	__cacheline_group_end(tcp_sock_read_rx);

	/* TX read-write hotpath cache lines */
@@ -319,6 +318,7 @@ struct tcp_sock {
					*/
	u32	app_limited;	/* limited until "delivered" reaches this val */
	u32	rcv_wnd;	/* Current receiver window		*/
	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
/*
 *      Options received (usually on last packet, some only on SYN packets).
 */
@@ -448,6 +448,9 @@ struct tcp_sock {
				 * the first SYN. */
	u32	undo_marker;	/* snd_una upon a new recovery episode. */
	int	undo_retrans;	/* number of undoable retransmissions. */
	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
			   * while socket was owned by user.
			   */
	u64	bytes_retrans;	/* RFC4898 tcpEStatsPerfOctetsRetrans
				 * Total data bytes retransmitted
				 */
@@ -494,9 +497,6 @@ struct tcp_sock {
		u32		  probe_seq_end;
	} mtu_probe;
	u32     plb_rehash;     /* PLB-triggered rehash attempts */
	u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
			   * while socket was owned by user.
			   */
#if IS_ENABLED(CONFIG_MPTCP)
	bool	is_mptcp;
#endif
+1 −1
Original line number Diff line number Diff line
@@ -185,8 +185,8 @@ struct fastopen_queue {
struct request_sock_queue {
	spinlock_t		rskq_lock;
	u8			rskq_defer_accept;
	u8			synflood_warned;

	u32			synflood_warned;
	atomic_t		qlen;
	atomic_t		young;

+5 −5
Original line number Diff line number Diff line
@@ -467,7 +467,7 @@ struct sock {
	__cacheline_group_begin(sock_write_tx);
	int			sk_write_pending;
	atomic_t		sk_omem_alloc;
	int			sk_sndbuf;
	int			sk_err_soft;

	int			sk_wmem_queued;
	refcount_t		sk_wmem_alloc;
@@ -492,6 +492,9 @@ struct sock {
	long			sk_sndtimeo;
	u32			sk_priority;
	u32			sk_mark;
	kuid_t			sk_uid;
	u16			sk_protocol;
	u16			sk_type;
	struct dst_entry __rcu	*sk_dst_cache;
	netdev_features_t	sk_route_caps;
#ifdef CONFIG_SOCK_VALIDATE_XMIT
@@ -504,6 +507,7 @@ struct sock {
	unsigned int		sk_gso_max_size;
	gfp_t			sk_allocation;
	u32			sk_txhash;
	int			sk_sndbuf;
	u8			sk_pacing_shift;
	bool			sk_use_task_frag;
	__cacheline_group_end(sock_read_tx);
@@ -517,15 +521,11 @@ struct sock {
				sk_no_check_tx : 1,
				sk_no_check_rx : 1;
	u8			sk_shutdown;
	u16			sk_type;
	u16			sk_protocol;
	unsigned long	        sk_lingertime;
	struct proto		*sk_prot_creator;
	rwlock_t		sk_callback_lock;
	int			sk_err_soft;
	u32			sk_ack_backlog;
	u32			sk_max_ack_backlog;
	kuid_t			sk_uid;
	unsigned long		sk_ino;
	spinlock_t		sk_peer_lock;
	int			sk_bind_phc;
+4 −1
Original line number Diff line number Diff line
@@ -4452,7 +4452,7 @@ static int __init sock_struct_check(void)

	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_err_soft);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
@@ -4471,12 +4471,15 @@ static int __init sock_struct_check(void)
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_uid);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_protocol);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndbuf);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
Loading