Commit e8f57a76 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-even-faster-connect-under-stress'

Eric Dumazet says:

====================
tcp: even faster connect() under stress

This is a followup on the prior series, "tcp: scale connect() under pressure"

Now spinlocks are no longer in the picture, we see a very high cost
of the inet6_ehashfn() function.

In this series (of 2), I change how lport contributes to inet6_ehashfn()
to ensure better cache locality and call inet6_ehashfn()
only once per connect() system call.

This brings an additional 229 % increase of performance
for "neper/tcp_crr -6 -T 200 -F 30000" stress test,
while greatly improving latency metrics.

Before:
  latency_min=0.014131929
  latency_max=17.895073144
  latency_mean=0.505675853
  latency_stddev=2.125164772
  num_samples=307884
  throughput=139866.80

After:
  latency_min=0.003041375
  latency_max=7.056589232
  latency_mean=0.141075048
  latency_stddev=0.526900516
  num_samples=312996
  throughput=320677.21
====================

Link: https://patch.msgid.link/20250305034550.879255-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f8ece407 d4438ce6
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -527,10 +527,12 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)

int __inet_hash_connect(struct inet_timewait_death_row *death_row,
			struct sock *sk, u64 port_offset,
			u32 hash_port0,
			int (*check_established)(struct inet_timewait_death_row *,
						 struct sock *, __u16,
						 struct inet_timewait_sock **,
						 bool rcu_lookup));
						 bool rcu_lookup,
						 u32 hash));

int inet_hash_connect(struct inet_timewait_death_row *death_row,
		      struct sock *sk);
+1 −1
Original line number Diff line number Diff line
@@ -357,7 +357,7 @@ static inline void inet_get_local_port_range(const struct net *net, int *low, in
bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);

#ifdef CONFIG_SYSCTL
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port)
{
	if (!net->ipv4.sysctl_local_reserved_ports)
		return false;
+20 −10
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ u32 inet_ehashfn(const struct net *net, const __be32 laddr,
{
	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));

	return __inet_ehashfn(laddr, lport, faddr, fport,
	return lport + __inet_ehashfn(laddr, 0, faddr, fport,
				      inet_ehash_secret + net_hash_mix(net));
}
EXPORT_SYMBOL_GPL(inet_ehashfn);
@@ -538,7 +538,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established);
static int __inet_check_established(struct inet_timewait_death_row *death_row,
				    struct sock *sk, __u16 lport,
				    struct inet_timewait_sock **twp,
				    bool rcu_lookup)
				    bool rcu_lookup,
				    u32 hash)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_sock *inet = inet_sk(sk);
@@ -549,8 +550,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
	int sdif = l3mdev_master_ifindex_by_index(net, dif);
	INET_ADDR_COOKIE(acookie, saddr, daddr);
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
	unsigned int hash = inet_ehashfn(net, daddr, lport,
					 saddr, inet->inet_dport);
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
	struct inet_timewait_sock *tw = NULL;
	const struct hlist_nulls_node *node;
@@ -1007,9 +1006,10 @@ static u32 *table_perturb;

int __inet_hash_connect(struct inet_timewait_death_row *death_row,
		struct sock *sk, u64 port_offset,
		u32 hash_port0,
		int (*check_established)(struct inet_timewait_death_row *,
			struct sock *, __u16, struct inet_timewait_sock **,
			bool rcu_lookup))
			bool rcu_lookup, u32 hash))
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_bind_hashbucket *head, *head2;
@@ -1027,7 +1027,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,

	if (port) {
		local_bh_disable();
		ret = check_established(death_row, sk, port, NULL, false);
		ret = check_established(death_row, sk, port, NULL, false,
					hash_port0 + port);
		local_bh_enable();
		return ret;
	}
@@ -1071,7 +1072,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
				rcu_read_unlock();
				goto next_port;
			}
			if (!check_established(death_row, sk, port, &tw, true))
			if (!check_established(death_row, sk, port, &tw, true,
					       hash_port0 + port))
				break;
			rcu_read_unlock();
			goto next_port;
@@ -1090,7 +1092,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
					goto next_port_unlock;
				WARN_ON(hlist_empty(&tb->bhash2));
				if (!check_established(death_row, sk,
						       port, &tw, false))
						       port, &tw, false,
						       hash_port0 + port))
					goto ok;
				goto next_port_unlock;
			}
@@ -1197,11 +1200,18 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
int inet_hash_connect(struct inet_timewait_death_row *death_row,
		      struct sock *sk)
{
	const struct inet_sock *inet = inet_sk(sk);
	const struct net *net = sock_net(sk);
	u64 port_offset = 0;
	u32 hash_port0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,

	hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0,
				  inet->inet_daddr, inet->inet_dport);

	return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
				   __inet_check_established);
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
+13 −6
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ u32 inet6_ehashfn(const struct net *net,
	lhash = (__force u32)laddr->s6_addr32[3];
	fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);

	return __inet6_ehashfn(lhash, lport, fhash, fport,
	return lport + __inet6_ehashfn(lhash, 0, fhash, fport,
				       inet6_ehash_secret + net_hash_mix(net));
}
EXPORT_SYMBOL_GPL(inet6_ehashfn);
@@ -264,7 +264,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup);
static int __inet6_check_established(struct inet_timewait_death_row *death_row,
				     struct sock *sk, const __u16 lport,
				     struct inet_timewait_sock **twp,
				     bool rcu_lookup)
				     bool rcu_lookup,
				     u32 hash)
{
	struct inet_hashinfo *hinfo = death_row->hashinfo;
	struct inet_sock *inet = inet_sk(sk);
@@ -274,8 +275,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
	struct net *net = sock_net(sk);
	const int sdif = l3mdev_master_ifindex_by_index(net, dif);
	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
						inet->inet_dport);
	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
	struct inet_timewait_sock *tw = NULL;
	const struct hlist_nulls_node *node;
@@ -354,11 +353,19 @@ static u64 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
		       struct sock *sk)
{
	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
	const struct in6_addr *saddr = &sk->sk_v6_daddr;
	const struct inet_sock *inet = inet_sk(sk);
	const struct net *net = sock_net(sk);
	u64 port_offset = 0;
	u32 hash_port0;

	if (!inet_sk(sk)->inet_num)
		port_offset = inet6_sk_port_offset(sk);
	return __inet_hash_connect(death_row, sk, port_offset,

	hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport);

	return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
				   __inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);