Commit 9cd5ef0b authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: rfs: add sock_rps_delete_flow() helper



RFS can exhibit lower performance for workloads using short-lived
flows and a small set of 4-tuple.

This is often the case for load-testers, using a pair of hosts,
if the server has a single listener port.

Typical use case :

Server : tcp_crr -T128 -F1000 -6 -U -l30 -R 14250
Client : tcp_crr -T128 -F1000 -6 -U -l30 -c -H server | grep local_throughput

This is because RFS global hash table contains stale information,
when the same RSS key is recycled for another socket and another cpu.

Make sure to undo the changes and go back to initial state when
a flow is disconnected.

Performance of the above test is increased by 22 %,
going from 372604 transactions per second to 457773.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reported-by: default avatarOctavian Purdila <tavip@google.com>
Reviewed-by: default avatarNeal Cardwell <ncardwell@google.com>
Link: https://patch.msgid.link/20250515100354.3339920-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent f24f7b2f
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -123,6 +123,30 @@ static inline void sock_rps_record_flow(const struct sock *sk)
#endif
}

static inline void sock_rps_delete_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
	struct rps_sock_flow_table *table;
	u32 hash, index;

	if (!static_branch_unlikely(&rfs_needed))
		return;

	hash = READ_ONCE(sk->sk_rxhash);
	if (!hash)
		return;

	rcu_read_lock();
	table = rcu_dereference(net_hotdata.rps_sock_flow_table);
	if (table) {
		index = hash & table->mask;
		if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
			WRITE_ONCE(table->ents[index], RPS_NO_CPU);
	}
	rcu_read_unlock();
#endif
}

static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
+4 −2
Original line number Diff line number Diff line
@@ -23,11 +23,12 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/inet6_hashtables.h>
#endif
#include <net/secure_seq.h>
#include <net/hotdata.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/rps.h>
#include <net/secure_seq.h>
#include <net/sock_reuseport.h>
#include <net/tcp.h>

u32 inet_ehashfn(const struct net *net, const __be32 laddr,
		 const __u16 lport, const __be32 faddr,
@@ -790,6 +791,7 @@ void inet_unhash(struct sock *sk)
	if (sk_unhashed(sk))
		return;

	sock_rps_delete_flow(sk);
	if (sk->sk_state == TCP_LISTEN) {
		struct inet_listen_hashbucket *ilb2;

+2 −0
Original line number Diff line number Diff line
@@ -120,6 +120,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
#include <net/rps.h>

struct udp_table udp_table __read_mostly;

@@ -2200,6 +2201,7 @@ void udp_lib_unhash(struct sock *sk)
		struct udp_table *udptable = udp_get_table_prot(sk);
		struct udp_hslot *hslot, *hslot2;

		sock_rps_delete_flow(sk);
		hslot  = udp_hashslot(udptable, sock_net(sk),
				      udp_sk(sk)->udp_port_hash);
		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
+1 −1
Original line number Diff line number Diff line
@@ -8321,7 +8321,7 @@ static int sctp_hash(struct sock *sk)

static void sctp_unhash(struct sock *sk)
{
	/* STUB */
	sock_rps_delete_flow(sk);
}

/* Check if port is acceptable.  Possibly find first available port.