Commit ff61a4a5 authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'ip-improve-tcp-sock-multipath-routing'

Willem de Bruijn says:

====================
ip: improve tcp sock multipath routing

From: Willem de Bruijn <willemb@google.com>

Improve layer 4 multipath hash policy for local tcp connections:

patch 1: Select a source address that matches the nexthop device.
         Due to tcp_v4_connect making separate route lookups for saddr
         and route, the two can currently be inconsistent.

patch 2: Use all paths when opening multiple local tcp connections to
         the same ip address and port.

patch 3: Test the behavior. Extend the fib_tests.sh testsuite with one
         opening many connections, and count SYNs on both egress
         devices, for packets matching the source address of the dev.

Changelog in the individual patches
====================

Link: https://patch.msgid.link/20250424143549.669426-1-willemdebruijn.kernel@gmail.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 0d15a26b 4d0dac49
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct flowi_common {
#define FLOWI_FLAG_ANYSRC		0x01
#define FLOWI_FLAG_KNOWN_NH		0x02
#define FLOWI_FLAG_L3MDEV_OIF		0x04
#define FLOWI_FLAG_ANY_SPORT		0x08
	__u32	flowic_secid;
	kuid_t  flowic_uid;
	__u32		flowic_multipath_hash;
+2 −1
Original line number Diff line number Diff line
@@ -574,7 +574,8 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net,

int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
		 struct netlink_ext_ack *extack);
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_multipath(struct fib_result *res, int hash,
			  const struct flowi4 *fl4);
void fib_select_path(struct net *net, struct fib_result *res,
		     struct flowi4 *fl4, const struct sk_buff *skb);

+3 −0
Original line number Diff line number Diff line
@@ -326,6 +326,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
	if (inet_test_bit(TRANSPARENT, sk))
		flow_flags |= FLOWI_FLAG_ANYSRC;

	if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport)
		flow_flags |= FLOWI_FLAG_ANY_SPORT;

	flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
			   ip_sock_rt_scope(sk), protocol, flow_flags, dst,
			   src, dport, sport, sk->sk_uid);
+25 −14
Original line number Diff line number Diff line
@@ -2170,34 +2170,45 @@ static bool fib_good_nh(const struct fib_nh *nh)
	return !!(state & NUD_VALID);
}

void fib_select_multipath(struct fib_result *res, int hash)
void fib_select_multipath(struct fib_result *res, int hash,
			  const struct flowi4 *fl4)
{
	struct fib_info *fi = res->fi;
	struct net *net = fi->fib_net;
	bool first = false;
	bool found = false;
	bool use_neigh;
	__be32 saddr;

	if (unlikely(res->fi->nh)) {
		nexthop_path_fib_result(res, hash);
		return;
	}

	use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh);
	saddr = fl4 ? fl4->saddr : 0;

	change_nexthops(fi) {
		if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
			if (!fib_good_nh(nexthop_nh))
		if (use_neigh && !fib_good_nh(nexthop_nh))
			continue;
			if (!first) {

		if (!found) {
			res->nh_sel = nhsel;
			res->nhc = &nexthop_nh->nh_common;
				first = true;
			}
			found = !saddr || nexthop_nh->nh_saddr == saddr;
		}

		if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
			continue;

		if (!saddr || nexthop_nh->nh_saddr == saddr) {
			res->nh_sel = nhsel;
			res->nhc = &nexthop_nh->nh_common;
			return;
		}

		if (found)
			return;

	} endfor_nexthops(fi);
}
#endif
@@ -2212,7 +2223,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
	if (fib_info_num_path(res->fi) > 1) {
		int h = fib_multipath_hash(net, fl4, skb, NULL);

		fib_select_multipath(res, h);
		fib_select_multipath(res, h, fl4);
	}
	else
#endif
+11 −4
Original line number Diff line number Diff line
@@ -2037,8 +2037,12 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net,
		hash_keys.addrs.v4addrs.dst = fl4->daddr;
	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
		hash_keys.basic.ip_proto = fl4->flowi4_proto;
	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) {
		if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT)
			hash_keys.ports.src = (__force __be16)get_random_u16();
		else
			hash_keys.ports.src = fl4->fl4_sport;
	}
	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
		hash_keys.ports.dst = fl4->fl4_dport;

@@ -2093,6 +2097,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
			hash_keys.addrs.v4addrs.src = fl4->saddr;
			hash_keys.addrs.v4addrs.dst = fl4->daddr;
			if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT)
				hash_keys.ports.src = (__force __be16)get_random_u16();
			else
				hash_keys.ports.src = fl4->fl4_sport;
			hash_keys.ports.dst = fl4->fl4_dport;
			hash_keys.basic.ip_proto = fl4->flowi4_proto;
@@ -2154,7 +2161,7 @@ ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
	if (res->fi && fib_info_num_path(res->fi) > 1) {
		int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);

		fib_select_multipath(res, h);
		fib_select_multipath(res, h, NULL);
		IPCB(skb)->flags |= IPSKB_MULTIPATH;
	}
#endif
Loading