Commit b1216f1d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'icmp-better-deal-with-ddos'

Eric Dumazet says:

====================
icmp: better deal with DDOS

When dealing with death of big UDP servers, admins might want to
increase net.ipv4.icmp_msgs_per_sec and net.ipv4.icmp_msgs_burst
to big values (2,000,000 or more).

They also might need to tune the per-host ratelimit to 1ms or 0ms
in favor of the global rate limit.

This series fixes bugs showing up in all these needs.
====================

Link: https://patch.msgid.link/20260216142832.3834174-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 570e4549 9395b1bb
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -3234,12 +3234,13 @@ enhanced_dad - BOOLEAN
===========

ratelimit - INTEGER
	Limit the maximal rates for sending ICMPv6 messages.
	Limit the maximal rates for sending ICMPv6 messages to a particular
	peer.

	0 to disable any limiting,
	otherwise the minimal space between responses in milliseconds.
	otherwise the space between responses in milliseconds.

	Default: 1000
	Default: 100

ratemask - list of comma separated ranges
	For ICMPv6 message types matching the ranges in the ratemask, limit
+7 −2
Original line number Diff line number Diff line
@@ -88,6 +88,12 @@ struct netns_ipv4 {
	int sysctl_tcp_rcvbuf_low_rtt;
	__cacheline_group_end(netns_ipv4_read_rx);

	/* ICMP rate limiter hot cache line. */
	__cacheline_group_begin_aligned(icmp);
	atomic_t	icmp_global_credit;
	u32		icmp_global_stamp;
	__cacheline_group_end_aligned(icmp);

	struct inet_timewait_death_row tcp_death_row;
	struct udp_table *udp_table;

@@ -141,8 +147,7 @@ struct netns_ipv4 {
	int sysctl_icmp_ratemask;
	int sysctl_icmp_msgs_per_sec;
	int sysctl_icmp_msgs_burst;
	atomic_t icmp_global_credit;
	u32 icmp_global_stamp;

	u32 ip_rt_min_pmtu;
	int ip_rt_mtu_expires;
	int ip_rt_min_advmss;
+12 −5
Original line number Diff line number Diff line
@@ -250,7 +250,8 @@ bool icmp_global_allow(struct net *net)
	if (delta < HZ / 50)
		return false;

	incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
	incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec);
	incr = div_u64((u64)incr * delta, HZ);
	if (!incr)
		return false;

@@ -315,23 +316,29 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
	struct dst_entry *dst = &rt->dst;
	struct inet_peer *peer;
	struct net_device *dev;
	int peer_timeout;
	bool rc = true;

	if (!apply_ratelimit)
		return true;

	peer_timeout = READ_ONCE(net->ipv4.sysctl_icmp_ratelimit);
	if (!peer_timeout)
		goto out;

	/* No rate limit on loopback */
	rcu_read_lock();
	dev = dst_dev_rcu(dst);
	if (dev && (dev->flags & IFF_LOOPBACK))
		goto out;
		goto out_unlock;

	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
			       l3mdev_master_ifindex_rcu(dev));
	rc = inet_peer_xrlim_allow(peer,
				   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
out:
	rc = inet_peer_xrlim_allow(peer, peer_timeout);

out_unlock:
	rcu_read_unlock();
out:
	if (!rc)
		__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
	else
+1 −1
Original line number Diff line number Diff line
@@ -952,7 +952,7 @@ static int __net_init inet6_net_init(struct net *net)
	int err = 0;

	net->ipv6.sysctl.bindv6only = 0;
	net->ipv6.sysctl.icmpv6_time = 1*HZ;
	net->ipv6.sysctl.icmpv6_time = HZ / 10;
	net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
	net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
	net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+7 −8
Original line number Diff line number Diff line
@@ -217,17 +217,16 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
	} else if (dev && (dev->flags & IFF_LOOPBACK)) {
		res = true;
	} else {
		struct rt6_info *rt = dst_rt6_info(dst);
		int tmo = net->ipv6.sysctl.icmpv6_time;
		int tmo = READ_ONCE(net->ipv6.sysctl.icmpv6_time);
		struct inet_peer *peer;

		/* Give more bandwidth to wider prefixes. */
		if (rt->rt6i_dst.plen < 128)
			tmo >>= ((128 - rt->rt6i_dst.plen)>>5);

		if (!tmo) {
			res = true;
		} else {
			peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
			res = inet_peer_xrlim_allow(peer, tmo);
		}
	}
	rcu_read_unlock();
	if (!res)
		__ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST);