Commit 8c2bd38b authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

icmp: change the order of rate limits



ICMP messages are ratelimited :

After the blamed commits, the two rate limiters are applied in this order:

1) host wide ratelimit (icmp_global_allow())

2) Per destination ratelimit (inetpeer based)

In order to avoid side-channels attacks, we need to apply
the per destination check first.

This patch makes the following change :

1) icmp_global_allow() checks if the host wide limit is reached.
   But credits are not yet consumed. This is deferred to 3)

2) The per destination limit is checked/updated.
   This might add a new node in inetpeer tree.

3) icmp_global_consume() consumes tokens if prior operations succeeded.

This means that host wide ratelimit is still effective
in keeping inetpeer tree small even under DDOS.

As a bonus, I removed icmp_global.lock as the fast path
can use a lock-free operation.

Fixes: c0303efe ("net: reduce cycles spend on ICMP replies that gets rate limited")
Fixes: 4cdf507d ("icmp: add a global rate limitation")
Reported-by: default avatarKeyu Man <keyu.man@email.ucr.edu>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent b26b6449
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -795,6 +795,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
}

bool icmp_global_allow(void);
void icmp_global_consume(void);

extern int sysctl_icmp_msgs_per_sec;
extern int sysctl_icmp_msgs_burst;

+56 −47
Original line number Diff line number Diff line
@@ -224,57 +224,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
int sysctl_icmp_msgs_burst __read_mostly = 50;

static struct {
	spinlock_t	lock;
	u32		credit;
	atomic_t	credit;
	u32		stamp;
} icmp_global = {
	.lock		= __SPIN_LOCK_UNLOCKED(icmp_global.lock),
};
} icmp_global;

/**
 * icmp_global_allow - Are we allowed to send one more ICMP message ?
 *
 * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
 * Returns false if we reached the limit and can not send another packet.
 * Note: called with BH disabled
 * Works in tandem with icmp_global_consume().
 */
bool icmp_global_allow(void)
{
	u32 credit, delta, incr = 0, now = (u32)jiffies;
	bool rc = false;
	u32 delta, now, oldstamp;
	int incr, new, old;

	/* Check if token bucket is empty and cannot be refilled
	 * without taking the spinlock. The READ_ONCE() are paired
	 * with the following WRITE_ONCE() in this same function.
	/* Note: many cpus could find this condition true.
	 * Then later icmp_global_consume() could consume more credits,
	 * this is an acceptable race.
	 */
	if (!READ_ONCE(icmp_global.credit)) {
		delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
	if (atomic_read(&icmp_global.credit) > 0)
		return true;

	now = jiffies;
	oldstamp = READ_ONCE(icmp_global.stamp);
	delta = min_t(u32, now - oldstamp, HZ);
	if (delta < HZ / 50)
		return false;
	}

	spin_lock(&icmp_global.lock);
	delta = min_t(u32, now - icmp_global.stamp, HZ);
	if (delta >= HZ / 50) {
	incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
		if (incr)
			WRITE_ONCE(icmp_global.stamp, now);
	}
	credit = min_t(u32, icmp_global.credit + incr,
		       READ_ONCE(sysctl_icmp_msgs_burst));
	if (credit) {
		/* We want to use a credit of one in average, but need to randomize
		 * it for security reasons.
		 */
		credit = max_t(int, credit - get_random_u32_below(3), 0);
		rc = true;
	if (!incr)
		return false;

	if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) {
		old = atomic_read(&icmp_global.credit);
		do {
			new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst));
		} while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new));
	}
	WRITE_ONCE(icmp_global.credit, credit);
	spin_unlock(&icmp_global.lock);
	return rc;
	return true;
}
EXPORT_SYMBOL(icmp_global_allow);

void icmp_global_consume(void)
{
	int credits = get_random_u32_below(3);

	/* Note: this might make icmp_global.credit negative. */
	if (credits)
		atomic_sub(credits, &icmp_global.credit);
}
EXPORT_SYMBOL(icmp_global_consume);

static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
	if (type > NR_ICMP_TYPES)
@@ -291,14 +293,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
	return false;
}

static bool icmpv4_global_allow(struct net *net, int type, int code)
static bool icmpv4_global_allow(struct net *net, int type, int code,
				bool *apply_ratelimit)
{
	if (icmpv4_mask_allow(net, type, code))
		return true;

	if (icmp_global_allow())
	if (icmp_global_allow()) {
		*apply_ratelimit = true;
		return true;

	}
	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
	return false;
}
@@ -308,15 +312,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
 */

static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
			       struct flowi4 *fl4, int type, int code)
			       struct flowi4 *fl4, int type, int code,
			       bool apply_ratelimit)
{
	struct dst_entry *dst = &rt->dst;
	struct inet_peer *peer;
	bool rc = true;
	int vif;

	if (icmpv4_mask_allow(net, type, code))
		goto out;
	if (!apply_ratelimit)
		return true;

	/* No rate limit on loopback */
	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
@@ -331,6 +336,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
	if (!rc)
		__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
	else
		icmp_global_consume();
	return rc;
}

@@ -402,6 +409,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	struct ipcm_cookie ipc;
	struct rtable *rt = skb_rtable(skb);
	struct net *net = dev_net(rt->dst.dev);
	bool apply_ratelimit = false;
	struct flowi4 fl4;
	struct sock *sk;
	struct inet_sock *inet;
@@ -413,11 +421,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
		return;

	/* Needed by both icmp_global_allow and icmp_xmit_lock */
	/* Needed by both icmpv4_global_allow and icmp_xmit_lock */
	local_bh_disable();

	/* global icmp_msgs_per_sec */
	if (!icmpv4_global_allow(net, type, code))
	/* is global icmp_msgs_per_sec exhausted ? */
	if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
		goto out_bh_enable;

	sk = icmp_xmit_lock(net);
@@ -450,7 +458,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
	rt = ip_route_output_key(net, &fl4);
	if (IS_ERR(rt))
		goto out_unlock;
	if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
	if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
		icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
	ip_rt_put(rt);
out_unlock:
@@ -596,6 +604,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
	int room;
	struct icmp_bxm icmp_param;
	struct rtable *rt = skb_rtable(skb_in);
	bool apply_ratelimit = false;
	struct ipcm_cookie ipc;
	struct flowi4 fl4;
	__be32 saddr;
@@ -677,7 +686,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
		}
	}

	/* Needed by both icmp_global_allow and icmp_xmit_lock */
	/* Needed by both icmpv4_global_allow and icmp_xmit_lock */
	local_bh_disable();

	/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
@@ -685,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
	 * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
	 */
	if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
	      !icmpv4_global_allow(net, type, code))
	      !icmpv4_global_allow(net, type, code, &apply_ratelimit))
		goto out_bh_enable;

	sk = icmp_xmit_lock(net);
@@ -744,7 +753,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
		goto out_unlock;

	/* peer icmp_ratelimit */
	if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
	if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
		goto ende;

	/* RFC says return as much as we can without exceeding 576 bytes. */
+18 −10
Original line number Diff line number Diff line
@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
	return false;
}

static bool icmpv6_global_allow(struct net *net, int type)
static bool icmpv6_global_allow(struct net *net, int type,
				bool *apply_ratelimit)
{
	if (icmpv6_mask_allow(net, type))
		return true;

	if (icmp_global_allow())
	if (icmp_global_allow()) {
		*apply_ratelimit = true;
		return true;

	}
	__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
	return false;
}
@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
 * Check the ICMP output rate limit
 */
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
			       struct flowi6 *fl6)
			       struct flowi6 *fl6, bool apply_ratelimit)
{
	struct net *net = sock_net(sk);
	struct dst_entry *dst;
	bool res = false;

	if (icmpv6_mask_allow(net, type))
	if (!apply_ratelimit)
		return true;

	/*
@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
	if (!res)
		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
				  ICMP6_MIB_RATELIMITHOST);
	else
		icmp_global_consume();
	dst_release(dst);
	return res;
}
@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
	struct net *net;
	struct ipv6_pinfo *np;
	const struct in6_addr *saddr = NULL;
	bool apply_ratelimit = false;
	struct dst_entry *dst;
	struct icmp6hdr tmp_hdr;
	struct flowi6 fl6;
@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
		return;
	}

	/* Needed by both icmp_global_allow and icmpv6_xmit_lock */
	/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
	local_bh_disable();

	/* Check global sysctl_icmp_msgs_per_sec ratelimit */
	if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
	if (!(skb->dev->flags & IFF_LOOPBACK) &&
	    !icmpv6_global_allow(net, type, &apply_ratelimit))
		goto out_bh_enable;

	mip6_addr_swap(skb, parm);
@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,

	np = inet6_sk(sk);

	if (!icmpv6_xrlim_allow(sk, type, &fl6))
	if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
		goto out;

	tmp_hdr.icmp6_type = type;
@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
	struct ipv6_pinfo *np;
	const struct in6_addr *saddr = NULL;
	struct icmp6hdr *icmph = icmp6_hdr(skb);
	bool apply_ratelimit = false;
	struct icmp6hdr tmp_hdr;
	struct flowi6 fl6;
	struct icmpv6_msg msg;
@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
		goto out;

	/* Check the ratelimit */
	if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
	if ((!(skb->dev->flags & IFF_LOOPBACK) &&
	    !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
	    !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
		goto out_dst_release;

	idev = __in6_dev_get(skb->dev);