Commit 144d0b1c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'ipv6-snmp-avoid-performance-issue-with-ratelimithost'

Eric Dumazet says:

====================
ipv6: snmp: avoid performance issue with RATELIMITHOST

Addition of ICMP6_MIB_RATELIMITHOST in commit d0941130
("icmp: Add counters for rate limits") introduced a performance drop
in case of DOS (like receiving UDP packets to closed ports).

Per netns ICMP6_MIB_RATELIMITHOST tracking uses per-cpu storage and
is enough, we do not need per-device and slow tracking for this metric.

In v2 of this series, I completed the removal of SNMP_MIB_SENTINEL
in all the kernel for consistency.

v1: https://lore.kernel.org/20250904092432.113c4940@kernel.org
====================

Link: https://patch.msgid.link/20250905165813.1470708-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents abcf9f66 20d3d268
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -326,11 +326,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
}
#endif

#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
#define snmp_get_cpu_field64_batch_cnt(buff64, stats_list, cnt,	\
				       mib_statistic, offset)	\
{ \
	int i, c; \
	for_each_possible_cpu(c) { \
		for (i = 0; stats_list[i].name; i++) \
		for (i = 0; i < cnt; i++) \
			buff64[i] += snmp_get_cpu_field64( \
					mib_statistic, \
					c, stats_list[i].entry, \
@@ -338,11 +339,11 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
	} \
}

#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
#define snmp_get_cpu_field_batch_cnt(buff, stats_list, cnt, mib_statistic) \
{ \
	int i, c; \
	for_each_possible_cpu(c) { \
		for (i = 0; stats_list[i].name; i++) \
		for (i = 0; i < cnt; i++) \
			buff[i] += snmp_get_cpu_field( \
						mib_statistic, \
						c, stats_list[i].entry); \
+0 −5
Original line number Diff line number Diff line
@@ -36,11 +36,6 @@ struct snmp_mib {
	.entry = _entry,			\
}

#define SNMP_MIB_SENTINEL {	\
	.name = NULL,		\
	.entry = 0,		\
}

/*
 * We use unsigned longs for most mibs but u64 for ipstats.
 */
+33 −32
Original line number Diff line number Diff line
@@ -95,7 +95,6 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
	SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS),
	SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES),
	SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS),
	SNMP_MIB_SENTINEL
};

/* Following items are displayed in /proc/net/netstat */
@@ -119,7 +118,6 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
	SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
	SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
	SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
	SNMP_MIB_SENTINEL
};

static const struct {
@@ -157,7 +155,6 @@ static const struct snmp_mib snmp4_tcp_list[] = {
	SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
	SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
	SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
	SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp4_udp_list[] = {
@@ -170,7 +167,6 @@ static const struct snmp_mib snmp4_udp_list[] = {
	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
	SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI),
	SNMP_MIB_ITEM("MemErrors", UDP_MIB_MEMERRORS),
	SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp4_net_list[] = {
@@ -309,7 +305,6 @@ static const struct snmp_mib snmp4_net_list[] = {
	SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
	SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
	SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
	SNMP_MIB_SENTINEL
};

static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
@@ -389,14 +384,15 @@ static void icmp_put(struct seq_file *seq)
 */
static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
{
	const int cnt = ARRAY_SIZE(snmp4_ipstats_list);
	u64 buff64[ARRAY_SIZE(snmp4_ipstats_list)];
	struct net *net = seq->private;
	u64 buff64[IPSTATS_MIB_MAX];
	int i;

	memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
	memset(buff64, 0, sizeof(buff64));

	seq_puts(seq, "Ip: Forwarding DefaultTTL");
	for (i = 0; snmp4_ipstats_list[i].name; i++)
	for (i = 0; i < cnt; i++)
		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);

	seq_printf(seq, "\nIp: %d %d",
@@ -404,10 +400,10 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
		   READ_ONCE(net->ipv4.sysctl_ip_default_ttl));

	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
	snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
	snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipstats_list, cnt,
				       net->mib.ip_statistics,
				       offsetof(struct ipstats_mib, syncp));
	for (i = 0; snmp4_ipstats_list[i].name; i++)
	for (i = 0; i < cnt; i++)
		seq_printf(seq, " %llu", buff64[i]);

	return 0;
@@ -415,20 +411,23 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)

static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
{
	const int udp_cnt = ARRAY_SIZE(snmp4_udp_list);
	const int tcp_cnt = ARRAY_SIZE(snmp4_tcp_list);
	unsigned long buff[TCPUDP_MIB_MAX];
	struct net *net = seq->private;
	int i;

	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
	memset(buff, 0, tcp_cnt * sizeof(unsigned long));

	seq_puts(seq, "\nTcp:");
	for (i = 0; snmp4_tcp_list[i].name; i++)
	for (i = 0; i < tcp_cnt; i++)
		seq_printf(seq, " %s", snmp4_tcp_list[i].name);

	seq_puts(seq, "\nTcp:");
	snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
	snmp_get_cpu_field_batch_cnt(buff, snmp4_tcp_list,
				     tcp_cnt,
				     net->mib.tcp_statistics);
	for (i = 0; snmp4_tcp_list[i].name; i++) {
	for (i = 0; i < tcp_cnt; i++) {
		/* MaxConn field is signed, RFC 2012 */
		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
			seq_printf(seq, " %ld", buff[i]);
@@ -436,27 +435,29 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
			seq_printf(seq, " %lu", buff[i]);
	}

	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
	memset(buff, 0, udp_cnt * sizeof(unsigned long));

	snmp_get_cpu_field_batch(buff, snmp4_udp_list,
	snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list,
				     udp_cnt,
				     net->mib.udp_statistics);
	seq_puts(seq, "\nUdp:");
	for (i = 0; snmp4_udp_list[i].name; i++)
	for (i = 0; i < udp_cnt; i++)
		seq_printf(seq, " %s", snmp4_udp_list[i].name);
	seq_puts(seq, "\nUdp:");
	for (i = 0; snmp4_udp_list[i].name; i++)
	for (i = 0; i < udp_cnt; i++)
		seq_printf(seq, " %lu", buff[i]);

	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
	memset(buff, 0, udp_cnt * sizeof(unsigned long));

	/* the UDP and UDP-Lite MIBs are the same */
	seq_puts(seq, "\nUdpLite:");
	snmp_get_cpu_field_batch(buff, snmp4_udp_list,
	snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list,
				     udp_cnt,
				     net->mib.udplite_statistics);
	for (i = 0; snmp4_udp_list[i].name; i++)
	for (i = 0; i < udp_cnt; i++)
		seq_printf(seq, " %s", snmp4_udp_list[i].name);
	seq_puts(seq, "\nUdpLite:");
	for (i = 0; snmp4_udp_list[i].name; i++)
	for (i = 0; i < udp_cnt; i++)
		seq_printf(seq, " %lu", buff[i]);

	seq_putc(seq, '\n');
@@ -480,8 +481,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 */
static int netstat_seq_show(struct seq_file *seq, void *v)
{
	const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list) - 1;
	const int tcp_cnt = ARRAY_SIZE(snmp4_net_list) - 1;
	const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list);
	const int tcp_cnt = ARRAY_SIZE(snmp4_net_list);
	struct net *net = seq->private;
	unsigned long *buff;
	int i;
@@ -494,7 +495,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
	buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)),
		       GFP_KERNEL);
	if (buff) {
		snmp_get_cpu_field_batch(buff, snmp4_net_list,
		snmp_get_cpu_field_batch_cnt(buff, snmp4_net_list, tcp_cnt,
					     net->mib.net_statistics);
		for (i = 0; i < tcp_cnt; i++)
			seq_printf(seq, " %lu", buff[i]);
@@ -513,7 +514,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
		u64 *buff64 = (u64 *)buff;

		memset(buff64, 0, ip_cnt * sizeof(u64));
		snmp_get_cpu_field64_batch(buff64, snmp4_ipextstats_list,
		snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipextstats_list, ip_cnt,
					   net->mib.ip_statistics,
					   offsetof(struct ipstats_mib, syncp));
		for (i = 0; i < ip_cnt; i++)
+1 −2
Original line number Diff line number Diff line
@@ -230,8 +230,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
	}
	rcu_read_unlock();
	if (!res)
		__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
				  ICMP6_MIB_RATELIMITHOST);
		__ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST);
	else
		icmp_global_consume(net);
	dst_release(dst);
+50 −41
Original line number Diff line number Diff line
@@ -85,7 +85,6 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
	SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
	SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
	SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS),
	SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp6_icmp6_list[] = {
@@ -95,30 +94,10 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
	SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
/* ICMP6_MIB_RATELIMITHOST needs to be last, see snmp6_dev_seq_show(). */
	SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST),
	SNMP_MIB_SENTINEL
};

/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
static const char *const icmp6type2name[256] = {
	[ICMPV6_DEST_UNREACH] = "DestUnreachs",
	[ICMPV6_PKT_TOOBIG] = "PktTooBigs",
	[ICMPV6_TIME_EXCEED] = "TimeExcds",
	[ICMPV6_PARAMPROB] = "ParmProblems",
	[ICMPV6_ECHO_REQUEST] = "Echos",
	[ICMPV6_ECHO_REPLY] = "EchoReplies",
	[ICMPV6_MGM_QUERY] = "GroupMembQueries",
	[ICMPV6_MGM_REPORT] = "GroupMembResponses",
	[ICMPV6_MGM_REDUCTION] = "GroupMembReductions",
	[ICMPV6_MLD2_REPORT] = "MLDv2Reports",
	[NDISC_ROUTER_ADVERTISEMENT] = "RouterAdvertisements",
	[NDISC_ROUTER_SOLICITATION] = "RouterSolicits",
	[NDISC_NEIGHBOUR_ADVERTISEMENT] = "NeighborAdvertisements",
	[NDISC_NEIGHBOUR_SOLICITATION] = "NeighborSolicits",
	[NDISC_REDIRECT] = "Redirects",
};


static const struct snmp_mib snmp6_udp6_list[] = {
	SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
@@ -129,7 +108,6 @@ static const struct snmp_mib snmp6_udp6_list[] = {
	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
	SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI),
	SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS),
	SNMP_MIB_SENTINEL
};

static const struct snmp_mib snmp6_udplite6_list[] = {
@@ -141,7 +119,6 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
	SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS),
	SNMP_MIB_SENTINEL
};

static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
@@ -151,11 +128,31 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)

	/* print by name -- deprecated items */
	for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
		const char *p = NULL;
		int icmptype;
		const char *p;

#define CASE(TYP, STR) case TYP: p = STR; break;

		icmptype = i & 0xff;
		p = icmp6type2name[icmptype];
		switch (icmptype) {
/* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
		CASE(ICMPV6_DEST_UNREACH,	"DestUnreachs")
		CASE(ICMPV6_PKT_TOOBIG,		"PktTooBigs")
		CASE(ICMPV6_TIME_EXCEED,	"TimeExcds")
		CASE(ICMPV6_PARAMPROB,		"ParmProblems")
		CASE(ICMPV6_ECHO_REQUEST,	"Echos")
		CASE(ICMPV6_ECHO_REPLY,		"EchoReplies")
		CASE(ICMPV6_MGM_QUERY,		"GroupMembQueries")
		CASE(ICMPV6_MGM_REPORT,		"GroupMembResponses")
		CASE(ICMPV6_MGM_REDUCTION,	"GroupMembReductions")
		CASE(ICMPV6_MLD2_REPORT,	"MLDv2Reports")
		CASE(NDISC_ROUTER_ADVERTISEMENT, "RouterAdvertisements")
		CASE(NDISC_ROUTER_SOLICITATION, "RouterSolicits")
		CASE(NDISC_NEIGHBOUR_ADVERTISEMENT, "NeighborAdvertisements")
		CASE(NDISC_NEIGHBOUR_SOLICITATION, "NeighborSolicits")
		CASE(NDISC_REDIRECT,		"Redirects")
		}
#undef CASE
		if (!p)	/* don't print un-named types here */
			continue;
		snprintf(name, sizeof(name), "Icmp6%s%s",
@@ -182,35 +179,37 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)
 */
static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,
				atomic_long_t *smib,
				const struct snmp_mib *itemlist)
				const struct snmp_mib *itemlist,
				int cnt)
{
	unsigned long buff[SNMP_MIB_MAX];
	int i;

	if (pcpumib) {
		memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
		memset(buff, 0, sizeof(unsigned long) * cnt);

		snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
		for (i = 0; itemlist[i].name; i++)
		snmp_get_cpu_field_batch_cnt(buff, itemlist, cnt, pcpumib);
		for (i = 0; i < cnt; i++)
			seq_printf(seq, "%-32s\t%lu\n",
				   itemlist[i].name, buff[i]);
	} else {
		for (i = 0; itemlist[i].name; i++)
		for (i = 0; i < cnt; i++)
			seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
				   atomic_long_read(smib + itemlist[i].entry));
	}
}

static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
				  const struct snmp_mib *itemlist, size_t syncpoff)
				  const struct snmp_mib *itemlist,
				  int cnt, size_t syncpoff)
{
	u64 buff64[SNMP_MIB_MAX];
	int i;

	memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
	memset(buff64, 0, sizeof(u64) * cnt);

	snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
	for (i = 0; itemlist[i].name; i++)
	snmp_get_cpu_field64_batch_cnt(buff64, itemlist, cnt, mib, syncpoff);
	for (i = 0; i < cnt; i++)
		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
}

@@ -219,14 +218,19 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
	struct net *net = (struct net *)seq->private;

	snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
			      snmp6_ipstats_list,
			      ARRAY_SIZE(snmp6_ipstats_list),
			      offsetof(struct ipstats_mib, syncp));
	snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,
			    NULL, snmp6_icmp6_list);
			    NULL, snmp6_icmp6_list,
			    ARRAY_SIZE(snmp6_icmp6_list));
	snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs);
	snmp6_seq_show_item(seq, net->mib.udp_stats_in6,
			    NULL, snmp6_udp6_list);
			    NULL, snmp6_udp6_list,
			    ARRAY_SIZE(snmp6_udp6_list));
	snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,
			    NULL, snmp6_udplite6_list);
			    NULL, snmp6_udplite6_list,
			    ARRAY_SIZE(snmp6_udplite6_list));
	return 0;
}

@@ -236,9 +240,14 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)

	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
	snmp6_seq_show_item64(seq, idev->stats.ipv6,
			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
			      snmp6_ipstats_list,
			      ARRAY_SIZE(snmp6_ipstats_list),
			      offsetof(struct ipstats_mib, syncp));

	/* Per idev icmp stats do not have ICMP6_MIB_RATELIMITHOST */
	snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
			    snmp6_icmp6_list);
			    snmp6_icmp6_list, ARRAY_SIZE(snmp6_icmp6_list) - 1);

	snmp6_seq_show_icmpv6msg(seq, idev->stats.icmpv6msgdev->mibs);
	return 0;
}
Loading