Commit 5086e1b7 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-visibility-of-memory-limits-in-netns'

Matteo Croce says:

====================
net: visibility of memory limits in netns

Some programs need to know the size of the network buffers to operate
correctly, export the following sysctls read-only in network namespaces:

- net.core.rmem_default
- net.core.rmem_max
- net.core.wmem_default
- net.core.wmem_max
====================

Link: https://lore.kernel.org/r/20240530232722.45255-1-technoboy85@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 165f8769 5b5233fb
Loading
Loading
Loading
Loading
+42 −33
Original line number Diff line number Diff line
@@ -382,38 +382,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
#endif

static struct ctl_table net_core_table[] = {
	{
		.procname	= "wmem_max",
		.data		= &sysctl_wmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_sndbuf,
	},
	{
		.procname	= "rmem_max",
		.data		= &sysctl_rmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_rcvbuf,
	},
	{
		.procname	= "wmem_default",
		.data		= &sysctl_wmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_sndbuf,
	},
	{
		.procname	= "rmem_default",
		.data		= &sysctl_rmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_rcvbuf,
	},
	{
		.procname	= "mem_pcpu_rsv",
		.data		= &net_hotdata.sysctl_mem_pcpu_rsv,
@@ -697,6 +665,41 @@ static struct ctl_table netns_core_table[] = {
		.extra2		= SYSCTL_ONE,
		.proc_handler	= proc_dou8vec_minmax,
	},
	/* sysctl_core_net_init() will set the values after this
	 * to readonly in network namespaces
	 */
	{
		.procname	= "wmem_max",
		.data		= &sysctl_wmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_sndbuf,
	},
	{
		.procname	= "rmem_max",
		.data		= &sysctl_rmem_max,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_rcvbuf,
	},
	{
		.procname	= "wmem_default",
		.data		= &sysctl_wmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_sndbuf,
	},
	{
		.procname	= "rmem_default",
		.data		= &sysctl_rmem_default,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= proc_dointvec_minmax,
		.extra1		= &min_rcvbuf,
	},
};

static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
@@ -724,9 +727,15 @@ static __net_init int sysctl_core_net_init(struct net *net)
		if (tbl == NULL)
			goto err_dup;

		for (i = 0; i < table_size; ++i)
		for (i = 0; i < table_size; ++i) {
			if (tbl[i].data == &sysctl_wmem_max)
				break;

			tbl[i].data += (char *)net - (char *)&init_net;
		}
		for (; i < table_size; ++i)
			tbl[i].mode &= ~0222;
	}

	net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
	if (net->core.sysctl_hdr == NULL)
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ TEST_PROGS += bind_bhash.sh
TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES =  socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
+40 −0
Original line number Diff line number Diff line
#!/bin/bash -e
# SPDX-License-Identifier: GPL-2.0
#
# This test checks that the network buffer sysctls are present
# in a network namespaces, and that they are readonly.

source lib.sh

cleanup() {
    cleanup_ns $test_ns
}

trap cleanup EXIT

fail() {
	echo "ERROR: $*" >&2
	exit 1
}

setup_ns test_ns

for sc in {r,w}mem_{default,max}; do
	# check that this is writable in a netns
	[ -w "/proc/sys/net/core/$sc" ] ||
		fail "$sc isn't writable in the init netns!"

	# change the value in the host netns
	sysctl -qw "net.core.$sc=300000" ||
		fail "Can't write $sc in init netns!"

	# check that the value is read from the init netns
	[ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] ||
		fail "Value for $sc mismatch!"

	# check that this isn't writable in a netns
	ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] &&
		fail "$sc is writable in a netns!"
done

echo 'Test passed OK'