Commit 81d572a5 authored by Willem de Bruijn's avatar Willem de Bruijn Committed by Jakub Kicinski
Browse files

selftest: net: extend msg_zerocopy test with forwarding



Zerocopy skbs are converted to regular copy skbs when data is queued
to a local socket. This happens in the existing test with a sender and
receiver communicating over a veth device.

Zerocopy skbs are sent without copying if egressing a device. Verify
that this behavior is maintained even in the common container setup
where data is forwarded over a veth to the physical device.

Update msg_zerocopy.sh to

1. Have a dummy network device to simulate a physical device.
2. Have forwarding enabled between veth and dummy.
3. Add a tx-only test that sends out dummy via the forwarding path.
4. Verify the exitcode of the sender, which signals zerocopy success.

As dummy drops all packets, this cannot be a TCP connection. Test
the new case with unconnected UDP only.

Update msg_zerocopy.c to
- Accept an argument whether send with zerocopy is expected.
- Return an exitcode whether behavior matched that expectation.

Signed-off-by: default avatarWillem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250630194312.1571410-3-willemdebruijn.kernel@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d2527ad3
Loading
Loading
Loading
Loading
+15 −9
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@
static int  cfg_cork;
static bool cfg_cork_mixed;
static int  cfg_cpu		= -1;		/* default: pin to last cpu */
static int  cfg_expect_zerocopy	= -1;
static int  cfg_family		= PF_UNSPEC;
static int  cfg_ifindex		= 1;
static int  cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;

static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
static int  zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;

@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
	next_completion = hi + 1;

	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
	if (zerocopied == -1)
		zerocopied = zerocopy;
	else if (zerocopied != zerocopy) {
		fprintf(stderr, "serr: inconsistent\n");
		zerocopied = zerocopy;
	if (cfg_expect_zerocopy != -1 &&
	    cfg_expect_zerocopy != zerocopy) {
		fprintf(stderr, "serr: ee_code: %u != expected %u\n",
			zerocopy, cfg_expect_zerocopy);
		exitcode = 1;
		/* suppress repeated messages */
		cfg_expect_zerocopy = zerocopy;
	}

	if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)

	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
		packets, bytes >> 20, completions,
		zerocopied == 1 ? 'y' : 'n');
		cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}

static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)

	cfg_payload_len = max_payload_len;

	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
		switch (c) {
		case '4':
			if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
		case 'z':
			cfg_zerocopy = true;
			break;
		case 'Z':
			cfg_expect_zerocopy = !!atoi(optarg);
			break;
		}
	}

@@ -817,5 +823,5 @@ int main(int argc, char **argv)
	else
		error(1, 0, "unknown cfg_test %s", cfg_test);

	return 0;
	return exitcode;
}
+62 −22
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
set -e

readonly DEV="veth0"
readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"

@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"

readonly SADDR4='192.168.1.1'
readonly DADDR4='192.168.1.2'
readonly SADDR6='fd::1'
readonly DADDR6='fd::2'
readonly LPREFIX4='192.168.1'
readonly RPREFIX4='192.168.2'
readonly LPREFIX6='fd'
readonly RPREFIX6='fc'


readonly path_sysctl_mem="net.core.optmem_max"

# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
	$0 4 tcp -t 1
	$0 6 tcp -t 1
	$0 4 udp -t 1
	$0 6 udp -t 1
	echo "OK. All tests passed"
	exit 0
	ret=0

	$0 4 tcp -t 1 || ret=1
	$0 6 tcp -t 1 || ret=1
	$0 4 udp -t 1 || ret=1
	$0 6 udp -t 1 || ret=1

	[[ "$ret" == "0" ]] && echo "OK. All tests passed"
	exit $ret
fi

# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"

# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
	readonly SADDR="${SADDR4}"
	readonly DADDR="${DADDR4}"
	readonly SADDR="${LPREFIX4}.1"
	readonly DADDR="${LPREFIX4}.2"
	readonly DUMMY_ADDR="${RPREFIX4}.1"
	readonly DADDR_TXONLY="${RPREFIX4}.2"
	readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
	readonly SADDR="${SADDR6}"
	readonly DADDR="${DADDR6}"
	readonly SADDR="${LPREFIX6}::1"
	readonly DADDR="${LPREFIX6}::2"
	readonly DUMMY_ADDR="${RPREFIX6}::1"
	readonly DADDR_TXONLY="${RPREFIX6}::2"
	readonly MASK="64"
	readonly NODAD="nodad"
else
	echo "Invalid IP version ${IP}"
	exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"

ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy

# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DUMMY_DEV}" up

# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06

# Add fixed IP addresses to the devices
ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}

ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"

ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1

# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off

ret=0

do_test() {
	local readonly ARGS="$1"

	echo "ipv${IP} ${TXMODE} ${ARGS}"
	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
	# tx-rx test
	# packets queued to a local socket are copied,
	# sender notification has SO_EE_CODE_ZEROCOPY_COPIED.

	echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
		-S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
	sleep 0.2
	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
		-S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
	wait

	# next test is unconnected tx to dummy0, cannot exercise with tcp
	[[ "${TXMODE}" == "tcp" ]] && return

	# tx-only test: send out dummy0
	# packets leaving the host are not copied,
	# sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.

	echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
		-S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}

do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
echo ok

[[ "$ret" == "0" ]] && echo "OK"