Commit 347fcdc4 authored by Joe Damato's avatar Joe Damato Committed by Jakub Kicinski
Browse files

selftests: net: Add busy_poll_test



Add an epoll busy poll test using netdevsim.

This test is comprised of:
  - busy_poller (via busy_poller.c)
  - busy_poll_test.sh which loads netdevsim, sets up network namespaces,
    and runs busy_poller to receive data and socat to send data.

The selftest tests two different scenarios:
  - busy poll (the pre-existing version in the kernel)
  - busy poll with suspend enabled (what this series adds)

The data transmit is a 1MiB temporary file generated from /dev/urandom
and the test is considered passing if the md5sum of the input file to
socat matches the md5sum of the output file from busy_poller.

netdevsim was chosen instead of veth due to netdevsim's support for
netdev-genl.

For now, this test uses the functionality that netdevsim provides. In the
future, perhaps netdevsim can be extended to emulate device IRQs to more
thoroughly test all pre-existing kernel options (like defer_hard_irqs)
and suspend.

Signed-off-by: default avatarJoe Damato <jdamato@fastly.com>
Co-developed-by: default avatarMartin Karsten <mkarsten@uwaterloo.ca>
Signed-off-by: default avatarMartin Karsten <mkarsten@uwaterloo.ca>
Acked-by: default avatarStanislav Fomichev <sdf@fomichev.me>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20241109050245.191288-6-jdamato@fastly.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 8a6de262
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
bind_bhash
bind_timewait
bind_wildcard
busy_poller
cmsg_sender
diag_uid
epoll_busy_poll
+9 −0
Original line number Diff line number Diff line
@@ -97,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh

# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
TEST_GEN_FILES += $(YNL_GEN_FILES)

TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
@@ -107,6 +112,10 @@ TEST_INCLUDES := forwarding/lib.sh

include ../lib.mk

# YNL build
YNL_GENS := netdev
include ynl.mk

$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
+165 −0
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source net_helper.sh

NSIM_SV_ID=$((256 + RANDOM % 256))
NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
NSIM_CL_ID=$((512 + RANDOM % 256))
NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID

NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device

SERVER_IP=192.168.1.1
CLIENT_IP=192.168.1.2
SERVER_PORT=48675

# busy poll config
MAX_EVENTS=8
BUSY_POLL_USECS=0
BUSY_POLL_BUDGET=16
PREFER_BUSY_POLL=1

# IRQ deferral config
NAPI_DEFER_HARD_IRQS=100
GRO_FLUSH_TIMEOUT=50000
SUSPEND_TIMEOUT=20000000

setup_ns()
{
	set -e
	ip netns add nssv
	ip netns add nscl

	NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
		-path $NSIM_SV_SYS/net -exec basename {} \;)
	NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
		-path $NSIM_CL_SYS/net -exec basename {} \;)

	# ensure the server has 1 queue
	ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null

	ip link set $NSIM_SV_NAME netns nssv
	ip link set $NSIM_CL_NAME netns nscl

	ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
	ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME

	ip netns exec nssv ip link set dev $NSIM_SV_NAME up
	ip netns exec nscl ip link set dev $NSIM_CL_NAME up

	set +e
}

cleanup_ns()
{
	ip netns del nscl
	ip netns del nssv
}

test_busypoll()
{
	suspend_value=${1:-0}
	tmp_file=$(mktemp)
	out_file=$(mktemp)

	# fill a test file with random data
	dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null

	timeout -k 1s 30s ip netns exec nssv ./busy_poller         \
					     -p${SERVER_PORT}      \
					     -b${SERVER_IP}        \
					     -m${MAX_EVENTS}       \
					     -u${BUSY_POLL_USECS}  \
					     -P${PREFER_BUSY_POLL} \
					     -g${BUSY_POLL_BUDGET} \
					     -i${NSIM_SV_IFIDX}    \
					     -s${suspend_value}    \
					     -o${out_file}&

	wait_local_port_listen nssv ${SERVER_PORT} tcp

	ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT}

	wait

	tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ')
	out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ')

	if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then
		res=0
	else
		echo "md5sum mismatch"
		echo "input file md5sum: ${tmp_file_md5sum}";
		echo "output file md5sum: ${out_file_md5sum}";
		res=1
	fi

	rm $out_file $tmp_file

	return $res
}

test_busypoll_with_suspend()
{
	test_busypoll ${SUSPEND_TIMEOUT}

	return $?
}

###
### Code start
###

modprobe netdevsim

# linking

echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
udevadm settle

setup_ns

NSIM_SV_FD=$((256 + RANDOM % 256))
exec {NSIM_SV_FD}</var/run/netns/nssv
NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)

NSIM_CL_FD=$((256 + RANDOM % 256))
exec {NSIM_CL_FD}</var/run/netns/nscl
NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)

echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
     $NSIM_DEV_SYS_LINK

if [ $? -ne 0 ]; then
	echo "linking netdevsim1 with netdevsim2 should succeed"
	cleanup_ns
	exit 1
fi

test_busypoll
if [ $? -ne 0 ]; then
	echo "test_busypoll failed"
	cleanup_ns
	exit 1
fi

test_busypoll_with_suspend
if [ $? -ne 0 ]; then
	echo "test_busypoll_with_suspend failed"
	cleanup_ns
	exit 1
fi

echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK

echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL

cleanup_ns

modprobe -r netdevsim

exit 0
+346 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <assert.h>
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ynl.h>

#include <arpa/inet.h>
#include <netinet/in.h>

#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>

#include <linux/genetlink.h>
#include <linux/netlink.h>

#include "netdev-user.h"

/* The below ifdef blob is required because:
 *
 * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
 *   systems with older glibcs will not have them available. However,
 *   sys/epoll.h does include the type definition for epoll_data, which is
 *   needed by the user program (e.g. epoll_event.data.fd)
 *
 * - linux/eventpoll.h does not define the epoll_data type, it is simply an
 *   opaque __u64. It does, however, include the ioctl definition.
 *
 * Including both headers is impossible (types would be redefined), so I've
 * opted instead to take sys/epoll.h, and include the blob below.
 *
 * Someday, when glibc is globally up to date, the blob below can be removed.
 */
#if !defined(EPOLL_IOC_TYPE)
struct epoll_params {
	uint32_t busy_poll_usecs;
	uint16_t busy_poll_budget;
	uint8_t prefer_busy_poll;

	/* pad the struct to a multiple of 64bits */
	uint8_t __pad;
};

#define EPOLL_IOC_TYPE 0x8A
#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif

static uint32_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
static int cfg_ifindex;

/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
static uint32_t cfg_busy_poll_budget;
static uint32_t cfg_prefer_busy_poll;

/* IRQ params */
static uint32_t cfg_defer_hard_irqs;
static uint64_t cfg_gro_flush_timeout;
static uint64_t cfg_irq_suspend_timeout;

static void usage(const char *filepath)
{
	error(1, 0,
	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
	      filepath);
}

static void parse_opts(int argc, char **argv)
{
	int ret;
	int c;

	if (argc <= 1)
		usage(argv[0]);

	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
		switch (c) {
		case 'u':
			cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
			if (cfg_busy_poll_usecs == ULONG_MAX ||
			    cfg_busy_poll_usecs > UINT32_MAX)
				error(1, ERANGE, "busy_poll_usecs too large");
			break;
		case 'P':
			cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
			if (cfg_prefer_busy_poll == ULONG_MAX ||
			    cfg_prefer_busy_poll > 1)
				error(1, ERANGE,
				      "prefer busy poll should be 0 or 1");
			break;
		case 'g':
			cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
			if (cfg_busy_poll_budget == ULONG_MAX ||
			    cfg_busy_poll_budget > UINT16_MAX)
				error(1, ERANGE,
				      "busy poll budget must be [0, UINT16_MAX]");
			break;
		case 'p':
			cfg_port = strtoul(optarg, NULL, 0);
			if (cfg_port > UINT16_MAX)
				error(1, ERANGE, "port must be <= 65535");
			break;
		case 'b':
			ret = inet_aton(optarg, &cfg_bind_addr);
			if (ret == 0)
				error(1, errno,
				      "bind address %s invalid", optarg);
			break;
		case 'o':
			cfg_outfile = strdup(optarg);
			if (!cfg_outfile)
				error(1, 0, "outfile invalid");
			break;
		case 'm':
			cfg_max_events = strtol(optarg, NULL, 0);

			if (cfg_max_events == LONG_MIN ||
			    cfg_max_events == LONG_MAX ||
			    cfg_max_events <= 0)
				error(1, ERANGE,
				      "max events must be > 0 and < LONG_MAX");
			break;
		case 'd':
			cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);

			if (cfg_defer_hard_irqs == ULONG_MAX ||
			    cfg_defer_hard_irqs > INT32_MAX)
				error(1, ERANGE,
				      "defer_hard_irqs must be <= INT32_MAX");
			break;
		case 'r':
			cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);

			if (cfg_gro_flush_timeout == ULLONG_MAX)
				error(1, ERANGE,
				      "gro_flush_timeout must be < ULLONG_MAX");
			break;
		case 's':
			cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);

			if (cfg_irq_suspend_timeout == ULLONG_MAX)
				error(1, ERANGE,
				      "irq_suspend_timeout must be < ULLONG_MAX");
			break;
		case 'i':
			cfg_ifindex = strtoul(optarg, NULL, 0);
			if (cfg_ifindex == ULONG_MAX)
				error(1, ERANGE,
				      "ifindex must be < ULONG_MAX");
			break;
		}
	}

	if (!cfg_ifindex)
		usage(argv[0]);

	if (optind != argc)
		usage(argv[0]);
}

static void epoll_ctl_add(int epfd, int fd, uint32_t events)
{
	struct epoll_event ev;

	ev.events = events;
	ev.data.fd = fd;
	if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
		error(1, errno, "epoll_ctl add fd: %d", fd);
}

static void setnonblock(int sockfd)
{
	int flags;

	flags = fcntl(sockfd, F_GETFL, 0);

	if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
		error(1, errno, "unable to set socket to nonblocking mode");
}

static void write_chunk(int fd, char *buf, ssize_t buflen)
{
	ssize_t remaining = buflen;
	char *buf_offset = buf;
	ssize_t writelen = 0;
	ssize_t write_result;

	while (writelen < buflen) {
		write_result = write(fd, buf_offset, remaining);
		if (write_result == -1)
			error(1, errno, "unable to write data to outfile");

		writelen += write_result;
		remaining -= write_result;
		buf_offset += write_result;
	}
}

static void setup_queue(void)
{
	struct netdev_napi_get_list *napi_list = NULL;
	struct netdev_napi_get_req_dump *req = NULL;
	struct netdev_napi_set_req *set_req = NULL;
	struct ynl_sock *ys;
	struct ynl_error yerr;
	uint32_t napi_id;

	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
	if (!ys)
		error(1, 0, "YNL: %s", yerr.msg);

	req = netdev_napi_get_req_dump_alloc();
	netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
	napi_list = netdev_napi_get_dump(ys, req);

	/* assume there is 1 NAPI configured and take the first */
	if (napi_list->obj._present.id)
		napi_id = napi_list->obj.id;
	else
		error(1, 0, "napi ID not present?");

	set_req = netdev_napi_set_req_alloc();
	netdev_napi_set_req_set_id(set_req, napi_id);
	netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
	netdev_napi_set_req_set_gro_flush_timeout(set_req,
						  cfg_gro_flush_timeout);
	netdev_napi_set_req_set_irq_suspend_timeout(set_req,
						    cfg_irq_suspend_timeout);

	if (netdev_napi_set(ys, set_req))
		error(1, 0, "can't set NAPI params: %s\n", yerr.msg);

	netdev_napi_get_list_free(napi_list);
	netdev_napi_get_req_dump_free(req);
	netdev_napi_set_req_free(set_req);
	ynl_sock_destroy(ys);
}

static void run_poller(void)
{
	struct epoll_event events[cfg_max_events];
	struct epoll_params epoll_params = {0};
	struct sockaddr_in server_addr;
	int i, epfd, nfds;
	ssize_t readlen;
	int outfile_fd;
	char buf[1024];
	int sockfd;
	int conn;
	int val;

	outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
	if (outfile_fd == -1)
		error(1, errno, "unable to open outfile: %s", cfg_outfile);

	sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	if (sockfd == -1)
		error(1, errno, "unable to create listen socket");

	server_addr.sin_family = AF_INET;
	server_addr.sin_port = htons(cfg_port);
	server_addr.sin_addr = cfg_bind_addr;

	/* these values are range checked during parse_opts, so casting is safe
	 * here
	 */
	epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
	epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
	epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
	epoll_params.__pad = 0;

	val = 1;
	if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
		error(1, errno, "poller setsockopt reuseaddr");

	setnonblock(sockfd);

	if (bind(sockfd, (struct sockaddr *)&server_addr,
		 sizeof(struct sockaddr_in)))
		error(0, errno, "poller bind to port: %d\n", cfg_port);

	if (listen(sockfd, 1))
		error(1, errno, "poller listen");

	epfd = epoll_create1(0);
	if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
		error(1, errno, "unable to set busy poll params");

	epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);

	for (;;) {
		nfds = epoll_wait(epfd, events, cfg_max_events, -1);
		for (i = 0; i < nfds; i++) {
			if (events[i].data.fd == sockfd) {
				conn = accept(sockfd, NULL, NULL);
				if (conn == -1)
					error(1, errno,
					      "accepting incoming connection failed");

				setnonblock(conn);
				epoll_ctl_add(epfd, conn,
					      EPOLLIN | EPOLLET | EPOLLRDHUP |
					      EPOLLHUP);
			} else if (events[i].events & EPOLLIN) {
				for (;;) {
					readlen = read(events[i].data.fd, buf,
						       sizeof(buf));
					if (readlen > 0)
						write_chunk(outfile_fd, buf,
							    readlen);
					else
						break;
				}
			} else {
				/* spurious event ? */
			}
			if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
				epoll_ctl(epfd, EPOLL_CTL_DEL,
					  events[i].data.fd, NULL);
				close(events[i].data.fd);
				close(outfile_fd);
				return;
			}
		}
	}
}

int main(int argc, char *argv[])
{
	parse_opts(argc, argv);
	setup_queue();
	run_poller();
	return 0;
}