Commit 04b1d18c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'vsock-test-check-for-null-ptr-deref-when-transport-changes'

Luigi Leonardi says:

====================
vsock/test: check for null-ptr-deref when transport changes

This series introduces a new test that checks for a null pointer
dereference that may happen when there is a transport change[1]. This
bug was fixed in [2].

Note that this test *cannot* fail, it hangs if it triggers a kernel
oops. The intended use-case is to run it and then check if there is any
oops in the dmesg.

This test is based on Hyunwoo Kim's[3] and Michal's python
reproducers[4].

[1]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/
[2]https://lore.kernel.org/netdev/20250110083511.30419-1-sgarzare@redhat.com/
[3]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/#t
[4]https://lore.kernel.org/netdev/2b3062e3-bdaa-4c94-a3c0-2930595b9670@rbox.co/

v4: https://lore.kernel.org/20250624-test_vsock-v4-1-087c9c8e25a2@redhat.com
v3: https://lore.kernel.org/20250611-test_vsock-v3-1-8414a2d4df62@redhat.com
v2: https://lore.kernel.org/20250314-test_vsock-v2-1-3c0a1d878a6d@redhat.com
v1: https://lore.kernel.org/20250306-test_vsock-v1-0-0320b5accf92@redhat.com
====================

Link: https://patch.msgid.link/20250630-test_vsock-v5-0-2492e141e80b@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 6d359cf4 3a764d93
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze
vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
vsock_perf: vsock_perf.o msg_zerocopy_common.o

vsock_test: LDLIBS = -lpthread
vsock_uring_test: LDLIBS = -luring
vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o

+4 −0
Original line number Diff line number Diff line
@@ -33,6 +33,10 @@ static const char * const transport_ksyms[] = {
static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM);
static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM);

#define TRANSPORTS_G2H   (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV)
#define TRANSPORTS_H2G   (TRANSPORT_VHOST | TRANSPORT_VMCI)
#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK)

/* Tests can either run as the client or the server */
enum test_mode {
	TEST_MODE_UNSET,
+170 −0
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@
#include <signal.h>
#include <sys/ioctl.h>
#include <linux/time64.h>
#include <pthread.h>
#include <fcntl.h>

#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1867,6 +1869,169 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
	close(fd);
}

#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */

static void *test_stream_transport_change_thread(void *vargp)
{
	pid_t *pid = (pid_t *)vargp;
	int ret;

	ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
	if (ret) {
		fprintf(stderr, "pthread_setcanceltype: %d\n", ret);
		exit(EXIT_FAILURE);
	}

	while (true) {
		if (kill(*pid, SIGUSR1) < 0) {
			perror("kill");
			exit(EXIT_FAILURE);
		}
	}
	return NULL;
}

static void test_transport_change_signal_handler(int signal)
{
	/* We need a custom handler for SIGUSR1 as the default one terminates the process. */
}

static void test_stream_transport_change_client(const struct test_opts *opts)
{
	__sighandler_t old_handler;
	pid_t pid = getpid();
	pthread_t thread_id;
	time_t tout;
	int ret, tr;

	tr = get_transports();

	/* Print a warning if there is a G2H transport loaded.
	 * This is on a best effort basis because VMCI can be either G2H and H2G, and there is
	 * no easy way to understand it.
	 * The bug we are testing only appears when G2H transports are not loaded.
	 * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport
	 * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref.
	 */
	if (tr & TRANSPORTS_G2H)
		fprintf(stderr, "G2H Transport detected. This test will not fail.\n");

	old_handler = signal(SIGUSR1, test_transport_change_signal_handler);
	if (old_handler == SIG_ERR) {
		perror("signal");
		exit(EXIT_FAILURE);
	}

	ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid);
	if (ret) {
		fprintf(stderr, "pthread_create: %d\n", ret);
		exit(EXIT_FAILURE);
	}

	control_expectln("LISTENING");

	tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC;
	do {
		struct sockaddr_vm sa = {
			.svm_family = AF_VSOCK,
			.svm_cid = opts->peer_cid,
			.svm_port = opts->peer_port,
		};
		int s;

		s = socket(AF_VSOCK, SOCK_STREAM, 0);
		if (s < 0) {
			perror("socket");
			exit(EXIT_FAILURE);
		}

		ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
		/* The connect can fail due to signals coming from the thread,
		 * or because the receiver connection queue is full.
		 * Ignoring also the latter case because there is no way
		 * of synchronizing client's connect and server's accept when
		 * connect(s) are constantly being interrupted by signals.
		 */
		if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) {
			perror("connect");
			exit(EXIT_FAILURE);
		}

		/* Set CID to 0 cause a transport change. */
		sa.svm_cid = 0;

		/* Ignore return value since it can fail or not.
		 * If the previous connect is interrupted while the
		 * connection request is already sent, the second
		 * connect() will wait for the response.
		 */
		connect(s, (struct sockaddr *)&sa, sizeof(sa));

		close(s);

		control_writeulong(CONTROL_CONTINUE);

	} while (current_nsec() < tout);

	control_writeulong(CONTROL_DONE);

	ret = pthread_cancel(thread_id);
	if (ret) {
		fprintf(stderr, "pthread_cancel: %d\n", ret);
		exit(EXIT_FAILURE);
	}

	ret = pthread_join(thread_id, NULL);
	if (ret) {
		fprintf(stderr, "pthread_join: %d\n", ret);
		exit(EXIT_FAILURE);
	}

	if (signal(SIGUSR1, old_handler) == SIG_ERR) {
		perror("signal");
		exit(EXIT_FAILURE);
	}
}

static void test_stream_transport_change_server(const struct test_opts *opts)
{
	int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);

	/* Set the socket to be nonblocking because connects that have been interrupted
	 * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure.
	 * As of today (6.15) in such situation there is no way to understand, from the
	 * client side, if the connection has been queued in the server or not.
	 */
	if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) {
		perror("fcntl");
		exit(EXIT_FAILURE);
	}
	control_writeln("LISTENING");

	while (control_readulong() == CONTROL_CONTINUE) {
		/* Must accept the connection, otherwise the `listen`
		 * queue will fill up and new connections will fail.
		 * There can be more than one queued connection,
		 * clear them all.
		 */
		while (true) {
			int client = accept(s, NULL, NULL);

			if (client < 0) {
				if (errno == EAGAIN)
					break;

				perror("accept");
				exit(EXIT_FAILURE);
			}

			close(client);
		}
	}

	close(s);
}

static void test_stream_linger_client(const struct test_opts *opts)
{
	int fd;
@@ -2106,6 +2271,11 @@ static struct test_case test_cases[] = {
		.run_client = test_stream_nolinger_client,
		.run_server = test_stream_nolinger_server,
	},
	{
		.name = "SOCK_STREAM transport change null-ptr-deref",
		.run_client = test_stream_transport_change_client,
		.run_server = test_stream_transport_change_server,
	},
	{},
};