Commit 5778d65d authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'vsock-virtio-fix-tx-credit-handling'

Stefano Garzarella says:

====================
vsock/virtio: fix TX credit handling

The original series was posted by Melbin K Mathew <mlbnkm1@gmail.com> till v4.
Since it's a real issue and the original author seems busy, I'm sending
the new version fixing my comments but keeping the authorship (and restoring
mine on patch 2 as reported on v4).

v5: https://lore.kernel.org/netdev/20260116201517.273302-1-sgarzare@redhat.com/
v4: https://lore.kernel.org/netdev/20251217181206.3681159-1-mlbnkm1@gmail.com/

From Melbin K Mathew <mlbnkm1@gmail.com>:

This series fixes TX credit handling in virtio-vsock:

Patch 1: Fix potential underflow in get_credit() using s64 arithmetic
Patch 2: Fix vsock_test seqpacket bounds test
Patch 3: Cap TX credit to local buffer size (security hardening)
Patch 4: Add stream TX credit bounds regression test

The core issue is that a malicious guest can advertise a huge buffer
size via SO_VM_SOCKETS_BUFFER_SIZE, causing the host to allocate
excessive sk_buff memory when sending data to that guest.

On an unpatched Ubuntu 22.04 host (~64 GiB RAM), running a PoC with
32 guest vsock connections advertising 2 GiB each and reading slowly
drove Slab/SUnreclaim from ~0.5 GiB to ~57 GiB; the system only
recovered after killing the QEMU process.

With this series applied, the same PoC shows only ~35 MiB increase in
Slab/SUnreclaim, no host OOM, and the guest remains responsive.
====================

Link: https://patch.msgid.link/20260121093628.9941-1-sgarzare@redhat.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents ca1bb3fe 2a689f76
Loading
Loading
Loading
Loading
+21 −9
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@

static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
					       bool cancel_timeout);
static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs);

static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock *vsk)
@@ -499,9 +500,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
		return 0;

	spin_lock_bh(&vvs->tx_lock);
	ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
	if (ret > credit)
		ret = credit;
	ret = min_t(u32, credit, virtio_transport_has_space(vvs));
	vvs->tx_cnt += ret;
	vvs->bytes_unsent += ret;
	spin_unlock_bh(&vvs->tx_lock);
@@ -822,6 +821,15 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);

static u32 virtio_transport_tx_buf_size(struct virtio_vsock_sock *vvs)
{
	/* The peer advertises its receive buffer via peer_buf_alloc, but we
	 * cap it to our local buf_alloc so a remote peer cannot force us to
	 * queue more data than our own buffer configuration allows.
	 */
	return min(vvs->peer_buf_alloc, vvs->buf_alloc);
}

int
virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
				   struct msghdr *msg,
@@ -831,7 +839,7 @@ virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,

	spin_lock_bh(&vvs->tx_lock);

	if (len > vvs->peer_buf_alloc) {
	if (len > virtio_transport_tx_buf_size(vvs)) {
		spin_unlock_bh(&vvs->tx_lock);
		return -EMSGSIZE;
	}
@@ -877,12 +885,16 @@ u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);

static s64 virtio_transport_has_space(struct vsock_sock *vsk)
static s64 virtio_transport_has_space(struct virtio_vsock_sock *vvs)
{
	struct virtio_vsock_sock *vvs = vsk->trans;
	s64 bytes;

	bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
	/* Use s64 arithmetic so if the peer shrinks peer_buf_alloc while
	 * we have bytes in flight (tx_cnt - peer_fwd_cnt), the subtraction
	 * does not underflow.
	 */
	bytes = (s64)virtio_transport_tx_buf_size(vvs) -
		(vvs->tx_cnt - vvs->peer_fwd_cnt);
	if (bytes < 0)
		bytes = 0;

@@ -895,7 +907,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
	s64 bytes;

	spin_lock_bh(&vvs->tx_lock);
	bytes = virtio_transport_has_space(vsk);
	bytes = virtio_transport_has_space(vvs);
	spin_unlock_bh(&vvs->tx_lock);

	return bytes;
@@ -1492,7 +1504,7 @@ static bool virtio_transport_space_update(struct sock *sk,
	spin_lock_bh(&vvs->tx_lock);
	vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
	vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
	space_available = virtio_transport_has_space(vsk);
	space_available = virtio_transport_has_space(vvs);
	spin_unlock_bh(&vvs->tx_lock);
	return space_available;
}
+112 −0
Original line number Diff line number Diff line
@@ -347,10 +347,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
}

#define SOCK_BUF_SIZE (2 * 1024 * 1024)
#define SOCK_BUF_SIZE_SMALL (64 * 1024)
#define MAX_MSG_PAGES 4

static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
{
	unsigned long long sock_buf_size;
	unsigned long curr_hash;
	size_t max_msg_size;
	int page_size;
@@ -363,6 +365,16 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
		exit(EXIT_FAILURE);
	}

	sock_buf_size = SOCK_BUF_SIZE;

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");

	/* Wait, until receiver sets buffer size. */
	control_expectln("SRVREADY");

@@ -2219,6 +2231,101 @@ static void test_stream_accepted_setsockopt_server(const struct test_opts *opts)
	close(fd);
}

static void test_stream_tx_credit_bounds_client(const struct test_opts *opts)
{
	unsigned long long sock_buf_size;
	size_t total = 0;
	char buf[4096];
	int fd;

	memset(buf, 'A', sizeof(buf));

	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
	if (fd < 0) {
		perror("connect");
		exit(EXIT_FAILURE);
	}

	sock_buf_size = SOCK_BUF_SIZE_SMALL;

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");

	if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK) < 0) {
		perror("fcntl(F_SETFL)");
		exit(EXIT_FAILURE);
	}

	control_expectln("SRVREADY");

	for (;;) {
		ssize_t sent = send(fd, buf, sizeof(buf), 0);

		if (sent == 0) {
			fprintf(stderr, "unexpected EOF while sending bytes\n");
			exit(EXIT_FAILURE);
		}

		if (sent < 0) {
			if (errno == EINTR)
				continue;

			if (errno == EAGAIN || errno == EWOULDBLOCK)
				break;

			perror("send");
			exit(EXIT_FAILURE);
		}

		total += sent;
	}

	control_writeln("CLIDONE");
	close(fd);

	/* We should not be able to send more bytes than the value set as
	 * local buffer size.
	 */
	if (total > sock_buf_size) {
		fprintf(stderr,
			"TX credit too large: queued %zu bytes (expected <= %llu)\n",
			total, sock_buf_size);
		exit(EXIT_FAILURE);
	}
}

static void test_stream_tx_credit_bounds_server(const struct test_opts *opts)
{
	unsigned long long sock_buf_size;
	int fd;

	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
	if (fd < 0) {
		perror("accept");
		exit(EXIT_FAILURE);
	}

	sock_buf_size = SOCK_BUF_SIZE;

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)");

	setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
			     sock_buf_size,
			     "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");

	control_writeln("SRVREADY");
	control_expectln("CLIDONE");

	close(fd);
}

static struct test_case test_cases[] = {
	{
		.name = "SOCK_STREAM connection reset",
@@ -2408,6 +2515,11 @@ static struct test_case test_cases[] = {
		.run_client = test_stream_msgzcopy_mangle_client,
		.run_server = test_stream_msgzcopy_mangle_server,
	},
	{
		.name = "SOCK_STREAM TX credit bounds",
		.run_client = test_stream_tx_credit_bounds_client,
		.run_server = test_stream_tx_credit_bounds_server,
	},
	{},
};