Commit 0cba6442 authored by Anish Moorthy's avatar Anish Moorthy Committed by Sean Christopherson
Browse files

KVM: selftests: Use EPOLL in userfaultfd_util reader threads



With multiple reader threads POLLing a single UFFD, the demand paging test
suffers from the thundering herd problem: performance degrades as the
number of reader threads is increased. Solve this issue [1] by switching
the the polling mechanism to EPOLL + EPOLLEXCLUSIVE.

Also, change the error-handling convention of uffd_handler_thread_fn.
Instead of just printing errors and returning early from the polling
loop, check for them via TEST_ASSERT(). "return NULL" is reserved for a
successful exit from uffd_handler_thread_fn, i.e. one triggered by a
write to the exit pipe.

Performance samples generated by the command in [2] are given below.

Num Reader Threads, Paging Rate (POLL), Paging Rate (EPOLL)
1      249k      185k
2      201k      235k
4      186k      155k
16     150k      217k
32     89k       198k

[1] Single-vCPU performance does suffer somewhat.
[2] ./demand_paging_test -u MINOR -s shmem -v 4 -o -r <num readers>

Signed-off-by: default avatarAnish Moorthy <amoorthy@google.com>
Acked-by: default avatarJames Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20240215235405.368539-13-amoorthy@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent df4ec5aa
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
+35 −39
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/epoll.h>
#include <sys/syscall.h>

#include "kvm_util.h"
@@ -32,60 +33,55 @@ static void *uffd_handler_thread_fn(void *arg)
	int64_t pages = 0;
	struct timespec start;
	struct timespec ts_diff;
	struct epoll_event evt;
	int epollfd;

	epollfd = epoll_create(1);
	TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");

	evt.events = EPOLLIN | EPOLLEXCLUSIVE;
	evt.data.u32 = 0;
	TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
		    "Failed to add uffd to epollfd");

	evt.events = EPOLLIN;
	evt.data.u32 = 1;
	TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
		    "Failed to add pipe to epollfd");

	clock_gettime(CLOCK_MONOTONIC, &start);
	while (1) {
		struct uffd_msg msg;
		struct pollfd pollfd[2];
		char tmp_chr;
		int r;

		pollfd[0].fd = uffd;
		pollfd[0].events = POLLIN;
		pollfd[1].fd = reader_args->pipe;
		pollfd[1].events = POLLIN;

		r = poll(pollfd, 2, -1);
		switch (r) {
		case -1:
			pr_info("poll err");
			continue;
		case 0:
			continue;
		case 1:
			break;
		default:
			pr_info("Polling uffd returned %d", r);
			return NULL;
		}
		r = epoll_wait(epollfd, &evt, 1, -1);
		TEST_ASSERT(r == 1,
			    "Unexpected number of events (%d) from epoll, errno = %d",
			    r, errno);

		if (pollfd[0].revents & POLLERR) {
			pr_info("uffd revents has POLLERR");
			return NULL;
		}
		if (evt.data.u32 == 1) {
			char tmp_chr;

		if (pollfd[1].revents & POLLIN) {
			r = read(pollfd[1].fd, &tmp_chr, 1);
			TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
				    "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
			r = read(reader_args->pipe, &tmp_chr, 1);
			TEST_ASSERT(r == 1,
				    "Error reading pipefd in UFFD thread");
				    "Error reading pipefd in uffd reader thread");
			break;
		}

		if (!(pollfd[0].revents & POLLIN))
			continue;
		TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
			    "Reader thread received EPOLLERR or EPOLLHUP on uffd.");

		r = read(uffd, &msg, sizeof(msg));
		if (r == -1) {
			if (errno == EAGAIN)
			TEST_ASSERT(errno == EAGAIN,
				    "Error reading from UFFD: errno = %d", errno);
			continue;
			pr_info("Read of uffd got errno %d\n", errno);
			return NULL;
		}

		if (r != sizeof(msg)) {
			pr_info("Read on uffd returned unexpected size: %d bytes", r);
			return NULL;
		}
		TEST_ASSERT(r == sizeof(msg),
			    "Read on uffd returned unexpected number of bytes (%d)", r);

		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
			continue;
@@ -93,8 +89,8 @@ static void *uffd_handler_thread_fn(void *arg)
		if (reader_args->delay)
			usleep(reader_args->delay);
		r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
		if (r < 0)
			return NULL;
		TEST_ASSERT(r >= 0,
			    "Reader thread handler fn returned negative value %d", r);
		pages++;
	}