Commit c748a255 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'selftests-bpf-fix-for-bpf_signal-stalls-watchdog-for-test_progs'

Eduard Zingerman says:

====================
selftests/bpf: fix for bpf_signal stalls, watchdog for test_progs

Test case 'bpf_signal' had been recently reported to stall, both on
the mailing list [1] and CI [2]. The stall is caused by CPU cycles
perf event not being delivered within expected time frame, before test
process enters system call and waits indefinitely.

This patch-set addresses the issue in several ways:
- A watchdog timer is added to test_progs.c runner:
  - it prints current sub-test name to stderr if sub-test takes longer
    than 10 seconds to finish;
  - it terminates process executing sub-test if sub-test takes longer
    than 120 seconds to finish.
- The test case is updated to await perf event notification with a
  timeout and a few retries, this serves two purposes:
  - busy loops longer to increase the time frame for CPU cycles event
    generation/delivery;
  - makes a timeout, not stall, a worst case scenario.
- The test case is updated to lower frequency of perf events, as high
  frequency of such events caused events generation throttling,
  which in turn delayed events delivery by amount of time sufficient
  to cause test case failure.

Note:

  librt pthread-based timer API is used to implement watchdog timer.
  I chose this API over SIGALRM because signal handler execution
  within test process context was sufficient to trigger perf event
  delivery for send_signal/send_signal_nmi_thread_remote test case,
  w/o any additional changes. Thus I concluded that SIGALRM based
  implementation interferes with tests execution.

[1] https://lore.kernel.org/bpf/CAP01T75OUeE8E-Lw9df84dm8ag2YmHW619f1DmPSVZ5_O89+Bg@mail.gmail.com/
[2] https://github.com/kernel-patches/bpf/actions/runs/11791485271/job/32843996871
====================

Link: https://lore.kernel.org/r/20241112110906.3045278-1-eddyz87@gmail.com


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 47e2c45c 4edab4c5
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -742,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
			 unpriv_helpers.c 	\
			 netlink_helpers.c	\
			 jit_disasm_helpers.c	\
			 io_helpers.c		\
			 test_loader.c		\
			 xsk.c			\
			 disasm.c		\
+21 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <sys/select.h>
#include <unistd.h>
#include <errno.h>

int read_with_timeout(int fd, char *buf, size_t count, long usec)
{
	const long M = 1000 * 1000;
	struct timeval tv = { usec / M, usec % M };
	fd_set fds;
	int err;

	FD_ZERO(&fds);
	FD_SET(fd, &fds);
	err = select(fd + 1, &fds, NULL, NULL, &tv);
	if (err < 0)
		return err;
	if (FD_ISSET(fd, &fds))
		return read(fd, buf, count);
	return -EAGAIN;
}
+7 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>

/* As a regular read(2), but allows to specify a timeout in micro-seconds.
 * Returns -EAGAIN on timeout.
 */
int read_with_timeout(int fd, char *buf, size_t count, long usec);
+4 −4
Original line number Diff line number Diff line
@@ -265,10 +265,10 @@ static void *run_test_task_tid(void *arg)

	linfo.task.tid = 0;
	linfo.task.pid = getpid();
	/* This includes the parent thread, this thread,
	/* This includes the parent thread, this thread, watchdog timer thread
	 * and the do_nothing_wait thread
	 */
	test_task_common(&opts, 2, 1);
	test_task_common(&opts, 3, 1);

	test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid);
	ASSERT_GT(num_unknown_tid, 2, "check_num_unknown_tid");
@@ -297,7 +297,7 @@ static void test_task_pid(void)
	opts.link_info = &linfo;
	opts.link_info_len = sizeof(linfo);

	test_task_common(&opts, 1, 1);
	test_task_common(&opts, 2, 1);
}

static void test_task_pidfd(void)
@@ -315,7 +315,7 @@ static void test_task_pidfd(void)
	opts.link_info = &linfo;
	opts.link_info_len = sizeof(linfo);

	test_task_common(&opts, 1, 1);
	test_task_common(&opts, 2, 1);

	close(pidfd);
}
+2 −2
Original line number Diff line number Diff line
@@ -192,8 +192,8 @@ static void subtest_task_iters(void)
	syscall(SYS_getpgid);
	iters_task__detach(skel);
	ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
	ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
	ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
	ASSERT_EQ(skel->bss->threads_cnt, thread_num + 2, "threads_cnt");
	ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 2, "proc_threads_cnt");
	ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
	pthread_mutex_unlock(&do_nothing_mutex);
	for (int i = 0; i < thread_num; i++)
Loading