Commit 6e15774d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'selftests-mptcp-mark-unstable-subtests-as-flaky'

Matthieu Baerts says:

====================
selftests: mptcp: mark unstable subtests as flaky

Some subtests can be unstable, failing once every X runs. Fixing them
can take time: there could be an issue in the kernel or in the subtest,
and it is then important to do a proper analysis, not to hide real bugs.

To avoid creating noises on the different CIs where tests are more
unstable than on our side, some subtests have been marked as flaky. As a
result, errors with these subtests (if any) are ignored.

Note that the MPTCP CI will continue to track these flaky subtests. All
these unstable subtests are also tracked by our bug tracker.

These are fixes for the -net tree, because the instabilities are visible
there. The first patch introducing the flake support has no 'Fixes'
tags, mainly because it requires recent and important refactoring done
in all MPTCP selftests. Backporting that to old versions where the flaky
tests have been introduced would be too difficult, and probably not
worth it. The other patches, adding MPTCP_LIB_SUBTEST_FLAKY=1, have a
Fixes tag, simply to ease the backport of the future fixes removing them
along with the proper fix.
====================

Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-0-a352362f3f8e@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 7a8cc96e 38af56e6
Loading
Loading
Loading
Loading
+9 −1
Original line number Diff line number Diff line
@@ -261,6 +261,8 @@ reset()

	TEST_NAME="${1}"

	MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified

	if skip_test; then
		MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
		last_test_ignored=1
@@ -448,7 +450,9 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
	if ! mptcp_lib_subtest_is_flaky; then
		ret=${KSFT_FAIL}
	fi

	if [ ${#} -gt 0 ]; then
		print_fail "${@}"
@@ -3069,6 +3073,7 @@ fullmesh_tests()
fastclose_tests()
{
	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		test_linkfail=1024 fastclose=client \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 0 0 0
@@ -3077,6 +3082,7 @@ fastclose_tests()
	fi

	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		test_linkfail=1024 fastclose=server \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 0 0 0 0 0 0 1
@@ -3095,6 +3101,7 @@ fail_tests()
{
	# single subflow
	if reset_with_fail "Infinite map" 1; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		test_linkfail=128 \
			run_tests $ns1 $ns2 10.0.1.1
		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
@@ -3103,6 +3110,7 @@ fail_tests()

	# multiple subflows
	if reset_with_fail "MP_FAIL MP_RST" 2; then
		MPTCP_LIB_SUBTEST_FLAKY=1
		tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
		pm_nl_set_limits $ns1 0 1
		pm_nl_set_limits $ns2 0 1
+28 −2
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ declare -rx MPTCP_LIB_AF_INET6=10

MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -41,6 +42,16 @@ else
	readonly MPTCP_LIB_COLOR_RESET=
fi

# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
# from subtests marked as flaky
mptcp_lib_override_flaky() {
	[ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
}

mptcp_lib_subtest_is_flaky() {
	[ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
}

# $1: color, $2: text
mptcp_lib_print_color() {
	echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -72,7 +83,16 @@ mptcp_lib_pr_skip() {
}

mptcp_lib_pr_fail() {
	mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
	local title cmt

	if mptcp_lib_subtest_is_flaky; then
		title="IGNO"
		cmt=" (flaky)"
	else
		title="FAIL"
	fi

	mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}

mptcp_lib_pr_info() {
@@ -208,7 +228,13 @@ mptcp_lib_result_pass() {

# $1: test name
mptcp_lib_result_fail() {
	if mptcp_lib_subtest_is_flaky; then
		# It might sound better to use 'not ok # TODO' or 'ok # SKIP',
		# but some CIs don't understand 'TODO' and treat SKIP as errors.
		__mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
	else
		__mptcp_lib_result_add "not ok" "${1}"
	fi
}

# $1: test name
+3 −3
Original line number Diff line number Diff line
@@ -244,7 +244,7 @@ run_test()
	do_transfer $small $large $time
	lret=$?
	mptcp_lib_result_code "${lret}" "${msg}"
	if [ $lret -ne 0 ]; then
	if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
		ret=$lret
		[ $bail -eq 0 ] || exit $ret
	fi
@@ -254,7 +254,7 @@ run_test()
	do_transfer $large $small $time
	lret=$?
	mptcp_lib_result_code "${lret}" "${msg}"
	if [ $lret -ne 0 ]; then
	if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
		ret=$lret
		[ $bail -eq 0 ] || exit $ret
	fi
@@ -290,7 +290,7 @@ run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"

# we still need some additional infrastructure to pass the following test-cases
run_test 10 3 0 0 "unbalanced bwidth"
MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"