Commit 60cf8637 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

selftests/ublk: add test for async partition scan



Add test_generic_15.sh to verify that async partition scan prevents
IO hang when reading partition tables.

The test creates ublk devices with fault_inject target and very large
delay (60s) to simulate blocked partition table reads, then kills the
daemon to verify proper state transitions without hanging:

1. Without recovery support:
   - Create device with fault_inject and 60s delay
   - Kill daemon while partition scan may be blocked
   - Verify device transitions to DEAD state

2. With recovery support (-r 1):
   - Create device with fault_inject, 60s delay, and recovery
   - Kill daemon while partition scan may be blocked
   - Verify device transitions to QUIESCED state

Before the async partition scan fix, killing the daemon during
partition scan would cause deadlock as partition scan held ub->mutex
while waiting for IO. With the async fix, partition scan happens in
a work function and flush_work() ensures proper synchronization.

Add _add_ublk_dev_no_settle() helper function to skip udevadm settle,
which would otherwise hang waiting for partition scan events to
complete when partition table read is delayed.

Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 7fc4da6a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ TEST_PROGS += test_generic_11.sh
TEST_PROGS += test_generic_12.sh
TEST_PROGS += test_generic_13.sh
TEST_PROGS += test_generic_14.sh
TEST_PROGS += test_generic_15.sh

TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
+12 −4
Original line number Diff line number Diff line
@@ -178,8 +178,9 @@ _have_feature()
_create_ublk_dev() {
	local dev_id;
	local cmd=$1
	local settle=$2

	shift 1
	shift 2

	if [ ! -c /dev/ublk-control ]; then
		return ${UBLK_SKIP_CODE}
@@ -194,7 +195,10 @@ _create_ublk_dev() {
		echo "fail to add ublk dev $*"
		return 255
	fi

	if [ "$settle" = "yes" ]; then
		udevadm settle
	fi

	if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
		echo "${dev_id}"
@@ -204,14 +208,18 @@ _create_ublk_dev() {
}

_add_ublk_dev() {
	_create_ublk_dev "add" "$@"
	_create_ublk_dev "add" "yes" "$@"
}

_add_ublk_dev_no_settle() {
	_create_ublk_dev "add" "no" "$@"
}

_recover_ublk_dev() {
	local dev_id
	local state

	dev_id=$(_create_ublk_dev "recover" "$@")
	dev_id=$(_create_ublk_dev "recover" "yes" "$@")
	for ((j=0;j<20;j++)); do
		state=$(_get_ublk_dev_state "${dev_id}")
		[ "$state" == "LIVE" ] && break
+68 −0
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh

TID="generic_15"
ERR_CODE=0

_test_partition_scan_no_hang()
{
	local recovery_flag=$1
	local expected_state=$2
	local dev_id
	local state
	local daemon_pid
	local start_time
	local elapsed

	# Create ublk device with fault_inject target and very large delay
	# to simulate hang during partition table read
	# --delay_us 60000000 = 60 seconds delay
	# Use _add_ublk_dev_no_settle to avoid udevadm settle hang waiting
	# for partition scan events to complete
	if [ "$recovery_flag" = "yes" ]; then
		echo "Testing partition scan with recovery support..."
		dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000 -r 1)
	else
		echo "Testing partition scan without recovery..."
		dev_id=$(_add_ublk_dev_no_settle -t fault_inject -q 1 -d 1 --delay_us 60000000)
	fi

	_check_add_dev "$TID" $?

	# The add command should return quickly because partition scan is async.
	# Now sleep briefly to let the async partition scan work start and hit
	# the delay in the fault_inject handler.
	sleep 1

	# Kill the ublk daemon while partition scan is potentially blocked
	# And check state transitions properly
	start_time=${SECONDS}
	daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
	state=$(__ublk_kill_daemon "${dev_id}" "${expected_state}")
	elapsed=$((SECONDS - start_time))

	# Verify the device transitioned to expected state
	if [ "$state" != "${expected_state}" ]; then
		echo "FAIL: Device state is $state, expected ${expected_state}"
		ERR_CODE=255
		${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
		return
	fi
	echo "PASS: Device transitioned to ${expected_state} in ${elapsed}s without hanging"

	# Clean up the device
	${UBLK_PROG} del -n "${dev_id}" > /dev/null 2>&1
}

_prep_test "partition_scan" "verify async partition scan prevents IO hang"

# Test 1: Without recovery support - should transition to DEAD
_test_partition_scan_no_hang "no" "DEAD"

# Test 2: With recovery support - should transition to QUIESCED
_test_partition_scan_no_hang "yes" "QUIESCED"

_cleanup_test "partition_scan"
_show_result $TID $ERR_CODE