Commit 5314d25a authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

selftests: ublk: improve I/O ordering test with bpftrace



Remove test_generic_01.sh since block layer may reorder I/O, making
the test prone to false positives. Apply the improvements to
test_generic_02.sh instead, which supposes for covering ublk dispatch
io order.

Rework test_generic_02 to verify that ublk dispatch doesn't reorder I/O
by comparing request start order with completion order using bpftrace.

The bpftrace script now:
- Tracks each request's start sequence number in a map keyed by sector
- On completion, verifies the request's start order matches expected
  completion order
- Reports any out-of-order completions detected

The test script:
- Wait bpftrace BEGIN code block is run
- Pins fio to CPU 0 for deterministic behavior
- Uses block_io_start and block_rq_complete tracepoints
- Checks bpftrace output for reordering errors

Reported-and-tested-by: default avatarAlexander Atanasov <alex@zazolabs.com>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d9a36ab3
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -7,8 +7,7 @@ endif

LDLIBS += -lpthread -lm -luring

TEST_PROGS := test_generic_01.sh
TEST_PROGS += test_generic_02.sh
TEST_PROGS := test_generic_02.sh
TEST_PROGS += test_generic_03.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
+0 −47
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh

ERR_CODE=0

if ! _have_program bpftrace; then
	exit "$UBLK_SKIP_CODE"
fi

if ! _have_program fio; then
	exit "$UBLK_SKIP_CODE"
fi

_prep_test "null" "sequential io order"

dev_id=$(_add_ublk_dev -t null)
_check_add_dev $TID $?

dev_t=$(_get_disk_dev_t "$dev_id")
bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
btrace_pid=$!
sleep 2

if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
	_cleanup_test "null"
	exit "$UBLK_SKIP_CODE"
fi

# run fio over this ublk disk
fio --name=write_seq \
    --filename=/dev/ublkb"${dev_id}" \
    --ioengine=libaio --iodepth=16 \
    --rw=write \
    --size=512M \
    --direct=1 \
    --bs=4k > /dev/null 2>&1
ERR_CODE=$?
kill "$btrace_pid"
wait
if grep -q "io_out_of_order" "$UBLK_TMP"; then
	cat "$UBLK_TMP"
	ERR_CODE=255
fi
_cleanup_test "null"
_show_result $TID $ERR_CODE
+15 −7
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ if ! _have_program fio; then
	exit "$UBLK_SKIP_CODE"
fi

_prep_test "null" "sequential io order for MQ"
_prep_test "null" "ublk dispatch won't reorder IO for MQ"

dev_id=$(_add_ublk_dev -t null -q 2)
_check_add_dev $TID $?
@@ -21,15 +21,20 @@ _check_add_dev $TID $?
dev_t=$(_get_disk_dev_t "$dev_id")
bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
btrace_pid=$!
sleep 2

if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY)
for _ in $(seq 100); do
	grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break
	sleep 0.1
done

if ! kill -0 "$btrace_pid" 2>/dev/null; then
	_cleanup_test "null"
	exit "$UBLK_SKIP_CODE"
fi

# run fio over this ublk disk
fio --name=write_seq \
# run fio over this ublk disk (pinned to CPU 0)
taskset -c 0 fio --name=write_seq \
    --filename=/dev/ublkb"${dev_id}" \
    --ioengine=libaio --iodepth=16 \
    --rw=write \
@@ -39,8 +44,11 @@ fio --name=write_seq \
ERR_CODE=$?
kill "$btrace_pid"
wait
if grep -q "io_out_of_order" "$UBLK_TMP"; then
	cat "$UBLK_TMP"

# Check for out-of-order completions detected by bpftrace
if grep -q "^out_of_order:" "$UBLK_TMP"; then
	echo "I/O reordering detected:"
	grep "^out_of_order:" "$UBLK_TMP"
	ERR_CODE=255
fi
_cleanup_test "null"
+38 −9
Original line number Diff line number Diff line
@@ -2,23 +2,52 @@
	$1: 	dev_t
	$2: 	RWBS
	$3:     strlen($2)

	Track request order between block_io_start and block_rq_complete.
	Sequence starts at 1 so 0 means "never seen". On first valid
	completion, sync complete_seq to handle probe attachment races.
	block_rq_complete listed first to reduce missed completion window.
*/

BEGIN {
	@last_rw[$1, str($2)] = (uint64)0;
	@start_seq = (uint64)1;
	@complete_seq = (uint64)0;
	@out_of_order = (uint64)0;
	@start_order[0] = (uint64)0;
	delete(@start_order[0]);
	printf("BPFTRACE_READY\n");
}

tracepoint:block:block_rq_complete
/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/
{
	$dev = $1;
	if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
		$last = @last_rw[$dev, str($2)];
		if ((uint64)args.sector != $last) {
			printf("io_out_of_order: exp %llu actual %llu\n",
				args.sector, $last);
	$expected = @start_order[args.sector];
	if ($expected > 0) {
		if (@complete_seq == 0) {
			@complete_seq = $expected;
		}
		if ($expected != @complete_seq) {
			printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n",
				args.sector, $expected, @complete_seq);
			@out_of_order = @out_of_order + 1;
		}
		@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
		delete(@start_order[args.sector]);
		@complete_seq = @complete_seq + 1;
	}
}

tracepoint:block:block_io_start
/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/
{
	@start_order[args.sector] = @start_seq;
	@start_seq = @start_seq + 1;
}

END {
	clear(@last_rw);
	printf("total_start: %llu total_complete: %llu out_of_order: %llu\n",
		@start_seq - 1, @complete_seq, @out_of_order);
	clear(@start_order);
	clear(@start_seq);
	clear(@complete_seq);
	clear(@out_of_order);
}