Commit 212c928d authored by Uday Shankar's avatar Uday Shankar Committed by Jens Axboe
Browse files

ublk: don't quiesce in ublk_ch_release



ublk_ch_release currently quiesces the device's request_queue while
setting force_abort/fail_io.  This avoids data races by preventing
concurrent reads from the I/O path, but is not strictly needed - at this
point, canceling is already set and guaranteed to be observed by any
concurrently executing I/Os, so they will be handled properly even if
the changes to force_abort/fail_io propagate to the I/O path later.
Remove the quiesce/unquiesce calls from ublk_ch_release. This makes the
writes to force_abort/fail_io concurrent with the reads in the I/O path,
so make the accesses atomic.

Before this change, the call to blk_mq_quiesce_queue was responsible for
most (90%) of the runtime of ublk_ch_release. With that call eliminated,
ublk_ch_release runs much faster. Here is a comparison of the total time
spent in calls to ublk_ch_release when a server handling 128 devices
exits, before and after this change:

before: 1.11s
after: 0.09s

Signed-off-by: default avatarUday Shankar <ushankar@purestorage.com>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250808-ublk_quiesce2-v1-1-f87ade33fa3d@purestorage.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d5dd4098
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
@@ -1389,7 +1389,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq,
{
	blk_status_t res;

	if (unlikely(ubq->fail_io))
	if (unlikely(READ_ONCE(ubq->fail_io)))
		return BLK_STS_TARGET;

	/* With recovery feature enabled, force_abort is set in
@@ -1401,7 +1401,8 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq,
	 * Note: force_abort is guaranteed to be seen because it is set
	 * before request queue is unqiuesced.
	 */
	if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))
	if (ublk_nosrv_should_queue_io(ubq) &&
	    unlikely(READ_ONCE(ubq->force_abort)))
		return BLK_STS_IOERR;

	if (check_cancel && unlikely(ubq->canceling))
@@ -1644,7 +1645,6 @@ static int ublk_ch_release(struct inode *inode, struct file *filp)
	 * Transition the device to the nosrv state. What exactly this
	 * means depends on the recovery flags
	 */
	blk_mq_quiesce_queue(disk->queue);
	if (ublk_nosrv_should_stop_dev(ub)) {
		/*
		 * Allow any pending/future I/O to pass through quickly
@@ -1652,8 +1652,7 @@ static int ublk_ch_release(struct inode *inode, struct file *filp)
		 * waits for all pending I/O to complete
		 */
		for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
			ublk_get_queue(ub, i)->force_abort = true;
		blk_mq_unquiesce_queue(disk->queue);
			WRITE_ONCE(ublk_get_queue(ub, i)->force_abort, true);

		ublk_stop_dev_unlocked(ub);
	} else {
@@ -1663,9 +1662,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp)
		} else {
			ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
			for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
				ublk_get_queue(ub, i)->fail_io = true;
				WRITE_ONCE(ublk_get_queue(ub, i)->fail_io, true);
		}
		blk_mq_unquiesce_queue(disk->queue);
	}
unlock:
	mutex_unlock(&ub->mutex);