Commit 01b91bf1 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

block: don't grab elevator lock during queue initialization

->elevator_lock depends on queue freeze lock, see block/blk-sysfs.c.

queue freeze lock depends on fs_reclaim.

So don't grab elevator lock during queue initialization which needs to
call kmalloc(GFP_KERNEL), and we can cut the dependency between
->elevator_lock and fs_reclaim, then the lockdep warning can be killed.

This way is safe because elevator setting isn't ready to run during
queue initialization.

There isn't such issue in __blk_mq_update_nr_hw_queues() because
memalloc_noio_save() is called before acquiring elevator lock.

Fixes the following lockdep warning:

https://lore.kernel.org/linux-block/67e6b425.050a0220.2f068f.007b.GAE@google.com/



Reported-by: default avatar <syzbot+4c7e0f9b94ad65811efb@syzkaller.appspotmail.com>
Cc: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250403105402.1334206-1-ming.lei@redhat.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fb585552
Loading
Loading
Loading
Loading
+17 −7
Original line number Diff line number Diff line
@@ -4464,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
	return NULL;
}

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
				     struct request_queue *q)
{
	struct blk_mq_hw_ctx *hctx;
	unsigned long i, j;

	/* protect against switching io scheduler  */
	mutex_lock(&q->elevator_lock);
	for (i = 0; i < set->nr_hw_queues; i++) {
		int old_node;
		int node = blk_mq_get_hctx_node(set, i);
@@ -4504,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,

	xa_for_each_start(&q->hctx_table, j, hctx, j)
		blk_mq_exit_hctx(q, set, hctx, j);
}

static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
				   struct request_queue *q, bool lock)
{
	if (lock) {
		/* protect against switching io scheduler  */
		mutex_lock(&q->elevator_lock);
		__blk_mq_realloc_hw_ctxs(set, q);
		mutex_unlock(&q->elevator_lock);
	} else {
		__blk_mq_realloc_hw_ctxs(set, q);
	}

	/* unregister cpuhp callbacks for exited hctxs */
	blk_mq_remove_hw_queues_cpuhp(q);
@@ -4536,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,

	xa_init(&q->hctx_table);

	blk_mq_realloc_hw_ctxs(set, q);
	blk_mq_realloc_hw_ctxs(set, q, false);
	if (!q->nr_hw_queues)
		goto err_hctxs;

@@ -5032,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
	blk_mq_update_queue_map(set);
	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_realloc_hw_ctxs(set, q);
		blk_mq_realloc_hw_ctxs(set, q, true);

		if (q->nr_hw_queues != set->nr_hw_queues) {
			int i = prev_nr_hw_queues;