Commit 539d1b47 authored by Nilay Shroff's avatar Nilay Shroff Committed by Jens Axboe
Browse files

block: break pcpu_alloc_mutex dependency on freeze_lock



While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex
after ->freeze_lock is acquired or queue is frozen. This potentially
creates a circular dependency involving ->fs_reclaim if reclaim is
triggered simultaneously in a code path which first acquires ->pcpu_
alloc_mutex. As the queue is already frozen while nr_hw_queue update
allocates tagsets, the reclaim can't forward progress and thus it could
cause a potential deadlock as reported in lockdep splat[1].

Fix this by pre-allocating tagset tags before we freeze queue during
nr_hw_queue update. Later the allocated tagset tags could be safely
installed and used after queue is frozen.

Reported-by: default avatarYi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/

 [1]
Signed-off-by: default avatarNilay Shroff <nilay@linux.ibm.com>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Tested-by: default avatarYi Zhang <yi.zhang@redhat.com>
Reviewed-by: default avatarYu Kuai <yukuai@fnnas.com>
[axboe: fix brace style issue]
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent da46b5df
Loading
Loading
Loading
Loading
+30 −15
Original line number Diff line number Diff line
@@ -4793,38 +4793,45 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
	}
}

static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
static struct blk_mq_tags **blk_mq_prealloc_tag_set_tags(
				struct blk_mq_tag_set *set,
				int new_nr_hw_queues)
{
	struct blk_mq_tags **new_tags;
	int i;

	if (set->nr_hw_queues >= new_nr_hw_queues)
		goto done;
		return NULL;

	new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
				GFP_KERNEL, set->numa_node);
	if (!new_tags)
		return -ENOMEM;
		return ERR_PTR(-ENOMEM);

	if (set->tags)
		memcpy(new_tags, set->tags, set->nr_hw_queues *
		       sizeof(*set->tags));
	kfree(set->tags);
	set->tags = new_tags;

	for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
		if (!__blk_mq_alloc_map_and_rqs(set, i)) {
			while (--i >= set->nr_hw_queues)
				__blk_mq_free_map_and_rqs(set, i);
			return -ENOMEM;
		if (blk_mq_is_shared_tags(set->flags)) {
			new_tags[i] = set->shared_tags;
		} else {
			new_tags[i] = blk_mq_alloc_map_and_rqs(set, i,
					set->queue_depth);
			if (!new_tags[i])
				goto out_unwind;
		}
		cond_resched();
	}

done:
	set->nr_hw_queues = new_nr_hw_queues;
	return 0;
	return new_tags;
out_unwind:
	while (--i >= set->nr_hw_queues) {
		if (!blk_mq_is_shared_tags(set->flags))
			blk_mq_free_map_and_rqs(set, new_tags[i], i);
	}
	kfree(new_tags);
	return ERR_PTR(-ENOMEM);
}

/*
@@ -5113,6 +5120,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
	unsigned int memflags;
	int i;
	struct xarray elv_tbl;
	struct blk_mq_tags **new_tags;
	bool queues_frozen = false;

	lockdep_assert_held(&set->tag_list_lock);
@@ -5147,11 +5155,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
		if (blk_mq_elv_switch_none(q, &elv_tbl))
			goto switch_back;

	new_tags = blk_mq_prealloc_tag_set_tags(set, nr_hw_queues);
	if (IS_ERR(new_tags))
		goto switch_back;

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_freeze_queue_nomemsave(q);
	queues_frozen = true;
	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
		goto switch_back;
	if (new_tags) {
		kfree(set->tags);
		set->tags = new_tags;
	}
	set->nr_hw_queues = nr_hw_queues;

fallback:
	blk_mq_update_queue_map(set);