Commit 1e1a9cec authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: force noio scope in blk_mq_freeze_queue



When block drivers or the core block code perform allocations with a
frozen queue, this could try to recurse into the block device to
reclaim memory and deadlock.  Thus all allocations done by a process
that froze a queue need to be done without __GFP_IO and __GFP_FS.
Instead of tying to track all of them down, force a noio scope as
part of freezing the queue.

Note that nvme is a bit of a mess here due to the non-owner freezes,
and they will be addressed separately.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20250131120352.1315351-2-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 14ef4965
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -1545,6 +1545,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
	struct request_queue *q = disk->queue;
	struct blkg_policy_data *pd_prealloc = NULL;
	struct blkcg_gq *blkg, *pinned_blkg = NULL;
	unsigned int memflags;
	int ret;

	if (blkcg_policy_enabled(q, pol))
@@ -1559,7 +1560,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
		return -EINVAL;

	if (queue_is_mq(q))
		blk_mq_freeze_queue(q);
		memflags = blk_mq_freeze_queue(q);
retry:
	spin_lock_irq(&q->queue_lock);

@@ -1623,7 +1624,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
	spin_unlock_irq(&q->queue_lock);
out:
	if (queue_is_mq(q))
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue(q, memflags);
	if (pinned_blkg)
		blkg_put(pinned_blkg);
	if (pd_prealloc)
@@ -1667,12 +1668,13 @@ void blkcg_deactivate_policy(struct gendisk *disk,
{
	struct request_queue *q = disk->queue;
	struct blkcg_gq *blkg;
	unsigned int memflags;

	if (!blkcg_policy_enabled(q, pol))
		return;

	if (queue_is_mq(q))
		blk_mq_freeze_queue(q);
		memflags = blk_mq_freeze_queue(q);

	mutex_lock(&q->blkcg_mutex);
	spin_lock_irq(&q->queue_lock);
@@ -1696,7 +1698,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
	mutex_unlock(&q->blkcg_mutex);

	if (queue_is_mq(q))
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue(q, memflags);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);

+8 −6
Original line number Diff line number Diff line
@@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
	u32 qos[NR_QOS_PARAMS];
	bool enable, user;
	char *body, *p;
	unsigned int memflags;
	int ret;

	blkg_conf_init(&ctx, input);
@@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
		ioc = q_to_ioc(disk->queue);
	}

	blk_mq_freeze_queue(disk->queue);
	memflags = blk_mq_freeze_queue(disk->queue);
	blk_mq_quiesce_queue(disk->queue);

	spin_lock_irq(&ioc->lock);
@@ -3347,7 +3348,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
		wbt_enable_default(disk);

	blk_mq_unquiesce_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue, memflags);

	blkg_conf_exit(&ctx);
	return nbytes;
@@ -3355,7 +3356,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
	spin_unlock_irq(&ioc->lock);

	blk_mq_unquiesce_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue);
	blk_mq_unfreeze_queue(disk->queue, memflags);

	ret = -EINVAL;
err:
@@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
{
	struct blkg_conf_ctx ctx;
	struct request_queue *q;
	unsigned int memflags;
	struct ioc *ioc;
	u64 u[NR_I_LCOEFS];
	bool user;
@@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
		ioc = q_to_ioc(q);
	}

	blk_mq_freeze_queue(q);
	memflags = blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	spin_lock_irq(&ioc->lock);
@@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
	spin_unlock_irq(&ioc->lock);

	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);
	blk_mq_unfreeze_queue(q, memflags);

	blkg_conf_exit(&ctx);
	return nbytes;
@@ -3502,7 +3504,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
	spin_unlock_irq(&ioc->lock);

	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);
	blk_mq_unfreeze_queue(q, memflags);

	ret = -EINVAL;
err:
+4 −2
Original line number Diff line number Diff line
@@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
	 */
	enabled = atomic_read(&blkiolat->enable_cnt);
	if (enabled != blkiolat->enabled) {
		blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
		unsigned int memflags;

		memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
		blkiolat->enabled = enabled;
		blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
		blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags);
	}
}

+13 −8
Original line number Diff line number Diff line
@@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);

void blk_mq_freeze_queue(struct request_queue *q)
void blk_mq_freeze_queue_nomemsave(struct request_queue *q)
{
	blk_freeze_queue_start(q);
	blk_mq_freeze_queue_wait(q);
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave);

bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
@@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
	return unfreeze;
}

void blk_mq_unfreeze_queue(struct request_queue *q)
void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q)
{
	if (__blk_mq_unfreeze_queue(q, false))
		blk_unfreeze_release_lock(q);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore);

/*
 * non_owner variant of blk_freeze_queue_start
@@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
					 bool shared)
{
	struct request_queue *q;
	unsigned int memflags;

	lockdep_assert_held(&set->tag_list_lock);

	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_freeze_queue(q);
		memflags = blk_mq_freeze_queue(q);
		queue_set_hctx_shared(q, shared);
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue(q, memflags);
	}
}

@@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
	struct request_queue *q;
	LIST_HEAD(head);
	int prev_nr_hw_queues = set->nr_hw_queues;
	unsigned int memflags;
	int i;

	lockdep_assert_held(&set->tag_list_lock);
@@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
	if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
		return;

	memflags = memalloc_noio_save();
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_freeze_queue(q);
		blk_mq_freeze_queue_nomemsave(q);

	/*
	 * Switch IO scheduler to 'none', cleaning up the data associated
	 * with the previous scheduler. We will switch back once we are done
@@ -5052,7 +5056,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
		blk_mq_elv_switch_back(&head, q);

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue_nomemrestore(q);
	memalloc_noio_restore(memflags);

	/* Free the excess tags when nr_hw_queues shrink. */
	for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
+1 −1
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q)
	if (percpu_ref_is_zero(&q->q_usage_counter))
		ret = 0;
	/* Switch q_usage_counter back to per-cpu mode. */
	blk_mq_unfreeze_queue(q);
	blk_mq_unfreeze_queue_nomemrestore(q);

	if (ret < 0) {
		spin_lock_irq(&q->queue_lock);
Loading