Commit dfe48ea1 authored by Yu Kuai's avatar Yu Kuai Committed by Jens Axboe
Browse files

blk-mq: use NOIO context to prevent deadlock during debugfs creation



Creating debugfs entries can trigger fs reclaim, which can enter back
into the block layer request_queue. This can cause deadlock if the
queue is frozen.

Previously, a WARN_ON_ONCE check was used in debugfs_create_files()
to detect this condition, but it was racy since the queue can be frozen
from another context at any time.

Introduce blk_debugfs_lock()/blk_debugfs_unlock() helpers that combine
the debugfs_mutex with memalloc_noio_save()/restore() to prevent fs
reclaim from triggering block I/O. Also add blk_debugfs_lock_nomemsave()
and blk_debugfs_unlock_nomemrestore() variants for callers that don't
need NOIO protection (e.g., debugfs removal or read-only operations).

Replace all raw debugfs_mutex lock/unlock pairs with these helpers,
using the _nomemsave/_nomemrestore variants where appropriate.

Reported-by: default avatarYi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs9gNKEYAPagD9JADfO5UH+OiCr4P7OO2wjpfOYeM-RV=A@mail.gmail.com/


Reported-by: default avatarShinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Closes: https://lore.kernel.org/all/aYWQR7CtYdk3K39g@shinmob/


Suggested-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarYu Kuai <yukuai@fnnas.com>
Reviewed-by: default avatarNilay Shroff <nilay@linux.ibm.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 3678a334
Loading
Loading
Loading
Loading
+3 −7
Original line number Diff line number Diff line
@@ -613,11 +613,6 @@ static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
				 const struct blk_mq_debugfs_attr *attr)
{
	lockdep_assert_held(&q->debugfs_mutex);
	/*
	 * Creating new debugfs entries with queue freezed has the risk of
	 * deadlock.
	 */
	WARN_ON_ONCE(q->mq_freeze_depth != 0);
	/*
	 * debugfs_mutex should not be nested under other locks that can be
	 * grabbed while queue is frozen.
@@ -693,12 +688,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
void blk_mq_debugfs_register_hctxs(struct request_queue *q)
{
	struct blk_mq_hw_ctx *hctx;
	unsigned int memflags;
	unsigned long i;

	mutex_lock(&q->debugfs_mutex);
	memflags = blk_debugfs_lock(q);
	queue_for_each_hw_ctx(q, hctx, i)
		blk_mq_debugfs_register_hctx(q, hctx);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock(q, memflags);
}

void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
+5 −4
Original line number Diff line number Diff line
@@ -390,13 +390,14 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
void blk_mq_sched_reg_debugfs(struct request_queue *q)
{
	struct blk_mq_hw_ctx *hctx;
	unsigned int memflags;
	unsigned long i;

	mutex_lock(&q->debugfs_mutex);
	memflags = blk_debugfs_lock(q);
	blk_mq_debugfs_register_sched(q);
	queue_for_each_hw_ctx(q, hctx, i)
		blk_mq_debugfs_register_sched_hctx(q, hctx);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock(q, memflags);
}

void blk_mq_sched_unreg_debugfs(struct request_queue *q)
@@ -404,11 +405,11 @@ void blk_mq_sched_unreg_debugfs(struct request_queue *q)
	struct blk_mq_hw_ctx *hctx;
	unsigned long i;

	mutex_lock(&q->debugfs_mutex);
	blk_debugfs_lock_nomemsave(q);
	queue_for_each_hw_ctx(q, hctx, i)
		blk_mq_debugfs_unregister_sched_hctx(hctx);
	blk_mq_debugfs_unregister_sched(q);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock_nomemrestore(q);
}

void blk_mq_free_sched_tags(struct elevator_tags *et,
+5 −4
Original line number Diff line number Diff line
@@ -892,13 +892,13 @@ static void blk_debugfs_remove(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;

	mutex_lock(&q->debugfs_mutex);
	blk_debugfs_lock_nomemsave(q);
	blk_trace_shutdown(q);
	debugfs_remove_recursive(q->debugfs_dir);
	q->debugfs_dir = NULL;
	q->sched_debugfs_dir = NULL;
	q->rqos_debugfs_dir = NULL;
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock_nomemrestore(q);
}

/**
@@ -908,6 +908,7 @@ static void blk_debugfs_remove(struct gendisk *disk)
int blk_register_queue(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;
	unsigned int memflags;
	int ret;

	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
@@ -921,11 +922,11 @@ int blk_register_queue(struct gendisk *disk)
	}
	mutex_lock(&q->sysfs_lock);

	mutex_lock(&q->debugfs_mutex);
	memflags = blk_debugfs_lock(q);
	q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
	if (queue_is_mq(q))
		blk_mq_debugfs_register(q);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock(q, memflags);

	ret = disk_register_independent_access_ranges(disk);
	if (ret)
+6 −4
Original line number Diff line number Diff line
@@ -776,6 +776,7 @@ void wbt_init_enable_default(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;
	struct rq_wb *rwb;
	unsigned int memflags;

	if (!__wbt_enable_default(disk))
		return;
@@ -789,9 +790,9 @@ void wbt_init_enable_default(struct gendisk *disk)
		return;
	}

	mutex_lock(&q->debugfs_mutex);
	memflags = blk_debugfs_lock(q);
	blk_mq_debugfs_register_rq_qos(q);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock(q, memflags);
}

static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -1015,9 +1016,10 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
	blk_mq_unquiesce_queue(q);
out:
	blk_mq_unfreeze_queue(q, memflags);
	mutex_lock(&q->debugfs_mutex);

	memflags = blk_debugfs_lock(q);
	blk_mq_debugfs_register_rq_qos(q);
	mutex_unlock(&q->debugfs_mutex);
	blk_debugfs_unlock(q, memflags);

	return ret;
}
+31 −0
Original line number Diff line number Diff line
@@ -729,4 +729,35 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q)
}
#endif

/*
 * debugfs directory and file creation can trigger fs reclaim, which can enter
 * back into the block layer request_queue. This can cause deadlock if the
 * queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs
 * reclaim from triggering block I/O.
 */
static inline void blk_debugfs_lock_nomemsave(struct request_queue *q)
{
	mutex_lock(&q->debugfs_mutex);
}

static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q)
{
	mutex_unlock(&q->debugfs_mutex);
}

static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q)
{
	unsigned int memflags = memalloc_noio_save();

	blk_debugfs_lock_nomemsave(q);
	return memflags;
}

static inline void blk_debugfs_unlock(struct request_queue *q,
				      unsigned int memflags)
{
	blk_debugfs_unlock_nomemrestore(q);
	memalloc_noio_restore(memflags);
}

#endif /* BLK_INTERNAL_H */
Loading