Commit 1bf70d08 authored by Nilay Shroff's avatar Nilay Shroff Committed by Jens Axboe
Browse files

block: introduce a dedicated lock for protecting queue elevator updates

A queue's elevator can be updated either when modifying nr_hw_queues
or through the sysfs scheduler attribute. Currently, elevator switching/
updating is protected using q->sysfs_lock, but this has led to lockdep
splats[1] due to inconsistent lock ordering between q->sysfs_lock and
the freeze-lock in multiple block layer call sites.

As the scope of q->sysfs_lock is not well-defined, its (mis)use has
resulted in numerous lockdep warnings. To address this, introduce a new
q->elevator_lock, dedicated specifically for protecting elevator
switches/updates. And we'd now use this new q->elevator_lock instead of
q->sysfs_lock for protecting elevator switches/updates.

While at it, make elv_iosched_load_module() a static function, as it is
only called from elv_iosched_store(). Also, remove redundant parameters
from elv_iosched_load_module() function signature.

[1] https://lore.kernel.org/all/67637e70.050a0220.3157ee.000c.GAE@google.com/



Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarNilay Shroff <nilay@linux.ibm.com>
Link: https://lore.kernel.org/r/20250304102551.2533767-5-nilay@linux.ibm.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d23977fe
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)

	refcount_set(&q->refs, 1);
	mutex_init(&q->debugfs_mutex);
	mutex_init(&q->elevator_lock);
	mutex_init(&q->sysfs_lock);
	mutex_init(&q->limits_lock);
	mutex_init(&q->rq_qos_mutex);
+7 −8
Original line number Diff line number Diff line
@@ -4467,7 +4467,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
	unsigned long i, j;

	/* protect against switching io scheduler  */
	mutex_lock(&q->sysfs_lock);
	mutex_lock(&q->elevator_lock);
	for (i = 0; i < set->nr_hw_queues; i++) {
		int old_node;
		int node = blk_mq_get_hctx_node(set, i);
@@ -4500,7 +4500,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,

	xa_for_each_start(&q->hctx_table, j, hctx, j)
		blk_mq_exit_hctx(q, set, hctx, j);
	mutex_unlock(&q->sysfs_lock);
	mutex_unlock(&q->elevator_lock);

	/* unregister cpuhp callbacks for exited hctxs */
	blk_mq_remove_hw_queues_cpuhp(q);
@@ -4933,10 +4933,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
	if (!qe)
		return false;

	/* q->elevator needs protection from ->sysfs_lock */
	mutex_lock(&q->sysfs_lock);
	/* Accessing q->elevator needs protection from ->elevator_lock. */
	mutex_lock(&q->elevator_lock);

	/* the check has to be done with holding sysfs_lock */
	if (!q->elevator) {
		kfree(qe);
		goto unlock;
@@ -4950,7 +4949,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
	list_add(&qe->node, head);
	elevator_disable(q);
unlock:
	mutex_unlock(&q->sysfs_lock);
	mutex_unlock(&q->elevator_lock);

	return true;
}
@@ -4980,11 +4979,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
	list_del(&qe->node);
	kfree(qe);

	mutex_lock(&q->sysfs_lock);
	mutex_lock(&q->elevator_lock);
	elevator_switch(q, t);
	/* drop the reference acquired in blk_mq_elv_switch_none */
	elevator_put(t);
	mutex_unlock(&q->sysfs_lock);
	mutex_unlock(&q->elevator_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
+22 −10
Original line number Diff line number Diff line
@@ -693,10 +693,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
	 * Attributes which are protected with q->sysfs_lock.
	 */
	&queue_requests_entry.attr,
	&elv_iosched_entry.attr,
#ifdef CONFIG_BLK_WBT
	&queue_wb_lat_entry.attr,
#endif
	/*
	 * Attributes which require some form of locking other than
	 * q->sysfs_lock.
	 */
	&elv_iosched_entry.attr,

	/*
	 * Attributes which don't require locking.
	 */
@@ -865,15 +870,19 @@ int blk_register_queue(struct gendisk *disk)
	if (ret)
		goto out_debugfs_remove;

	if (q->elevator) {
		ret = elv_register_queue(q, false);
	ret = blk_crypto_sysfs_register(disk);
	if (ret)
		goto out_unregister_ia_ranges;
	}

	ret = blk_crypto_sysfs_register(disk);
	if (ret)
		goto out_elv_unregister;
	mutex_lock(&q->elevator_lock);
	if (q->elevator) {
		ret = elv_register_queue(q, false);
		if (ret) {
			mutex_unlock(&q->elevator_lock);
			goto out_crypto_sysfs_unregister;
		}
	}
	mutex_unlock(&q->elevator_lock);

	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
	wbt_enable_default(disk);
@@ -898,8 +907,8 @@ int blk_register_queue(struct gendisk *disk)

	return ret;

out_elv_unregister:
	elv_unregister_queue(q);
out_crypto_sysfs_unregister:
	blk_crypto_sysfs_unregister(disk);
out_unregister_ia_ranges:
	disk_unregister_independent_access_ranges(disk);
out_debugfs_remove:
@@ -945,8 +954,11 @@ void blk_unregister_queue(struct gendisk *disk)
		blk_mq_sysfs_unregister(disk);
	blk_crypto_sysfs_unregister(disk);

	mutex_lock(&q->sysfs_lock);
	mutex_lock(&q->elevator_lock);
	elv_unregister_queue(q);
	mutex_unlock(&q->elevator_lock);

	mutex_lock(&q->sysfs_lock);
	disk_unregister_independent_access_ranges(disk);
	mutex_unlock(&q->sysfs_lock);

+16 −19
Original line number Diff line number Diff line
@@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
	struct elevator_queue *e = q->elevator;
	int error;

	lockdep_assert_held(&q->sysfs_lock);
	lockdep_assert_held(&q->elevator_lock);

	error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
	if (!error) {
@@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
{
	struct elevator_queue *e = q->elevator;

	lockdep_assert_held(&q->sysfs_lock);
	lockdep_assert_held(&q->elevator_lock);

	if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
		kobject_uevent(&e->kobj, KOBJ_REMOVE);
@@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
	unsigned int memflags;
	int ret;

	lockdep_assert_held(&q->sysfs_lock);
	lockdep_assert_held(&q->elevator_lock);

	memflags = blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);
@@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
{
	unsigned int memflags;

	lockdep_assert_held(&q->sysfs_lock);
	lockdep_assert_held(&q->elevator_lock);

	memflags = blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);
@@ -700,28 +700,23 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
	return ret;
}

void elv_iosched_load_module(struct gendisk *disk, const char *buf,
			     size_t count)
static void elv_iosched_load_module(char *elevator_name)
{
	char elevator_name[ELV_NAME_MAX];
	struct elevator_type *found;
	const char *name;

	strscpy(elevator_name, buf, sizeof(elevator_name));
	name = strstrip(elevator_name);

	spin_lock(&elv_list_lock);
	found = __elevator_find(name);
	found = __elevator_find(elevator_name);
	spin_unlock(&elv_list_lock);

	if (!found)
		request_module("%s-iosched", name);
		request_module("%s-iosched", elevator_name);
}

ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
			  size_t count)
{
	char elevator_name[ELV_NAME_MAX];
	char *name;
	int ret;
	unsigned int memflags;
	struct request_queue *q = disk->queue;
@@ -731,16 +726,18 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
	 * queue to ensure that the module file can be read when the request
	 * queue is the one for the device storing the module file.
	 */
	elv_iosched_load_module(disk, buf, count);
	strscpy(elevator_name, buf, sizeof(elevator_name));
	name = strstrip(elevator_name);

	elv_iosched_load_module(name);

	mutex_lock(&q->sysfs_lock);
	memflags = blk_mq_freeze_queue(q);
	ret = elevator_change(q, strstrip(elevator_name));
	mutex_lock(&q->elevator_lock);
	ret = elevator_change(q, name);
	if (!ret)
		ret = count;
	mutex_unlock(&q->elevator_lock);
	blk_mq_unfreeze_queue(q, memflags);
	mutex_unlock(&q->sysfs_lock);
	return ret;
}

@@ -751,7 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
	struct elevator_type *cur = NULL, *e;
	int len = 0;

	mutex_lock(&q->sysfs_lock);
	mutex_lock(&q->elevator_lock);
	if (!q->elevator) {
		len += sprintf(name+len, "[none] ");
	} else {
@@ -769,7 +766,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
	spin_unlock(&elv_list_lock);

	len += sprintf(name+len, "\n");
	mutex_unlock(&q->sysfs_lock);
	mutex_unlock(&q->elevator_lock);

	return len;
}
+0 −2
Original line number Diff line number Diff line
@@ -148,8 +148,6 @@ extern void elv_unregister(struct elevator_type *);
 * io scheduler sysfs switching
 */
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
void elv_iosched_load_module(struct gendisk *disk, const char *page,
		size_t count);
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);

extern bool elv_bio_merge_ok(struct request *, struct bio *);
Loading