Commit 596dce11 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

block: simplify elevator reattachment for updating nr_hw_queues



In blk_mq_update_nr_hw_queues(), nr_hw_queues changes and elevator data
depends on it, and elevator has to be reattached, so call elevator_switch()
to force attachment.

Add elv_update_nr_hw_queues() simply for blk_mq_update_nr_hw_queues() to
reattach elevator, since elevator switch isn't likely when running
blk_mq_update_nr_hw_queues(). This way removes the current switch
none and switch back code.

Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarNilay Shroff <nilay@linux.ibm.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20250505141805.2751237-14-ming.lei@redhat.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ac55b71a
Loading
Loading
Loading
Loading
+1 −89
Original line number Diff line number Diff line
@@ -4987,88 +4987,10 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
	return ret;
}

/*
 * request_queue and elevator_type pair.
 * It is just used by __blk_mq_update_nr_hw_queues to cache
 * the elevator_type associated with a request_queue.
 */
struct blk_mq_qe_pair {
	struct list_head node;
	struct request_queue *q;
	struct elevator_type *type;
};

/*
 * Cache the elevator_type in qe pair list and switch the
 * io scheduler to 'none'
 */
static bool blk_mq_elv_switch_none(struct list_head *head,
		struct request_queue *q)
{
	struct blk_mq_qe_pair *qe;

	qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
	if (!qe)
		return false;

	/* Accessing q->elevator needs protection from ->elevator_lock. */
	mutex_lock(&q->elevator_lock);

	if (!q->elevator) {
		kfree(qe);
		goto unlock;
	}

	INIT_LIST_HEAD(&qe->node);
	qe->q = q;
	qe->type = q->elevator->type;
	/* keep a reference to the elevator module as we'll switch back */
	__elevator_get(qe->type);
	list_add(&qe->node, head);
	elevator_disable(q);
unlock:
	mutex_unlock(&q->elevator_lock);

	return true;
}

static struct blk_mq_qe_pair *blk_lookup_qe_pair(struct list_head *head,
						struct request_queue *q)
{
	struct blk_mq_qe_pair *qe;

	list_for_each_entry(qe, head, node)
		if (qe->q == q)
			return qe;

	return NULL;
}

static void blk_mq_elv_switch_back(struct list_head *head,
				  struct request_queue *q)
{
	struct blk_mq_qe_pair *qe;
	struct elevator_type *t;

	qe = blk_lookup_qe_pair(head, q);
	if (!qe)
		return;
	t = qe->type;
	list_del(&qe->node);
	kfree(qe);

	mutex_lock(&q->elevator_lock);
	elevator_switch(q, t->elevator_name);
	/* drop the reference acquired in blk_mq_elv_switch_none */
	elevator_put(t);
	mutex_unlock(&q->elevator_lock);
}

static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
							int nr_hw_queues)
{
	struct request_queue *q;
	LIST_HEAD(head);
	int prev_nr_hw_queues = set->nr_hw_queues;
	unsigned int memflags;
	int i;
@@ -5086,15 +5008,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_freeze_queue_nomemsave(q);

	/*
	 * Switch IO scheduler to 'none', cleaning up the data associated
	 * with the previous scheduler. We will switch back once we are done
	 * updating the new sw to hw queue mappings.
	 */
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		if (!blk_mq_elv_switch_none(&head, q))
			goto switch_back;

	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_debugfs_unregister_hctxs(q);
		blk_mq_sysfs_unregister_hctxs(q);
@@ -5128,9 +5041,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
		blk_mq_debugfs_register_hctxs(q);
	}

switch_back:
	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_elv_switch_back(&head, q);
		elv_update_nr_hw_queues(q);

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_unfreeze_queue_nomemrestore(q);
+1 −2
Original line number Diff line number Diff line
@@ -322,8 +322,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,

bool blk_insert_flush(struct request *rq);

int elevator_switch(struct request_queue *q, const char *name);
void elevator_disable(struct request_queue *q);
void elv_update_nr_hw_queues(struct request_queue *q);
void elevator_exit(struct request_queue *q);
int elv_register_queue(struct request_queue *q, bool uevent);
void elv_unregister_queue(struct request_queue *q);
+19 −1
Original line number Diff line number Diff line
@@ -621,7 +621,7 @@ void elevator_init_mq(struct request_queue *q)
 * If switching fails, we are most likely running out of memory and not able
 * to restore the old io scheduler, so leaving the io scheduler being none.
 */
int elevator_switch(struct request_queue *q, const char *name)
static int elevator_switch(struct request_queue *q, const char *name)
{
	struct elevator_type *new_e = NULL;
	int ret = 0;
@@ -682,6 +682,24 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
	return elevator_switch(q, elevator_name);
}

/*
 * The I/O scheduler depends on the number of hardware queues, this forces a
 * reattachment when nr_hw_queues changes.
 */
void elv_update_nr_hw_queues(struct request_queue *q)
{
	WARN_ON_ONCE(q->mq_freeze_depth == 0);

	mutex_lock(&q->elevator_lock);
	if (q->elevator && !blk_queue_dying(q) && !blk_queue_registered(q)) {
		const char *name = q->elevator->type->elevator_name;

		/* force to reattach elevator after nr_hw_queue is updated */
		elevator_switch(q, name);
	}
	mutex_unlock(&q->elevator_lock);
}

static void elv_iosched_load_module(char *elevator_name)
{
	struct elevator_type *found;