Commit 7d337eef authored by Yu Kuai's avatar Yu Kuai Committed by Jens Axboe
Browse files

blk-mq: fix elevator depth_updated method



Current depth_updated has some problems:

1) depth_updated() will be called for each hctx, while all elevators
will update async_depth for the disk level, this is not related to hctx;
2) In blk_mq_update_nr_requests(), if previous hctx update succeed and
this hctx update failed, q->nr_requests will not be updated, while
async_depth is already updated with new nr_reqeuests in previous
depth_updated();
3) All elevators are using q->nr_requests to calculate async_depth now,
however, q->nr_requests is still the old value when depth_updated() is
called from blk_mq_update_nr_requests();

Those problems are first from error path, then mq-deadline, and recently
for bfq and kyber, fix those problems by:

- pass in request_queue instead of hctx;
- move depth_updated() after q->nr_requests is updated in
  blk_mq_update_nr_requests();
- add depth_updated() call inside init_sched() method to initialize
  async_depth;
- remove init_hctx() method for mq-deadline and bfq that is useless now;

Fixes: 77f1e0a5 ("bfq: update internal depth state when queue depth changes")
Fixes: 39823b47 ("block/mq-deadline: Fix the tag reservation code")
Fixes: 42e6c6ce ("lib/sbitmap: convert shallow_depth from one word to the whole sbitmap")
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarLi Nan <linan122@huawei.com>
Reviewed-by: default avatarNilay Shroff <nilay@linux.ibm.com>
Link: https://lore.kernel.org/r/20250821060612.1729939-2-yukuai1@huaweicloud.com


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 225dc96f
Loading
Loading
Loading
Loading
+5 −17
Original line number Diff line number Diff line
@@ -7109,9 +7109,10 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg)
 * See the comments on bfq_limit_depth for the purpose of
 * the depths set in the function. Return minimum shallow depth we'll use.
 */
static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
static void bfq_depth_updated(struct request_queue *q)
{
	unsigned int nr_requests = bfqd->queue->nr_requests;
	struct bfq_data *bfqd = q->elevator->elevator_data;
	unsigned int nr_requests = q->nr_requests;

	/*
	 * In-word depths if no bfq_queue is being weight-raised:
@@ -7143,21 +7144,8 @@ static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
	bfqd->async_depths[1][0] = max((nr_requests * 3) >> 4, 1U);
	/* no more than ~37% of tags for sync writes (~20% extra tags) */
	bfqd->async_depths[1][1] = max((nr_requests * 6) >> 4, 1U);
}

static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
{
	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
	struct blk_mq_tags *tags = hctx->sched_tags;

	bfq_update_depths(bfqd, &tags->bitmap_tags);
	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
}

static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
{
	bfq_depth_updated(hctx);
	return 0;
	blk_mq_set_min_shallow_depth(q, 1);
}

static void bfq_exit_queue(struct elevator_queue *e)
@@ -7369,6 +7357,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_queue *eq)
		goto out_free;
	bfq_init_root_group(bfqd->root_group, bfqd);
	bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
	bfq_depth_updated(q);

	/* We dispatch from request queue wide instead of hw queue */
	blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
@@ -7628,7 +7617,6 @@ static struct elevator_type iosched_bfq_mq = {
		.request_merged		= bfq_request_merged,
		.has_work		= bfq_has_work,
		.depth_updated		= bfq_depth_updated,
		.init_hctx		= bfq_init_hctx,
		.init_sched		= bfq_init_queue,
		.exit_sched		= bfq_exit_queue,
	},
+11 −0
Original line number Diff line number Diff line
@@ -92,4 +92,15 @@ static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}

static inline void blk_mq_set_min_shallow_depth(struct request_queue *q,
						unsigned int depth)
{
	struct blk_mq_hw_ctx *hctx;
	unsigned long i;

	queue_for_each_hw_ctx(q, hctx, i)
		sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
						depth);
}

#endif
+12 −11
Original line number Diff line number Diff line
@@ -4951,20 +4951,21 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
						      false);
		}
		if (ret)
			break;
		if (q->elevator && q->elevator->type->ops.depth_updated)
			q->elevator->type->ops.depth_updated(hctx);
			goto out;
	}
	if (!ret) {

	q->nr_requests = nr;
	if (q->elevator && q->elevator->type->ops.depth_updated)
		q->elevator->type->ops.depth_updated(q);

	if (blk_mq_is_shared_tags(set->flags)) {
		if (q->elevator)
			blk_mq_tag_update_sched_shared_tags(q);
		else
			blk_mq_tag_resize_shared_tags(set, nr);
	}
	}

out:
	blk_mq_unquiesce_queue(q);

	return ret;
+1 −1
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ struct elevator_mq_ops {
	void (*exit_sched)(struct elevator_queue *);
	int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
	void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
	void (*depth_updated)(struct blk_mq_hw_ctx *);
	void (*depth_updated)(struct request_queue *);

	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
+9 −10
Original line number Diff line number Diff line
@@ -399,6 +399,14 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
	return ERR_PTR(ret);
}

static void kyber_depth_updated(struct request_queue *q)
{
	struct kyber_queue_data *kqd = q->elevator->elevator_data;

	kqd->async_depth = q->nr_requests * KYBER_ASYNC_PERCENT / 100U;
	blk_mq_set_min_shallow_depth(q, kqd->async_depth);
}

static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq)
{
	struct kyber_queue_data *kqd;
@@ -413,6 +421,7 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_queue *eq)

	eq->elevator_data = kqd;
	q->elevator = eq;
	kyber_depth_updated(q);

	return 0;
}
@@ -440,15 +449,6 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
		INIT_LIST_HEAD(&kcq->rq_list[i]);
}

static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
{
	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
	struct blk_mq_tags *tags = hctx->sched_tags;

	kqd->async_depth = hctx->queue->nr_requests * KYBER_ASYNC_PERCENT / 100U;
	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
}

static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
{
	struct kyber_hctx_data *khd;
@@ -493,7 +493,6 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
	khd->batching = 0;

	hctx->sched_data = khd;
	kyber_depth_updated(hctx);

	return 0;

Loading