Commit 34845d92 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Just a collection of fixes that have been queued up since the initial
  merge window pull request, the majority of which are targeted for
  stable as well.

  One bio_set fix that fixes an issue with the dm adoption of cached bio
  structs that got introduced in this merge window"

* tag 'for-5.19/block-2022-06-02' of git://git.kernel.dk/linux-block:
  block: Fix potential deadlock in blk_ia_range_sysfs_show()
  block: fix bio_clone_blkg_association() to associate with proper blkcg_gq
  block: remove useless BUG_ON() in blk_mq_put_tag()
  blk-mq: do not update io_ticks with passthrough requests
  block: make bioset_exit() fully resilient against being called twice
  block: use bio_queue_enter instead of blk_queue_enter in bio_poll
  block: document BLK_STS_AGAIN usage
  block: take destination bvec offsets into account in bio_copy_data_iter
  blk-iolatency: Fix inflight count imbalances and IO hangs on offline
  blk-mq: don't touch ->tagset in blk_mq_get_sq_hctx
parents 5ac8bdb9 41e46b3c
Loading
Loading
Loading
Loading
+6 −3
Original line number Diff line number Diff line
@@ -722,6 +722,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
		bio_alloc_cache_prune(cache, -1U);
	}
	free_percpu(bs->cache);
	bs->cache = NULL;
}

/**
@@ -1366,10 +1367,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
		struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
		struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
		unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
		void *src_buf;
		void *src_buf = bvec_kmap_local(&src_bv);
		void *dst_buf = bvec_kmap_local(&dst_bv);

		src_buf = bvec_kmap_local(&src_bv);
		memcpy_to_bvec(&dst_bv, src_buf);
		memcpy(dst_buf, src_buf, bytes);

		kunmap_local(dst_buf);
		kunmap_local(src_buf);

		bio_advance_iter_single(src, src_iter, bytes);
+2 −6
Original line number Diff line number Diff line
@@ -1974,12 +1974,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
 */
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
{
	if (src->bi_blkg) {
		if (dst->bi_blkg)
			blkg_put(dst->bi_blkg);
		blkg_get(src->bi_blkg);
		dst->bi_blkg = src->bi_blkg;
	}
	if (src->bi_blkg)
		bio_associate_blkg_from_css(dst, bio_blkcg_css(src));
}
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);

+1 −1
Original line number Diff line number Diff line
@@ -939,7 +939,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)

	blk_flush_plug(current->plug, false);

	if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
	if (bio_queue_enter(bio))
		return 0;
	if (queue_is_mq(q)) {
		ret = blk_mq_poll(q, cookie, iob, flags);
+1 −6
Original line number Diff line number Diff line
@@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
		container_of(attr, struct blk_ia_range_sysfs_entry, attr);
	struct blk_independent_access_range *iar =
		container_of(kobj, struct blk_independent_access_range, kobj);
	ssize_t ret;

	mutex_lock(&iar->queue->sysfs_lock);
	ret = entry->show(iar, buf);
	mutex_unlock(&iar->queue->sysfs_lock);

	return ret;
	return entry->show(iar, buf);
}

static const struct sysfs_ops blk_ia_range_sysfs_ops = {
+64 −58
Original line number Diff line number Diff line
@@ -87,7 +87,17 @@ struct iolatency_grp;
struct blk_iolatency {
	struct rq_qos rqos;
	struct timer_list timer;
	atomic_t enabled;

	/*
	 * ->enabled is the master enable switch gating the throttling logic and
	 * inflight tracking. The number of cgroups which have iolat enabled is
	 * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly
	 * from ->enable_work with the request_queue frozen. For details, See
	 * blkiolatency_enable_work_fn().
	 */
	bool enabled;
	atomic_t enable_cnt;
	struct work_struct enable_work;
};

static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
@@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
	return container_of(rqos, struct blk_iolatency, rqos);
}

static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
{
	return atomic_read(&blkiolat->enabled) > 0;
}

struct child_latency_info {
	spinlock_t lock;

@@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
	struct blkcg_gq *blkg = bio->bi_blkg;
	bool issue_as_root = bio_issue_as_root_blkg(bio);

	if (!blk_iolatency_enabled(blkiolat))
	if (!blkiolat->enabled)
		return;

	while (blkg && blkg->parent) {
@@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
	u64 window_start;
	u64 now;
	bool issue_as_root = bio_issue_as_root_blkg(bio);
	bool enabled = false;
	int inflight = 0;

	blkg = bio->bi_blkg;
@@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
	if (!iolat)
		return;

	enabled = blk_iolatency_enabled(iolat->blkiolat);
	if (!enabled)
	if (!iolat->blkiolat->enabled)
		return;

	now = ktime_to_ns(ktime_get());
@@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);

	del_timer_sync(&blkiolat->timer);
	flush_work(&blkiolat->enable_work);
	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
	kfree(blkiolat);
}
@@ -716,6 +720,44 @@ static void blkiolatency_timer_fn(struct timer_list *t)
	rcu_read_unlock();
}

/**
 * blkiolatency_enable_work_fn - Enable or disable iolatency on the device
 * @work: enable_work of the blk_iolatency of interest
 *
 * iolatency needs to keep track of the number of in-flight IOs per cgroup. This
 * is relatively expensive as it involves walking up the hierarchy twice for
 * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we
 * want to disable the in-flight tracking.
 *
 * We have to make sure that the counting is balanced - we don't want to leak
 * the in-flight counts by disabling accounting in the completion path while IOs
 * are in flight. This is achieved by ensuring that no IO is in flight by
 * freezing the queue while flipping ->enabled. As this requires a sleepable
 * context, ->enabled flipping is punted to this work function.
 */
static void blkiolatency_enable_work_fn(struct work_struct *work)
{
	struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency,
						      enable_work);
	bool enabled;

	/*
	 * There can only be one instance of this function running for @blkiolat
	 * and it's guaranteed to be executed at least once after the latest
	 * ->enabled_cnt modification. Acting on the latest ->enable_cnt is
	 * sufficient.
	 *
	 * Also, we know @blkiolat is safe to access as ->enable_work is flushed
	 * in blkcg_iolatency_exit().
	 */
	enabled = atomic_read(&blkiolat->enable_cnt);
	if (enabled != blkiolat->enabled) {
		blk_mq_freeze_queue(blkiolat->rqos.q);
		blkiolat->enabled = enabled;
		blk_mq_unfreeze_queue(blkiolat->rqos.q);
	}
}

int blk_iolatency_init(struct request_queue *q)
{
	struct blk_iolatency *blkiolat;
@@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q)
	}

	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
	INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);

	return 0;
}

/*
 * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
 * return 0.
 */
static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
{
	struct iolatency_grp *iolat = blkg_to_lat(blkg);
	struct blk_iolatency *blkiolat = iolat->blkiolat;
	u64 oldval = iolat->min_lat_nsec;

	iolat->min_lat_nsec = val;
@@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
	iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
				    BLKIOLATENCY_MAX_WIN_SIZE);

	if (!oldval && val)
		return 1;
	if (!oldval && val) {
		if (atomic_inc_return(&blkiolat->enable_cnt) == 1)
			schedule_work(&blkiolat->enable_work);
	}
	if (oldval && !val) {
		blkcg_clear_delay(blkg);
		return -1;
		if (atomic_dec_return(&blkiolat->enable_cnt) == 0)
			schedule_work(&blkiolat->enable_work);
	}
	return 0;
}

static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
	u64 lat_val = 0;
	u64 oldval;
	int ret;
	int enable = 0;

	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
	if (ret)
@@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
	blkg = ctx.blkg;
	oldval = iolat->min_lat_nsec;

	enable = iolatency_set_min_lat_nsec(blkg, lat_val);
	if (enable) {
		if (!blk_get_queue(blkg->q)) {
			ret = -ENODEV;
			goto out;
		}

		blkg_get(blkg);
	}

	if (oldval != iolat->min_lat_nsec) {
	iolatency_set_min_lat_nsec(blkg, lat_val);
	if (oldval != iolat->min_lat_nsec)
		iolatency_clear_scaling(blkg);
	}

	ret = 0;
out:
	blkg_conf_finish(&ctx);
	if (ret == 0 && enable) {
		struct iolatency_grp *tmp = blkg_to_lat(blkg);
		struct blk_iolatency *blkiolat = tmp->blkiolat;

		blk_mq_freeze_queue(blkg->q);

		if (enable == 1)
			atomic_inc(&blkiolat->enabled);
		else if (enable == -1)
			atomic_dec(&blkiolat->enabled);
		else
			WARN_ON_ONCE(1);

		blk_mq_unfreeze_queue(blkg->q);

		blkg_put(blkg);
		blk_put_queue(blkg->q);
	}
	return ret ?: nbytes;
}

@@ -1005,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
{
	struct iolatency_grp *iolat = pd_to_lat(pd);
	struct blkcg_gq *blkg = lat_to_blkg(iolat);
	struct blk_iolatency *blkiolat = iolat->blkiolat;
	int ret;

	ret = iolatency_set_min_lat_nsec(blkg, 0);
	if (ret == 1)
		atomic_inc(&blkiolat->enabled);
	if (ret == -1)
		atomic_dec(&blkiolat->enabled);
	iolatency_set_min_lat_nsec(blkg, 0);
	iolatency_clear_scaling(blkg);
}

Loading