Commit 115cd471 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:
 "Here are the core block changes for 5.19. This contains:

   - blk-throttle accounting fix (Laibin)

   - Series removing redundant assignments (Michal)

   - Expose bio cache via the bio_set, so that DM can use it (Mike)

   - Finish off the bio allocation interface cleanups by dealing with
     the weirdest member of the family. bio_kmalloc combines a kmalloc
     for the bio and bio_vecs with a hidden bio_init call and magic
     cleanup semantics (Christoph)

   - Clean up the block layer API so that APIs consumed by file systems
     are (almost) only struct block_device based, so that file systems
     don't have to poke into block layer internals like the
     request_queue (Christoph)

   - Clean up the blk_execute_rq* API (Christoph)

   - Clean up various lose end in the blk-cgroup code to make it easier
     to follow in preparation of reworking the blkcg assignment for bios
     (Christoph)

   - Fix use-after-free issues in BFQ when processes with merged queues
     get moved to different cgroups (Jan)

   - BFQ fixes (Jan)

   - Various fixes and cleanups (Bart, Chengming, Fanjun, Julia, Ming,
     Wolfgang, me)"

* tag 'for-5.19/block-2022-05-22' of git://git.kernel.dk/linux-block: (83 commits)
  blk-mq: fix typo in comment
  bfq: Remove bfq_requeue_request_body()
  bfq: Remove superfluous conversion from RQ_BIC()
  bfq: Allow current waker to defend against a tentative one
  bfq: Relax waker detection for shared queues
  blk-cgroup: delete rcu_read_lock_held() WARN_ON_ONCE()
  blk-throttle: Set BIO_THROTTLED when bio has been throttled
  blk-cgroup: Remove unnecessary rcu_read_lock/unlock()
  blk-cgroup: always terminate io.stat lines
  block, bfq: make bfq_has_work() more accurate
  block, bfq: protect 'bfqd->queued' by 'bfqd->lock'
  block: cleanup the VM accounting in submit_bio
  block: Fix the bio.bi_opf comment
  block: reorder the REQ_ flags
  blk-iocost: combine local_stat and desc_stat to stat
  block: improve the error message from bio_check_eod
  block: allow passing a NULL bdev to bio_alloc_clone/bio_init_clone
  block: remove superfluous calls to blkcg_bio_issue_init
  kthread: unexport kthread_blkcg
  blk-cgroup: cleanup blkcg_maybe_throttle_current
  ...
parents f6792c87 2aaf5160
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -483,7 +483,6 @@ static void ubd_handler(void)
			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
				blk_queue_max_discard_sectors(io_req->req->q, 0);
				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
			}
			blk_mq_end_request(io_req->req, io_req->error);
			kfree(io_req);
@@ -803,7 +802,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
	}
	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
	return 0;
+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
obj-$(CONFIG_BLK_CGROUP_RWSTAT)	+= blk-cgroup-rwstat.o
obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOPRIO)	+= blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
+0 −2
Original line number Diff line number Diff line
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
		s >>= bb->shift;
		target += (1<<bb->shift) - 1;
		target >>= bb->shift;
		sectors = target - s;
	}
	/* 'target' is now the first block after the bad range */

@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
		s += (1<<bb->shift) - 1;
		s >>= bb->shift;
		target >>= bb->shift;
		sectors = target - s;
	}

	write_seqlock_irq(&bb->lock);
+67 −44
Original line number Diff line number Diff line
@@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
				   */
	bfqg->bfqd = bfqd;
	bfqg->active_entities = 0;
	bfqg->online = true;
	bfqg->rq_pos_tree = RB_ROOT;
}

@@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg,
	entity->sched_data = &parent->sched_data;
}

static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
					 struct blkcg *blkcg)
static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
{
	struct blkcg_gq *blkg;

	blkg = blkg_lookup(blkcg, bfqd->queue);
	if (likely(blkg))
		return blkg_to_bfqg(blkg);
	return NULL;
}

struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
				     struct blkcg *blkcg)
{
	struct bfq_group *bfqg, *parent;
	struct bfq_group *parent;
	struct bfq_entity *entity;

	bfqg = bfq_lookup_bfqg(bfqd, blkcg);

	if (unlikely(!bfqg))
		return NULL;

	/*
	 * Update chain of bfq_groups as we might be handling a leaf group
	 * which, along with some of its relatives, has not been hooked yet
@@ -623,9 +607,25 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
			bfq_group_set_parent(curr_bfqg, parent);
		}
	}
}

struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
{
	struct blkcg_gq *blkg = bio->bi_blkg;
	struct bfq_group *bfqg;

	while (blkg) {
		bfqg = blkg_to_bfqg(blkg);
		if (bfqg->online) {
			bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
			return bfqg;
		}
		blkg = blkg->parent;
	}
	bio_associate_blkg_from_css(bio,
				&bfqg_to_blkg(bfqd->root_group)->blkcg->css);
	return bfqd->root_group;
}

/**
 * bfq_bfqq_move - migrate @bfqq to @bfqg.
@@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
 * Move bic to blkcg, assuming that bfqd->lock is held; which makes
 * sure that the reference to cgroup is valid across the call (see
 * comments in bfq_bic_update_cgroup on this issue)
 *
 * NOTE: an alternative approach might have been to store the current
 * cgroup in bfqq and getting a reference to it, reducing the lookup
 * time here, at the price of slightly more complex code.
 */
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
				     struct bfq_io_cq *bic,
						struct blkcg *blkcg)
				     struct bfq_group *bfqg)
{
	struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
	struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
	struct bfq_group *bfqg;
	struct bfq_entity *entity;

	bfqg = bfq_find_set_group(bfqd, blkcg);

	if (unlikely(!bfqg))
		bfqg = bfqd->root_group;

	if (async_bfqq) {
		entity = &async_bfqq->entity;

@@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
	}

	if (sync_bfqq) {
		entity = &sync_bfqq->entity;
		if (entity->sched_data != &bfqg->sched_data)
		if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
			/* We are the only user of this bfqq, just move it */
			if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
				bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
		} else {
			struct bfq_queue *bfqq;

			/*
			 * The queue was merged to a different queue. Check
			 * that the merge chain still belongs to the same
			 * cgroup.
			 */
			for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
				if (bfqq->entity.sched_data !=
				    &bfqg->sched_data)
					break;
			if (bfqq) {
				/*
				 * Some queue changed cgroup so the merge is
				 * not valid anymore. We cannot easily just
				 * cancel the merge (by clearing new_bfqq) as
				 * there may be other processes using this
				 * queue and holding refs to all queues below
				 * sync_bfqq->new_bfqq. Similarly if the merge
				 * already happened, we need to detach from
				 * bfqq now so that we cannot merge bio to a
				 * request from the old cgroup.
				 */
				bfq_put_cooperator(sync_bfqq);
				bfq_release_process_ref(bfqd, sync_bfqq);
				bic_set_bfqq(bic, NULL, 1);
			}
		}
	}

	return bfqg;
@@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
	struct bfq_data *bfqd = bic_to_bfqd(bic);
	struct bfq_group *bfqg = NULL;
	struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
	uint64_t serial_nr;

	rcu_read_lock();
	serial_nr = __bio_blkcg(bio)->css.serial_nr;
	serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;

	/*
	 * Check whether blkcg has changed.  The condition may trigger
	 * spuriously on a newly created cic but there's no harm.
	 */
	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
		goto out;
		return;

	bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
	/*
	 * New cgroup for this process. Make sure it is linked to bfq internal
	 * cgroup hierarchy.
	 */
	bfq_link_bfqg(bfqd, bfqg);
	__bfq_bic_change_cgroup(bfqd, bic, bfqg);
	/*
	 * Update blkg_path for bfq_log_* functions. We cache this
	 * path, and update it here, for the following
@@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
	 */
	blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
	bic->blkcg_serial_nr = serial_nr;
out:
	rcu_read_unlock();
}

/**
@@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)

put_async_queues:
	bfq_put_async_queues(bfqd, bfqg);
	bfqg->online = false;

	spin_unlock_irqrestore(&bfqd->lock, flags);
	/*
@@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
	bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}

struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
{
	return bfqd->root_group;
}
+53 −42
Original line number Diff line number Diff line
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
 */
static const unsigned long bfq_late_stable_merging = 600;

#define RQ_BIC(rq)		icq_to_bic((rq)->elv.priv[0])
#define RQ_BIC(rq)		((struct bfq_io_cq *)((rq)->elv.priv[0]))
#define RQ_BFQQ(rq)		((rq)->elv.priv[1])

struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
 */
void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
	lockdep_assert_held(&bfqd->lock);

	if (bfqd->queued != 0) {
		bfq_log(bfqd, "schedule dispatch");
		blk_mq_run_hw_queues(bfqd->queue, true);
@@ -2133,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
	if (!bfqd->last_completed_rq_bfqq ||
	    bfqd->last_completed_rq_bfqq == bfqq ||
	    bfq_bfqq_has_short_ttime(bfqq) ||
	    bfqq->dispatched > 0 ||
	    now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
	    bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
	    now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
		return;

	/*
@@ -2208,9 +2208,13 @@ static void bfq_add_request(struct request *rq)

	bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
	bfqq->queued[rq_is_sync(rq)]++;
	bfqd->queued++;
	/*
	 * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
	 * may be read without holding the lock in bfq_has_work().
	 */
	WRITE_ONCE(bfqd->queued, bfqd->queued + 1);

	if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
	if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
		bfq_check_waker(bfqd, bfqq, now_ns);

		/*
@@ -2400,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
	if (rq->queuelist.prev != &rq->queuelist)
		list_del_init(&rq->queuelist);
	bfqq->queued[sync]--;
	bfqd->queued--;
	/*
	 * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
	 * may be read without holding the lock in bfq_has_work().
	 */
	WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
	elv_rb_del(&bfqq->sort_list, rq);

	elv_rqhash_del(q, rq);
@@ -2463,10 +2471,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,

	spin_lock_irq(&bfqd->lock);

	if (bic)
	if (bic) {
		/*
		 * Make sure cgroup info is uptodate for current process before
		 * considering the merge.
		 */
		bfq_bic_update_cgroup(bic, bio);

		bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
	else
	} else {
		bfqd->bio_bfqq = NULL;
	}
	bfqd->bio_bic = bic;

	ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free);
@@ -2496,8 +2511,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req,
	return ELEVATOR_NO_MERGE;
}

static struct bfq_queue *bfq_init_rq(struct request *rq);

static void bfq_request_merged(struct request_queue *q, struct request *req,
			       enum elv_merge type)
{
@@ -2506,7 +2519,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
	    blk_rq_pos(req) <
	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
				    struct request, rb_node))) {
		struct bfq_queue *bfqq = bfq_init_rq(req);
		struct bfq_queue *bfqq = RQ_BFQQ(req);
		struct bfq_data *bfqd;
		struct request *prev, *next_rq;

@@ -2558,8 +2571,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
static void bfq_requests_merged(struct request_queue *q, struct request *rq,
				struct request *next)
{
	struct bfq_queue *bfqq = bfq_init_rq(rq),
		*next_bfqq = bfq_init_rq(next);
	struct bfq_queue *bfqq = RQ_BFQQ(rq),
		*next_bfqq = RQ_BFQQ(next);

	if (!bfqq)
		goto remove;
@@ -2764,6 +2777,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
	if (process_refs == 0 || new_process_refs == 0)
		return NULL;

	/*
	 * Make sure merged queues belong to the same parent. Parents could
	 * have changed since the time we decided the two queues are suitable
	 * for merging.
	 */
	if (new_bfqq->entity.parent != bfqq->entity.parent)
		return NULL;

	bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d",
		new_bfqq->pid);

@@ -2901,9 +2922,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
				struct bfq_queue *new_bfqq =
					bfq_setup_merge(bfqq, stable_merge_bfqq);

				if (new_bfqq) {
					bic->stably_merged = true;
				if (new_bfqq && new_bfqq->bic)
					new_bfqq->bic->stably_merged = true;
					if (new_bfqq->bic)
						new_bfqq->bic->stably_merged =
									true;
				}
				return new_bfqq;
			} else
				return NULL;
@@ -5045,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;

	/*
	 * Avoiding lock: a race on bfqd->busy_queues should cause at
	 * Avoiding lock: a race on bfqd->queued should cause at
	 * most a call to dispatch for nothing
	 */
	return !list_empty_careful(&bfqd->dispatch) ||
		bfq_tot_busy_queues(bfqd) > 0;
		READ_ONCE(bfqd->queued);
}

static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -5310,7 +5334,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
	bfq_put_queue(bfqq);
}

static void bfq_put_cooperator(struct bfq_queue *bfqq)
void bfq_put_cooperator(struct bfq_queue *bfqq)
{
	struct bfq_queue *__bfqq, *next;

@@ -5716,14 +5740,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
	struct bfq_queue *bfqq;
	struct bfq_group *bfqg;

	rcu_read_lock();

	bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
	if (!bfqg) {
		bfqq = &bfqd->oom_bfqq;
		goto out;
	}

	bfqg = bfq_bio_bfqg(bfqd, bio);
	if (!is_sync) {
		async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
						  ioprio);
@@ -5769,8 +5786,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,

	if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn)
		bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic);

	rcu_read_unlock();
	return bfqq;
}

@@ -6117,6 +6132,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
					   unsigned int cmd_flags) {}
#endif /* CONFIG_BFQ_CGROUP_DEBUG */

static struct bfq_queue *bfq_init_rq(struct request *rq);

static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
			       bool at_head)
{
@@ -6132,18 +6149,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
		bfqg_stats_update_legacy_io(q, rq);
#endif
	spin_lock_irq(&bfqd->lock);
	bfqq = bfq_init_rq(rq);
	if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
		spin_unlock_irq(&bfqd->lock);
		blk_mq_free_requests(&free);
		return;
	}

	spin_unlock_irq(&bfqd->lock);

	trace_block_rq_insert(rq);

	spin_lock_irq(&bfqd->lock);
	bfqq = bfq_init_rq(rq);
	if (!bfqq || at_head) {
		if (at_head)
			list_add(&rq->queuelist, &bfqd->dispatch);
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
		bfq_schedule_dispatch(bfqd);
}

static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
{
	bfqq_request_freed(bfqq);
	bfq_put_queue(bfqq);
}

/*
 * The processes associated with bfqq may happen to generate their
 * cumulative I/O at a lower rate than the rate at which the device
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)

		bfq_completed_request(bfqq, bfqd);
	}
	bfq_finish_requeue_request_body(bfqq);
	bfqq_request_freed(bfqq);
	bfq_put_queue(bfqq);
	RQ_BIC(rq)->requests--;
	spin_unlock_irqrestore(&bfqd->lock, flags);

	/*
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)

	bfqq_request_allocated(bfqq);
	bfqq->ref++;
	bic->requests++;
	bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
		     rq, bfqq, bfqq->ref);

@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
	bfq_bfqq_expire(bfqd, bfqq, true, reason);

schedule_dispatch:
	spin_unlock_irqrestore(&bfqd->lock, flags);
	bfq_schedule_dispatch(bfqd);
	spin_unlock_irqrestore(&bfqd->lock, flags);
}

/*
Loading