Commit 2f6b2565 authored by Keith Busch's avatar Keith Busch Committed by Jens Axboe
Browse files

block: accumulate memory segment gaps per bio



The blk-mq dma iterator has an optimization for requests that align to
the device's iommu merge boundary. This boundary may be larger than the
device's virtual boundary, but the code had been depending on that queue
limit to know ahead of time if the request is guaranteed to align to
that optimization.

Rather than rely on that queue limit, which many devices may not report,
save the lowest set bit of any boundary gap between each segment in the
bio while checking the segments. The request stores the value for
merging and quickly checking per io if the request can use iova
optimizations.

Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0739c2c6
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -253,6 +253,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
	bio->bi_write_hint = 0;
	bio->bi_write_stream = 0;
	bio->bi_status = 0;
	bio->bi_bvec_gap_bit = 0;
	bio->bi_iter.bi_sector = 0;
	bio->bi_iter.bi_size = 0;
	bio->bi_iter.bi_idx = 0;
+3 −0
Original line number Diff line number Diff line
@@ -459,6 +459,8 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
	if (rq->bio) {
		if (!ll_back_merge_fn(rq, bio, nr_segs))
			return -EINVAL;
		rq->phys_gap_bit = bio_seg_gap(rq->q, rq->biotail, bio,
					       rq->phys_gap_bit);
		rq->biotail->bi_next = bio;
		rq->biotail = bio;
		rq->__data_len += bio->bi_iter.bi_size;
@@ -469,6 +471,7 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
	rq->nr_phys_segments = nr_segs;
	rq->bio = rq->biotail = bio;
	rq->__data_len = bio->bi_iter.bi_size;
	rq->phys_gap_bit = bio->bi_bvec_gap_bit;
	return 0;
}
EXPORT_SYMBOL(blk_rq_append_bio);
+36 −3
Original line number Diff line number Diff line
@@ -302,6 +302,12 @@ static unsigned int bio_split_alignment(struct bio *bio,
	return lim->logical_block_size;
}

static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv,
					struct bio_vec *bv)
{
	return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len);
}

/**
 * bio_split_io_at - check if and where to split a bio
 * @bio:  [in] bio to be split
@@ -319,8 +325,8 @@ int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
		unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
{
	struct bio_vec bv, bvprv, *bvprvp = NULL;
	unsigned nsegs = 0, bytes = 0, gaps = 0;
	struct bvec_iter iter;
	unsigned nsegs = 0, bytes = 0;

	bio_for_each_bvec(bv, bio, iter) {
		if (bv.bv_offset & lim->dma_alignment ||
@@ -331,8 +337,11 @@ int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
		 * If the queue doesn't support SG gaps and adding this
		 * offset would create a gap, disallow it.
		 */
		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
		if (bvprvp) {
			if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
				goto split;
			gaps |= bvec_seg_gap(bvprvp, &bv);
		}

		if (nsegs < lim->max_segments &&
		    bytes + bv.bv_len <= max_bytes &&
@@ -350,6 +359,7 @@ int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
	}

	*segs = nsegs;
	bio->bi_bvec_gap_bit = ffs(gaps);
	return 0;
split:
	if (bio->bi_opf & REQ_ATOMIC)
@@ -385,6 +395,7 @@ int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
	 * big IO can be trival, disable iopoll when split needed.
	 */
	bio_clear_polled(bio);
	bio->bi_bvec_gap_bit = ffs(gaps);
	return bytes >> SECTOR_SHIFT;
}
EXPORT_SYMBOL_GPL(bio_split_io_at);
@@ -721,6 +732,21 @@ static bool blk_atomic_write_mergeable_rqs(struct request *rq,
	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
}

u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
	       u8 gaps_bit)
{
	struct bio_vec pb, nb;

	gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit);
	gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit);

	bio_get_last_bvec(prev, &pb);
	bio_get_first_bvec(next, &nb);
	if (!biovec_phys_mergeable(q, &pb, &nb))
		gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb)));
	return gaps_bit;
}

/*
 * For non-mq, this has to be called with the request spinlock acquired.
 * For mq with scheduling, the appropriate queue wide lock should be held.
@@ -785,6 +811,9 @@ static struct request *attempt_merge(struct request_queue *q,
	if (next->start_time_ns < req->start_time_ns)
		req->start_time_ns = next->start_time_ns;

	req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio,
					min_not_zero(next->phys_gap_bit,
						     req->phys_gap_bit));
	req->biotail->bi_next = next->bio;
	req->biotail = next->biotail;

@@ -908,6 +937,8 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
		blk_zone_write_plug_bio_merged(bio);

	req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio,
					req->phys_gap_bit);
	req->biotail->bi_next = bio;
	req->biotail = bio;
	req->__data_len += bio->bi_iter.bi_size;
@@ -942,6 +973,8 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,

	blk_update_mixed_merge(req, bio, true);

	req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio,
					req->phys_gap_bit);
	bio->bi_next = req->bio;
	req->bio = bio;

+1 −2
Original line number Diff line number Diff line
@@ -79,8 +79,7 @@ static bool blk_map_iter_next(struct request *req, struct blk_map_iter *iter,
static inline bool blk_can_dma_map_iova(struct request *req,
		struct device *dma_dev)
{
	return !((queue_virt_boundary(req->q) + 1) &
		dma_get_merge_boundary(dma_dev));
	return !(req_phys_gap_mask(req) & dma_get_merge_boundary(dma_dev));
}

static bool blk_dma_map_bus(struct blk_dma_iter *iter, struct phys_vec *vec)
+6 −0
Original line number Diff line number Diff line
@@ -376,6 +376,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
	INIT_LIST_HEAD(&rq->queuelist);
	rq->q = q;
	rq->__sector = (sector_t) -1;
	rq->phys_gap_bit = 0;
	INIT_HLIST_NODE(&rq->hash);
	RB_CLEAR_NODE(&rq->rb_node);
	rq->tag = BLK_MQ_NO_TAG;
@@ -668,6 +669,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
			goto out_queue_exit;
	}
	rq->__data_len = 0;
	rq->phys_gap_bit = 0;
	rq->__sector = (sector_t) -1;
	rq->bio = rq->biotail = NULL;
	return rq;
@@ -748,6 +750,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
	rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
	blk_mq_rq_time_init(rq, alloc_time_ns);
	rq->__data_len = 0;
	rq->phys_gap_bit = 0;
	rq->__sector = (sector_t) -1;
	rq->bio = rq->biotail = NULL;
	return rq;
@@ -2674,6 +2677,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
	rq->bio = rq->biotail = bio;
	rq->__sector = bio->bi_iter.bi_sector;
	rq->__data_len = bio->bi_iter.bi_size;
	rq->phys_gap_bit = bio->bi_bvec_gap_bit;

	rq->nr_phys_segments = nr_segs;
	if (bio_integrity(bio))
		rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
@@ -3380,6 +3385,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
	}
	rq->nr_phys_segments = rq_src->nr_phys_segments;
	rq->nr_integrity_segments = rq_src->nr_integrity_segments;
	rq->phys_gap_bit = rq_src->phys_gap_bit;

	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
		goto free_and_out;
Loading