Commit b35243a4 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe
Browse files

block: rework bio splitting



The current setup with bio_may_exceed_limit and __bio_split_to_limits
is a bit of a mess.

Change it so that __bio_split_to_limits does all the work and is just
a variant of bio_split_to_limits that returns nr_segs.  This is done
by inlining it and instead have the various bio_split_* helpers directly
submit the potentially split bios.

To support btrfs, the rw version has a lower level helper split out
that just returns the offset to split.  This turns out to nicely clean
up the btrfs flow as well.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarDavid Sterba <dsterba@suse.com>
Reviewed-by: default avatarDamien Le Moal <dlemoal@kernel.org>
Tested-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Link: https://lore.kernel.org/r/20240826173820.1690925-2-hch@lst.de


Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f6f84be0
Loading
Loading
Loading
Loading
+56 −90
Original line number Diff line number Diff line
@@ -105,9 +105,33 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
}

static struct bio *bio_split_discard(struct bio *bio,
				     const struct queue_limits *lim,
				     unsigned *nsegs, struct bio_set *bs)
static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
{
	if (unlikely(split_sectors < 0)) {
		bio->bi_status = errno_to_blk_status(split_sectors);
		bio_endio(bio);
		return NULL;
	}

	if (split_sectors) {
		struct bio *split;

		split = bio_split(bio, split_sectors, GFP_NOIO,
				&bio->bi_bdev->bd_disk->bio_split);
		split->bi_opf |= REQ_NOMERGE;
		blkcg_bio_issue_init(split);
		bio_chain(split, bio);
		trace_block_split(split, bio->bi_iter.bi_sector);
		WARN_ON_ONCE(bio_zone_write_plugging(bio));
		submit_bio_noacct(bio);
		return split;
	}

	return bio;
}

struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
		unsigned *nsegs)
{
	unsigned int max_discard_sectors, granularity;
	sector_t tmp;
@@ -121,10 +145,10 @@ static struct bio *bio_split_discard(struct bio *bio,
		min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
	max_discard_sectors -= max_discard_sectors % granularity;
	if (unlikely(!max_discard_sectors))
		return NULL;
		return bio;

	if (bio_sectors(bio) <= max_discard_sectors)
		return NULL;
		return bio;

	split_sectors = max_discard_sectors;

@@ -139,19 +163,18 @@ static struct bio *bio_split_discard(struct bio *bio,
	if (split_sectors > tmp)
		split_sectors -= tmp;

	return bio_split(bio, split_sectors, GFP_NOIO, bs);
	return bio_submit_split(bio, split_sectors);
}

static struct bio *bio_split_write_zeroes(struct bio *bio,
					  const struct queue_limits *lim,
					  unsigned *nsegs, struct bio_set *bs)
struct bio *bio_split_write_zeroes(struct bio *bio,
		const struct queue_limits *lim, unsigned *nsegs)
{
	*nsegs = 0;
	if (!lim->max_write_zeroes_sectors)
		return NULL;
		return bio;
	if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
		return NULL;
	return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
		return bio;
	return bio_submit_split(bio, lim->max_write_zeroes_sectors);
}

static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
@@ -274,27 +297,19 @@ static bool bvec_split_segs(const struct queue_limits *lim,
}

/**
 * bio_split_rw - split a bio in two bios
 * bio_split_rw_at - check if and where to split a read/write bio
 * @bio:  [in] bio to be split
 * @lim:  [in] queue limits to split based on
 * @segs: [out] number of segments in the bio with the first half of the sectors
 * @bs:	  [in] bio set to allocate the clone from
 * @max_bytes: [in] maximum number of bytes per bio
 *
 * Clone @bio, update the bi_iter of the clone to represent the first sectors
 * of @bio and update @bio->bi_iter to represent the remaining sectors. The
 * following is guaranteed for the cloned bio:
 * - That it has at most @max_bytes worth of data
 * - That it has at most queue_max_segments(@q) segments.
 *
 * Except for discard requests the cloned bio will point at the bi_io_vec of
 * the original bio. It is the responsibility of the caller to ensure that the
 * original bio is not freed before the cloned bio. The caller is also
 * responsible for ensuring that @bs is only destroyed after processing of the
 * split bio has finished.
 * Find out if @bio needs to be split to fit the queue limits in @lim and a
 * maximum size of @max_bytes.  Returns a negative error number if @bio can't be
 * split, 0 if the bio doesn't have to be split, or a positive sector offset if
 * @bio needs to be split.
 */
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
		unsigned *segs, struct bio_set *bs, unsigned max_bytes)
int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
		unsigned *segs, unsigned max_bytes)
{
	struct bio_vec bv, bvprv, *bvprvp = NULL;
	struct bvec_iter iter;
@@ -324,22 +339,17 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
	}

	*segs = nsegs;
	return NULL;
	return 0;
split:
	if (bio->bi_opf & REQ_ATOMIC) {
		bio->bi_status = BLK_STS_INVAL;
		bio_endio(bio);
		return ERR_PTR(-EINVAL);
	}
	if (bio->bi_opf & REQ_ATOMIC)
		return -EINVAL;

	/*
	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
	 * with EAGAIN if splitting is required and return an error pointer.
	 */
	if (bio->bi_opf & REQ_NOWAIT) {
		bio->bi_status = BLK_STS_AGAIN;
		bio_endio(bio);
		return ERR_PTR(-EAGAIN);
	}
	if (bio->bi_opf & REQ_NOWAIT)
		return -EAGAIN;

	*segs = nsegs;

@@ -356,58 +366,16 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
	 * big IO can be trival, disable iopoll when split needed.
	 */
	bio_clear_polled(bio);
	return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
	return bytes >> SECTOR_SHIFT;
}
EXPORT_SYMBOL_GPL(bio_split_rw);
EXPORT_SYMBOL_GPL(bio_split_rw_at);

/**
 * __bio_split_to_limits - split a bio to fit the queue limits
 * @bio:     bio to be split
 * @lim:     queue limits to split based on
 * @nr_segs: returns the number of segments in the returned bio
 *
 * Check if @bio needs splitting based on the queue limits, and if so split off
 * a bio fitting the limits from the beginning of @bio and return it.  @bio is
 * shortened to the remainder and re-submitted.
 *
 * The split bio is allocated from @q->bio_split, which is provided by the
 * block layer.
 */
struct bio *__bio_split_to_limits(struct bio *bio,
				  const struct queue_limits *lim,
				  unsigned int *nr_segs)
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
		unsigned *nr_segs)
{
	struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
	struct bio *split;

	switch (bio_op(bio)) {
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
		split = bio_split_discard(bio, lim, nr_segs, bs);
		break;
	case REQ_OP_WRITE_ZEROES:
		split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
		break;
	default:
		split = bio_split_rw(bio, lim, nr_segs, bs,
				get_max_io_size(bio, lim) << SECTOR_SHIFT);
		if (IS_ERR(split))
			return NULL;
		break;
	}

	if (split) {
		/* there isn't chance to merge the split bio */
		split->bi_opf |= REQ_NOMERGE;

		blkcg_bio_issue_init(split);
		bio_chain(split, bio);
		trace_block_split(split, bio->bi_iter.bi_sector);
		WARN_ON_ONCE(bio_zone_write_plugging(bio));
		submit_bio_noacct(bio);
		return split;
	}
	return bio;
	return bio_submit_split(bio,
		bio_split_rw_at(bio, lim, nr_segs,
			get_max_io_size(bio, lim) << SECTOR_SHIFT));
}

/**
@@ -426,9 +394,7 @@ struct bio *bio_split_to_limits(struct bio *bio)
	const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
	unsigned int nr_segs;

	if (bio_may_exceed_limits(bio, lim))
	return __bio_split_to_limits(bio, lim, &nr_segs);
	return bio;
}
EXPORT_SYMBOL(bio_split_to_limits);

+5 −6
Original line number Diff line number Diff line
@@ -2939,7 +2939,7 @@ void blk_mq_submit_bio(struct bio *bio)
	struct blk_plug *plug = current->plug;
	const int is_sync = op_is_sync(bio->bi_opf);
	struct blk_mq_hw_ctx *hctx;
	unsigned int nr_segs = 1;
	unsigned int nr_segs;
	struct request *rq;
	blk_status_t ret;

@@ -2981,11 +2981,10 @@ void blk_mq_submit_bio(struct bio *bio)
		goto queue_exit;
	}

	if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
	bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
	if (!bio)
		goto queue_exit;
	}

	if (!bio_integrity_prep(bio))
		goto queue_exit;

+44 −19
Original line number Diff line number Diff line
@@ -331,33 +331,58 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
				const char *, size_t);

static inline bool bio_may_exceed_limits(struct bio *bio,
struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
		unsigned *nsegs);
struct bio *bio_split_write_zeroes(struct bio *bio,
		const struct queue_limits *lim, unsigned *nsegs);
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
		unsigned *nr_segs);

/*
 * All drivers must accept single-segments bios that are smaller than PAGE_SIZE.
 *
 * This is a quick and dirty check that relies on the fact that bi_io_vec[0] is
 * always valid if a bio has data.  The check might lead to occasional false
 * positives when bios are cloned, but compared to the performance impact of
 * cloned bios themselves the loop below doesn't matter anyway.
 */
static inline bool bio_may_need_split(struct bio *bio,
		const struct queue_limits *lim)
{
	return lim->chunk_sectors || bio->bi_vcnt != 1 ||
		bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}

/**
 * __bio_split_to_limits - split a bio to fit the queue limits
 * @bio:     bio to be split
 * @lim:     queue limits to split based on
 * @nr_segs: returns the number of segments in the returned bio
 *
 * Check if @bio needs splitting based on the queue limits, and if so split off
 * a bio fitting the limits from the beginning of @bio and return it.  @bio is
 * shortened to the remainder and re-submitted.
 *
 * The split bio is allocated from @q->bio_split, which is provided by the
 * block layer.
 */
static inline struct bio *__bio_split_to_limits(struct bio *bio,
		const struct queue_limits *lim, unsigned int *nr_segs)
{
	switch (bio_op(bio)) {
	default:
		if (bio_may_need_split(bio, lim))
			return bio_split_rw(bio, lim, nr_segs);
		*nr_segs = 1;
		return bio;
	case REQ_OP_DISCARD:
	case REQ_OP_SECURE_ERASE:
		return bio_split_discard(bio, lim, nr_segs);
	case REQ_OP_WRITE_ZEROES:
		return true; /* non-trivial splitting decisions */
	default:
		break;
		return bio_split_write_zeroes(bio, lim, nr_segs);
	}

	/*
	 * All drivers must accept single-segments bios that are <= PAGE_SIZE.
	 * This is a quick and dirty check that relies on the fact that
	 * bi_io_vec[0] is always valid if a bio has data.  The check might
	 * lead to occasional false negatives when bios are cloned, but compared
	 * to the performance impact of cloned bios themselves the loop below
	 * doesn't matter anyway.
	 */
	return lim->chunk_sectors || bio->bi_vcnt != 1 ||
		bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}

struct bio *__bio_split_to_limits(struct bio *bio,
				  const struct queue_limits *lim,
				  unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
		unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
+18 −12
Original line number Diff line number Diff line
@@ -73,20 +73,13 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,

static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
					 struct btrfs_bio *orig_bbio,
					 u64 map_length, bool use_append)
					 u64 map_length)
{
	struct btrfs_bio *bbio;
	struct bio *bio;

	if (use_append) {
		unsigned int nr_segs;

		bio = bio_split_rw(&orig_bbio->bio, &fs_info->limits, &nr_segs,
				   &btrfs_clone_bioset, map_length);
	} else {
		bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT,
				GFP_NOFS, &btrfs_clone_bioset);
	}
	bio = bio_split(&orig_bbio->bio, map_length >> SECTOR_SHIFT, GFP_NOFS,
			&btrfs_clone_bioset);
	bbio = btrfs_bio(bio);
	btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
	bbio->inode = orig_bbio->inode;
@@ -664,6 +657,19 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
	return true;
}

static u64 btrfs_append_map_length(struct btrfs_bio *bbio, u64 map_length)
{
	unsigned int nr_segs;
	int sector_offset;

	map_length = min(map_length, bbio->fs_info->max_zone_append_size);
	sector_offset = bio_split_rw_at(&bbio->bio, &bbio->fs_info->limits,
					&nr_segs, map_length);
	if (sector_offset)
		return sector_offset << SECTOR_SHIFT;
	return map_length;
}

static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
	struct btrfs_inode *inode = bbio->inode;
@@ -691,10 +697,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)

	map_length = min(map_length, length);
	if (use_append)
		map_length = min(map_length, fs_info->max_zone_append_size);
		map_length = btrfs_append_map_length(bbio, map_length);

	if (map_length < length) {
		bbio = btrfs_split_bio(fs_info, bbio, map_length, use_append);
		bbio = btrfs_split_bio(fs_info, bbio, map_length);
		bio = &bbio->bio;
	}

+2 −2
Original line number Diff line number Diff line
@@ -324,8 +324,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio)
void bio_trim(struct bio *bio, sector_t offset, sector_t size);
extern struct bio *bio_split(struct bio *bio, int sectors,
			     gfp_t gfp, struct bio_set *bs);
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
		unsigned *segs, struct bio_set *bs, unsigned max_bytes);
int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
		unsigned *segs, unsigned max_bytes);

/**
 * bio_next_split - get next @sectors from a bio, splitting if necessary