Commit fac69ec8 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Carlos Maiolino
Browse files

xfs: simplify buffer I/O submission



The code in _xfs_buf_ioapply is unnecessarily complicated because it
doesn't take advantage of modern bio features.

Simplify it by making use of bio splitting and chaining, that is build
a single bio for the pages in the buffer using a simple loop, and then
split that bio on the map boundaries for discontiguous multi-FSB buffers
and chain the split bios to the main one so that there is only a single
I/O completion.

This not only simplifies the code to build the buffer, but also removes
the need for the b_io_remaining field as buffer ownership is granted
to the bio on submit of the final bio with no chance for a completion
before that as well as the b_io_error field that is now superfluous
because there always is exactly one completion.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Acked-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarCarlos Maiolino <cem@kernel.org>
parent 8db65d31
Loading
Loading
Loading
Loading
+60 −144
Original line number Diff line number Diff line
@@ -1362,13 +1362,6 @@ xfs_buf_ioend(
{
	trace_xfs_buf_iodone(bp, _RET_IP_);

	/*
	 * Pull in IO completion errors now. We are guaranteed to be running
	 * single threaded, so we don't need the lock to read b_io_error.
	 */
	if (!bp->b_error && bp->b_io_error)
		xfs_buf_ioerror(bp, bp->b_io_error);

	if (bp->b_flags & XBF_READ) {
		if (!bp->b_error && bp->b_ops)
			bp->b_ops->verify_read(bp);
@@ -1491,118 +1484,26 @@ static void
xfs_buf_bio_end_io(
	struct bio		*bio)
{
	struct xfs_buf		*bp = (struct xfs_buf *)bio->bi_private;
	struct xfs_buf		*bp = bio->bi_private;

	if (!bio->bi_status &&
	    (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
	if (bio->bi_status)
		xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
	else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
		 XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
		bio->bi_status = BLK_STS_IOERR;

	/*
	 * don't overwrite existing errors - otherwise we can lose errors on
	 * buffers that require multiple bios to complete.
	 */
	if (bio->bi_status) {
		int error = blk_status_to_errno(bio->bi_status);

		cmpxchg(&bp->b_io_error, 0, error);
	}
		xfs_buf_ioerror(bp, -EIO);

	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
	xfs_buf_ioend_async(bp);
	bio_put(bio);
}

static void
xfs_buf_ioapply_map(
	struct xfs_buf	*bp,
	int		map,
	int		*buf_offset,
	int		*count,
	blk_opf_t	op)
{
	int		page_index;
	unsigned int	total_nr_pages = bp->b_page_count;
	int		nr_pages;
	struct bio	*bio;
	sector_t	sector =  bp->b_maps[map].bm_bn;
	int		size;
	int		offset;

	/* skip the pages in the buffer before the start offset */
	page_index = 0;
	offset = *buf_offset;
	while (offset >= PAGE_SIZE) {
		page_index++;
		offset -= PAGE_SIZE;
	}

	/*
	 * Limit the IO size to the length of the current vector, and update the
	 * remaining IO count for the next time around.
	 */
	size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
	*count -= size;
	*buf_offset += size;

next_chunk:
	atomic_inc(&bp->b_io_remaining);
	nr_pages = bio_max_segs(total_nr_pages);

	bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
	bio->bi_iter.bi_sector = sector;
	bio->bi_end_io = xfs_buf_bio_end_io;
	bio->bi_private = bp;

	for (; size && nr_pages; nr_pages--, page_index++) {
		int	rbytes, nbytes = PAGE_SIZE - offset;

		if (nbytes > size)
			nbytes = size;

		rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
				      offset);
		if (rbytes < nbytes)
			break;

		offset = 0;
		sector += BTOBB(nbytes);
		size -= nbytes;
		total_nr_pages--;
	}

	if (likely(bio->bi_iter.bi_size)) {
		if (xfs_buf_is_vmapped(bp)) {
			flush_kernel_vmap_range(bp->b_addr,
						xfs_buf_vmap_len(bp));
		}
		submit_bio(bio);
		if (size)
			goto next_chunk;
	} else {
		/*
		 * This is guaranteed not to be the last io reference count
		 * because the caller (xfs_buf_submit) holds a count itself.
		 */
		atomic_dec(&bp->b_io_remaining);
		xfs_buf_ioerror(bp, -EIO);
		bio_put(bio);
	}

}

STATIC void
_xfs_buf_ioapply(
static inline blk_opf_t
xfs_buf_bio_op(
	struct xfs_buf		*bp)
{
	struct blk_plug	plug;
	blk_opf_t		op;
	int		offset;
	int		size;
	int		i;

	if (bp->b_flags & XBF_WRITE) {
		op = REQ_OP_WRITE;
@@ -1612,25 +1513,53 @@ _xfs_buf_ioapply(
			op |= REQ_RAHEAD;
	}

	/* we only use the buffer cache for meta-data */
	op |= REQ_META;
	return op | REQ_META;
}

static void
xfs_buf_submit_bio(
	struct xfs_buf		*bp)
{
	unsigned int		size = BBTOB(bp->b_length);
	unsigned int		map = 0, p;
	struct blk_plug		plug;
	struct bio		*bio;

	bio = bio_alloc(bp->b_target->bt_bdev, bp->b_page_count,
			xfs_buf_bio_op(bp), GFP_NOIO);
	bio->bi_private = bp;
	bio->bi_end_io = xfs_buf_bio_end_io;

	if (bp->b_flags & _XBF_KMEM) {
		__bio_add_page(bio, virt_to_page(bp->b_addr), size,
				bp->b_offset);
	} else {
		for (p = 0; p < bp->b_page_count; p++)
			__bio_add_page(bio, bp->b_pages[p], PAGE_SIZE, 0);
		bio->bi_iter.bi_size = size; /* limit to the actual size used */

		if (xfs_buf_is_vmapped(bp))
			flush_kernel_vmap_range(bp->b_addr,
					xfs_buf_vmap_len(bp));
	}

	/*
	 * Walk all the vectors issuing IO on them. Set up the initial offset
	 * into the buffer and the desired IO size before we start -
	 * _xfs_buf_ioapply_vec() will modify them appropriately for each
	 * subsequent call.
	 * If there is more than one map segment, split out a new bio for each
	 * map except of the last one.  The last map is handled by the
	 * remainder of the original bio outside the loop.
	 */
	offset = bp->b_offset;
	size = BBTOB(bp->b_length);
	blk_start_plug(&plug);
	for (i = 0; i < bp->b_map_count; i++) {
		xfs_buf_ioapply_map(bp, i, &offset, &size, op);
		if (bp->b_error)
			break;
		if (size <= 0)
			break;	/* all done */
	for (map = 0; map < bp->b_map_count - 1; map++) {
		struct bio	*split;

		split = bio_split(bio, bp->b_maps[map].bm_len, GFP_NOFS,
				&fs_bio_set);
		split->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
		bio_chain(split, bio);
		submit_bio(split);
	}
	bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
	submit_bio(bio);
	blk_finish_plug(&plug);
}

@@ -1729,14 +1658,7 @@ xfs_buf_submit(
	 * left over from previous use of the buffer (e.g. failed readahead).
	 */
	bp->b_error = 0;
	bp->b_io_error = 0;

	/*
	 * Set the count to 1 initially, this will stop an I/O completion
	 * callout which happens before we have started all the I/O from calling
	 * xfs_buf_ioend too early.
	 */
	atomic_set(&bp->b_io_remaining, 1);
	if (bp->b_flags & XBF_ASYNC)
		xfs_buf_ioacct_inc(bp);

@@ -1749,21 +1671,15 @@ xfs_buf_submit(
	if (xfs_buftarg_is_mem(bp->b_target))
		goto done;

	_xfs_buf_ioapply(bp);
	xfs_buf_submit_bio(bp);
	goto rele;

done:
	/*
	 * If _xfs_buf_ioapply failed, we can get back here with only the IO
	 * reference we took above. If we drop it to zero, run completion so
	 * that we don't return to the caller with completion still pending.
	 */
	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
	if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
		xfs_buf_ioend(bp);
	else
		xfs_buf_ioend_async(bp);
	}

rele:
	/*
	 * Release the hold that keeps the buffer referenced for the entire
	 * I/O. Note that if the buffer is async, it is not safe to reference
+0 −2
Original line number Diff line number Diff line
@@ -184,7 +184,6 @@ struct xfs_buf {
	struct list_head	b_lru;		/* lru list */
	spinlock_t		b_lock;		/* internal state lock */
	unsigned int		b_state;	/* internal state flags */
	int			b_io_error;	/* internal IO error state */
	wait_queue_head_t	b_waiters;	/* unpin waiters */
	struct list_head	b_list;
	struct xfs_perag	*b_pag;
@@ -202,7 +201,6 @@ struct xfs_buf {
	struct xfs_buf_map	__b_map;	/* inline compound buffer map */
	int			b_map_count;
	atomic_t		b_pin_count;	/* pin count */
	atomic_t		b_io_remaining;	/* #outstanding I/O requests */
	unsigned int		b_page_count;	/* size of page array */
	unsigned int		b_offset;	/* page offset of b_addr,
						   only for _XBF_KMEM buffers */