Commit 2e238340 authored by Christoph Hellwig's avatar Christoph Hellwig
Browse files

xfs: implement direct writes to zoned RT devices



Direct writes to zoned RT devices are extremely simple.  After taking the
block reservation before acquiring the iolock, the iomap direct I/O calls
into ->iomap_begin which will return a "fake" iomap for the entire
requested range.  The actual block allocation is then done from the
submit_io handler using code shared with the buffered I/O path.

The iomap_dio_ops set the bio_set to the (iomap) ioend one and initialize
the embedded ioend, which allows reusing the existing ioend based buffered
I/O completion path.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
parent 058dd70c
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -158,7 +158,9 @@ xfs_end_ioend(
	else if (ioend->io_flags & IOMAP_IOEND_UNWRITTEN)
		error = xfs_iomap_write_unwritten(ip, offset, size, false);

	if (!error && xfs_ioend_is_append(ioend))
	if (!error &&
	    !(ioend->io_flags & IOMAP_IOEND_DIRECT) &&
	    xfs_ioend_is_append(ioend))
		error = xfs_setfilesize(ip, offset, size);
done:
	if (is_zoned)
@@ -205,7 +207,7 @@ xfs_end_io(
	}
}

static void
void
xfs_end_bio(
	struct bio		*bio)
{
+2 −1
Original line number Diff line number Diff line
@@ -10,5 +10,6 @@ extern const struct address_space_operations xfs_address_space_operations;
extern const struct address_space_operations xfs_dax_aops;

int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
void xfs_end_bio(struct bio *bio);

#endif /* __XFS_AOPS_H__ */
+72 −8
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
#include "xfs_file.h"
#include "xfs_aops.h"
#include "xfs_zone_alloc.h"

#include <linux/dax.h>
@@ -548,6 +549,9 @@ xfs_dio_write_end_io(
	loff_t			offset = iocb->ki_pos;
	unsigned int		nofs_flag;

	ASSERT(!xfs_is_zoned_inode(ip) ||
	       !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));

	trace_xfs_end_io_direct_write(ip, offset, size);

	if (xfs_is_shutdown(ip->i_mount))
@@ -627,14 +631,51 @@ static const struct iomap_dio_ops xfs_dio_write_ops = {
	.end_io		= xfs_dio_write_end_io,
};

static void
xfs_dio_zoned_submit_io(
	const struct iomap_iter	*iter,
	struct bio		*bio,
	loff_t			file_offset)
{
	struct xfs_mount	*mp = XFS_I(iter->inode)->i_mount;
	struct xfs_zone_alloc_ctx *ac = iter->private;
	xfs_filblks_t		count_fsb;
	struct iomap_ioend	*ioend;

	count_fsb = XFS_B_TO_FSB(mp, bio->bi_iter.bi_size);
	if (count_fsb > ac->reserved_blocks) {
		xfs_err(mp,
"allocation (%lld) larger than reservation (%lld).",
			count_fsb, ac->reserved_blocks);
		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
		bio_io_error(bio);
		return;
	}
	ac->reserved_blocks -= count_fsb;

	bio->bi_end_io = xfs_end_bio;
	ioend = iomap_init_ioend(iter->inode, bio, file_offset,
			IOMAP_IOEND_DIRECT);
	xfs_zone_alloc_and_submit(ioend, &ac->open_zone);
}

static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
	.bio_set	= &iomap_ioend_bioset,
	.submit_io	= xfs_dio_zoned_submit_io,
	.end_io		= xfs_dio_write_end_io,
};

/*
 * Handle block aligned direct I/O writes
 * Handle block aligned direct I/O writes.
 */
static noinline ssize_t
xfs_file_dio_write_aligned(
	struct xfs_inode	*ip,
	struct kiocb		*iocb,
	struct iov_iter		*from)
	struct iov_iter		*from,
	const struct iomap_ops	*ops,
	const struct iomap_dio_ops *dops,
	struct xfs_zone_alloc_ctx *ac)
{
	unsigned int		iolock = XFS_IOLOCK_SHARED;
	ssize_t			ret;
@@ -642,7 +683,7 @@ xfs_file_dio_write_aligned(
	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
	if (ret)
		return ret;
	ret = xfs_file_write_checks(iocb, from, &iolock, NULL);
	ret = xfs_file_write_checks(iocb, from, &iolock, ac);
	if (ret)
		goto out_unlock;

@@ -656,14 +697,34 @@ xfs_file_dio_write_aligned(
		iolock = XFS_IOLOCK_SHARED;
	}
	trace_xfs_file_direct_write(iocb, from);
	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
			   &xfs_dio_write_ops, 0, NULL, 0);
	ret = iomap_dio_rw(iocb, from, ops, dops, 0, ac, 0);
out_unlock:
	if (iolock)
	xfs_iunlock(ip, iolock);
	return ret;
}

/*
 * Handle block aligned direct I/O writes to zoned devices.
 */
static noinline ssize_t
xfs_file_dio_write_zoned(
	struct xfs_inode	*ip,
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct xfs_zone_alloc_ctx ac = { };
	ssize_t			ret;

	ret = xfs_zoned_write_space_reserve(ip, iocb, from, 0, &ac);
	if (ret < 0)
		return ret;
	ret = xfs_file_dio_write_aligned(ip, iocb, from,
			&xfs_zoned_direct_write_iomap_ops,
			&xfs_dio_zoned_write_ops, &ac);
	xfs_zoned_space_unreserve(ip, &ac);
	return ret;
}

/*
 * Handle block unaligned direct I/O writes
 *
@@ -777,7 +838,10 @@ xfs_file_dio_write(
	    (xfs_is_always_cow_inode(ip) &&
	     (iov_iter_alignment(from) & ip->i_mount->m_blockmask)))
		return xfs_file_dio_write_unaligned(ip, iocb, from);
	return xfs_file_dio_write_aligned(ip, iocb, from);
	if (xfs_is_zoned_inode(ip))
		return xfs_file_dio_write_zoned(ip, iocb, from);
	return xfs_file_dio_write_aligned(ip, iocb, from,
			&xfs_direct_write_iomap_ops, &xfs_dio_write_ops, NULL);
}

static noinline ssize_t
+53 −0
Original line number Diff line number Diff line
@@ -965,6 +965,59 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
	.iomap_begin		= xfs_direct_write_iomap_begin,
};

#ifdef CONFIG_XFS_RT
/*
 * This is really simple.  The space has already been reserved before taking the
 * IOLOCK, the actual block allocation is done just before submitting the bio
 * and only recorded in the extent map on I/O completion.
 */
static int
xfs_zoned_direct_write_iomap_begin(
	struct inode		*inode,
	loff_t			offset,
	loff_t			length,
	unsigned		flags,
	struct iomap		*iomap,
	struct iomap		*srcmap)
{
	struct xfs_inode	*ip = XFS_I(inode);
	int			error;

	ASSERT(!(flags & IOMAP_OVERWRITE_ONLY));

	/*
	 * Needs to be pushed down into the allocator so that only writes into
	 * a single zone can be supported.
	 */
	if (flags & IOMAP_NOWAIT)
		return -EAGAIN;

	/*
	 * Ensure the extent list is in memory in so that we don't have to do
	 * read it from the I/O completion handler.
	 */
	if (xfs_need_iread_extents(&ip->i_df)) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		if (error)
			return error;
	}

	iomap->type = IOMAP_MAPPED;
	iomap->flags = IOMAP_F_DIRTY;
	iomap->bdev = ip->i_mount->m_rtdev_targp->bt_bdev;
	iomap->offset = offset;
	iomap->length = length;
	iomap->flags = IOMAP_F_ANON_WRITE;
	return 0;
}

const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
	.iomap_begin		= xfs_zoned_direct_write_iomap_begin,
};
#endif /* CONFIG_XFS_RT */

static int
xfs_dax_write_iomap_end(
	struct inode		*inode,
+1 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ xfs_aligned_fsb_count(

extern const struct iomap_ops xfs_buffered_write_iomap_ops;
extern const struct iomap_ops xfs_direct_write_iomap_ops;
extern const struct iomap_ops xfs_zoned_direct_write_iomap_ops;
extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops;