Commit bd1d2c21 authored by John Garry's avatar John Garry Committed by Darrick J. Wong
Browse files

xfs: add xfs_atomic_write_cow_iomap_begin()



For CoW-based atomic writes, reuse the infrastructure for reflink CoW fork
support.

Add ->iomap_begin() callback xfs_atomic_write_cow_iomap_begin() to create
staging mappings in the CoW fork for atomic write updates.

The general steps in the function are as follows:
- find extent mapping in the CoW fork for the FS block range being written
	- if part or full extent is found, proceed to process found extent
	- if no extent found, map in new blocks to the CoW fork
- convert unwritten blocks in extent if required
- update iomap extent mapping and return

The bulk of this function is quite similar to the processing in
xfs_reflink_allocate_cow(), where we try to find an extent mapping; if
none exists, then allocate a new extent in the CoW fork, convert unwritten
blocks, and return a mapping.

Performance testing has shown the XFS_ILOCK_EXCL locking to be quite
a bottleneck, so this is an area which could be optimised in future.

Christoph Hellwig contributed almost all of the code in
xfs_atomic_write_cow_iomap_begin().

Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
[djwong: add a new xfs_can_sw_atomic_write to convey intent better]
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJohn Garry <john.g.garry@oracle.com>
parent 0ea88ed4
Loading
Loading
Loading
Loading
+128 −0
Original line number Diff line number Diff line
@@ -1022,6 +1022,134 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
};
#endif /* CONFIG_XFS_RT */

static int
xfs_atomic_write_cow_iomap_begin(
	struct inode		*inode,
	loff_t			offset,
	loff_t			length,
	unsigned		flags,
	struct iomap		*iomap,
	struct iomap		*srcmap)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	const xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
	xfs_filblks_t		count_fsb = end_fsb - offset_fsb;
	int			nmaps = 1;
	xfs_filblks_t		resaligned;
	struct xfs_bmbt_irec	cmap;
	struct xfs_iext_cursor	icur;
	struct xfs_trans	*tp;
	unsigned int		dblocks = 0, rblocks = 0;
	int			error;
	u64			seq;

	ASSERT(flags & IOMAP_WRITE);
	ASSERT(flags & IOMAP_DIRECT);

	if (xfs_is_shutdown(mp))
		return -EIO;

	if (!xfs_can_sw_atomic_write(mp)) {
		ASSERT(xfs_can_sw_atomic_write(mp));
		return -EINVAL;
	}

	/* blocks are always allocated in this path */
	if (flags & IOMAP_NOWAIT)
		return -EAGAIN;

	trace_xfs_iomap_atomic_write_cow(ip, offset, length);

	xfs_ilock(ip, XFS_ILOCK_EXCL);

	if (!ip->i_cowfp) {
		ASSERT(!xfs_is_reflink_inode(ip));
		xfs_ifork_init_cow(ip);
	}

	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
		cmap.br_startoff = end_fsb;
	if (cmap.br_startoff <= offset_fsb) {
		xfs_trim_extent(&cmap, offset_fsb, count_fsb);
		goto found;
	}

	end_fsb = cmap.br_startoff;
	count_fsb = end_fsb - offset_fsb;

	resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
			xfs_get_cowextsz_hint(ip));
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	if (XFS_IS_REALTIME_INODE(ip)) {
		dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
		rblocks = resaligned;
	} else {
		dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
		rblocks = 0;
	}

	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks,
			rblocks, false, &tp);
	if (error)
		return error;

	/* extent layout could have changed since the unlock, so check again */
	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
		cmap.br_startoff = end_fsb;
	if (cmap.br_startoff <= offset_fsb) {
		xfs_trim_extent(&cmap, offset_fsb, count_fsb);
		xfs_trans_cancel(tp);
		goto found;
	}

	/*
	 * Allocate the entire reservation as unwritten blocks.
	 *
	 * Use XFS_BMAPI_EXTSZALIGN to hint at aligning new extents according to
	 * extszhint, such that there will be a greater chance that future
	 * atomic writes to that same range will be aligned (and don't require
	 * this COW-based method).
	 */
	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC |
			XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps);
	if (error) {
		xfs_trans_cancel(tp);
		goto out_unlock;
	}

	xfs_inode_set_cowblocks_tag(ip);
	error = xfs_trans_commit(tp);
	if (error)
		goto out_unlock;

found:
	if (cmap.br_state != XFS_EXT_NORM) {
		error = xfs_reflink_convert_cow_locked(ip, offset_fsb,
				count_fsb);
		if (error)
			goto out_unlock;
		cmap.br_state = XFS_EXT_NORM;
	}

	length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
	trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);

out_unlock:
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return error;
}

const struct iomap_ops xfs_atomic_write_cow_iomap_ops = {
	.iomap_begin		= xfs_atomic_write_cow_iomap_begin,
};

static int
xfs_dax_write_iomap_end(
	struct inode		*inode,
+1 −0
Original line number Diff line number Diff line
@@ -56,5 +56,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops;
extern const struct iomap_ops xfs_dax_write_iomap_ops;
extern const struct iomap_ops xfs_atomic_write_cow_iomap_ops;

#endif /* __XFS_IOMAP_H__*/
+5 −0
Original line number Diff line number Diff line
@@ -464,6 +464,11 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
	return !xfs_has_zoned(mp);
}

static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
{
	return xfs_has_reflink(mp);
}

/*
 * Some features are always on for v5 file systems, allow the compiler to
 * eliminiate dead code when building without v4 support.
+1 −1
Original line number Diff line number Diff line
@@ -293,7 +293,7 @@ xfs_bmap_trim_cow(
	return xfs_reflink_trim_around_shared(ip, imap, shared);
}

static int
int
xfs_reflink_convert_cow_locked(
	struct xfs_inode	*ip,
	xfs_fileoff_t		offset_fsb,
+2 −0
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@ int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
		bool convert_now);
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
		xfs_off_t count);
int xfs_reflink_convert_cow_locked(struct xfs_inode *ip,
		xfs_fileoff_t offset_fsb, xfs_filblks_t count_fsb);

extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
Loading