Commit b1e09178 authored by John Garry's avatar John Garry Committed by Darrick J. Wong
Browse files

xfs: commit CoW-based atomic writes atomically



When completing a CoW-based write, each extent range mapping update is
covered by a separate transaction.

For a CoW-based atomic write, all mappings must be changed at once, so
change to use a single transaction.

Note that there is a limit on the amount of log intent items which can be
fit into a single transaction, but this is being ignored for now since
the count of items for a typical atomic write would be much less than is
typically supported. A typical atomic write would be expected to be 64KB
or less, which means only 16 possible extents unmaps, which is quite
small.

Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
[djwong: add tr_atomic_ioend]
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarJohn Garry <john.g.garry@oracle.com>
parent 11ab3190
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks(
	 */
	if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
		xfs_trans_resv_calc(mp, resv);
		resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
		return;
	}

@@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks(

	xfs_trans_resv_calc(mp, resv);

	/* Copy the dynamic transaction reservation types from the running fs */
	resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;

	if (xfs_has_reflink(mp)) {
		/*
		 * In the early days of reflink, typical log operation counts
+15 −0
Original line number Diff line number Diff line
@@ -1284,6 +1284,15 @@ xfs_calc_namespace_reservations(
	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
}

STATIC void
xfs_calc_default_atomic_ioend_reservation(
	struct xfs_mount	*mp,
	struct xfs_trans_resv	*resp)
{
	/* Pick a default that will scale reasonably for the log size. */
	resp->tr_atomic_ioend = resp->tr_itruncate;
}

void
xfs_trans_resv_calc(
	struct xfs_mount	*mp,
@@ -1378,4 +1387,10 @@ xfs_trans_resv_calc(
	resp->tr_itruncate.tr_logcount += logcount_adj;
	resp->tr_write.tr_logcount += logcount_adj;
	resp->tr_qm_dqalloc.tr_logcount += logcount_adj;

	/*
	 * Now that we've finished computing the static reservations, we can
	 * compute the dynamic reservation for atomic writes.
	 */
	xfs_calc_default_atomic_ioend_reservation(mp, resp);
}
+1 −0
Original line number Diff line number Diff line
@@ -48,6 +48,7 @@ struct xfs_trans_resv {
	struct xfs_trans_res	tr_qm_dqalloc;	/* allocate quota on disk */
	struct xfs_trans_res	tr_sb;		/* modify superblock */
	struct xfs_trans_res	tr_fsyncts;	/* update timestamps on fsync */
	struct xfs_trans_res	tr_atomic_ioend; /* untorn write completion */
};

/* shorthand way of accessing reservation structure */
+4 −1
Original line number Diff line number Diff line
@@ -576,6 +576,9 @@ xfs_dio_write_end_io(
	nofs_flag = memalloc_nofs_save();

	if (flags & IOMAP_DIO_COW) {
		if (iocb->ki_flags & IOCB_ATOMIC)
			error = xfs_reflink_end_atomic_cow(ip, offset, size);
		else
			error = xfs_reflink_end_cow(ip, offset, size);
		if (error)
			goto out;
+56 −0
Original line number Diff line number Diff line
@@ -984,6 +984,62 @@ xfs_reflink_end_cow(
	return error;
}

/*
 * Fully remap all of the file's data fork at once, which is the critical part
 * in achieving atomic behaviour.
 * The regular CoW end path does not use function as to keep the block
 * reservation per transaction as low as possible.
 */
int
xfs_reflink_end_atomic_cow(
	struct xfs_inode		*ip,
	xfs_off_t			offset,
	xfs_off_t			count)
{
	xfs_fileoff_t			offset_fsb;
	xfs_fileoff_t			end_fsb;
	int				error = 0;
	struct xfs_mount		*mp = ip->i_mount;
	struct xfs_trans		*tp;
	unsigned int			resblks;

	trace_xfs_reflink_end_cow(ip, offset, count);

	offset_fsb = XFS_B_TO_FSBT(mp, offset);
	end_fsb = XFS_B_TO_FSB(mp, offset + count);

	/*
	 * Each remapping operation could cause a btree split, so in the worst
	 * case that's one for each block.
	 */
	resblks = (end_fsb - offset_fsb) *
			XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);

	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0,
			XFS_TRANS_RESERVE, &tp);
	if (error)
		return error;

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	xfs_trans_ijoin(tp, ip, 0);

	while (end_fsb > offset_fsb && !error) {
		error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
				end_fsb);
	}
	if (error) {
		trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
		goto out_cancel;
	}
	error = xfs_trans_commit(tp);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return error;
out_cancel:
	xfs_trans_cancel(tp);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return error;
}

/*
 * Free all CoW staging blocks that are still referenced by the ondisk refcount
 * metadata.  The ondisk metadata does not track which inode created the
Loading