Commit 4528b905 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: allow sysadmins to specify a maximum atomic write limit at mount time



Introduce a mount option to allow sysadmins to specify the maximum size
of an atomic write.  If the filesystem can work with the supplied value,
that becomes the new guaranteed maximum.

The value mustn't be too big for the existing filesystem geometry (max
write size, max AG/rtgroup size).  We dynamically recompute the
tr_atomic_write transaction reservation based on the given block size,
check that the current log size isn't less than the new minimum log size
constraints, and set a new maximum.

The actual software atomic write max is still computed based off of
tr_atomic_ioend the same way it has for the past few commits.  Note also
that xfs_calc_atomic_write_log_geometry is non-static because mkfs will
need that.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarJohn Garry <john.g.garry@oracle.com>
Reviewed-by: default avatarJohn Garry <john.g.garry@oracle.com>
parent 9dffc58f
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
	optional, and the log section can be separate from the data
	section or contained within it.

  max_atomic_write=value
	Set the maximum size of an atomic write.  The size may be
	specified in bytes, in kilobytes with a "k" suffix, in megabytes
	with a "m" suffix, or in gigabytes with a "g" suffix.  The size
	cannot be larger than the maximum write size, larger than the
	size of any allocation group, or larger than the size of a
	remapping operation that the log can complete atomically.

	The default value is to set the maximum I/O completion size
	to allow each CPU to handle one at a time.

  max_open_zones=value
	Specify the max number of zones to keep open for writing on a
	zoned rt device. Many open zones aids file data separation
+69 −0
Original line number Diff line number Diff line
@@ -1488,3 +1488,72 @@ xfs_calc_max_atomic_write_fsblocks(

	return ret;
}

/*
 * Compute the log blocks and transaction reservation needed to complete an
 * atomic write of a given number of blocks.  Worst case, each block requires
 * separate handling.  A return value of 0 means something went wrong.
 */
xfs_extlen_t
xfs_calc_atomic_write_log_geometry(
	struct xfs_mount	*mp,
	xfs_extlen_t		blockcount,
	unsigned int		*new_logres)
{
	struct xfs_trans_res	*curr_res = &M_RES(mp)->tr_atomic_ioend;
	uint			old_logres = curr_res->tr_logres;
	unsigned int		per_intent, step_size;
	unsigned int		logres;
	xfs_extlen_t		min_logblocks;

	ASSERT(blockcount > 0);

	xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));

	per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);

	/* Check for overflows */
	if (check_mul_overflow(blockcount, per_intent, &logres) ||
	    check_add_overflow(logres, step_size, &logres))
		return 0;

	curr_res->tr_logres = logres;
	min_logblocks = xfs_log_calc_minimum_size(mp);
	curr_res->tr_logres = old_logres;

	trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
			blockcount, min_logblocks, logres);

	*new_logres = logres;
	return min_logblocks;
}

/*
 * Compute the transaction reservation needed to complete an out of place
 * atomic write of a given number of blocks.
 */
int
xfs_calc_atomic_write_reservation(
	struct xfs_mount	*mp,
	xfs_extlen_t		blockcount)
{
	unsigned int		new_logres;
	xfs_extlen_t		min_logblocks;

	/*
	 * If the caller doesn't ask for a specific atomic write size, then
	 * use the defaults.
	 */
	if (blockcount == 0) {
		xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
		return 0;
	}

	min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
			&new_logres);
	if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
		return -EINVAL;

	M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
	return 0;
}
+4 −0
Original line number Diff line number Diff line
@@ -122,5 +122,9 @@ unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);

xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
xfs_extlen_t xfs_calc_atomic_write_log_geometry(struct xfs_mount *mp,
		xfs_extlen_t blockcount, unsigned int *new_logres);
int xfs_calc_atomic_write_reservation(struct xfs_mount *mp,
		xfs_extlen_t blockcount);

#endif	/* __XFS_TRANS_RESV_H__ */
+79 −1
Original line number Diff line number Diff line
@@ -742,6 +742,82 @@ xfs_calc_atomic_write_unit_max(
			max_agsize, max_rgsize);
}

/*
 * Try to set the atomic write maximum to a new value that we got from
 * userspace via mount option.
 */
int
xfs_set_max_atomic_write_opt(
	struct xfs_mount	*mp,
	unsigned long long	new_max_bytes)
{
	const xfs_filblks_t	new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes);
	const xfs_extlen_t	max_write = xfs_calc_atomic_write_max(mp);
	const xfs_extlen_t	max_group =
		max(mp->m_groups[XG_TYPE_AG].blocks,
		    mp->m_groups[XG_TYPE_RTG].blocks);
	const xfs_extlen_t	max_group_write =
		max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
	int			error;

	if (new_max_bytes == 0)
		goto set_limit;

	ASSERT(max_write <= U32_MAX);

	/* generic_atomic_write_valid enforces power of two length */
	if (!is_power_of_2(new_max_bytes)) {
		xfs_warn(mp,
 "max atomic write size of %llu bytes is not a power of 2",
				new_max_bytes);
		return -EINVAL;
	}

	if (new_max_bytes & mp->m_blockmask) {
		xfs_warn(mp,
 "max atomic write size of %llu bytes not aligned with fsblock",
				new_max_bytes);
		return -EINVAL;
	}

	if (new_max_fsbs > max_write) {
		xfs_warn(mp,
 "max atomic write size of %lluk cannot be larger than max write size %lluk",
				new_max_bytes >> 10,
				XFS_FSB_TO_B(mp, max_write) >> 10);
		return -EINVAL;
	}

	if (new_max_fsbs > max_group) {
		xfs_warn(mp,
 "max atomic write size of %lluk cannot be larger than allocation group size %lluk",
				new_max_bytes >> 10,
				XFS_FSB_TO_B(mp, max_group) >> 10);
		return -EINVAL;
	}

	if (new_max_fsbs > max_group_write) {
		xfs_warn(mp,
 "max atomic write size of %lluk cannot be larger than max allocation group write size %lluk",
				new_max_bytes >> 10,
				XFS_FSB_TO_B(mp, max_group_write) >> 10);
		return -EINVAL;
	}

set_limit:
	error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs);
	if (error) {
		xfs_warn(mp,
 "cannot support completing atomic writes of %lluk",
				new_max_bytes >> 10);
		return error;
	}

	xfs_calc_atomic_write_unit_max(mp);
	mp->m_awu_max_bytes = new_max_bytes;
	return 0;
}

/* Compute maximum possible height for realtime btree types for this fs. */
static inline void
xfs_rtbtree_compute_maxlevels(
@@ -1163,7 +1239,9 @@ xfs_mountfs(
	 * derived from transaction reservations, so we must do this after the
	 * log is fully initialized.
	 */
	xfs_calc_atomic_write_unit_max(mp);
	error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes);
	if (error)
		goto out_agresv;

	return 0;

+6 −0
Original line number Diff line number Diff line
@@ -237,6 +237,9 @@ typedef struct xfs_mount {
	unsigned int		m_max_open_zones;
	unsigned int		m_zonegc_low_space;

	/* max_atomic_write mount option value */
	unsigned long long	m_awu_max_bytes;

	/*
	 * Bitsets of per-fs metadata that have been checked and/or are sick.
	 * Callers must hold m_sb_lock to access these two fields.
@@ -804,4 +807,7 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
	percpu_counter_add(&mp->m_delalloc_blks, delta);
}

int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
		unsigned long long new_max_bytes);

#endif	/* __XFS_MOUNT_H__ */
Loading