Commit 096384de authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-fixes-6.15-rc3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull XFS fixes from Carlos Maiolino:
 "This mostly includes fixes and documentation for the zoned allocator
  feature merged during previous merge window, but it also adds a sysfs
  tunable for the zone garbage collector.

  There is also a fix for a regression to the RT device that we'd like
  to fix ASAP now that we're getting more users on the RT zoned
  allocator"

* tag 'xfs-fixes-6.15-rc3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: document zoned rt specifics in admin-guide
  xfs: fix fsmap for internal zoned devices
  xfs: Fix spelling mistake "drity" -> "dirty"
  xfs: compute buffer address correctly in xmbuf_map_backing_mem
  xfs: add tunable threshold parameter for triggering zone GC
  xfs: mark xfs_buf_free as might_sleep()
  xfs: remove the leftover xfs_{set,clear}_li_failed infrastructure
parents 0cb9ce06 c7b67ddc
Loading
Loading
Loading
Loading
+50 −0
Original line number Diff line number Diff line
@@ -124,6 +124,14 @@ When mounting an XFS filesystem, the following options are accepted.
	controls the size of each buffer and so is also relevant to
	this case.

  lifetime (default) or nolifetime
	Enable data placement based on write life time hints provided
	by the user. This turns on co-allocation of data of similar
	life times when statistically favorable to reduce garbage
	collection cost.

	These options are only available for zoned rt file systems.

  logbsize=value
	Set the size of each in-memory log buffer.  The size may be
	specified in bytes, or in kilobytes with a "k" suffix.
@@ -143,6 +151,14 @@ When mounting an XFS filesystem, the following options are accepted.
	optional, and the log section can be separate from the data
	section or contained within it.

  max_open_zones=value
	Specify the max number of zones to keep open for writing on a
	zoned rt device. Many open zones aids file data separation
	but may impact performance on HDDs.

	If ``max_open_zones`` is not specified, the value is determined
	by the capabilities and the size of the zoned rt device.

  noalign
	Data allocations will not be aligned at stripe unit
	boundaries. This is only relevant to filesystems created
@@ -542,3 +558,37 @@ The interesting knobs for XFS workqueues are as follows:
  nice           Relative priority of scheduling the threads.  These are the
                 same nice levels that can be applied to userspace processes.
============     ===========

Zoned Filesystems
=================

For zoned file systems, the following attribute is exposed in:

  /sys/fs/xfs/<dev>/zoned/

  max_open_zones		(Min:  1  Default:  Varies  Max:  UINTMAX)
	This read-only attribute exposes the maximum number of open zones
	available for data placement. The value is determined at mount time and
	is limited by the capabilities of the backing zoned device, file system
	size and the max_open_zones mount option.

Zoned Filesystems
=================

For zoned file systems, the following attributes are exposed in:

 /sys/fs/xfs/<dev>/zoned/

 max_open_zones                 (Min:  1  Default:  Varies  Max:  UINTMAX)
        This read-only attribute exposes the maximum number of open zones
        available for data placement. The value is determined at mount time and
        is limited by the capabilities of the backing zoned device, file system
        size and the max_open_zones mount option.

 zonegc_low_space               (Min:  0  Default:  0  Max:  100)
        Define a percentage for how much of the unused space that GC should keep
        available for writing. A high value will reclaim more of the space
        occupied by unused blocks, creating a larger buffer against write
        bursts at the cost of increased write amplification.  Regardless
        of this value, garbage collection will always aim to free a minimum
        amount of blocks to keep max_open_zones open for data placement purposes.
+1 −0
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ xfs_buf_free(
{
	unsigned int		size = BBTOB(bp->b_length);

	might_sleep();
	trace_xfs_buf_free(bp, _RET_IP_);

	ASSERT(list_empty(&bp->b_lru));
+1 −1
Original line number Diff line number Diff line
@@ -165,7 +165,7 @@ xmbuf_map_backing_mem(
	folio_set_dirty(folio);
	folio_unlock(folio);

	bp->b_addr = folio_address(folio);
	bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos);
	return 0;
}

+1 −2
Original line number Diff line number Diff line
@@ -1186,9 +1186,8 @@ xfs_qm_dqflush_done(
	if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
	    (lip->li_lsn == qlip->qli_flush_lsn ||
	     test_bit(XFS_LI_FAILED, &lip->li_flags))) {

		spin_lock(&ailp->ail_lock);
		xfs_clear_li_failed(lip);
		clear_bit(XFS_LI_FAILED, &lip->li_flags);
		if (lip->li_lsn == qlip->qli_flush_lsn) {
			/* xfs_ail_update_finish() drops the AIL lock */
			tail_lsn = xfs_ail_delete_one(ailp, lip);
+33 −18
Original line number Diff line number Diff line
@@ -876,6 +876,7 @@ xfs_getfsmap_rtdev_rmapbt(
	const struct xfs_fsmap		*keys,
	struct xfs_getfsmap_info	*info)
{
	struct xfs_fsmap		key0 = *keys; /* struct copy */
	struct xfs_mount		*mp = tp->t_mountp;
	struct xfs_rtgroup		*rtg = NULL;
	struct xfs_btree_cur		*bt_cur = NULL;
@@ -887,32 +888,46 @@ xfs_getfsmap_rtdev_rmapbt(
	int				error = 0;

	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart + mp->m_sb.sb_rblocks);
	if (keys[0].fmr_physical >= eofs)
	if (key0.fmr_physical >= eofs)
		return 0;

	/*
	 * On zoned filesystems with an internal rt volume, the volume comes
	 * immediately after the end of the data volume.  However, the
	 * xfs_rtblock_t address space is relative to the start of the data
	 * device, which means that the first @rtstart fsblocks do not actually
	 * point anywhere.  If a fsmap query comes in with the low key starting
	 * below @rtstart, report it as "owned by filesystem".
	 */
	rtstart_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart);
	if (keys[0].fmr_physical < rtstart_daddr) {
	if (xfs_has_zoned(mp) && key0.fmr_physical < rtstart_daddr) {
		struct xfs_fsmap_irec		frec = {
			.owner			= XFS_RMAP_OWN_FS,
			.len_daddr		= rtstart_daddr,
		};

		/* Adjust the low key if we are continuing from where we left off. */
		if (keys[0].fmr_length > 0) {
			info->low_daddr = keys[0].fmr_physical + keys[0].fmr_length;
			return 0;
		}

		/* Fabricate an rmap entry for space occupied by the data dev */
		/*
		 * Adjust the start of the query range if we're picking up from
		 * a previous round, and only emit the record if we haven't
		 * already gone past.
		 */
		key0.fmr_physical += key0.fmr_length;
		if (key0.fmr_physical < rtstart_daddr) {
			error = xfs_getfsmap_helper(tp, info, &frec);
			if (error)
				return error;

			key0.fmr_physical = rtstart_daddr;
		}

	start_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + keys[0].fmr_physical);
	end_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr +
			min(eofs - 1, keys[1].fmr_physical));
		/* Zero the other fields to avoid further adjustments. */
		key0.fmr_owner = 0;
		key0.fmr_offset = 0;
		key0.fmr_length = 0;
	}

	start_rtb = xfs_daddr_to_rtb(mp, key0.fmr_physical);
	end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
	info->missing_owner = XFS_FMR_OWN_FREE;

	/*
@@ -920,12 +935,12 @@ xfs_getfsmap_rtdev_rmapbt(
	 * low to the fsmap low key and max out the high key to the end
	 * of the rtgroup.
	 */
	info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
	error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
	info->low.rm_offset = XFS_BB_TO_FSBT(mp, key0.fmr_offset);
	error = xfs_fsmap_owner_to_rmap(&info->low, &key0);
	if (error)
		return error;
	info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
	xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
	info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, key0.fmr_length);
	xfs_getfsmap_set_irec_flags(&info->low, &key0);

	/* Adjust the low key if we are continuing from where we left off. */
	if (info->low.rm_blockcount == 0) {
Loading