Commit 080d01c4 authored by Christoph Hellwig's avatar Christoph Hellwig
Browse files

xfs: implement zoned garbage collection



RT groups on a zoned file system need to be completely empty before their
space can be reused.  This means that partially empty groups need to be
emptied entirely to free up space if no entirely free groups are
available.

Add a garbage collection thread that moves all data out of the least used
zone when not enough free zones are available, and which resets all zones
that have been emptied.  To find empty zone a simple set of 10 buckets
based on the amount of space used in the zone is used.  To empty zones,
the rmap is walked to find the owners and the data is read and then
written to the new place.

To automatically defragment files the rmap records are sorted by inode
and logical offset.  This means defragmentation of parallel writes into
a single zone happens automatically when performing garbage collection.
Because holding the iolock over the entire GC cycle would inject very
noticeable latency for other accesses to the inodes, the iolock is not
taken while performing I/O.  Instead the I/O completion handler checks
that the mapping hasn't changed over the one recorded at the start of
the GC cycle and doesn't update the mapping if it change.

Co-developed-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
parent 0bb21930
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -139,6 +139,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o \
				   xfs_zone_alloc.o \
				   xfs_zone_gc.o \
				   xfs_zone_space_resv.o

xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
+17 −4
Original line number Diff line number Diff line
@@ -19,11 +19,24 @@ struct xfs_group {
#ifdef __KERNEL__
	/* -- kernel only structures below this line -- */

	union {
		/*
		 * For perags and non-zoned RT groups:
		 * Track freed but not yet committed extents.
		 */
		struct xfs_extent_busy_tree	*xg_busy_extents;

		/*
		 * For zoned RT groups:
		 * List of groups that need a zone reset.
		 *
		 * The zonegc code forces a log flush of the rtrmap inode before
		 * resetting the write pointer, so there is no need for
		 * individual busy extent tracking.
		 */
		struct xfs_group		*xg_next_reset;
	};

	/*
	 * Bitsets of per-ag metadata that have been checked and/or are sick.
	 * Callers should hold xg_state_lock before accessing this field.
+6 −0
Original line number Diff line number Diff line
@@ -58,6 +58,12 @@ struct xfs_rtgroup {
 */
#define XFS_RTG_FREE			XA_MARK_0

/*
 * For zoned RT devices this is set on groups that are fully written and that
 * have unused blocks.  Used by the garbage collection to pick targets.
 */
#define XFS_RTG_RECLAIMABLE		XA_MARK_1

static inline struct xfs_rtgroup *to_rtg(struct xfs_group *xg)
{
	return container_of(xg, struct xfs_rtgroup, rtg_group);
+1 −1
Original line number Diff line number Diff line
@@ -671,7 +671,7 @@ xfs_extent_busy_wait_all(
	while ((pag = xfs_perag_next(mp, pag)))
		xfs_extent_busy_wait_group(pag_group(pag));

	if (xfs_has_rtgroups(mp))
	if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp))
		while ((rtg = xfs_rtgroup_next(mp, rtg)))
			xfs_extent_busy_wait_group(rtg_group(rtg));
}
+4 −0
Original line number Diff line number Diff line
@@ -1090,6 +1090,8 @@ xfs_mountfs(
		error = xfs_fs_reserve_ag_blocks(mp);
		if (error && error != -ENOSPC)
			goto out_agresv;

		xfs_zone_gc_start(mp);
	}

	return 0;
@@ -1178,6 +1180,8 @@ xfs_unmountfs(
	xfs_inodegc_flush(mp);

	xfs_blockgc_stop(mp);
	if (!test_bit(XFS_OPSTATE_READONLY, &mp->m_opstate))
		xfs_zone_gc_stop(mp);
	xfs_fs_unreserve_ag_blocks(mp);
	xfs_qm_unmount_quotas(mp);
	if (xfs_has_zoned(mp))
Loading