Commit 0bb21930 authored by Christoph Hellwig's avatar Christoph Hellwig
Browse files

xfs: add support for zoned space reservations



For zoned file systems garbage collection (GC) has to take the iolock
and mmaplock after moving data to a new place to synchronize with
readers.  This means waiting for garbage collection with the iolock can
deadlock.

To avoid this, the worst case required blocks have to be reserved before
taking the iolock, which is done using a new RTAVAILABLE counter that
tracks blocks that are free to write into and don't require garbage
collection.  The new helpers try to take these available blocks, and
if there aren't enough available it wakes and waits for GC.  This is
done using a list of on-stack reservations to ensure fairness.

Co-developed-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
parent 4e4d5207
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -138,7 +138,8 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \

# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o \
				   xfs_zone_alloc.o
				   xfs_zone_alloc.o \
				   xfs_zone_space_resv.o

xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
+11 −4
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include "xfs_symlink_remote.h"
#include "xfs_inode_util.h"
#include "xfs_rtgroup.h"
#include "xfs_zone_alloc.h"

struct kmem_cache		*xfs_bmap_intent_cache;

@@ -4788,12 +4789,18 @@ xfs_bmap_del_extent_delay(
	da_diff = da_old - da_new;
	fdblocks = da_diff;

	if (bflags & XFS_BMAPI_REMAP)
	if (bflags & XFS_BMAPI_REMAP) {
		;
	else if (isrt)
		xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
	else
	} else if (isrt) {
		xfs_rtbxlen_t	rtxlen;

		rtxlen = xfs_blen_to_rtbxlen(mp, del->br_blockcount);
		if (xfs_is_zoned_inode(ip))
			xfs_zoned_add_available(mp, rtxlen);
		xfs_add_frextents(mp, rtxlen);
	} else {
		fdblocks += del->br_blockcount;
	}

	xfs_add_fdblocks(mp, fdblocks);
	xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);
+11 −1
Original line number Diff line number Diff line
@@ -244,12 +244,22 @@ enum xfs_free_counter {
	 */
	XC_FREE_RTEXTENTS,

	/*
	 * Number of available for use RT extents.
	 *
	 * This counter only exists for zoned RT device and indicates the number
	 * of RT extents that can be directly used by writes.  XC_FREE_RTEXTENTS
	 * also includes blocks that have been written previously and freed, but
	 * sit in a rtgroup that still needs a zone reset.
	 */
	XC_FREE_RTAVAILABLE,
	XC_FREE_NR,
};

#define XFS_FREECOUNTER_STR \
	{ XC_FREE_BLOCKS,		"blocks" }, \
	{ XC_FREE_RTEXTENTS,		"rtextents" }
	{ XC_FREE_RTEXTENTS,		"rtextents" }, \
	{ XC_FREE_RTAVAILABLE,		"rtavailable" }

/*
 * Type verifier functions
+21 −15
Original line number Diff line number Diff line
@@ -465,6 +465,7 @@ xfs_mount_reset_sbqflags(
static const char *const xfs_free_pool_name[] = {
	[XC_FREE_BLOCKS]	= "free blocks",
	[XC_FREE_RTEXTENTS]	= "free rt extents",
	[XC_FREE_RTAVAILABLE]	= "available rt extents",
};

uint64_t
@@ -472,22 +473,27 @@ xfs_default_resblks(
	struct xfs_mount	*mp,
	enum xfs_free_counter	ctr)
{
	uint64_t resblks;

	if (ctr == XC_FREE_RTEXTENTS)
		return 0;

	switch (ctr) {
	case XC_FREE_BLOCKS:
		/*
	 * We default to 5% or 8192 fsbs of space reserved, whichever is
	 * smaller.  This is intended to cover concurrent allocation
	 * transactions when we initially hit enospc. These each require a 4
	 * block reservation. Hence by default we cover roughly 2000 concurrent
	 * allocation reservations.
	 */
	resblks = mp->m_sb.sb_dblocks;
	do_div(resblks, 20);
	resblks = min_t(uint64_t, resblks, 8192);
	return resblks;
		 * Default to 5% or 8192 FSBs of space reserved, whichever is
		 * smaller.
		 *
		 * This is intended to cover concurrent allocation transactions
		 * when we initially hit ENOSPC.  These each require a 4 block
		 * reservation. Hence by default we cover roughly 2000
		 * concurrent allocation reservations.
		 */
		return min(div_u64(mp->m_sb.sb_dblocks, 20), 8192ULL);
	case XC_FREE_RTEXTENTS:
	case XC_FREE_RTAVAILABLE:
		if (IS_ENABLED(CONFIG_XFS_RT) && xfs_has_zoned(mp))
			return xfs_zoned_default_resblks(mp, ctr);
		return 0;
	default:
		ASSERT(0);
		return 0;
	}
}

/* Ensure the summary counts are correct. */
+23 −0
Original line number Diff line number Diff line
@@ -363,6 +363,28 @@ DEFINE_EVENT(xfs_zone_alloc_class, name, \
	TP_ARGS(oz, rgbno, len))
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_record_blocks);
DEFINE_ZONE_ALLOC_EVENT(xfs_zone_alloc_blocks);

TRACE_EVENT(xfs_zones_mount,
	TP_PROTO(struct xfs_mount *mp),
	TP_ARGS(mp),
	TP_STRUCT__entry(
		__field(dev_t, dev)
		__field(xfs_rgnumber_t, rgcount)
		__field(uint32_t, blocks)
		__field(unsigned int, max_open_zones)
	),
	TP_fast_assign(
		__entry->dev = mp->m_super->s_dev;
		__entry->rgcount = mp->m_sb.sb_rgcount;
		__entry->blocks = mp->m_groups[XG_TYPE_RTG].blocks;
		__entry->max_open_zones = mp->m_max_open_zones;
	),
	TP_printk("dev %d:%d zoned %u blocks_per_zone %u, max_open %u",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		__entry->rgcount,
		__entry->blocks,
		__entry->max_open_zones)
);
#endif /* CONFIG_XFS_RT */

TRACE_EVENT(xfs_inodegc_worker,
@@ -5767,6 +5789,7 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,

TRACE_DEFINE_ENUM(XC_FREE_BLOCKS);
TRACE_DEFINE_ENUM(XC_FREE_RTEXTENTS);
TRACE_DEFINE_ENUM(XC_FREE_RTAVAILABLE);

DECLARE_EVENT_CLASS(xfs_freeblocks_resv_class,
	TP_PROTO(struct xfs_mount *mp, enum xfs_free_counter ctr,
Loading