Commit 4e4d5207 authored by Christoph Hellwig's avatar Christoph Hellwig
Browse files

xfs: add the zoned space allocator



For zoned RT devices space is always allocated at the write pointer, that
is right after the last written block and only recorded on I/O completion.

Because the actual allocation algorithm is very simple and just involves
picking a good zone - preferably the one used for the last write to the
inode.  As the number of zones that can written at the same time is
usually limited by the hardware, selecting a zone is done as late as
possible from the iomap dio and buffered writeback bio submissions
helpers just before submitting the bio.

Given that the writers already took a reservation before acquiring the
iolock, space will always be readily available if an open zone slot is
available.  A new structure is used to track these open zones, and
pointed to by the xfs_rtgroup.  Because zoned file systems don't have
a rsum cache the space for that pointer can be reused.

Allocations are only recorded at I/O completion time.  The scheme used
for that is very similar to the reflink COW end I/O path.

Co-developed-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
parent 720c2d58
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -137,7 +137,8 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
				   xfs_quotaops.o

# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o \
				   xfs_zone_alloc.o

xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
+17 −5
Original line number Diff line number Diff line
@@ -37,15 +37,27 @@ struct xfs_rtgroup {
	xfs_rtxnum_t		rtg_extents;

	/*
	 * Cache of rt summary level per bitmap block with the invariant that
	 * rtg_rsum_cache[bbno] > the maximum i for which rsum[i][bbno] != 0,
	 * or 0 if rsum[i][bbno] == 0 for all i.
	 *
	 * For bitmap based RT devices this points to a cache of rt summary
	 * level per bitmap block with the invariant that rtg_rsum_cache[bbno]
	 * > the maximum i for which rsum[i][bbno] != 0, or 0 if
	 * rsum[i][bbno] == 0 for all i.
	 * Reads and writes are serialized by the rsumip inode lock.
	 *
	 * For zoned RT devices this points to the open zone structure for
	 * a group that is open for writers, or is NULL.
	 */
	union {
		uint8_t			*rtg_rsum_cache;
		struct xfs_open_zone	*rtg_open_zone;
	};
};

/*
 * For zoned RT devices this is set on groups that have no written blocks
 * and can be picked by the allocator for opening.
 */
#define XFS_RTG_FREE			XA_MARK_0

static inline struct xfs_rtgroup *to_rtg(struct xfs_group *xg)
{
	return container_of(xg, struct xfs_rtgroup, rtg_group);
+1 −0
Original line number Diff line number Diff line
@@ -243,6 +243,7 @@ enum xfs_free_counter {
	 * Number of free RT extents on the RT device.
	 */
	XC_FREE_RTEXTENTS,

	XC_FREE_NR,
};

+4 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "xfs_sysfs.h"
#include "xfs_sb.h"
#include "xfs_health.h"
#include "xfs_zone_alloc.h"

struct kmem_cache	*xfs_log_ticket_cache;

@@ -3540,6 +3541,9 @@ xlog_force_shutdown(
	spin_unlock(&log->l_icloglock);

	wake_up_var(&log->l_opstate);
	if (IS_ENABLED(CONFIG_XFS_RT) && xfs_has_zoned(log->l_mp))
		xfs_zoned_wake_all(log->l_mp);

	return log_error;
}

+11 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"
#include "scrub/stats.h"
#include "xfs_zone_alloc.h"

static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -1042,6 +1043,12 @@ xfs_mountfs(
	if (xfs_is_readonly(mp) && !xfs_has_norecovery(mp))
		xfs_log_clean(mp);

	if (xfs_has_zoned(mp)) {
		error = xfs_mount_zones(mp);
		if (error)
			goto out_rtunmount;
	}

	/*
	 * Complete the quota initialisation, post-log-replay component.
	 */
@@ -1084,6 +1091,8 @@ xfs_mountfs(
 out_agresv:
	xfs_fs_unreserve_ag_blocks(mp);
	xfs_qm_unmount_quotas(mp);
	if (xfs_has_zoned(mp))
		xfs_unmount_zones(mp);
 out_rtunmount:
	xfs_rtunmount_inodes(mp);
 out_rele_rip:
@@ -1165,6 +1174,8 @@ xfs_unmountfs(
	xfs_blockgc_stop(mp);
	xfs_fs_unreserve_ag_blocks(mp);
	xfs_qm_unmount_quotas(mp);
	if (xfs_has_zoned(mp))
		xfs_unmount_zones(mp);
	xfs_rtunmount_inodes(mp);
	xfs_irele(mp->m_rootip);
	if (mp->m_metadirip)
Loading