Commit c148bc75 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-6.15-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Carlos Maiolino:

 - XFS zoned allocator: Enables XFS to support zoned devices using its
   real-time allocator

 - Use folios/vmalloc for buffer cache backing memory

 - Some code cleanups and bug fixes

* tag 'xfs-6.15-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (70 commits)
  xfs: remove the flags argument to xfs_buf_get_uncached
  xfs: remove the flags argument to xfs_buf_read_uncached
  xfs: remove xfs_buf_free_maps
  xfs: remove xfs_buf_get_maps
  xfs: call xfs_buf_alloc_backing_mem from _xfs_buf_alloc
  xfs: remove unnecessary NULL check before kvfree()
  xfs: don't wake zone space waiters without m_zone_info
  xfs: don't increment m_generation for all errors in xfs_growfs_data
  xfs: fix a missing unlock in xfs_growfs_data
  xfs: Remove duplicate xfs_rtbitmap.h header
  xfs: trigger zone GC when out of available rt blocks
  xfs: trace what memory backs a buffer
  xfs: cleanup mapping tmpfs folios into the buffer cache
  xfs: use vmalloc instead of vm_map_area for buffer backing memory
  xfs: buffer items don't straddle pages anymore
  xfs: kill XBF_UNMAPPED
  xfs: convert buffer cache to use high order folios
  xfs: remove the kmalloc to page allocator fallback
  xfs: refactor backing memory allocations for buffers
  xfs: remove xfs_buf_is_vmapped
  ...
parents 0de1e842 b3f8f290
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -64,6 +64,7 @@ xfs-y += $(addprefix libxfs/, \
xfs-$(CONFIG_XFS_RT)		+= $(addprefix libxfs/, \
				   xfs_rtbitmap.o \
				   xfs_rtgroup.o \
				   xfs_zones.o \
				   )

# highlevel code
@@ -136,7 +137,11 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
				   xfs_quotaops.o

# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o
xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o \
				   xfs_zone_alloc.o \
				   xfs_zone_gc.o \
				   xfs_zone_info.o \
				   xfs_zone_space_resv.o

xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o
xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o
+1 −1
Original line number Diff line number Diff line
@@ -301,7 +301,7 @@ xfs_get_aghdr_buf(
	struct xfs_buf		*bp;
	int			error;

	error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
	error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, &bp);
	if (error)
		return error;

+23 −293
Original line number Diff line number Diff line
@@ -34,13 +34,13 @@
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
#include "xfs_icache.h"
#include "xfs_iomap.h"
#include "xfs_health.h"
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
#include "xfs_inode_util.h"
#include "xfs_rtgroup.h"
#include "xfs_zone_alloc.h"

struct kmem_cache		*xfs_bmap_intent_cache;

@@ -171,18 +171,16 @@ xfs_bmbt_update(
 * Compute the worst-case number of indirect blocks that will be used
 * for ip's delayed extent of length "len".
 */
STATIC xfs_filblks_t
xfs_filblks_t
xfs_bmap_worst_indlen(
	xfs_inode_t	*ip,		/* incore inode pointer */
	struct xfs_inode	*ip,		/* incore inode pointer */
	xfs_filblks_t		len)		/* delayed extent length */
{
	int		level;		/* btree level number */
	int		maxrecs;	/* maximum record count at this level */
	xfs_mount_t	*mp;		/* mount structure */
	xfs_filblks_t	rval;		/* return value */
	struct xfs_mount	*mp = ip->i_mount;
	int			maxrecs = mp->m_bmap_dmxr[0];
	int			level;
	xfs_filblks_t		rval;

	mp = ip->i_mount;
	maxrecs = mp->m_bmap_dmxr[0];
	for (level = 0, rval = 0;
	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
	     level++) {
@@ -2571,146 +2569,6 @@ xfs_bmap_add_extent_unwritten_real(
#undef	PREV
}

/*
 * Convert a hole to a delayed allocation.
 */
STATIC void
xfs_bmap_add_extent_hole_delay(
	xfs_inode_t		*ip,	/* incore inode pointer */
	int			whichfork,
	struct xfs_iext_cursor	*icur,
	xfs_bmbt_irec_t		*new)	/* new data to add to file extents */
{
	struct xfs_ifork	*ifp;	/* inode fork pointer */
	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
	xfs_filblks_t		newlen=0;	/* new indirect size */
	xfs_filblks_t		oldlen=0;	/* old indirect size */
	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
	uint32_t		state = xfs_bmap_fork_to_state(whichfork);
	xfs_filblks_t		temp;	 /* temp for indirect calculations */

	ifp = xfs_ifork_ptr(ip, whichfork);
	ASSERT(isnullstartblock(new->br_startblock));

	/*
	 * Check and set flags if this segment has a left neighbor
	 */
	if (xfs_iext_peek_prev_extent(ifp, icur, &left)) {
		state |= BMAP_LEFT_VALID;
		if (isnullstartblock(left.br_startblock))
			state |= BMAP_LEFT_DELAY;
	}

	/*
	 * Check and set flags if the current (right) segment exists.
	 * If it doesn't exist, we're converting the hole at end-of-file.
	 */
	if (xfs_iext_get_extent(ifp, icur, &right)) {
		state |= BMAP_RIGHT_VALID;
		if (isnullstartblock(right.br_startblock))
			state |= BMAP_RIGHT_DELAY;
	}

	/*
	 * Set contiguity flags on the left and right neighbors.
	 * Don't let extents get too large, even if the pieces are contiguous.
	 */
	if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
	    left.br_startoff + left.br_blockcount == new->br_startoff &&
	    left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
		state |= BMAP_LEFT_CONTIG;

	if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
	    new->br_startoff + new->br_blockcount == right.br_startoff &&
	    new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
	    (!(state & BMAP_LEFT_CONTIG) ||
	     (left.br_blockcount + new->br_blockcount +
	      right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
		state |= BMAP_RIGHT_CONTIG;

	/*
	 * Switch out based on the contiguity flags.
	 */
	switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
	case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
		/*
		 * New allocation is contiguous with delayed allocations
		 * on the left and on the right.
		 * Merge all three into a single extent record.
		 */
		temp = left.br_blockcount + new->br_blockcount +
			right.br_blockcount;

		oldlen = startblockval(left.br_startblock) +
			startblockval(new->br_startblock) +
			startblockval(right.br_startblock);
		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
					 oldlen);
		left.br_startblock = nullstartblock(newlen);
		left.br_blockcount = temp;

		xfs_iext_remove(ip, icur, state);
		xfs_iext_prev(ifp, icur);
		xfs_iext_update_extent(ip, state, icur, &left);
		break;

	case BMAP_LEFT_CONTIG:
		/*
		 * New allocation is contiguous with a delayed allocation
		 * on the left.
		 * Merge the new allocation with the left neighbor.
		 */
		temp = left.br_blockcount + new->br_blockcount;

		oldlen = startblockval(left.br_startblock) +
			startblockval(new->br_startblock);
		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
					 oldlen);
		left.br_blockcount = temp;
		left.br_startblock = nullstartblock(newlen);

		xfs_iext_prev(ifp, icur);
		xfs_iext_update_extent(ip, state, icur, &left);
		break;

	case BMAP_RIGHT_CONTIG:
		/*
		 * New allocation is contiguous with a delayed allocation
		 * on the right.
		 * Merge the new allocation with the right neighbor.
		 */
		temp = new->br_blockcount + right.br_blockcount;
		oldlen = startblockval(new->br_startblock) +
			startblockval(right.br_startblock);
		newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
					 oldlen);
		right.br_startoff = new->br_startoff;
		right.br_startblock = nullstartblock(newlen);
		right.br_blockcount = temp;
		xfs_iext_update_extent(ip, state, icur, &right);
		break;

	case 0:
		/*
		 * New allocation is not contiguous with another
		 * delayed allocation.
		 * Insert a new entry.
		 */
		oldlen = newlen = 0;
		xfs_iext_insert(ip, icur, new, state);
		break;
	}
	if (oldlen != newlen) {
		ASSERT(oldlen > newlen);
		xfs_add_fdblocks(ip->i_mount, oldlen - newlen);

		/*
		 * Nothing to do for disk quota accounting here.
		 */
		xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen);
	}
}

/*
 * Convert a hole to a real allocation.
 */
@@ -4039,144 +3897,6 @@ xfs_bmapi_read(
	return 0;
}

/*
 * Add a delayed allocation extent to an inode. Blocks are reserved from the
 * global pool and the extent inserted into the inode in-core extent tree.
 *
 * On entry, got refers to the first extent beyond the offset of the extent to
 * allocate or eof is specified if no such extent exists. On return, got refers
 * to the extent record that was inserted to the inode fork.
 *
 * Note that the allocated extent may have been merged with contiguous extents
 * during insertion into the inode fork. Thus, got does not reflect the current
 * state of the inode fork on return. If necessary, the caller can use lastx to
 * look up the updated record in the inode fork.
 */
int
xfs_bmapi_reserve_delalloc(
	struct xfs_inode	*ip,
	int			whichfork,
	xfs_fileoff_t		off,
	xfs_filblks_t		len,
	xfs_filblks_t		prealloc,
	struct xfs_bmbt_irec	*got,
	struct xfs_iext_cursor	*icur,
	int			eof)
{
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
	xfs_extlen_t		alen;
	xfs_extlen_t		indlen;
	uint64_t		fdblocks;
	int			error;
	xfs_fileoff_t		aoff;
	bool			use_cowextszhint =
					whichfork == XFS_COW_FORK && !prealloc;

retry:
	/*
	 * Cap the alloc length. Keep track of prealloc so we know whether to
	 * tag the inode before we return.
	 */
	aoff = off;
	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
	if (!eof)
		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
	if (prealloc && alen >= len)
		prealloc = alen - len;

	/*
	 * If we're targetting the COW fork but aren't creating a speculative
	 * posteof preallocation, try to expand the reservation to align with
	 * the COW extent size hint if there's sufficient free space.
	 *
	 * Unlike the data fork, the CoW cancellation functions will free all
	 * the reservations at inactivation, so we don't require that every
	 * delalloc reservation have a dirty pagecache.
	 */
	if (use_cowextszhint) {
		struct xfs_bmbt_irec	prev;
		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);

		if (!xfs_iext_peek_prev_extent(ifp, icur, &prev))
			prev.br_startoff = NULLFILEOFF;

		error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof,
					       1, 0, &aoff, &alen);
		ASSERT(!error);
	}

	/*
	 * Make a transaction-less quota reservation for delayed allocation
	 * blocks.  This number gets adjusted later.  We return if we haven't
	 * allocated blocks already inside this loop.
	 */
	error = xfs_quota_reserve_blkres(ip, alen);
	if (error)
		goto out;

	/*
	 * Split changing sb for alen and indlen since they could be coming
	 * from different places.
	 */
	indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
	ASSERT(indlen > 0);

	fdblocks = indlen;
	if (XFS_IS_REALTIME_INODE(ip)) {
		error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
		if (error)
			goto out_unreserve_quota;
	} else {
		fdblocks += alen;
	}

	error = xfs_dec_fdblocks(mp, fdblocks, false);
	if (error)
		goto out_unreserve_frextents;

	ip->i_delayed_blks += alen;
	xfs_mod_delalloc(ip, alen, indlen);

	got->br_startoff = aoff;
	got->br_startblock = nullstartblock(indlen);
	got->br_blockcount = alen;
	got->br_state = XFS_EXT_NORM;

	xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got);

	/*
	 * Tag the inode if blocks were preallocated. Note that COW fork
	 * preallocation can occur at the start or end of the extent, even when
	 * prealloc == 0, so we must also check the aligned offset and length.
	 */
	if (whichfork == XFS_DATA_FORK && prealloc)
		xfs_inode_set_eofblocks_tag(ip);
	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
		xfs_inode_set_cowblocks_tag(ip);

	return 0;

out_unreserve_frextents:
	if (XFS_IS_REALTIME_INODE(ip))
		xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
out_unreserve_quota:
	if (XFS_IS_QUOTA_ON(mp))
		xfs_quota_unreserve_blkres(ip, alen);
out:
	if (error == -ENOSPC || error == -EDQUOT) {
		trace_xfs_delalloc_enospc(ip, off, len);

		if (prealloc || use_cowextszhint) {
			/* retry without any preallocation */
			use_cowextszhint = false;
			prealloc = 0;
			goto retry;
		}
	}
	return error;
}

static int
xfs_bmapi_allocate(
	struct xfs_bmalloca	*bma)
@@ -4948,7 +4668,8 @@ xfs_bmap_del_extent_delay(
	int			whichfork,
	struct xfs_iext_cursor	*icur,
	struct xfs_bmbt_irec	*got,
	struct xfs_bmbt_irec	*del)
	struct xfs_bmbt_irec	*del,
	uint32_t		bflags)	/* bmapi flags */
{
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
@@ -5068,10 +4789,18 @@ xfs_bmap_del_extent_delay(
	da_diff = da_old - da_new;
	fdblocks = da_diff;

	if (isrt)
		xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
	else
	if (bflags & XFS_BMAPI_REMAP) {
		;
	} else if (isrt) {
		xfs_rtbxlen_t	rtxlen;

		rtxlen = xfs_blen_to_rtbxlen(mp, del->br_blockcount);
		if (xfs_is_zoned_inode(ip))
			xfs_zoned_add_available(mp, rtxlen);
		xfs_add_frextents(mp, rtxlen);
	} else {
		fdblocks += del->br_blockcount;
	}

	xfs_add_fdblocks(mp, fdblocks);
	xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);
@@ -5670,7 +5399,8 @@ __xfs_bunmapi(

delete:
		if (wasdel) {
			xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
			xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got,
					&del, flags);
		} else {
			error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
					&del, &tmp_logflags, whichfork,
+2 −5
Original line number Diff line number Diff line
@@ -204,7 +204,7 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
		xfs_extnum_t nexts, int *done);
void	xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
		struct xfs_bmbt_irec *del);
		struct xfs_bmbt_irec *del, uint32_t bflags);
void	xfs_bmap_del_extent_cow(struct xfs_inode *ip,
		struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
		struct xfs_bmbt_irec *del);
@@ -219,10 +219,6 @@ int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
		bool *done, xfs_fileoff_t stop_fsb);
int	xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip,
		xfs_fileoff_t split_offset);
int	xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
		xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
		int eof);
int	xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
		xfs_off_t offset, struct iomap *iomap, unsigned int *seq);
int	xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
@@ -233,6 +229,7 @@ xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip,
		int fork);
int	xfs_bmap_btalloc_low_space(struct xfs_bmalloca *ap,
		struct xfs_alloc_arg *args);
xfs_filblks_t xfs_bmap_worst_indlen(struct xfs_inode *ip, xfs_filblks_t len);

enum xfs_bmap_intent_type {
	XFS_BMAP_MAP = 1,
+16 −4
Original line number Diff line number Diff line
@@ -178,9 +178,10 @@ typedef struct xfs_sb {

	xfs_rgnumber_t	sb_rgcount;	/* number of realtime groups */
	xfs_rtxlen_t	sb_rgextents;	/* size of a realtime group in rtx */

	uint8_t		sb_rgblklog;    /* rt group number shift */
	uint8_t		sb_pad[7];	/* zeroes */
	xfs_rfsblock_t	sb_rtstart;	/* start of internal RT section (FSB) */
	xfs_filblks_t	sb_rtreserved;	/* reserved (zoned) RT blocks */

	/* must be padded to 64 bit alignment */
} xfs_sb_t;
@@ -270,9 +271,10 @@ struct xfs_dsb {
	__be64		sb_metadirino;	/* metadata directory tree root */
	__be32		sb_rgcount;	/* # of realtime groups */
	__be32		sb_rgextents;	/* size of rtgroup in rtx */

	__u8		sb_rgblklog;    /* rt group number shift */
	__u8		sb_pad[7];	/* zeroes */
	__be64		sb_rtstart;	/* start of internal RT section (FSB) */
	__be64		sb_rtreserved;	/* reserved (zoned) RT blocks */

	/*
	 * The size of this structure must be padded to 64 bit alignment.
@@ -395,6 +397,9 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE	(1 << 6)  /* exchangerange supported */
#define XFS_SB_FEAT_INCOMPAT_PARENT	(1 << 7)  /* parent pointers */
#define XFS_SB_FEAT_INCOMPAT_METADIR	(1 << 8)  /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ZONED	(1 << 9)  /* zoned RT allocator */
#define XFS_SB_FEAT_INCOMPAT_ZONE_GAPS	(1 << 10) /* RTGs have LBA gaps */

#define XFS_SB_FEAT_INCOMPAT_ALL \
		(XFS_SB_FEAT_INCOMPAT_FTYPE | \
		 XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -404,7 +409,9 @@ xfs_sb_has_ro_compat_feature(
		 XFS_SB_FEAT_INCOMPAT_NREXT64 | \
		 XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
		 XFS_SB_FEAT_INCOMPAT_PARENT | \
		 XFS_SB_FEAT_INCOMPAT_METADIR)
		 XFS_SB_FEAT_INCOMPAT_METADIR | \
		 XFS_SB_FEAT_INCOMPAT_ZONED | \
		 XFS_SB_FEAT_INCOMPAT_ZONE_GAPS)

#define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
@@ -952,7 +959,12 @@ struct xfs_dinode {
	__be64		di_changecount;	/* number of attribute changes */
	__be64		di_lsn;		/* flush sequence */
	__be64		di_flags2;	/* more random flags */
	__be32		di_cowextsize;	/* basic cow extent size for file */
	union {
		/* basic cow extent size for (regular) file */
		__be32		di_cowextsize;
		/* used blocks in RTG for (zoned) rtrmap inode */
		__be32		di_used_blocks;
	};
	__u8		di_pad2[12];	/* more padding for future expansion */

	/* fields only written to during inode creation */
Loading