Merge branch 'xfs-7.1-merge' into for-next (df236c99) · Commits · git / linux-net

fs/iomap/buffered-io.c

+1 −5

Original line number	Diff line number	Diff line
		@@ -1632,16 +1632,12 @@ iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
		while ((ret = iomap_iter(&iter, ops)) > 0) {
		const struct iomap *srcmap = iomap_iter_srcmap(&iter);

		if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
		srcmap->type != IOMAP_UNWRITTEN))
		return -EIO;

		if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) &&
		(srcmap->type == IOMAP_HOLE \|\|
		srcmap->type == IOMAP_UNWRITTEN)) {
		s64 status;

		if (range_dirty) {
		if (range_dirty && srcmap->type == IOMAP_UNWRITTEN) {
		range_dirty = false;
		status = iomap_zero_iter_flush_and_stale(&iter);
		} else {

fs/xfs/xfs_file.c

+17 −0

Original line number	Diff line number	Diff line
		@@ -1306,6 +1306,23 @@ xfs_falloc_insert_range(
		if (offset >= isize)
		return -EINVAL;

		/*
		* Let writeback clean up EOF folio state before we bump i_size. The
		* insert flushes before it starts shifting and under certain
		* circumstances we can write back blocks that should technically be
		* considered post-eof (and thus should not be submitted for writeback).
		*
		* For example, a large, dirty folio that spans EOF and is backed by
		* post-eof COW fork preallocation can cause block remap into the data
		* fork. This shifts back out beyond EOF, but creates an expectedly
		* written post-eof block. The insert is going to flush, unmap and
		* cancel prealloc across this whole range, so flush EOF now before we
		* bump i_size to provide consistent behavior.
		*/
		error = filemap_write_and_wait_range(inode->i_mapping, isize, isize);
		if (error)
		return error;

		error = xfs_falloc_setsize(file, isize + len);
		if (error)
		return error;

fs/xfs/xfs_iomap.c

+112 −34

Original line number	Diff line number	Diff line
		@@ -1590,6 +1590,7 @@ xfs_zoned_buffered_write_iomap_begin(
		{
		struct iomap_iter *iter =
		container_of(iomap, struct iomap_iter, iomap);
		struct address_space *mapping = inode->i_mapping;
		struct xfs_zone_alloc_ctx *ac = iter->private;
		struct xfs_inode *ip = XFS_I(inode);
		struct xfs_mount *mp = ip->i_mount;
		@@ -1614,6 +1615,7 @@ xfs_zoned_buffered_write_iomap_begin(
		if (error)
		return error;

		restart:
		error = xfs_ilock_for_iomap(ip, flags, &lockmode);
		if (error)
		return error;
		@@ -1651,14 +1653,6 @@ xfs_zoned_buffered_write_iomap_begin(
		&smap))
		smap.br_startoff = end_fsb; /* fake hole until EOF */
		if (smap.br_startoff > offset_fsb) {
		/*
		* We never need to allocate blocks for zeroing a hole.
		*/
		if (flags & IOMAP_ZERO) {
		xfs_hole_to_iomap(ip, iomap, offset_fsb,
		smap.br_startoff);
		goto out_unlock;
		}
		end_fsb = min(end_fsb, smap.br_startoff);
		} else {
		end_fsb = min(end_fsb,
		@@ -1690,6 +1684,33 @@ xfs_zoned_buffered_write_iomap_begin(
		count_fsb = min3(end_fsb - offset_fsb, XFS_MAX_BMBT_EXTLEN,
		XFS_B_TO_FSB(mp, 1024 * PAGE_SIZE));

		/*
		* When zeroing, don't allocate blocks for holes as they are already
		* zeroes, but we need to ensure that no extents exist in both the data
		* and COW fork to ensure this really is a hole.
		*
		* A window exists where we might observe a hole in both forks with
		* valid data in cache. Writeback removes the COW fork blocks on
		* submission but doesn't remap into the data fork until completion. If
		* the data fork was previously a hole, we'll fail to zero. Until we
		* find a way to avoid this transient state, check for dirty pagecache
		* and flush to wait on blocks to land in the data fork.
		*/
		if ((flags & IOMAP_ZERO) && srcmap->type == IOMAP_HOLE) {
		if (filemap_range_needs_writeback(mapping, offset,
		offset + count - 1)) {
		xfs_iunlock(ip, lockmode);
		error = filemap_write_and_wait_range(mapping, offset,
		offset + count - 1);
		if (error)
		return error;
		goto restart;
		}

		xfs_hole_to_iomap(ip, iomap, offset_fsb, end_fsb);
		goto out_unlock;
		}

		/*
		* The block reservation is supposed to cover all blocks that the
		* operation could possible write, but there is a nasty corner case
		@@ -1764,6 +1785,8 @@ xfs_buffered_write_iomap_begin(
		struct xfs_mount *mp = ip->i_mount;
		xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
		xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
		xfs_fileoff_t cow_fsb = NULLFILEOFF;
		xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
		struct xfs_bmbt_irec imap, cmap;
		struct xfs_iext_cursor icur, ccur;
		xfs_fsblock_t prealloc_blocks = 0;
		@@ -1808,30 +1831,96 @@ xfs_buffered_write_iomap_begin(
		goto out_unlock;

		/*
		* Search the data fork first to look up our source mapping. We
		* always need the data fork map, as we have to return it to the
		* iomap code so that the higher level write code can read data in to
		* perform read-modify-write cycles for unaligned writes.
		* Search the data fork first to look up our source mapping. We always
		* need the data fork map, as we have to return it to the iomap code so
		* that the higher level write code can read data in to perform
		* read-modify-write cycles for unaligned writes.
		*
		* Then search the COW fork extent list even if we did not find a data
		* fork extent. This serves two purposes: first this implements the
		* speculative preallocation using cowextsize, so that we also unshare
		* block adjacent to shared blocks instead of just the shared blocks
		* themselves. Second the lookup in the extent list is generally faster
		* than going out to the shared extent tree.
		*/
		eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
		if (eof)
		imap.br_startoff = end_fsb; /* fake hole until the end */
		if (xfs_is_cow_inode(ip)) {
		if (!ip->i_cowfp) {
		ASSERT(!xfs_is_reflink_inode(ip));
		xfs_ifork_init_cow(ip);
		}
		cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
		&ccur, &cmap);
		if (!cow_eof)
		cow_fsb = cmap.br_startoff;
		}

		/* We never need to allocate blocks for zeroing or unsharing a hole. */
		if ((flags & (IOMAP_UNSHARE \| IOMAP_ZERO)) &&
		imap.br_startoff > offset_fsb) {
		/* We never need to allocate blocks for unsharing a hole. */
		if ((flags & IOMAP_UNSHARE) && imap.br_startoff > offset_fsb) {
		xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
		goto out_unlock;
		}

		/*
		* We may need to zero over a hole in the data fork if it's fronted by
		* COW blocks and dirty pagecache. Scan such file ranges for dirty
		* cache and fill the iomap batch with folios that need zeroing.
		*/
		if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
		loff_t start, end;
		unsigned int fbatch_count;

		imap.br_blockcount = imap.br_startoff - offset_fsb;
		imap.br_startoff = offset_fsb;
		imap.br_startblock = HOLESTARTBLOCK;
		imap.br_state = XFS_EXT_NORM;

		if (cow_fsb == NULLFILEOFF)
		goto found_imap;
		if (cow_fsb > offset_fsb) {
		xfs_trim_extent(&imap, offset_fsb,
		cow_fsb - offset_fsb);
		goto found_imap;
		}

		/* no zeroing beyond eof, so split at the boundary */
		if (offset_fsb >= eof_fsb)
		goto found_imap;
		if (offset_fsb < eof_fsb && end_fsb > eof_fsb)
		xfs_trim_extent(&imap, offset_fsb,
		eof_fsb - offset_fsb);

		/* COW fork blocks overlap the hole */
		xfs_trim_extent(&imap, offset_fsb,
		cmap.br_startoff + cmap.br_blockcount - offset_fsb);
		start = XFS_FSB_TO_B(mp, imap.br_startoff);
		end = XFS_FSB_TO_B(mp, imap.br_startoff + imap.br_blockcount);
		fbatch_count = iomap_fill_dirty_folios(iter, &start, end,
		&iomap_flags);
		xfs_trim_extent(&imap, offset_fsb,
		XFS_B_TO_FSB(mp, start) - offset_fsb);

		/*
		* Report the COW mapping if we have folios to zero. Otherwise
		* ignore the COW blocks as preallocation and report a hole.
		*/
		if (fbatch_count) {
		xfs_trim_extent(&cmap, imap.br_startoff,
		imap.br_blockcount);
		imap.br_startoff = end_fsb; /* fake hole */
		goto found_cow;
		}
		goto found_imap;
		}

		/*
		* For zeroing, trim extents that extend beyond the EOF block. If a
		* delalloc extent starts beyond the EOF block, convert it to an
		* unwritten extent.
		*/
		if (flags & IOMAP_ZERO) {
		xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));

		if (isnullstartblock(imap.br_startblock) &&
		offset_fsb >= eof_fsb)
		goto convert_delay;
		@@ -1864,25 +1953,14 @@ xfs_buffered_write_iomap_begin(
		}

		/*
		* Search the COW fork extent list even if we did not find a data fork
		* extent. This serves two purposes: first this implements the
		* speculative preallocation using cowextsize, so that we also unshare
		* block adjacent to shared blocks instead of just the shared blocks
		* themselves. Second the lookup in the extent list is generally faster
		* than going out to the shared extent tree.
		* Now that we've handled any operation specific special cases, at this
		* point we can report a COW mapping if found.
		*/
		if (xfs_is_cow_inode(ip)) {
		if (!ip->i_cowfp) {
		ASSERT(!xfs_is_reflink_inode(ip));
		xfs_ifork_init_cow(ip);
		}
		cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
		&ccur, &cmap);
		if (!cow_eof && cmap.br_startoff <= offset_fsb) {
		if (xfs_is_cow_inode(ip) &&
		!cow_eof && cmap.br_startoff <= offset_fsb) {
		trace_xfs_reflink_cow_found(ip, &cmap);
		goto found_cow;
		}
		}

		if (imap.br_startoff <= offset_fsb) {
		/*

fs/xfs/xfs_mount.c

+36 −39

Original line number	Diff line number	Diff line
		@@ -44,17 +44,36 @@
		#include "xfs_healthmon.h"

		static DEFINE_MUTEX(xfs_uuid_table_mutex);
		static int xfs_uuid_table_size;
		static uuid_t *xfs_uuid_table;
		static DEFINE_XARRAY_ALLOC(xfs_uuid_table);

		static uuid_t *
		xfs_uuid_search(
		uuid_t *new_uuid)
		{
		unsigned long index = 0;
		uuid_t *uuid;

		xa_for_each(&xfs_uuid_table, index, uuid) {
		if (uuid_equal(uuid, new_uuid))
		return uuid;
		}
		return NULL;
		}

		static void
		xfs_uuid_delete(
		uuid_t *uuid,
		unsigned int index)
		{
		ASSERT(uuid_equal(xa_load(&xfs_uuid_table, index), uuid));
		xa_erase(&xfs_uuid_table, index);
		}

		void
		xfs_uuid_table_free(void)
		{
		if (xfs_uuid_table_size == 0)
		return;
		kfree(xfs_uuid_table);
		xfs_uuid_table = NULL;
		xfs_uuid_table_size = 0;
		ASSERT(xa_empty(&xfs_uuid_table));
		xa_destroy(&xfs_uuid_table);
		}

		/*
		@@ -66,7 +85,7 @@ xfs_uuid_mount(
		struct xfs_mount *mp)
		{
		uuid_t *uuid = &mp->m_sb.sb_uuid;
		int hole, i;
		int ret;

		/* Publish UUID in struct super_block */
		super_set_uuid(mp->m_super, uuid->b, sizeof(*uuid));
		@@ -80,30 +99,17 @@ xfs_uuid_mount(
		}

		mutex_lock(&xfs_uuid_table_mutex);
		for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) {
		if (uuid_is_null(&xfs_uuid_table[i])) {
		hole = i;
		continue;
		}
		if (uuid_equal(uuid, &xfs_uuid_table[i]))
		goto out_duplicate;
		}

		if (hole < 0) {
		xfs_uuid_table = krealloc(xfs_uuid_table,
		(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
		GFP_KERNEL \| __GFP_NOFAIL);
		hole = xfs_uuid_table_size++;
		}
		xfs_uuid_table[hole] = *uuid;
		if (unlikely(xfs_uuid_search(uuid))) {
		xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount",
		uuid);
		mutex_unlock(&xfs_uuid_table_mutex);
		return -EINVAL;
		}

		return 0;

		out_duplicate:
		ret = xa_alloc(&xfs_uuid_table, &mp->m_uuid_table_index, uuid,
		xa_limit_32b, GFP_KERNEL);
		mutex_unlock(&xfs_uuid_table_mutex);
		xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
		return -EINVAL;
		return ret;
		}

		STATIC void
		@@ -111,21 +117,12 @@ xfs_uuid_unmount(
		struct xfs_mount *mp)
		{
		uuid_t *uuid = &mp->m_sb.sb_uuid;
		int i;

		if (xfs_has_nouuid(mp))
		return;

		mutex_lock(&xfs_uuid_table_mutex);
		for (i = 0; i < xfs_uuid_table_size; i++) {
		if (uuid_is_null(&xfs_uuid_table[i]))
		continue;
		if (!uuid_equal(uuid, &xfs_uuid_table[i]))
		continue;
		memset(&xfs_uuid_table[i], 0, sizeof(uuid_t));
		break;
		}
		ASSERT(i < xfs_uuid_table_size);
		xfs_uuid_delete(uuid, mp->m_uuid_table_index);
		mutex_unlock(&xfs_uuid_table_mutex);
		}

fs/xfs/xfs_mount.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -346,6 +346,9 @@ typedef struct xfs_mount {

		/* Private data referring to a health monitor object. */
		struct xfs_healthmon __rcu *m_healthmon;

		/* Index of uuid record in the uuid xarray. */
		unsigned int m_uuid_table_index;
		} xfs_mount_t;

		#define M_IGEO(mp) (&(mp)->m_ino_geo)