Merge tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux (568570fd) · Commits · git / linux-net

Documentation/filesystems/iomap/operations.rst

+1 −1

Original line number	Diff line number	Diff line
		@@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
		such `reservations
		<https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/>`_
		because writeback will not consume the reservation.
		The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
		The ``iomap_write_delalloc_release`` can be called from a
		``->iomap_end`` function to find all the clean areas of the folios
		caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
		It takes the ``invalidate_lock``.

fs/iomap/buffered-io.c

+36 −75

Original line number	Diff line number	Diff line
		@@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
		}

		/*
		* When a short write occurs, the filesystem might need to use ->iomap_end
		* to remove space reservations created in ->iomap_begin.
		*
		* For filesystems that use delayed allocation, there can be dirty pages over
		* the delalloc extent outside the range of a short write but still within the
		* delalloc extent allocated for this iomap if the write raced with page
		* faults.
		*
		* Punch out all the delalloc blocks in the range given except for those that
		* have dirty data still pending in the page cache - those are going to be
		* written and so must still retain the delalloc backing for writeback.
		*
		* The punch() callback must only punch delalloc extents in the range passed
		* to it. It must skip over all other types of extents in the range and leave
		* them completely unchanged. It must do this punch atomically with respect to
		* other extent modifications.
		*
		* The punch() callback may be called with a folio locked to prevent writeback
		* extent allocation racing at the edge of the range we are currently punching.
		* The locked folio may or may not cover the range being punched, so it is not
		* safe for the punch() callback to lock folios itself.
		*
		* Lock order is:
		*
		* inode->i_rwsem (shared or exclusive)
		* inode->i_mapping->invalidate_lock (exclusive)
		* folio_lock()
		* ->punch
		* internal filesystem allocation lock
		*
		* As we are scanning the page cache for data, we don't need to reimplement the
		* wheel - mapping_seek_hole_data() does exactly what we need to identify the
		* start and end of data ranges correctly even for sub-folio block sizes. This
		@@ -1177,7 +1203,7 @@ static void iomap_write_delalloc_scan(struct inode *inode,
		* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
		* the code to subtle off-by-one bugs....
		*/
		static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		loff_t end_byte, unsigned flags, struct iomap *iomap,
		iomap_punch_t punch)
		{
		@@ -1185,12 +1211,13 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		loff_t scan_end_byte = min(i_size_read(inode), end_byte);

		/*
		* Lock the mapping to avoid races with page faults re-instantiating
		* folios and dirtying them via ->page_mkwrite whilst we walk the
		* cache and perform delalloc extent removal. Failing to do this can
		* leave dirty pages with no space reservation in the cache.
		* The caller must hold invalidate_lock to avoid races with page faults
		* re-instantiating folios and dirtying them via ->page_mkwrite whilst
		* we walk the cache and perform delalloc extent removal. Failing to do
		* this can leave dirty pages with no space reservation in the cache.
		*/
		filemap_invalidate_lock(inode->i_mapping);
		lockdep_assert_held_write(&inode->i_mapping->invalidate_lock);

		while (start_byte < scan_end_byte) {
		loff_t data_end;

		@@ -1207,7 +1234,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		if (start_byte == -ENXIO \|\| start_byte == scan_end_byte)
		break;
		if (WARN_ON_ONCE(start_byte < 0))
		goto out_unlock;
		return;
		WARN_ON_ONCE(start_byte < punch_start_byte);
		WARN_ON_ONCE(start_byte > scan_end_byte);

		@@ -1218,7 +1245,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
		scan_end_byte, SEEK_HOLE);
		if (WARN_ON_ONCE(data_end < 0))
		goto out_unlock;
		return;

		/*
		* If we race with post-direct I/O invalidation of the page cache,
		@@ -1240,74 +1267,8 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
		if (punch_start_byte < end_byte)
		punch(inode, punch_start_byte, end_byte - punch_start_byte,
		iomap);
		out_unlock:
		filemap_invalidate_unlock(inode->i_mapping);
		}

		/*
		* When a short write occurs, the filesystem may need to remove reserved space
		* that was allocated in ->iomap_begin from it's ->iomap_end method. For
		* filesystems that use delayed allocation, we need to punch out delalloc
		* extents from the range that are not dirty in the page cache. As the write can
		* race with page faults, there can be dirty pages over the delalloc extent
		* outside the range of a short write but still within the delalloc extent
		* allocated for this iomap.
		*
		* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
		* simplify range iterations.
		*
		* The punch() callback must only punch delalloc extents in the range passed
		* to it. It must skip over all other types of extents in the range and leave
		* them completely unchanged. It must do this punch atomically with respect to
		* other extent modifications.
		*
		* The punch() callback may be called with a folio locked to prevent writeback
		* extent allocation racing at the edge of the range we are currently punching.
		* The locked folio may or may not cover the range being punched, so it is not
		* safe for the punch() callback to lock folios itself.
		*
		* Lock order is:
		*
		* inode->i_rwsem (shared or exclusive)
		* inode->i_mapping->invalidate_lock (exclusive)
		* folio_lock()
		* ->punch
		* internal filesystem allocation lock
		*/
		void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
		loff_t pos, loff_t length, ssize_t written, unsigned flags,
		struct iomap *iomap, iomap_punch_t punch)
		{
		loff_t start_byte;
		loff_t end_byte;
		unsigned int blocksize = i_blocksize(inode);

		if (iomap->type != IOMAP_DELALLOC)
		return;

		/* If we didn't reserve the blocks, we're not allowed to punch them. */
		if (!(iomap->flags & IOMAP_F_NEW))
		return;

		/*
		* start_byte refers to the first unused block after a short write. If
		* nothing was written, round offset down to point at the first block in
		* the range.
		*/
		if (unlikely(!written))
		start_byte = round_down(pos, blocksize);
		else
		start_byte = round_up(pos + written, blocksize);
		end_byte = round_up(pos + length, blocksize);

		/* Nothing to do if we've written the entire delalloc extent */
		if (start_byte >= end_byte)
		return;

		iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
		punch);
		}
		EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
		EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);

		static loff_t iomap_unshare_iter(struct iomap_iter *iter)
		{

fs/xfs/scrub/bmap_repair.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -801,7 +801,7 @@ xrep_bmap(
		{
		struct xrep_bmap *rb;
		char *descr;
		unsigned int max_bmbt_recs;
		xfs_extnum_t max_bmbt_recs;
		bool large_extcount;
		int error = 0;

fs/xfs/xfs_aops.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -116,7 +116,7 @@ xfs_end_ioend(
		if (unlikely(error)) {
		if (ioend->io_flags & IOMAP_F_SHARED) {
		xfs_reflink_cancel_cow_range(ip, offset, size, true);
		xfs_bmap_punch_delalloc_range(ip, offset,
		xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
		offset + size);
		}
		goto done;
		@@ -456,7 +456,7 @@ xfs_discard_folio(
		* byte of the next folio. Hence the end offset is only dependent on the
		* folio itself and not the start offset that is passed in.
		*/
		xfs_bmap_punch_delalloc_range(ip, pos,
		xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
		folio_pos(folio) + folio_size(folio));
		}

fs/xfs/xfs_bmap_util.c

+7 −3

Original line number	Diff line number	Diff line
		@@ -442,11 +442,12 @@ xfs_getbmap(
		void
		xfs_bmap_punch_delalloc_range(
		struct xfs_inode *ip,
		int whichfork,
		xfs_off_t start_byte,
		xfs_off_t end_byte)
		{
		struct xfs_mount *mp = ip->i_mount;
		struct xfs_ifork *ifp = &ip->i_df;
		struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
		xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
		xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
		struct xfs_bmbt_irec got, del;
		@@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
		continue;
		}

		xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
		xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
		if (!xfs_iext_get_extent(ifp, &icur, &got))
		break;
		}

		if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
		xfs_inode_clear_cowblocks_tag(ip);

		out_unlock:
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		}
		@@ -580,7 +584,7 @@ xfs_free_eofblocks(
		*/
		if (ip->i_diflags & (XFS_DIFLAG_PREALLOC \| XFS_DIFLAG_APPEND)) {
		if (ip->i_delayed_blks) {
		xfs_bmap_punch_delalloc_range(ip,
		xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
		round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
		LLONG_MAX);
		}