Commit 3d693c18 authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "iomap: flush dirty cache over unwritten mappings on zero range"

Brian Foster <bfoster@redhat.com> says:

Two fixes for iomap zero range flushes.

* patches from https://lore.kernel.org/r/20240830145634.138439-1-bfoster@redhat.com:
  iomap: make zero range flush conditional on unwritten mappings
  iomap: fix handling of dirty folios over unwritten extents

Link: https://lore.kernel.org/r/20240830145634.138439-1-bfoster@redhat.com


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 6f634eb0 7d9b474e
Loading
Loading
Loading
Loading
+59 −4
Original line number Diff line number Diff line
@@ -1394,16 +1394,53 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
}
EXPORT_SYMBOL_GPL(iomap_file_unshare);

static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
/*
 * Flush the remaining range of the iter and mark the current mapping stale.
 * This is used when zero range sees an unwritten mapping that may have had
 * dirty pagecache over it.
 */
static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
{
	struct address_space *mapping = i->inode->i_mapping;
	loff_t end = i->pos + i->len - 1;

	i->iomap.flags |= IOMAP_F_STALE;
	return filemap_write_and_wait_range(mapping, i->pos, end);
}

static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
		bool *range_dirty)
{
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
	loff_t pos = iter->pos;
	loff_t length = iomap_length(iter);
	loff_t written = 0;

	/* already zeroed?  we're done. */
	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
	/*
	 * We must zero subranges of unwritten mappings that might be dirty in
	 * pagecache from previous writes. We only know whether the entire range
	 * was clean or not, however, and dirty folios may have been written
	 * back or reclaimed at any point after mapping lookup.
	 *
	 * The easiest way to deal with this is to flush pagecache to trigger
	 * any pending unwritten conversions and then grab the updated extents
	 * from the fs. The flush may change the current mapping, so mark it
	 * stale for the iterator to remap it for the next pass to handle
	 * properly.
	 *
	 * Note that holes are treated the same as unwritten because zero range
	 * is (ab)used for partial folio zeroing in some cases. Hole backed
	 * post-eof ranges can be dirtied via mapped write and the flush
	 * triggers writeback time post-eof zeroing.
	 */
	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) {
		if (*range_dirty) {
			*range_dirty = false;
			return iomap_zero_iter_flush_and_stale(iter);
		}
		/* range is clean and already zeroed, nothing to do */
		return length;
	}

	do {
		struct folio *folio;
@@ -1451,9 +1488,27 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
		.flags		= IOMAP_ZERO,
	};
	int ret;
	bool range_dirty;

	/*
	 * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
	 * pagecache must be flushed to ensure stale data from previous
	 * buffered writes is not exposed. A flush is only required for certain
	 * types of mappings, but checking pagecache after mapping lookup is
	 * racy with writeback and reclaim.
	 *
	 * Therefore, check the entire range first and pass along whether any
	 * part of it is dirty. If so and an underlying mapping warrants it,
	 * flush the cache at that point. This trades off the occasional false
	 * positive (and spurious flush, if the dirty data and mapping don't
	 * happen to overlap) for simplicity in handling a relatively uncommon
	 * situation.
	 */
	range_dirty = filemap_range_needs_writeback(inode->i_mapping,
					pos, pos + len - 1);

	while ((ret = iomap_iter(&iter, ops)) > 0)
		iter.processed = iomap_zero_iter(&iter, did_zero);
		iter.processed = iomap_zero_iter(&iter, did_zero, &range_dirty);
	return ret;
}
EXPORT_SYMBOL_GPL(iomap_zero_range);
+0 −10
Original line number Diff line number Diff line
@@ -870,16 +870,6 @@ xfs_setattr_size(
		error = xfs_zero_range(ip, oldsize, newsize - oldsize,
				&did_zeroing);
	} else {
		/*
		 * iomap won't detect a dirty page over an unwritten block (or a
		 * cow block over a hole) and subsequently skips zeroing the
		 * newly post-EOF portion of the page. Flush the new EOF to
		 * convert the block before the pagecache truncate.
		 */
		error = filemap_write_and_wait_range(inode->i_mapping, newsize,
						     newsize);
		if (error)
			return error;
		error = xfs_truncate_page(ip, newsize, &did_zeroing);
	}