Commit a2e43397 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull iomap updates from Christian Brauner:

 - More fallout and preparatory work associated with the folio batch
   prototype posted a while back.

   Mainly this just cleans up some of the helpers and pushes some
   pos/len trimming further down in the write begin path.

 - Add missing flag descriptions to the iomap documentation

* tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  iomap: rework iomap_write_begin() to return folio offset and length
  iomap: push non-large folio check into get folio path
  iomap: helper to trim pos/bytes to within folio
  iomap: drop pos param from __iomap_[get|put]_folio()
  iomap: drop unnecessary pos param from iomap_write_[begin|end]
  iomap: resample iter->pos after iomap_write_begin() calls
  iomap: trace: Add missing flags to [IOMAP_|IOMAP_F_]FLAGS_STRINGS
  Documentation: iomap: Add missing flags description
parents c5bfc48d 2cb0e96c
Loading
Loading
Loading
Loading
+14 −2
Original line number Diff line number Diff line
@@ -243,13 +243,25 @@ The fields are as follows:
     regular file data.
     This is only useful for FIEMAP.

   * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
     be set by the filesystem for its own purposes.
   * **IOMAP_F_BOUNDARY**: This indicates I/O and its completion must not be
     merged with any other I/O or completion. Filesystems must use this when
     submitting I/O to devices that cannot handle I/O crossing certain LBAs
     (e.g. ZNS devices). This flag applies only to buffered I/O writeback; all
     other functions ignore it.

   * **IOMAP_F_PRIVATE**: This flag is reserved for filesystem private use.

   * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
     block assigned to it yet and the file system will do that in the bio
     submission handler, splitting the I/O as needed.

   * **IOMAP_F_ATOMIC_BIO**: This indicates write I/O must be submitted with the
     ``REQ_ATOMIC`` flag set in the bio. Filesystems need to set this flag to
     inform iomap that the write I/O operation requires torn-write protection
     based on HW-offload mechanism. They must also ensure that mapping updates
     upon the completion of the I/O must be performed in a single metadata
     update.

   These flags can be set by iomap itself during file operations.
   The filesystem should supply an ``->iomap_end`` function if it needs
   to observe these flags:
+58 −42
Original line number Diff line number Diff line
@@ -679,11 +679,12 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
	return submit_bio_wait(&bio);
}

static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
		size_t len, struct folio *folio)
static int __iomap_write_begin(const struct iomap_iter *iter, size_t len,
		struct folio *folio)
{
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
	struct iomap_folio_state *ifs;
	loff_t pos = iter->pos;
	loff_t block_size = i_blocksize(iter->inode);
	loff_t block_start = round_down(pos, block_size);
	loff_t block_end = round_up(pos + len, block_size);
@@ -741,10 +742,13 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
	return 0;
}

static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
		size_t len)
static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len)
{
	const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
	loff_t pos = iter->pos;

	if (!mapping_large_folio_support(iter->inode->i_mapping))
		len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

	if (folio_ops && folio_ops->get_folio)
		return folio_ops->get_folio(iter, pos, len);
@@ -752,10 +756,11 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
		return iomap_get_folio(iter, pos, len);
}

static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
static void __iomap_put_folio(struct iomap_iter *iter, size_t ret,
		struct folio *folio)
{
	const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
	loff_t pos = iter->pos;

	if (folio_ops && folio_ops->put_folio) {
		folio_ops->put_folio(iter->inode, pos, ret, folio);
@@ -765,6 +770,22 @@ static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
	}
}

/* trim pos and bytes to within a given folio */
static loff_t iomap_trim_folio_range(struct iomap_iter *iter,
		struct folio *folio, size_t *offset, u64 *bytes)
{
	loff_t pos = iter->pos;
	size_t fsize = folio_size(folio);

	WARN_ON_ONCE(pos < folio_pos(folio));
	WARN_ON_ONCE(pos >= folio_pos(folio) + fsize);

	*offset = offset_in_folio(folio, pos);
	*bytes = min(*bytes, fsize - *offset);

	return pos;
}

static int iomap_write_begin_inline(const struct iomap_iter *iter,
		struct folio *folio)
{
@@ -774,14 +795,22 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
	return iomap_read_inline_data(iter, folio);
}

static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
		size_t len, struct folio **foliop)
/*
 * Grab and prepare a folio for write based on iter state. Returns the folio,
 * offset, and length. Callers can optionally pass a max length *plen,
 * otherwise init to zero.
 */
static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
		size_t *poffset, u64 *plen)
{
	const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
	loff_t pos = iter->pos;
	u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
	struct folio *folio;
	int status = 0;

	len = min_not_zero(len, *plen);
	BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
	if (srcmap != &iter->iomap)
		BUG_ON(pos + len > srcmap->offset + srcmap->length);
@@ -789,10 +818,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
	if (fatal_signal_pending(current))
		return -EINTR;

	if (!mapping_large_folio_support(iter->inode->i_mapping))
		len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

	folio = __iomap_get_folio(iter, pos, len);
	folio = __iomap_get_folio(iter, len);
	if (IS_ERR(folio))
		return PTR_ERR(folio);

@@ -816,24 +842,24 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
		}
	}

	if (pos + len > folio_pos(folio) + folio_size(folio))
		len = folio_pos(folio) + folio_size(folio) - pos;
	pos = iomap_trim_folio_range(iter, folio, poffset, &len);

	if (srcmap->type == IOMAP_INLINE)
		status = iomap_write_begin_inline(iter, folio);
	else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
		status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
	else
		status = __iomap_write_begin(iter, pos, len, folio);
		status = __iomap_write_begin(iter, len, folio);

	if (unlikely(status))
		goto out_unlock;

	*foliop = folio;
	*plen = len;
	return 0;

out_unlock:
	__iomap_put_folio(iter, pos, 0, folio);
	__iomap_put_folio(iter, 0, folio);

	return status;
}
@@ -883,10 +909,11 @@ static void iomap_write_end_inline(const struct iomap_iter *iter,
 * Returns true if all copied bytes have been written to the pagecache,
 * otherwise return false.
 */
static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
		size_t copied, struct folio *folio)
static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
		struct folio *folio)
{
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
	loff_t pos = iter->pos;

	if (srcmap->type == IOMAP_INLINE) {
		iomap_write_end_inline(iter, folio, pos, copied);
@@ -917,14 +944,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
		struct folio *folio;
		loff_t old_size;
		size_t offset;		/* Offset into folio */
		size_t bytes;		/* Bytes to write to folio */
		u64 bytes;		/* Bytes to write to folio */
		size_t copied;		/* Bytes copied from user */
		u64 written;		/* Bytes have been written */
		loff_t pos = iter->pos;
		loff_t pos;

		bytes = iov_iter_count(i);
retry:
		offset = pos & (chunk - 1);
		offset = iter->pos & (chunk - 1);
		bytes = min(chunk - offset, bytes);
		status = balance_dirty_pages_ratelimited_flags(mapping,
							       bdp_flags);
@@ -949,23 +976,21 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
			break;
		}

		status = iomap_write_begin(iter, pos, bytes, &folio);
		status = iomap_write_begin(iter, &folio, &offset, &bytes);
		if (unlikely(status)) {
			iomap_write_failed(iter->inode, pos, bytes);
			iomap_write_failed(iter->inode, iter->pos, bytes);
			break;
		}
		if (iter->iomap.flags & IOMAP_F_STALE)
			break;

		offset = offset_in_folio(folio, pos);
		if (bytes > folio_size(folio) - offset)
			bytes = folio_size(folio) - offset;
		pos = iter->pos;

		if (mapping_writably_mapped(mapping))
			flush_dcache_folio(folio);

		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
		written = iomap_write_end(iter, pos, bytes, copied, folio) ?
		written = iomap_write_end(iter, bytes, copied, folio) ?
			  copied : 0;

		/*
@@ -980,7 +1005,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
			i_size_write(iter->inode, pos + written);
			iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
		}
		__iomap_put_folio(iter, pos, written, folio);
		__iomap_put_folio(iter, written, folio);

		if (old_size < pos)
			pagecache_isize_extended(iter->inode, old_size, pos);
@@ -1276,22 +1301,17 @@ static int iomap_unshare_iter(struct iomap_iter *iter)
	do {
		struct folio *folio;
		size_t offset;
		loff_t pos = iter->pos;
		bool ret;

		bytes = min_t(u64, SIZE_MAX, bytes);
		status = iomap_write_begin(iter, pos, bytes, &folio);
		status = iomap_write_begin(iter, &folio, &offset, &bytes);
		if (unlikely(status))
			return status;
		if (iomap->flags & IOMAP_F_STALE)
			break;

		offset = offset_in_folio(folio, pos);
		if (bytes > folio_size(folio) - offset)
			bytes = folio_size(folio) - offset;

		ret = iomap_write_end(iter, pos, bytes, bytes, folio);
		__iomap_put_folio(iter, pos, bytes, folio);
		ret = iomap_write_end(iter, bytes, bytes, folio);
		__iomap_put_folio(iter, bytes, folio);
		if (WARN_ON_ONCE(!ret))
			return -EIO;

@@ -1351,11 +1371,10 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
	do {
		struct folio *folio;
		size_t offset;
		loff_t pos = iter->pos;
		bool ret;

		bytes = min_t(u64, SIZE_MAX, bytes);
		status = iomap_write_begin(iter, pos, bytes, &folio);
		status = iomap_write_begin(iter, &folio, &offset, &bytes);
		if (status)
			return status;
		if (iter->iomap.flags & IOMAP_F_STALE)
@@ -1363,15 +1382,12 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)

		/* warn about zeroing folios beyond eof that won't write back */
		WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
		offset = offset_in_folio(folio, pos);
		if (bytes > folio_size(folio) - offset)
			bytes = folio_size(folio) - offset;

		folio_zero_range(folio, offset, bytes);
		folio_mark_accessed(folio);

		ret = iomap_write_end(iter, pos, bytes, bytes, folio);
		__iomap_put_folio(iter, pos, bytes, folio);
		ret = iomap_write_end(iter, bytes, bytes, folio);
		__iomap_put_folio(iter, bytes, folio);
		if (WARN_ON_ONCE(!ret))
			return -EIO;

+21 −6
Original line number Diff line number Diff line
@@ -99,7 +99,11 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
	{ IOMAP_FAULT,		"FAULT" }, \
	{ IOMAP_DIRECT,		"DIRECT" }, \
	{ IOMAP_NOWAIT,		"NOWAIT" }, \
	{ IOMAP_ATOMIC,		"ATOMIC" }
	{ IOMAP_OVERWRITE_ONLY,	"OVERWRITE_ONLY" }, \
	{ IOMAP_UNSHARE,	"UNSHARE" }, \
	{ IOMAP_DAX,		"DAX" }, \
	{ IOMAP_ATOMIC,		"ATOMIC" }, \
	{ IOMAP_DONTCACHE,	"DONTCACHE" }

#define IOMAP_F_FLAGS_STRINGS \
	{ IOMAP_F_NEW,		"NEW" }, \
@@ -107,7 +111,14 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
	{ IOMAP_F_SHARED,	"SHARED" }, \
	{ IOMAP_F_MERGED,	"MERGED" }, \
	{ IOMAP_F_BUFFER_HEAD,	"BH" }, \
	{ IOMAP_F_SIZE_CHANGED,	"SIZE_CHANGED" }
	{ IOMAP_F_XATTR,	"XATTR" }, \
	{ IOMAP_F_BOUNDARY,	"BOUNDARY" }, \
	{ IOMAP_F_ANON_WRITE,	"ANON_WRITE" }, \
	{ IOMAP_F_ATOMIC_BIO,	"ATOMIC_BIO" }, \
	{ IOMAP_F_PRIVATE,	"PRIVATE" }, \
	{ IOMAP_F_SIZE_CHANGED,	"SIZE_CHANGED" }, \
	{ IOMAP_F_STALE,	"STALE" }


#define IOMAP_DIO_STRINGS \
	{IOMAP_DIO_FORCE_WAIT,	"DIO_FORCE_WAIT" }, \
@@ -138,7 +149,7 @@ DECLARE_EVENT_CLASS(iomap_class,
		__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
	),
	TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
		  "length 0x%llx type %s flags %s",
		  "length 0x%llx type %s (0x%x) flags %s (0x%x)",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -146,7 +157,9 @@ DECLARE_EVENT_CLASS(iomap_class,
		  __entry->offset,
		  __entry->length,
		  __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
		  __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
		  __entry->type,
		  __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
		  __entry->flags)
)

#define DEFINE_IOMAP_EVENT(name)		\
@@ -185,7 +198,7 @@ TRACE_EVENT(iomap_writepage_map,
		__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
	),
	TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
		  "addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
		  "addr 0x%llx offset 0x%llx length 0x%llx type %s (0x%x) flags %s (0x%x)",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -195,7 +208,9 @@ TRACE_EVENT(iomap_writepage_map,
		  __entry->offset,
		  __entry->length,
		  __print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
		  __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
		  __entry->type,
		  __print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
		  __entry->flags)
);

TRACE_EVENT(iomap_iter,