Commit 32f6987f authored by Carlos Maiolino's avatar Carlos Maiolino
Browse files

Merge branch 'xfs-6.15-merge' into for-next



XFS code for 6.15 to be merged into linux-next

Signed-off-by: default avatarCarlos Maiolino <cem@kernel.org>
parents 5d138b6f 358cab79
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -246,6 +246,10 @@ The fields are as follows:
   * **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
     be set by the filesystem for its own purposes.

   * **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
     block assigned to it yet and the file system will do that in the bio
     submission handler, splitting the I/O as needed.

   These flags can be set by iomap itself during file operations.
   The filesystem should supply an ``->iomap_end`` function if it needs
   to observe these flags:
@@ -352,6 +356,11 @@ operations:
   ``IOMAP_NOWAIT`` is often set on behalf of ``IOCB_NOWAIT`` or
   ``RWF_NOWAIT``.

 * ``IOMAP_DONTCACHE`` is set when the caller wishes to perform a
   buffered file I/O and would like the kernel to drop the pagecache
   after the I/O completes, if it isn't already being used by another
   thread.

If it is necessary to read existing file contents from a `different
<https://lore.kernel.org/all/20191008071527.29304-9-hch@lst.de/>`_
device or address range on a device, the filesystem should return that
+23 −10
Original line number Diff line number Diff line
@@ -131,6 +131,8 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:

 * ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.

 * ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.

Internal per-Folio State
------------------------

@@ -283,7 +285,7 @@ The ``ops`` structure must be specified and is as follows:
 struct iomap_writeback_ops {
     int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
                       loff_t offset, unsigned len);
     int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
     int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
     void (*discard_folio)(struct folio *folio, loff_t pos);
 };

@@ -306,13 +308,12 @@ The fields are as follows:
    purpose.
    This function must be supplied by the filesystem.

  - ``prepare_ioend``: Enables filesystems to transform the writeback
    ioend or perform any other preparatory work before the writeback I/O
    is submitted.
  - ``submit_ioend``: Allows the file systems to hook into writeback bio
    submission.
    This might include pre-write space accounting updates, or installing
    a custom ``->bi_end_io`` function for internal purposes, such as
    deferring the ioend completion to a workqueue to run metadata update
    transactions from process context.
    transactions from process context before submitting the bio.
    This function is optional.

  - ``discard_folio``: iomap calls this function after ``->map_blocks``
@@ -341,7 +342,7 @@ This can happen in interrupt or process context, depending on the
storage device.

Filesystems that need to update internal bookkeeping (e.g. unwritten
extent conversions) should provide a ``->prepare_ioend`` function to
extent conversions) should provide a ``->submit_ioend`` function to
set ``struct iomap_end::bio::bi_end_io`` to its own function.
This function should call ``iomap_finish_ioends`` after finishing its
own work (e.g. unwritten extent conversion).
@@ -513,8 +514,8 @@ IOMAP_WRITE`` with any combination of the following enhancements:
   if the mapping is unwritten and the filesystem cannot handle zeroing
   the unaligned regions without exposing stale contents.

 * ``IOMAP_ATOMIC``: This write is being issued with torn-write
   protection.
 * ``IOMAP_ATOMIC_HW``: This write is being issued with torn-write
   protection based on HW-offload support.
   Only a single bio can be created for the write, and the write must
   not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be
   set.
@@ -525,8 +526,20 @@ IOMAP_WRITE`` with any combination of the following enhancements:
   conversion or copy on write), all updates for the entire file range
   must be committed atomically as well.
   Only one space mapping is allowed per untorn write.
   Untorn writes must be aligned to, and must not be longer than, a
   single file block.
   Untorn writes may be longer than a single file block. In all cases,
   the mapping start disk block must have at least the same alignment as
   the write offset.

 * ``IOMAP_ATOMIC_SW``: This write is being issued with torn-write
   protection via a software mechanism provided by the filesystem.
   All the disk block alignment and single bio restrictions which apply
   to IOMAP_ATOMIC_HW do not apply here.
   SW-based untorn writes would typically be used as a fallback when
   HW-based untorn writes may not be issued, e.g. the range of the write
   covers multiple extents, meaning that it is not possible to issue
   a single bio.
   All filesystem metadata updates for the entire file range must be
   committed atomically as well.

Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
calling this function.
+61 −50
Original line number Diff line number Diff line
@@ -1258,7 +1258,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
}
#endif /* CONFIG_FS_DAX_PMD */

static s64 dax_unshare_iter(struct iomap_iter *iter)
static int dax_unshare_iter(struct iomap_iter *iter)
{
	struct iomap *iomap = &iter->iomap;
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
@@ -1266,11 +1266,11 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
	u64 copy_len = iomap_length(iter);
	u32 mod;
	int id = 0;
	s64 ret = 0;
	s64 ret;
	void *daddr = NULL, *saddr = NULL;

	if (!iomap_want_unshare_iter(iter))
		return iomap_length(iter);
		return iomap_iter_advance_full(iter);

	/*
	 * Extend the file range to be aligned to fsblock/pagesize, because
@@ -1300,14 +1300,14 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
	if (ret < 0)
		goto out_unlock;

	if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
		ret = iomap_length(iter);
	else
	if (copy_mc_to_kernel(daddr, saddr, copy_len) != 0)
		ret = -EIO;

out_unlock:
	dax_read_unlock(id);
	if (ret < 0)
		return dax_mem2blk_err(ret);
	return iomap_iter_advance_full(iter);
}

int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
@@ -1326,7 +1326,7 @@ int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,

	iter.len = min(len, size - pos);
	while ((ret = iomap_iter(&iter, ops)) > 0)
		iter.processed = dax_unshare_iter(&iter);
		iter.status = dax_unshare_iter(&iter);
	return ret;
}
EXPORT_SYMBOL_GPL(dax_file_unshare);
@@ -1354,17 +1354,16 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
	return ret;
}

static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
	const struct iomap *iomap = &iter->iomap;
	const struct iomap *srcmap = iomap_iter_srcmap(iter);
	loff_t pos = iter->pos;
	u64 length = iomap_length(iter);
	s64 written = 0;
	int ret;

	/* already zeroed?  we're done. */
	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
		return length;
		return iomap_iter_advance(iter, &length);

	/*
	 * invalidate the pages whose sharing state is to be changed
@@ -1372,33 +1371,35 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
	 */
	if (iomap->flags & IOMAP_F_SHARED)
		invalidate_inode_pages2_range(iter->inode->i_mapping,
					      pos >> PAGE_SHIFT,
					      (pos + length - 1) >> PAGE_SHIFT);
				iter->pos >> PAGE_SHIFT,
				(iter->pos + length - 1) >> PAGE_SHIFT);

	do {
		loff_t pos = iter->pos;
		unsigned offset = offset_in_page(pos);
		unsigned size = min_t(u64, PAGE_SIZE - offset, length);
		pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
		long rc;
		int id;

		length = min_t(u64, PAGE_SIZE - offset, length);

		id = dax_read_lock();
		if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
			rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
		if (IS_ALIGNED(pos, PAGE_SIZE) && length == PAGE_SIZE)
			ret = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
		else
			rc = dax_memzero(iter, pos, size);
			ret = dax_memzero(iter, pos, length);
		dax_read_unlock(id);

		if (rc < 0)
			return rc;
		pos += size;
		length -= size;
		written += size;
		if (ret < 0)
			return ret;

		ret = iomap_iter_advance(iter, &length);
		if (ret)
			return ret;
	} while (length > 0);

	if (did_zero)
		*did_zero = true;
	return written;
	return ret;
}

int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
@@ -1413,7 +1414,7 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
	int ret;

	while ((ret = iomap_iter(&iter, ops)) > 0)
		iter.processed = dax_zero_iter(&iter, did_zero);
		iter.status = dax_zero_iter(&iter, did_zero);
	return ret;
}
EXPORT_SYMBOL_GPL(dax_zero_range);
@@ -1431,8 +1432,7 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
}
EXPORT_SYMBOL_GPL(dax_truncate_page);

static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
		struct iov_iter *iter)
static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
{
	const struct iomap *iomap = &iomi->iomap;
	const struct iomap *srcmap = iomap_iter_srcmap(iomi);
@@ -1451,8 +1451,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
		if (pos >= end)
			return 0;

		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
			return iov_iter_zero(min(length, end - pos), iter);
		if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
			done = iov_iter_zero(min(length, end - pos), iter);
			return iomap_iter_advance(iomi, &done);
		}
	}

	/*
@@ -1485,7 +1487,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
	}

	id = dax_read_lock();
	while (pos < end) {
	while ((pos = iomi->pos) < end) {
		unsigned offset = pos & (PAGE_SIZE - 1);
		const size_t size = ALIGN(length + offset, PAGE_SIZE);
		pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
@@ -1535,18 +1537,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
			xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
					map_len, iter);

		pos += xfer;
		length -= xfer;
		done += xfer;

		if (xfer == 0)
		length = xfer;
		ret = iomap_iter_advance(iomi, &length);
		if (!ret && xfer == 0)
			ret = -EFAULT;
		if (xfer < map_len)
			break;
	}
	dax_read_unlock(id);

	return done ? done : ret;
	return ret;
}

/**
@@ -1586,7 +1586,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
		iomi.flags |= IOMAP_NOWAIT;

	while ((ret = iomap_iter(&iomi, ops)) > 0)
		iomi.processed = dax_iomap_iter(&iomi, iter);
		iomi.status = dax_iomap_iter(&iomi, iter);

	done = iomi.pos - iocb->ki_pos;
	iocb->ki_pos = iomi.pos;
@@ -1757,7 +1757,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,

	while ((error = iomap_iter(&iter, ops)) > 0) {
		if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
			iter.processed = -EIO;	/* fs corruption? */
			iter.status = -EIO;	/* fs corruption? */
			continue;
		}

@@ -1769,8 +1769,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
			ret |= VM_FAULT_MAJOR;
		}

		if (!(ret & VM_FAULT_ERROR))
			iter.processed = PAGE_SIZE;
		if (!(ret & VM_FAULT_ERROR)) {
			u64 length = PAGE_SIZE;
			iter.status = iomap_iter_advance(&iter, &length);
		}
	}

	if (iomap_errp)
@@ -1883,8 +1885,10 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
			continue; /* actually breaks out of the loop */

		ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
		if (ret != VM_FAULT_FALLBACK)
			iter.processed = PMD_SIZE;
		if (ret != VM_FAULT_FALLBACK) {
			u64 length = PMD_SIZE;
			iter.status = iomap_iter_advance(&iter, &length);
		}
	}

unlock_entry:
@@ -1999,12 +2003,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order,
}
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);

static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
static int dax_range_compare_iter(struct iomap_iter *it_src,
		struct iomap_iter *it_dest, u64 len, bool *same)
{
	const struct iomap *smap = &it_src->iomap;
	const struct iomap *dmap = &it_dest->iomap;
	loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
	u64 dest_len;
	void *saddr, *daddr;
	int id, ret;

@@ -2012,7 +2017,7 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src,

	if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) {
		*same = true;
		return len;
		goto advance;
	}

	if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) {
@@ -2035,7 +2040,13 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
	if (!*same)
		len = 0;
	dax_read_unlock(id);
	return len;

advance:
	dest_len = len;
	ret = iomap_iter_advance(it_src, &len);
	if (!ret)
		ret = iomap_iter_advance(it_dest, &dest_len);
	return ret;

out_unlock:
	dax_read_unlock(id);
@@ -2058,15 +2069,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
		.len		= len,
		.flags		= IOMAP_DAX,
	};
	int ret, compared = 0;
	int ret, status;

	while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
	       (ret = iomap_iter(&dst_iter, ops)) > 0) {
		compared = dax_range_compare_iter(&src_iter, &dst_iter,
		status = dax_range_compare_iter(&src_iter, &dst_iter,
				min(src_iter.len, dst_iter.len), same);
		if (compared < 0)
		if (status < 0)
			return ret;
		src_iter.processed = dst_iter.processed = compared;
		src_iter.status = dst_iter.status = status;
	}
	return ret;
}
+1 −1
Original line number Diff line number Diff line
@@ -3467,7 +3467,7 @@ static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
		return false;

	/* atomic writes are all-or-nothing */
	if (flags & IOMAP_ATOMIC)
	if (flags & IOMAP_ATOMIC_HW)
		return false;

	/* can only try again if we wrote nothing */
+2 −1
Original line number Diff line number Diff line
@@ -1300,7 +1300,8 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
				 unsigned int length)
{
	BUG_ON(current->journal_info);
	return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
	return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
			NULL);
}

#define GFS2_JTRUNC_REVOKES 8192
Loading