Commit d5b3afea authored by Alistair Popple's avatar Alistair Popple Committed by Andrew Morton
Browse files

fs/dax: create a common implementation to break DAX layouts

Prior to freeing a block file systems supporting FS DAX must check that
the associated pages are both unmapped from user-space and not undergoing
DMA or other access from eg.  get_user_pages().  This is achieved by
unmapping the file range and scanning the FS DAX page-cache to see if any
pages within the mapping have an elevated refcount.

This is done using two functions - dax_layout_busy_page_range() which
returns a page to wait for the refcount to become idle on.  Rather than
open-code this introduce a common implementation to both unmap and wait
for the page to become idle.

Link: https://lkml.kernel.org/r/c4d381e41fc618296cee2820403c166d80599d5c.1740713401.git-series.apopple@nvidia.com


Signed-off-by: default avatarAlistair Popple <apopple@nvidia.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Tested-by: default avatarAlison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent e6fa3963
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -846,6 +846,39 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
	return ret;
}

static int wait_page_idle(struct page *page,
			void (cb)(struct inode *),
			struct inode *inode)
{
	return ___wait_var_event(page, dax_page_is_idle(page),
				TASK_INTERRUPTIBLE, 0, 0, cb(inode));
}

/*
 * Unmaps the inode and waits for any DMA to complete prior to deleting the
 * DAX mapping entries for the range.
 */
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
		void (cb)(struct inode *))
{
	struct page *page;
	int error = 0;

	if (!dax_mapping(inode->i_mapping))
		return 0;

	do {
		page = dax_layout_busy_page_range(inode->i_mapping, start, end);
		if (!page)
			break;

		error = wait_page_idle(page, cb, inode);
	} while (error == 0);

	return error;
}
EXPORT_SYMBOL_GPL(dax_break_layout);

/*
 * Invalidate DAX entry if it is clean.
 */
+1 −12
Original line number Diff line number Diff line
@@ -3911,21 +3911,10 @@ static void ext4_wait_dax_page(struct inode *inode)

int ext4_break_layouts(struct inode *inode)
{
	struct page *page;
	int error;

	if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
		return -EINVAL;

	do {
		page = dax_layout_busy_page(inode->i_mapping);
		if (!page)
			return 0;

		error = dax_wait_page_idle(page, ext4_wait_dax_page, inode);
	} while (error == 0);

	return error;
	return dax_break_layout_inode(inode, ext4_wait_dax_page);
}

/*
+3 −24
Original line number Diff line number Diff line
@@ -666,33 +666,12 @@ static void fuse_wait_dax_page(struct inode *inode)
	filemap_invalidate_lock(inode->i_mapping);
}

/* Should be called with mapping->invalidate_lock held exclusively */
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
				    loff_t start, loff_t end)
{
	struct page *page;

	page = dax_layout_busy_page_range(inode->i_mapping, start, end);
	if (!page)
		return 0;

	*retry = true;
	return dax_wait_page_idle(page, fuse_wait_dax_page, inode);
}

/* Should be called with mapping->invalidate_lock held exclusively. */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
				  u64 dmap_end)
{
	bool	retry;
	int	ret;

	do {
		retry = false;
		ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
					       dmap_end);
	} while (ret == 0 && retry);

	return ret;
	return dax_break_layout(inode, dmap_start, dmap_end,
				fuse_wait_dax_page);
}

ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+7 −19
Original line number Diff line number Diff line
@@ -2735,21 +2735,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
	struct xfs_inode	*ip2)
{
	int			error;
	bool			retry;
	struct page		*page;

	if (ip1->i_ino > ip2->i_ino)
		swap(ip1, ip2);

again:
	retry = false;
	/* Lock the first inode */
	xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
	error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
	if (error || retry) {
	error = xfs_break_dax_layouts(VFS_I(ip1));
	if (error) {
		xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
		if (error == 0 && retry)
			goto again;
		return error;
	}

@@ -2764,7 +2760,7 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
	 * for this nested lock case.
	 */
	page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
	if (page && page_ref_count(page) != 1) {
	if (!dax_page_is_idle(page)) {
		xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
		xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
		goto again;
@@ -3008,19 +3004,11 @@ xfs_wait_dax_page(

int
xfs_break_dax_layouts(
	struct inode		*inode,
	bool			*retry)
	struct inode		*inode)
{
	struct page		*page;

	xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL);

	page = dax_layout_busy_page(inode->i_mapping);
	if (!page)
		return 0;

	*retry = true;
	return dax_wait_page_idle(page, xfs_wait_dax_page, inode);
	return dax_break_layout_inode(inode, xfs_wait_dax_page);
}

int
@@ -3038,8 +3026,8 @@ xfs_break_layouts(
		retry = false;
		switch (reason) {
		case BREAK_UNMAP:
			error = xfs_break_dax_layouts(inode, &retry);
			if (error || retry)
			error = xfs_break_dax_layouts(inode);
			if (error)
				break;
			fallthrough;
		case BREAK_WRITE:
+1 −1
Original line number Diff line number Diff line
@@ -603,7 +603,7 @@ xfs_itruncate_extents(
	return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
}

int	xfs_break_dax_layouts(struct inode *inode, bool *retry);
int	xfs_break_dax_layouts(struct inode *inode);
int	xfs_break_layouts(struct inode *inode, uint *iolock,
		enum layout_break_reason reason);

Loading