Commit bde708f1 authored by Alistair Popple's avatar Alistair Popple Committed by Andrew Morton
Browse files

fs/dax: always remove DAX page-cache entries when breaking layouts

Prior to any truncation operations file systems call dax_break_mapping()
to ensure pages in the range are not under going DMA.  Later DAX
page-cache entries will be removed by truncate_folio_batch_exceptionals()
in the generic page-cache code.

However this makes it possible for folios to be removed from the
page-cache even though they are still DMA busy if the file-system hasn't
called dax_break_mapping().  It also means they can never be waited on in
future because FS DAX will lose track of them once the page-cache entry
has been deleted.

Instead it is better to delete the FS DAX entry when the file-system calls
dax_break_mapping() as part of it's truncate operation.  This ensures only
idle pages can be removed from the FS DAX page-cache and makes it easy to
detect if a file-system hasn't called dax_break_mapping() prior to a
truncate operation.

Link: https://lkml.kernel.org/r/3be6115eaaa8d28fee37fcba3287be4f226a7d24.1740713401.git-series.apopple@nvidia.com


Signed-off-by: default avatarAlistair Popple <apopple@nvidia.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Tested-by: default avatarAlison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent d5b3afea
Loading
Loading
Loading
Loading
+40 −0
Original line number Diff line number Diff line
@@ -846,6 +846,36 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
	return ret;
}

void dax_delete_mapping_range(struct address_space *mapping,
				loff_t start, loff_t end)
{
	void *entry;
	pgoff_t start_idx = start >> PAGE_SHIFT;
	pgoff_t end_idx;
	XA_STATE(xas, &mapping->i_pages, start_idx);

	/* If end == LLONG_MAX, all pages from start to till end of file */
	if (end == LLONG_MAX)
		end_idx = ULONG_MAX;
	else
		end_idx = end >> PAGE_SHIFT;

	xas_lock_irq(&xas);
	xas_for_each(&xas, entry, end_idx) {
		if (!xa_is_value(entry))
			continue;
		entry = wait_entry_unlocked_exclusive(&xas, entry);
		if (!entry)
			continue;
		dax_disassociate_entry(entry, mapping, true);
		xas_store(&xas, NULL);
		mapping->nrpages -= 1UL << dax_entry_order(entry);
		put_unlocked_entry(&xas, entry, WAKE_ALL);
	}
	xas_unlock_irq(&xas);
}
EXPORT_SYMBOL_GPL(dax_delete_mapping_range);

static int wait_page_idle(struct page *page,
			void (cb)(struct inode *),
			struct inode *inode)
@@ -857,6 +887,9 @@ static int wait_page_idle(struct page *page,
/*
 * Unmaps the inode and waits for any DMA to complete prior to deleting the
 * DAX mapping entries for the range.
 *
 * For NOWAIT behavior, pass @cb as NULL to early-exit on first found
 * busy page
 */
int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
		void (cb)(struct inode *))
@@ -871,10 +904,17 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
		page = dax_layout_busy_page_range(inode->i_mapping, start, end);
		if (!page)
			break;
		if (!cb) {
			error = -ERESTARTSYS;
			break;
		}

		error = wait_page_idle(page, cb, inode);
	} while (error == 0);

	if (!page)
		dax_delete_mapping_range(inode->i_mapping, start, end);

	return error;
}
EXPORT_SYMBOL_GPL(dax_break_layout);
+2 −3
Original line number Diff line number Diff line
@@ -2735,7 +2735,6 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
	struct xfs_inode	*ip2)
{
	int			error;
	struct page		*page;

	if (ip1->i_ino > ip2->i_ino)
		swap(ip1, ip2);
@@ -2759,8 +2758,8 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
	 * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
	 * for this nested lock case.
	 */
	page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
	if (!dax_page_is_idle(page)) {
	error = dax_break_layout(VFS_I(ip2), 0, -1, NULL);
	if (error) {
		xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
		xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
		goto again;
+2 −0
Original line number Diff line number Diff line
@@ -255,6 +255,8 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order,
vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
		unsigned int order, pfn_t pfn);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
void dax_delete_mapping_range(struct address_space *mapping,
				loff_t start, loff_t end);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
				      pgoff_t index);
int __must_check dax_break_layout(struct inode *inode, loff_t start,
+15 −1
Original line number Diff line number Diff line
@@ -78,9 +78,23 @@ static void truncate_folio_batch_exceptionals(struct address_space *mapping,

	if (dax_mapping(mapping)) {
		for (i = j; i < nr; i++) {
			if (xa_is_value(fbatch->folios[i]))
			if (xa_is_value(fbatch->folios[i])) {
				/*
				 * File systems should already have called
				 * dax_break_layout_entry() to remove all DAX
				 * entries while holding a lock to prevent
				 * establishing new entries. Therefore we
				 * shouldn't find any here.
				 */
				WARN_ON_ONCE(1);

				/*
				 * Delete the mapping so truncate_pagecache()
				 * doesn't loop forever.
				 */
				dax_delete_mapping_entry(mapping, indices[i]);
			}
		}
		goto out;
	}