Commit eb88e6bf authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull fsnotify reverts from Jan Kara:
 "Syzbot has found out that fsnotify HSM events generated on page fault
  can be generated while we already hold freeze protection for the
  filesystem (when you do buffered write from a buffer which is mmapped
  file on the same filesystem) which violates expectations for HSM
  events and could lead to deadlocks of HSM clients with filesystem
  freezing.

  Since it's quite late in the cycle we've decided to revert changes
  implementing HSM events on page fault for now and instead just
  generate one event for the whole range on mmap(2) so that HSM client
  can fetch the data at that moment"

* tag 'fsnotify_for_v6.14-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  Revert "fanotify: disable readahead if we have pre-content watches"
  Revert "mm: don't allow huge faults for files with pre content watches"
  Revert "fsnotify: generate pre-content permission event on page fault"
  Revert "xfs: add pre-content fsnotify hook for DAX faults"
  Revert "ext4: add pre-content fsnotify hook for DAX faults"
  fsnotify: add pre-content hooks on mmap()
parents 3571e8b0 252256e4
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -756,9 +756,6 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
			return VM_FAULT_SIGBUS;
		}
	} else {
		result = filemap_fsnotify_fault(vmf);
		if (unlikely(result))
			return result;
		filemap_invalidate_lock_shared(mapping);
	}
	result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
+0 −13
Original line number Diff line number Diff line
@@ -1451,9 +1451,6 @@ xfs_dax_read_fault(

	trace_xfs_read_fault(ip, order);

	ret = filemap_fsnotify_fault(vmf);
	if (unlikely(ret))
		return ret;
	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
	ret = xfs_dax_fault_locked(vmf, order, false);
	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1482,16 +1479,6 @@ xfs_write_fault(
	vm_fault_t		ret;

	trace_xfs_write_fault(ip, order);
	/*
	 * Usually we get here from ->page_mkwrite callback but in case of DAX
	 * we will get here also for ordinary write fault. Handle HSM
	 * notifications for that case.
	 */
	if (IS_DAX(inode)) {
		ret = filemap_fsnotify_fault(vmf);
		if (unlikely(ret))
			return ret;
	}

	sb_start_pagefault(inode->i_sb);
	file_update_time(vmf->vma->vm_file);
+21 −0
Original line number Diff line number Diff line
@@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
	return fsnotify_path(&file->f_path, FS_ACCESS_PERM);
}

/*
 * fsnotify_mmap_perm - permission hook before mmap of file range
 */
static inline int fsnotify_mmap_perm(struct file *file, int prot,
				     const loff_t off, size_t len)
{
	/*
	 * mmap() generates only pre-content events.
	 */
	if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode)))
		return 0;

	return fsnotify_pre_content(&file->f_path, &off, len);
}

/*
 * fsnotify_truncate_perm - permission hook before file truncate
 */
@@ -223,6 +238,12 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
	return 0;
}

static inline int fsnotify_mmap_perm(struct file *file, int prot,
				     const loff_t off, size_t len)
{
	return 0;
}

static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
{
	return 0;
+0 −1
Original line number Diff line number Diff line
@@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
		pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);

extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+0 −86
Original line number Diff line number Diff line
@@ -47,7 +47,6 @@
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/mm.h>
#include <linux/fsnotify.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -3198,14 +3197,6 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
	unsigned long vm_flags = vmf->vma->vm_flags;
	unsigned int mmap_miss;

	/*
	 * If we have pre-content watches we need to disable readahead to make
	 * sure that we don't populate our mapping with 0 filled pages that we
	 * never emitted an event for.
	 */
	if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
		return fpin;

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	/* Use the readahead code, even if readahead is disabled */
	if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
@@ -3274,10 +3265,6 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
	struct file *fpin = NULL;
	unsigned int mmap_miss;

	/* See comment in do_sync_mmap_readahead. */
	if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
		return fpin;

	/* If we don't want any read-ahead, don't bother */
	if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
		return fpin;
@@ -3336,48 +3323,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
	return ret;
}

/**
 * filemap_fsnotify_fault - maybe emit a pre-content event.
 * @vmf:	struct vm_fault containing details of the fault.
 *
 * If we have a pre-content watch on this file we will emit an event for this
 * range.  If we return anything the fault caller should return immediately, we
 * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
 * fault again and then the fault handler will run the second time through.
 *
 * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
 */
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
{
	struct file *fpin = NULL;
	int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
	loff_t pos = vmf->pgoff >> PAGE_SHIFT;
	size_t count = PAGE_SIZE;
	int err;

	/*
	 * We already did this and now we're retrying with everything locked,
	 * don't emit the event and continue.
	 */
	if (vmf->flags & FAULT_FLAG_TRIED)
		return 0;

	/* No watches, we're done. */
	if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
		return 0;

	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
	if (!fpin)
		return VM_FAULT_SIGBUS;

	err = fsnotify_file_area_perm(fpin, mask, &pos, count);
	fput(fpin);
	if (err)
		return VM_FAULT_SIGBUS;
	return VM_FAULT_RETRY;
}
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);

/**
 * filemap_fault - read in file data for page fault handling
 * @vmf:	struct vm_fault containing details of the fault
@@ -3481,37 +3426,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
	 * or because readahead was otherwise unable to retrieve it.
	 */
	if (unlikely(!folio_test_uptodate(folio))) {
		/*
		 * If this is a precontent file we have can now emit an event to
		 * try and populate the folio.
		 */
		if (!(vmf->flags & FAULT_FLAG_TRIED) &&
		    unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
			loff_t pos = folio_pos(folio);
			size_t count = folio_size(folio);

			/* We're NOWAIT, we have to retry. */
			if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
				folio_unlock(folio);
				goto out_retry;
			}

			if (mapping_locked)
				filemap_invalidate_unlock_shared(mapping);
			mapping_locked = false;

			folio_unlock(folio);
			fpin = maybe_unlock_mmap_for_io(vmf, fpin);
			if (!fpin)
				goto out_retry;

			error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
							count);
			if (error)
				ret = VM_FAULT_SIGBUS;
			goto out_retry;
		}

		/*
		 * If the invalidate lock is not held, the folio was in cache
		 * and uptodate and now it is not. Strange but possible since we
Loading