Commit 0e335a77 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs fixes from Christian Brauner:

 - Fix an uninitialized variable in file_getattr().

   The flags_valid field wasn't initialized before calling
   vfs_fileattr_get(), triggering KMSAN uninit-value reports in fuse

 - Fix writeback wakeup and logging timeouts when DETECT_HUNG_TASK is
   not enabled.

   sysctl_hung_task_timeout_secs is 0 in that case causing spurious
   "waiting for writeback completion for more than 1 seconds" warnings

 - Fix a null-ptr-deref in do_statmount() when the mount is internal

 - Add missing kernel-doc description for the @private parameter in
   iomap_readahead()

 - Fix mount namespace creation to hold namespace_sem across the mount
   copy in create_new_namespace().

   The previous drop-and-reacquire pattern was fragile and failed to
   clean up mount propagation links if the real rootfs was a shared or
   dependent mount

 - Fix /proc mount iteration where m->index wasn't updated when
   m->show() overflows, causing a restart to repeatedly show the same
   mount entry in a rapidly expanding mount table

 - Return EFSCORRUPTED instead of ENOSPC in minix_new_inode() when the
   inode number is out of range

 - Fix unshare(2) when CLONE_NEWNS is set and current->fs isn't shared.

   copy_mnt_ns() received the live fs_struct so if a subsequent
   namespace creation failed the rollback would leave pwd and root
   pointing to detached mounts. Always allocate a new fs_struct when
   CLONE_NEWNS is requested

 - fserror bug fixes:

    - Remove the unused fsnotify_sb_error() helper now that all callers
      have been converted to fserror_report_metadata

    - Fix a lockdep splat in fserror_report() where igrab() takes
      inode::i_lock which can be held in IRQ context.

      Replace igrab() with a direct i_count bump since filesystems
      should not report inodes that are about to be freed or not yet
      exposed

 - Handle error pointer in procfs for try_lookup_noperm()

 - Fix an integer overflow in ep_loop_check_proc() where recursive calls
   returning INT_MAX would overflow when +1 is added, breaking the
   recursion depth check

 - Fix a misleading break in pidfs

* tag 'vfs-7.0-rc2.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  pidfs: avoid misleading break
  eventpoll: Fix integer overflow in ep_loop_check_proc()
  proc: Fix pointer error dereference
  fserror: fix lockdep complaint when igrabbing inode
  fsnotify: drop unused helper
  unshare: fix unshare_fs() handling
  minix: Correct errno in minix_new_inode
  namespace: fix proc mount iteration
  mount: hold namespace_sem across copy in create_new_namespace()
  iomap: Describe @private in iomap_readahead()
  statmount: Fix the null-ptr-deref in do_statmount()
  writeback: Fix wakeup and logging timeouts for !DETECT_HUNG_TASK
  fs: init flags_valid before calling vfs_fileattr_get
parents bfbc0b5b 4a1ddb0f
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -2061,7 +2061,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 * @ep: the &struct eventpoll to be currently checked.
 * @depth: Current depth of the path being checked.
 *
 * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep.
 * Return: depth of the subtree, or a value bigger than EP_MAX_NESTS if we found
 * a loop or went too deep.
 */
static int ep_loop_check_proc(struct eventpoll *ep, int depth)
{
@@ -2080,7 +2081,7 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth)
			struct eventpoll *ep_tovisit;
			ep_tovisit = epi->ffd.file->private_data;
			if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS)
				result = INT_MAX;
				result = EP_MAX_NESTS+1;
			else
				result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1);
			if (result > EP_MAX_NESTS)
+1 −1
Original line number Diff line number Diff line
@@ -378,7 +378,7 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename,
	struct path filepath __free(path_put) = {};
	unsigned int lookup_flags = 0;
	struct file_attr fattr;
	struct file_kattr fa;
	struct file_kattr fa = { .flags_valid = true }; /* hint only */
	int error;

	BUILD_BUG_ON(sizeof(struct file_attr) < FILE_ATTR_SIZE_VER0);
+5 −4
Original line number Diff line number Diff line
@@ -198,10 +198,11 @@ static void wb_queue_work(struct bdi_writeback *wb,

static bool wb_wait_for_completion_cb(struct wb_completion *done)
{
	unsigned long timeout = sysctl_hung_task_timeout_secs;
	unsigned long waited_secs = (jiffies - done->wait_start) / HZ;

	done->progress_stamp = jiffies;
	if (waited_secs > sysctl_hung_task_timeout_secs)
	if (timeout && (waited_secs > timeout))
		pr_info("INFO: The task %s:%d has been waiting for writeback "
			"completion for more than %lu seconds.",
			current->comm, current->pid, waited_secs);
@@ -1954,6 +1955,7 @@ static long writeback_sb_inodes(struct super_block *sb,
		.range_end		= LLONG_MAX,
	};
	unsigned long start_time = jiffies;
	unsigned long timeout = sysctl_hung_task_timeout_secs;
	long write_chunk;
	long total_wrote = 0;  /* count both pages and inodes */
	unsigned long dirtied_before = jiffies;
@@ -2040,9 +2042,8 @@ static long writeback_sb_inodes(struct super_block *sb,
		__writeback_single_inode(inode, &wbc);

		/* Report progress to inform the hung task detector of the progress. */
		if (work->done && work->done->progress_stamp &&
		   (jiffies - work->done->progress_stamp) > HZ *
		   sysctl_hung_task_timeout_secs / 2)
		if (work->done && work->done->progress_stamp && timeout &&
		   (jiffies - work->done->progress_stamp) > HZ * timeout / 2)
			wake_up_all(work->done->waitq);

		wbc_detach_inode(&wbc);
+1 −0
Original line number Diff line number Diff line
@@ -624,6 +624,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
 * iomap_readahead - Attempt to read pages from a file.
 * @ops: The operations vector for the filesystem.
 * @ctx: The ctx used for issuing readahead.
 * @private: The filesystem-specific information for issuing iomap_iter.
 *
 * This function is for filesystems to call to implement their readahead
 * address_space operation.
+46 −0
Original line number Diff line number Diff line
@@ -69,11 +69,57 @@ static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
	return folio_count;
}

static DEFINE_SPINLOCK(failed_ioend_lock);
static LIST_HEAD(failed_ioend_list);

static void
iomap_fail_ioends(
	struct work_struct	*work)
{
	struct iomap_ioend	*ioend;
	struct list_head	tmp;
	unsigned long		flags;

	spin_lock_irqsave(&failed_ioend_lock, flags);
	list_replace_init(&failed_ioend_list, &tmp);
	spin_unlock_irqrestore(&failed_ioend_lock, flags);

	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
			io_list))) {
		list_del_init(&ioend->io_list);
		iomap_finish_ioend_buffered(ioend);
		cond_resched();
	}
}

static DECLARE_WORK(failed_ioend_work, iomap_fail_ioends);

static void iomap_fail_ioend_buffered(struct iomap_ioend *ioend)
{
	unsigned long flags;

	/*
	 * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions
	 * in the fserror code.  The caller no longer owns the ioend reference
	 * after the spinlock drops.
	 */
	spin_lock_irqsave(&failed_ioend_lock, flags);
	if (list_empty(&failed_ioend_list))
		WARN_ON_ONCE(!schedule_work(&failed_ioend_work));
	list_add_tail(&ioend->io_list, &failed_ioend_list);
	spin_unlock_irqrestore(&failed_ioend_lock, flags);
}

static void ioend_writeback_end_bio(struct bio *bio)
{
	struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);

	ioend->io_error = blk_status_to_errno(bio->bi_status);
	if (ioend->io_error) {
		iomap_fail_ioend_buffered(ioend);
		return;
	}

	iomap_finish_ioend_buffered(ioend);
}

Loading