Commit d1384f70 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs fixes from Christian Brauner:

 - Fix netfs_limit_iter() hitting BUG() when an ITER_KVEC iterator
   reaches it via core dump writes to 9P filesystems. Add ITER_KVEC
   handling following the same pattern as the existing ITER_BVEC code.

 - Fix a NULL pointer dereference in the netfs unbuffered write retry
   path when the filesystem (e.g., 9P) doesn't set the prepare_write
   operation.

 - Clear I_DIRTY_TIME in sync_lazytime for filesystems implementing
  ->sync_lazytime. Without this the flag stays set and may cause
   additional unnecessary calls during inode deactivation.

 - Increase tmpfs size in mount_setattr selftests. A recent commit
   bumped the ext4 image size to 2 GB but didn't adjust the tmpfs
   backing store, so mkfs.ext4 fails with ENOSPC writing metadata.

 - Fix an invalid folio access in iomap when i_blkbits matches the folio
   size but differs from the I/O granularity. The cur_folio pointer
   would not get invalidated and iomap_read_end() would still be called
   on it despite the IO helper owning it.

 - Fix hash_name() docstring.

 - Fix read abandonment during netfs retry where the subreq variable
   used for abandonment could be uninitialized on the first pass or
   point to a deleted subrequest on later passes.

 - Don't block sync for filesystems with no data integrity guarantees.
   Add a SB_I_NO_DATA_INTEGRITY superblock flag replacing the per-inode
   AS_NO_DATA_INTEGRITY mapping flag so sync kicks off writeback but
   doesn't wait for flusher threads. This fixes a suspend-to-RAM hang on
   fuse-overlayfs where the flusher thread blocks when the fuse daemon
   is frozen.

 - Fix a lockdep splat in iomap when reads fail. iomap_read_end_io()
   invokes fserror_report() which calls igrab() taking i_lock in hardirq
   context while i_lock is normally held with interrupts enabled. Kick
   failed read handling to a workqueue.

 - Remove the redundant netfs_io_stream::front member and use
   stream->subrequests.next instead, fixing a potential issue in the
   direct write code path.

* tag 'vfs-7.0-rc6.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  netfs: Fix the handling of stream->front by removing it
  iomap: fix lockdep complaint when reads fail
  writeback: don't block sync for filesystems with no data integrity guarantees
  netfs: Fix read abandonment during retry
  vfs: fix docstring of hash_name()
  iomap: fix invalid folio access when i_blkbits differs from I/O granularity
  selftests/mount_setattr: increase tmpfs size for idmapped mount tests
  fs: clear I_DIRTY_TIME in sync_lazytime
  netfs: Fix NULL pointer dereference in netfs_unbuffered_write() on retry
  netfs: Fix kernel BUG in netfs_limit_iter() for ITER_KVEC iterators
parents fc9eae25 0e764b9d
Loading
Loading
Loading
Loading
+27 −9
Original line number Diff line number Diff line
@@ -1711,6 +1711,19 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
	}
}

static bool __sync_lazytime(struct inode *inode)
{
	spin_lock(&inode->i_lock);
	if (!(inode_state_read(inode) & I_DIRTY_TIME)) {
		spin_unlock(&inode->i_lock);
		return false;
	}
	inode_state_clear(inode, I_DIRTY_TIME);
	spin_unlock(&inode->i_lock);
	inode->i_op->sync_lazytime(inode);
	return true;
}

bool sync_lazytime(struct inode *inode)
{
	if (!(inode_state_read_once(inode) & I_DIRTY_TIME))
@@ -1718,8 +1731,7 @@ bool sync_lazytime(struct inode *inode)

	trace_writeback_lazytime(inode);
	if (inode->i_op->sync_lazytime)
		inode->i_op->sync_lazytime(inode);
	else
		return __sync_lazytime(inode);
	mark_inode_dirty_sync(inode);
	return true;
}
@@ -2775,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb)
		 * The mapping can appear untagged while still on-list since we
		 * do not have the mapping lock. Skip it here, wb completion
		 * will remove it.
		 *
		 * If the mapping does not have data integrity semantics,
		 * there's no need to wait for the writeout to complete, as the
		 * mapping cannot guarantee that data is persistently stored.
		 */
		if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ||
		    mapping_no_data_integrity(mapping))
		if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
			continue;

		spin_unlock_irq(&sb->s_inode_wblist_lock);
@@ -2916,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb)
	 */
	if (bdi == &noop_backing_dev_info)
		return;

	/*
	 * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to
	 * wait for the writeout to complete, as the filesystem cannot guarantee
	 * data persistence on sync. Just kick off writeback and return.
	 */
	if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) {
		wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC);
		return;
	}

	WARN_ON(!rwsem_is_locked(&sb->s_umount));

	/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+1 −3
Original line number Diff line number Diff line
@@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)

	inode->i_fop = &fuse_file_operations;
	inode->i_data.a_ops = &fuse_file_aops;
	if (fc->writeback_cache) {
	if (fc->writeback_cache)
		mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
		mapping_set_no_data_integrity(&inode->i_data);
	}

	INIT_LIST_HEAD(&fi->write_files);
	INIT_LIST_HEAD(&fi->queued_writes);
+1 −0
Original line number Diff line number Diff line
@@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb)
	sb->s_export_op = &fuse_export_operations;
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	sb->s_iflags |= SB_I_NOIDMAP;
	sb->s_iflags |= SB_I_NO_DATA_INTEGRITY;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
+50 −1
Original line number Diff line number Diff line
@@ -8,7 +8,10 @@
#include "internal.h"
#include "trace.h"

static void iomap_read_end_io(struct bio *bio)
static DEFINE_SPINLOCK(failed_read_lock);
static struct bio_list failed_read_list = BIO_EMPTY_LIST;

static void __iomap_read_end_io(struct bio *bio)
{
	int error = blk_status_to_errno(bio->bi_status);
	struct folio_iter fi;
@@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio)
	bio_put(bio);
}

static void
iomap_fail_reads(
	struct work_struct	*work)
{
	struct bio		*bio;
	struct bio_list		tmp = BIO_EMPTY_LIST;
	unsigned long		flags;

	spin_lock_irqsave(&failed_read_lock, flags);
	bio_list_merge_init(&tmp, &failed_read_list);
	spin_unlock_irqrestore(&failed_read_lock, flags);

	while ((bio = bio_list_pop(&tmp)) != NULL) {
		__iomap_read_end_io(bio);
		cond_resched();
	}
}

static DECLARE_WORK(failed_read_work, iomap_fail_reads);

static void iomap_fail_buffered_read(struct bio *bio)
{
	unsigned long flags;

	/*
	 * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions
	 * in the fserror code.  The caller no longer owns the bio reference
	 * after the spinlock drops.
	 */
	spin_lock_irqsave(&failed_read_lock, flags);
	if (bio_list_empty(&failed_read_list))
		WARN_ON_ONCE(!schedule_work(&failed_read_work));
	bio_list_add(&failed_read_list, bio);
	spin_unlock_irqrestore(&failed_read_lock, flags);
}

static void iomap_read_end_io(struct bio *bio)
{
	if (bio->bi_status) {
		iomap_fail_buffered_read(bio);
		return;
	}

	__iomap_read_end_io(bio);
}

static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
{
	struct bio *bio = ctx->read_ctx;
+10 −5
Original line number Diff line number Diff line
@@ -514,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
	loff_t length = iomap_length(iter);
	struct folio *folio = ctx->cur_folio;
	size_t folio_len = folio_size(folio);
	struct iomap_folio_state *ifs;
	size_t poff, plen;
	loff_t pos_diff;
	int ret;
@@ -525,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
		return iomap_iter_advance(iter, length);
	}

	ifs_alloc(iter->inode, folio, iter->flags);
	ifs = ifs_alloc(iter->inode, folio, iter->flags);

	length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
	while (length) {
@@ -560,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,

			*bytes_submitted += plen;
			/*
			 * If the entire folio has been read in by the IO
			 * helper, then the helper owns the folio and will end
			 * the read on it.
			 * Hand off folio ownership to the IO helper when:
			 * 1) The entire folio has been submitted for IO, or
			 * 2) There is no ifs attached to the folio
			 *
			 * Case (2) occurs when 1 << i_blkbits matches the folio
			 * size but the underlying filesystem or block device
			 * uses a smaller granularity for IO.
			 */
			if (*bytes_submitted == folio_len)
			if (*bytes_submitted == folio_len || !ifs)
				ctx->cur_folio = NULL;
		}

Loading