Commit eb98f304 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.15-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - For some reason we went from zero to three maintainers for HFS/HFS+
   in a matter of days. The lesson to learn from this might just be that
   we need to threaten code removal more often!?

 - Fix a regression introduced by enabling large folios for lage logical
   block sizes. This has caused issues for noref migration with large
   folios due to sleeping while in an atomic context.

   New sleeping variants of pagecache lookup helpers are introduced.
   These helpers take the folio lock instead of the mapping's private
   spinlock. The problematic users are converted to the sleeping
   variants and serialize against noref migration. Atomic users will
   bail on seeing the new BH_Migrate flag.

   This also shrinks the critical region of the mapping's private lock
   and the new blocking callers reduce contention on the spinlock for
   bdev mappings.

 - Fix two bugs in do_move_mount() when with MOVE_MOUNT_BENEATH. The
   first bug is using a mountpoint that is located on a mount we're not
   holding a reference to. The second bug is putting the mountpoint
   after we've called namespace_unlock() as it's no longer guaranteed
   that it does stay a mountpoint.

 - Remove a pointless call to vfs_getattr_nosec() in the devtmpfs code
   just to query i_mode instead of simply querying the inode directly.
   This also avoids lifetime issues for the dm code by an earlier bugfix
   this cycle that moved bdev_statx() handling into vfs_getattr_nosec().

 - Fix AT_FDCWD handling with getname_maybe_null() in the xattr code.

 - Fix a performance regression for files when multiple callers issue a
   close when it's not the last reference.

 - Remove a duplicate noinline annotation from pipe_clear_nowait().

* tag 'vfs-6.15-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs/xattr: Fix handling of AT_FDCWD in setxattrat(2) and getxattrat(2)
  MAINTAINERS: hfs/hfsplus: add myself as maintainer
  splice: remove duplicate noinline from pipe_clear_nowait
  devtmpfs: don't use vfs_getattr_nosec to query i_mode
  fix a couple of races in MNT_TREE_BENEATH handling by do_move_mount()
  fs: fall back to file_ref_put() for non-last reference
  mm/migrate: fix sleep in atomic for large folios and buffer heads
  fs/ext4: use sleeping version of sb_find_get_block()
  fs/jbd2: use sleeping version of __find_get_block()
  fs/ocfs2: use sleeping version of __find_get_block()
  fs/buffer: use sleeping version of __find_get_block()
  fs/buffer: introduce sleeping flavors for pagecache lookups
  MAINTAINERS: add HFS/HFS+ maintainers
  fs/buffer: split locking for pagecache lookups
parents 349b7d77 f520bed2
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -10464,14 +10464,20 @@ S: Supported
F:	drivers/infiniband/hw/hfi1
HFS FILESYSTEM
M:	Viacheslav Dubeyko <slava@dubeyko.com>
M:	John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
M:	Yangtao Li <frank.li@vivo.com>
L:	linux-fsdevel@vger.kernel.org
S:	Orphan
S:	Maintained
F:	Documentation/filesystems/hfs.rst
F:	fs/hfs/
HFSPLUS FILESYSTEM
M:	Viacheslav Dubeyko <slava@dubeyko.com>
M:	John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
M:	Yangtao Li <frank.li@vivo.com>
L:	linux-fsdevel@vger.kernel.org
S:	Orphan
S:	Maintained
F:	Documentation/filesystems/hfsplus.rst
F:	fs/hfsplus/
+9 −13
Original line number Diff line number Diff line
@@ -296,7 +296,7 @@ static int delete_path(const char *nodepath)
	return err;
}

static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
static int dev_mynode(struct device *dev, struct inode *inode)
{
	/* did we create it */
	if (inode->i_private != &thread)
@@ -304,13 +304,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta

	/* does the dev_t match */
	if (is_blockdev(dev)) {
		if (!S_ISBLK(stat->mode))
		if (!S_ISBLK(inode->i_mode))
			return 0;
	} else {
		if (!S_ISCHR(stat->mode))
		if (!S_ISCHR(inode->i_mode))
			return 0;
	}
	if (stat->rdev != dev->devt)
	if (inode->i_rdev != dev->devt)
		return 0;

	/* ours */
@@ -321,20 +321,16 @@ static int handle_remove(const char *nodename, struct device *dev)
{
	struct path parent;
	struct dentry *dentry;
	struct kstat stat;
	struct path p;
	struct inode *inode;
	int deleted = 0;
	int err;
	int err = 0;

	dentry = kern_path_locked(nodename, &parent);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	p.mnt = parent.mnt;
	p.dentry = dentry;
	err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
			  AT_STATX_SYNC_AS_STAT);
	if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
	inode = d_inode(dentry);
	if (dev_mynode(dev, inode)) {
		struct iattr newattrs;
		/*
		 * before unlinking this node, reset permissions
@@ -342,7 +338,7 @@ static int handle_remove(const char *nodename, struct device *dev)
		 */
		newattrs.ia_uid = GLOBAL_ROOT_UID;
		newattrs.ia_gid = GLOBAL_ROOT_GID;
		newattrs.ia_mode = stat.mode & ~0777;
		newattrs.ia_mode = inode->i_mode & ~0777;
		newattrs.ia_valid =
			ATTR_UID|ATTR_GID|ATTR_MODE;
		inode_lock(d_inode(dentry));
+54 −19
Original line number Diff line number Diff line
@@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
}
EXPORT_SYMBOL(end_buffer_write_sync);

/*
 * Various filesystems appear to want __find_get_block to be non-blocking.
 * But it's the page lock which protects the buffers.  To get around this,
 * we get exclusion from try_to_free_buffers with the blockdev mapping's
 * i_private_lock.
 *
 * Hack idea: for the blockdev mapping, i_private_lock contention
 * may be quite high.  This code could TryLock the page, and if that
 * succeeds, there is no need to take i_private_lock.
 */
static struct buffer_head *
__find_get_block_slow(struct block_device *bdev, sector_t block)
__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic)
{
	struct address_space *bd_mapping = bdev->bd_mapping;
	const int blkbits = bd_mapping->host->i_blkbits;
@@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
	if (IS_ERR(folio))
		goto out;

	/*
	 * Folio lock protects the buffers. Callers that cannot block
	 * will fallback to serializing vs try_to_free_buffers() via
	 * the i_private_lock.
	 */
	if (atomic)
		spin_lock(&bd_mapping->i_private_lock);
	else
		folio_lock(folio);

	head = folio_buffers(folio);
	if (!head)
		goto out_unlock;
	/*
	 * Upon a noref migration, the folio lock serializes here;
	 * otherwise bail.
	 */
	if (test_bit_acquire(BH_Migrate, &head->b_state)) {
		WARN_ON(!atomic);
		goto out_unlock;
	}

	bh = head;
	do {
		if (!buffer_mapped(bh))
@@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
		       1 << blkbits);
	}
out_unlock:
	if (atomic)
		spin_unlock(&bd_mapping->i_private_lock);
	else
		folio_unlock(folio);
	folio_put(folio);
out:
	return ret;
@@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync);
void write_boundary_block(struct block_device *bdev,
			sector_t bblock, unsigned blocksize)
{
	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
	struct buffer_head *bh;

	bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize);
	if (bh) {
		if (buffer_dirty(bh))
			write_dirty_buffer(bh, 0);
@@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
/*
 * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
 * it in the LRU and mark it as accessed.  If it is not present then return
 * NULL
 * NULL. Atomic context callers may also return NULL if the buffer is being
 * migrated; similarly the page is not marked accessed either.
 */
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
static struct buffer_head *
find_get_block_common(struct block_device *bdev, sector_t block,
			unsigned size, bool atomic)
{
	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);

	if (bh == NULL) {
		/* __find_get_block_slow will mark the page accessed */
		bh = __find_get_block_slow(bdev, block);
		bh = __find_get_block_slow(bdev, block, atomic);
		if (bh)
			bh_lru_install(bh);
	} else
@@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)

	return bh;
}

struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
	return find_get_block_common(bdev, block, size, true);
}
EXPORT_SYMBOL(__find_get_block);

/* same as __find_get_block() but allows sleeping contexts */
struct buffer_head *
__find_get_block_nonatomic(struct block_device *bdev, sector_t block,
			   unsigned size)
{
	return find_get_block_common(bdev, block, size, false);
}
EXPORT_SYMBOL(__find_get_block_nonatomic);

/**
 * bdev_getblk - Get a buffer_head in a block device's buffer cache.
 * @bdev: The block device.
@@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block);
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
		unsigned size, gfp_t gfp)
{
	struct buffer_head *bh = __find_get_block(bdev, block, size);
	struct buffer_head *bh;

	if (gfpflags_allow_blocking(gfp))
		bh = __find_get_block_nonatomic(bdev, block, size);
	else
		bh = __find_get_block(bdev, block, size);

	might_alloc(gfp);
	if (bh)
+2 −1
Original line number Diff line number Diff line
@@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
	if (!bh || !buffer_uptodate(bh))
		/*
		 * If the block is not in the buffer cache, then it
		 * must have been written out.
		 * must have been written out, or, most unlikely, is
		 * being migrated - false failure should be OK here.
		 */
		goto out;

+2 −1
Original line number Diff line number Diff line
@@ -6642,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
		for (i = 0; i < count; i++) {
			cond_resched();
			if (is_metadata)
				bh = sb_find_get_block(inode->i_sb, block + i);
				bh = sb_find_get_block_nonatomic(inode->i_sb,
								 block + i);
			ext4_forget(handle, is_metadata, inode, bh, block + i);
		}
	}
Loading