Commit a8058f84 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.19-rc1.directory.locking' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull directory locking updates from Christian Brauner:
 "This contains the work to add centralized APIs for directory locking
  operations.

  This series is part of a larger effort to change directory operation
  locking to allow multiple concurrent operations in a directory. The
  ultimate goal is to lock the target dentry(s) rather than the whole
  parent directory.

  To help with changing the locking protocol, this series centralizes
  locking and lookup in new helper functions. The helpers establish a
  pattern where it is the dentry that is being locked and unlocked
  (currently the lock is held on dentry->d_parent->d_inode, but that can
  change in the future).

  This also changes vfs_mkdir() to unlock the parent on failure, as well
  as dput()ing the dentry. This allows end_creating() to only require
  the target dentry (which may be IS_ERR() after vfs_mkdir()), not the
  parent"

* tag 'vfs-6.19-rc1.directory.locking' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  nfsd: fix end_creating() conversion
  VFS: introduce end_creating_keep()
  VFS: change vfs_mkdir() to unlock on failure.
  ecryptfs: use new start_creating/start_removing APIs
  Add start_renaming_two_dentries()
  VFS/ovl/smb: introduce start_renaming_dentry()
  VFS/nfsd/ovl: introduce start_renaming() and end_renaming()
  VFS: add start_creating_killable() and start_removing_killable()
  VFS: introduce start_removing_dentry()
  smb/server: use end_removing_noperm for for target of smb2_create_link()
  VFS: introduce start_creating_noperm() and start_removing_noperm()
  VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()
  VFS/nfsd/cachefiles/ovl: add start_creating() and end_creating()
  VFS: tidy up do_unlinkat()
  VFS: introduce start_dirop() and end_dirop()
  debugfs: rename end_creating() to debugfs_end_creating()
parents db74a7d0 eeec741e
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -1309,3 +1309,16 @@ a different length, use
	vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len))

instead.

---

**mandatory**

vfs_mkdir() now returns a dentry - the one returned by ->mkdir().  If
that dentry is different from the dentry passed in, including if it is
an IS_ERR() dentry pointer, the original dentry is dput().

When vfs_mkdir() returns an error, and so both dputs() the original
dentry and doesn't provide a replacement, it also unlocks the parent.
Consequently the return value from vfs_mkdir() can be passed to
end_creating() and the parent will be unlocked precisely when necessary.
+12 −29
Original line number Diff line number Diff line
@@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
	struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
	int ret;

	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (ret == -EINTR)
		return ret;

	dentry = lookup_one(idmap, qname, parent);
	ret = PTR_ERR(dentry);
	dentry = start_creating_killable(idmap, parent, qname);
	if (IS_ERR(dentry))
		goto out_unlock;
		return PTR_ERR(dentry);

	ret = btrfs_may_create(idmap, dir, dentry);
	if (ret)
@@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
	up_read(&fs_info->subvol_sem);
out_dput:
	dput(dentry);
out_unlock:
	btrfs_inode_unlock(BTRFS_I(dir), 0);
	end_creating(dentry);
	return ret;
}

@@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		goto free_subvol_name;
	}

	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (ret == -EINTR)
		goto free_subvol_name;
	dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
	dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
	if (IS_ERR(dentry)) {
		ret = PTR_ERR(dentry);
		goto out_unlock_dir;
	}

	if (d_really_is_negative(dentry)) {
		ret = -ENOENT;
		goto out_dput;
		goto out_end_removing;
	}

	inode = d_inode(dentry);
@@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		 */
		ret = -EPERM;
		if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
			goto out_dput;
			goto out_end_removing;

		/*
		 * Do not allow deletion if the parent dir is the same
@@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		 */
		ret = -EINVAL;
		if (root == dest)
			goto out_dput;
			goto out_end_removing;

		ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
		if (ret)
			goto out_dput;
			goto out_end_removing;
	}

	/* check if subvolume may be deleted by a user */
	ret = btrfs_may_delete(idmap, dir, dentry, 1);
	if (ret)
		goto out_dput;
		goto out_end_removing;

	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
		ret = -EINVAL;
		goto out_dput;
		goto out_end_removing;
	}

	btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
	if (!ret)
		d_delete_notify(dir, dentry);

out_dput:
	dput(dentry);
out_unlock_dir:
	btrfs_inode_unlock(BTRFS_I(dir), 0);
out_end_removing:
	end_removing(dentry);
free_subvol_name:
	kfree(subvol_name_ptr);
free_parent:
+7 −4
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <linux/falloc.h>
#include <trace/events/fscache.h>
#include "internal.h"
@@ -428,10 +429,12 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
		if (!old_tmpfile) {
			struct cachefiles_volume *volume = object->volume;
			struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
			struct dentry *obj;

			inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
			cachefiles_bury_object(volume->cache, object, fan,
					       old_file->f_path.dentry,
			obj = start_removing_dentry(fan, old_file->f_path.dentry);
			if (!IS_ERR(obj))
				cachefiles_bury_object(volume->cache, object,
						       fan, obj,
						       FSCACHE_OBJECT_INVALIDATED);
		}
		fput(old_file);
+46 −50
Original line number Diff line number Diff line
@@ -93,12 +93,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
	_enter(",,%s", dirname);

	/* search the current directory for the element name */
	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);

retry:
	ret = cachefiles_inject_read_error();
	if (ret == 0)
		subdir = lookup_one(&nop_mnt_idmap, &QSTR(dirname), dir);
		subdir = start_creating(&nop_mnt_idmap, dir, &QSTR(dirname));
	else
		subdir = ERR_PTR(ret);
	trace_cachefiles_lookup(NULL, dir, subdir);
@@ -129,10 +128,12 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
		if (ret < 0)
			goto mkdir_error;
		ret = cachefiles_inject_write_error();
		if (ret == 0)
		if (ret == 0) {
			subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL);
		else
		} else {
			end_creating(subdir);
			subdir = ERR_PTR(ret);
		}
		if (IS_ERR(subdir)) {
			trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
						   cachefiles_trace_mkdir_error);
@@ -141,7 +142,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
		trace_cachefiles_mkdir(dir, subdir);

		if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
			dput(subdir);
			end_creating(subdir);
			goto retry;
		}
		ASSERT(d_backing_inode(subdir));
@@ -154,7 +155,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,

	/* Tell rmdir() it's not allowed to delete the subdir */
	inode_lock(d_inode(subdir));
	inode_unlock(d_inode(dir));
	end_creating_keep(subdir);

	if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
		pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
@@ -196,14 +197,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
	return ERR_PTR(-EBUSY);

mkdir_error:
	inode_unlock(d_inode(dir));
	if (!IS_ERR(subdir))
		dput(subdir);
	end_creating(subdir);
	pr_err("mkdir %s failed with error %d\n", dirname, ret);
	return ERR_PTR(ret);

lookup_error:
	inode_unlock(d_inode(dir));
	ret = PTR_ERR(subdir);
	pr_err("Lookup %s failed with error %d\n", dirname, ret);
	return ERR_PTR(ret);
@@ -263,6 +261,8 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
 * - File backed objects are unlinked
 * - Directory backed objects are stuffed into the graveyard for userspace to
 *   delete
 * On entry dir must be locked.  It will be unlocked on exit.
 * On entry there must be at least 2 refs on rep, one will be dropped on exit.
 */
int cachefiles_bury_object(struct cachefiles_cache *cache,
			   struct cachefiles_object *object,
@@ -278,27 +278,23 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
	_enter(",'%pd','%pd'", dir, rep);

	if (rep->d_parent != dir) {
		inode_unlock(d_inode(dir));
		end_removing(rep);
		_leave(" = -ESTALE");
		return -ESTALE;
	}

	/* non-directories can just be unlinked */
	if (!d_is_dir(rep)) {
		dget(rep); /* Stop the dentry being negated if it's only pinned
			    * by a file struct.
			    */
		ret = cachefiles_unlink(cache, object, dir, rep, why);
		dput(rep);
		end_removing(rep);

		inode_unlock(d_inode(dir));
		_leave(" = %d", ret);
		return ret;
	}

	/* directories have to be moved to the graveyard */
	_debug("move stale object to graveyard");
	inode_unlock(d_inode(dir));
	end_removing(rep);

try_again:
	/* first step is to make up a grave dentry in the graveyard */
@@ -425,13 +421,12 @@ int cachefiles_delete_object(struct cachefiles_object *object,

	_enter(",OBJ%x{%pD}", object->debug_id, object->file);

	/* Stop the dentry being negated if it's only pinned by a file struct. */
	dget(dentry);

	inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT);
	dentry = start_removing_dentry(fan, dentry);
	if (IS_ERR(dentry))
		ret = PTR_ERR(dentry);
	else
		ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
	inode_unlock(d_backing_inode(fan));
	dput(dentry);
	end_removing(dentry);
	return ret;
}

@@ -644,8 +639,12 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)

	if (!d_is_reg(dentry)) {
		pr_err("%pd is not a file\n", dentry);
		inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
		ret = cachefiles_bury_object(volume->cache, object, fan, dentry,
		struct dentry *de = start_removing_dentry(fan, dentry);
		if (IS_ERR(de))
			ret = PTR_ERR(de);
		else
			ret = cachefiles_bury_object(volume->cache, object,
						     fan, de,
						     FSCACHE_OBJECT_IS_WEIRD);
		dput(dentry);
		if (ret < 0)
@@ -679,36 +678,41 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,

	_enter(",%pD", object->file);

	inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
	ret = cachefiles_inject_read_error();
	if (ret == 0)
		dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
		dentry = start_creating(&nop_mnt_idmap, fan, &QSTR(object->d_name));
	else
		dentry = ERR_PTR(ret);
	if (IS_ERR(dentry)) {
		trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
					   cachefiles_trace_lookup_error);
		_debug("lookup fail %ld", PTR_ERR(dentry));
		goto out_unlock;
		goto out;
	}

	if (!d_is_negative(dentry)) {
	/*
	 * This loop will only execute more than once if some other thread
	 * races to create the object we are trying to create.
	 */
	while (!d_is_negative(dentry)) {
		ret = cachefiles_unlink(volume->cache, object, fan, dentry,
					FSCACHE_OBJECT_IS_STALE);
		if (ret < 0)
			goto out_dput;
			goto out_end;

		end_creating(dentry);

		dput(dentry);
		ret = cachefiles_inject_read_error();
		if (ret == 0)
			dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
			dentry = start_creating(&nop_mnt_idmap, fan,
						&QSTR(object->d_name));
		else
			dentry = ERR_PTR(ret);
		if (IS_ERR(dentry)) {
			trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
						   cachefiles_trace_lookup_error);
			_debug("lookup fail %ld", PTR_ERR(dentry));
			goto out_unlock;
			goto out;
		}
	}

@@ -729,10 +733,9 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
		success = true;
	}

out_dput:
	dput(dentry);
out_unlock:
	inode_unlock(d_inode(fan));
out_end:
	end_creating(dentry);
out:
	_leave(" = %u", success);
	return success;
}
@@ -748,26 +751,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
	struct dentry *victim;
	int ret = -ENOENT;

	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
	victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));

	victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
	if (IS_ERR(victim))
		goto lookup_error;
	if (d_is_negative(victim))
		goto lookup_put;
	if (d_inode(victim)->i_flags & S_KERNEL_FILE)
		goto lookup_busy;
	return victim;

lookup_busy:
	ret = -EBUSY;
lookup_put:
	inode_unlock(d_inode(dir));
	dput(victim);
	end_removing(victim);
	return ERR_PTR(ret);

lookup_error:
	inode_unlock(d_inode(dir));
	ret = PTR_ERR(victim);
	if (ret == -ENOENT)
		return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
@@ -815,18 +812,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,

	ret = cachefiles_bury_object(cache, NULL, dir, victim,
				     FSCACHE_OBJECT_WAS_CULLED);
	dput(victim);
	if (ret < 0)
		goto error;

	fscache_count_culled();
	dput(victim);
	_leave(" = 0");
	return 0;

error_unlock:
	inode_unlock(d_inode(dir));
	end_removing(victim);
error:
	dput(victim);
	if (ret == -ENOENT)
		return -ESTALE; /* Probably got retired by the netfs */

+6 −3
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@

#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include "internal.h"
#include <trace/events/fscache.h>

@@ -58,8 +59,10 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
		if (ret < 0) {
			if (ret != -ESTALE)
				goto error_dir;
			inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT);
			cachefiles_bury_object(cache, NULL, cache->store, vdentry,
			vdentry = start_removing_dentry(cache->store, vdentry);
			if (!IS_ERR(vdentry))
				cachefiles_bury_object(cache, NULL, cache->store,
						       vdentry,
						       FSCACHE_VOLUME_IS_WEIRD);
			cachefiles_put_directory(volume->dentry);
			cond_resched();
Loading