Unverified Commit 523ac768 authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "Create and use APIs to centralise locking for directory ops."

NeilBrown <neilb@ownmail.net> says:

This series is the next part of my effort to change directory-op
locking to allow multiple concurrent ops in a directory.  Ultimately we
will (in my plan) lock the target dentry(s) rather than the whole
parent directory.

To help with changing the locking protocol, this series centralises
locking and lookup in some helpers.  The various helpers are introduced
and then used in the same patch - roughly one patch per helper though
with various exceptions.

I haven't introduced these helpers into the various filesystems that
Al's tree-in-dcache series is changing.  That series introduces and
uses similar helpers tuned to the specific needs of that set of
filesystems.  Ultimately all the helpers will use the same backends
which can then be adjusted when it is time to change the locking
protocol.

One change that deserves highlighting is in patch 13 where vfs_mkdir()
is changed to unlock the parent on failure, as well as the current
behaviour of dput()ing the dentry on failure.  Once this change is in
place, the final step of both create and an remove sequences only
requires the target dentry, not the parent.  So e.g.  end_creating() is
only given the dentry (which may be IS_ERR() after vfs_mkdir()).  This
helps establish the pattern that it is the dentry that is being locked
and unlocked (the lock is currently held on dentry->d_parent->d_inode,
but that can change).

* patches from https://patch.msgid.link/20251113002050.676694-1-neilb@ownmail.net:
  VFS: introduce end_creating_keep()
  VFS: change vfs_mkdir() to unlock on failure.
  ecryptfs: use new start_creating/start_removing APIs
  Add start_renaming_two_dentries()
  VFS/ovl/smb: introduce start_renaming_dentry()
  VFS/nfsd/ovl: introduce start_renaming() and end_renaming()
  VFS: add start_creating_killable() and start_removing_killable()
  VFS: introduce start_removing_dentry()
  smb/server: use end_removing_noperm for for target of smb2_create_link()
  VFS: introduce start_creating_noperm() and start_removing_noperm()
  VFS/nfsd/cachefiles/ovl: introduce start_removing() and end_removing()
  VFS/nfsd/cachefiles/ovl: add start_creating() and end_creating()
  VFS: tidy up do_unlinkat()
  VFS: introduce start_dirop() and end_dirop()
  debugfs: rename end_creating() to debugfs_end_creating()

Link: https://patch.msgid.link/20251113002050.676694-1-neilb@ownmail.net


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 3a866087 cf296b29
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -1309,3 +1309,16 @@ a different length, use
	vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len))

instead.

---

**mandatory**

vfs_mkdir() now returns a dentry - the one returned by ->mkdir().  If
that dentry is different from the dentry passed in, including if it is
an IS_ERR() dentry pointer, the original dentry is dput().

When vfs_mkdir() returns an error, and so both dputs() the original
dentry and doesn't provide a replacement, it also unlocks the parent.
Consequently the return value from vfs_mkdir() can be passed to
end_creating() and the parent will be unlocked precisely when necessary.
+12 −29
Original line number Diff line number Diff line
@@ -904,14 +904,9 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
	struct fscrypt_str name_str = FSTR_INIT((char *)qname->name, qname->len);
	int ret;

	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (ret == -EINTR)
		return ret;

	dentry = lookup_one(idmap, qname, parent);
	ret = PTR_ERR(dentry);
	dentry = start_creating_killable(idmap, parent, qname);
	if (IS_ERR(dentry))
		goto out_unlock;
		return PTR_ERR(dentry);

	ret = btrfs_may_create(idmap, dir, dentry);
	if (ret)
@@ -940,9 +935,7 @@ static noinline int btrfs_mksubvol(struct dentry *parent,
out_up_read:
	up_read(&fs_info->subvol_sem);
out_dput:
	dput(dentry);
out_unlock:
	btrfs_inode_unlock(BTRFS_I(dir), 0);
	end_creating(dentry);
	return ret;
}

@@ -2417,18 +2410,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		goto free_subvol_name;
	}

	ret = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (ret == -EINTR)
		goto free_subvol_name;
	dentry = lookup_one(idmap, &QSTR(subvol_name), parent);
	dentry = start_removing_killable(idmap, parent, &QSTR(subvol_name));
	if (IS_ERR(dentry)) {
		ret = PTR_ERR(dentry);
		goto out_unlock_dir;
	}

	if (d_really_is_negative(dentry)) {
		ret = -ENOENT;
		goto out_dput;
		goto out_end_removing;
	}

	inode = d_inode(dentry);
@@ -2449,7 +2434,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		 */
		ret = -EPERM;
		if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
			goto out_dput;
			goto out_end_removing;

		/*
		 * Do not allow deletion if the parent dir is the same
@@ -2460,21 +2445,21 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
		 */
		ret = -EINVAL;
		if (root == dest)
			goto out_dput;
			goto out_end_removing;

		ret = inode_permission(idmap, inode, MAY_WRITE | MAY_EXEC);
		if (ret)
			goto out_dput;
			goto out_end_removing;
	}

	/* check if subvolume may be deleted by a user */
	ret = btrfs_may_delete(idmap, dir, dentry, 1);
	if (ret)
		goto out_dput;
		goto out_end_removing;

	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
		ret = -EINVAL;
		goto out_dput;
		goto out_end_removing;
	}

	btrfs_inode_lock(BTRFS_I(inode), 0);
@@ -2483,10 +2468,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
	if (!ret)
		d_delete_notify(dir, dentry);

out_dput:
	dput(dentry);
out_unlock_dir:
	btrfs_inode_unlock(BTRFS_I(dir), 0);
out_end_removing:
	end_removing(dentry);
free_subvol_name:
	kfree(subvol_name_ptr);
free_parent:
+7 −4
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/file.h>
#include <linux/namei.h>
#include <linux/falloc.h>
#include <trace/events/fscache.h>
#include "internal.h"
@@ -428,10 +429,12 @@ static bool cachefiles_invalidate_cookie(struct fscache_cookie *cookie)
		if (!old_tmpfile) {
			struct cachefiles_volume *volume = object->volume;
			struct dentry *fan = volume->fanout[(u8)cookie->key_hash];
			struct dentry *obj;

			inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
			cachefiles_bury_object(volume->cache, object, fan,
					       old_file->f_path.dentry,
			obj = start_removing_dentry(fan, old_file->f_path.dentry);
			if (!IS_ERR(obj))
				cachefiles_bury_object(volume->cache, object,
						       fan, obj,
						       FSCACHE_OBJECT_INVALIDATED);
		}
		fput(old_file);
+46 −50
Original line number Diff line number Diff line
@@ -93,12 +93,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
	_enter(",,%s", dirname);

	/* search the current directory for the element name */
	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);

retry:
	ret = cachefiles_inject_read_error();
	if (ret == 0)
		subdir = lookup_one(&nop_mnt_idmap, &QSTR(dirname), dir);
		subdir = start_creating(&nop_mnt_idmap, dir, &QSTR(dirname));
	else
		subdir = ERR_PTR(ret);
	trace_cachefiles_lookup(NULL, dir, subdir);
@@ -129,10 +128,12 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
		if (ret < 0)
			goto mkdir_error;
		ret = cachefiles_inject_write_error();
		if (ret == 0)
		if (ret == 0) {
			subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
		else
		} else {
			end_creating(subdir);
			subdir = ERR_PTR(ret);
		}
		if (IS_ERR(subdir)) {
			trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
						   cachefiles_trace_mkdir_error);
@@ -141,7 +142,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
		trace_cachefiles_mkdir(dir, subdir);

		if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
			dput(subdir);
			end_creating(subdir);
			goto retry;
		}
		ASSERT(d_backing_inode(subdir));
@@ -154,7 +155,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,

	/* Tell rmdir() it's not allowed to delete the subdir */
	inode_lock(d_inode(subdir));
	inode_unlock(d_inode(dir));
	end_creating_keep(subdir);

	if (!__cachefiles_mark_inode_in_use(NULL, d_inode(subdir))) {
		pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
@@ -196,14 +197,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
	return ERR_PTR(-EBUSY);

mkdir_error:
	inode_unlock(d_inode(dir));
	if (!IS_ERR(subdir))
		dput(subdir);
	end_creating(subdir);
	pr_err("mkdir %s failed with error %d\n", dirname, ret);
	return ERR_PTR(ret);

lookup_error:
	inode_unlock(d_inode(dir));
	ret = PTR_ERR(subdir);
	pr_err("Lookup %s failed with error %d\n", dirname, ret);
	return ERR_PTR(ret);
@@ -263,6 +261,8 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
 * - File backed objects are unlinked
 * - Directory backed objects are stuffed into the graveyard for userspace to
 *   delete
 * On entry dir must be locked.  It will be unlocked on exit.
 * On entry there must be at least 2 refs on rep, one will be dropped on exit.
 */
int cachefiles_bury_object(struct cachefiles_cache *cache,
			   struct cachefiles_object *object,
@@ -278,27 +278,23 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
	_enter(",'%pd','%pd'", dir, rep);

	if (rep->d_parent != dir) {
		inode_unlock(d_inode(dir));
		end_removing(rep);
		_leave(" = -ESTALE");
		return -ESTALE;
	}

	/* non-directories can just be unlinked */
	if (!d_is_dir(rep)) {
		dget(rep); /* Stop the dentry being negated if it's only pinned
			    * by a file struct.
			    */
		ret = cachefiles_unlink(cache, object, dir, rep, why);
		dput(rep);
		end_removing(rep);

		inode_unlock(d_inode(dir));
		_leave(" = %d", ret);
		return ret;
	}

	/* directories have to be moved to the graveyard */
	_debug("move stale object to graveyard");
	inode_unlock(d_inode(dir));
	end_removing(rep);

try_again:
	/* first step is to make up a grave dentry in the graveyard */
@@ -425,13 +421,12 @@ int cachefiles_delete_object(struct cachefiles_object *object,

	_enter(",OBJ%x{%pD}", object->debug_id, object->file);

	/* Stop the dentry being negated if it's only pinned by a file struct. */
	dget(dentry);

	inode_lock_nested(d_backing_inode(fan), I_MUTEX_PARENT);
	dentry = start_removing_dentry(fan, dentry);
	if (IS_ERR(dentry))
		ret = PTR_ERR(dentry);
	else
		ret = cachefiles_unlink(volume->cache, object, fan, dentry, why);
	inode_unlock(d_backing_inode(fan));
	dput(dentry);
	end_removing(dentry);
	return ret;
}

@@ -644,8 +639,12 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)

	if (!d_is_reg(dentry)) {
		pr_err("%pd is not a file\n", dentry);
		inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
		ret = cachefiles_bury_object(volume->cache, object, fan, dentry,
		struct dentry *de = start_removing_dentry(fan, dentry);
		if (IS_ERR(de))
			ret = PTR_ERR(de);
		else
			ret = cachefiles_bury_object(volume->cache, object,
						     fan, de,
						     FSCACHE_OBJECT_IS_WEIRD);
		dput(dentry);
		if (ret < 0)
@@ -679,36 +678,41 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,

	_enter(",%pD", object->file);

	inode_lock_nested(d_inode(fan), I_MUTEX_PARENT);
	ret = cachefiles_inject_read_error();
	if (ret == 0)
		dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
		dentry = start_creating(&nop_mnt_idmap, fan, &QSTR(object->d_name));
	else
		dentry = ERR_PTR(ret);
	if (IS_ERR(dentry)) {
		trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
					   cachefiles_trace_lookup_error);
		_debug("lookup fail %ld", PTR_ERR(dentry));
		goto out_unlock;
		goto out;
	}

	if (!d_is_negative(dentry)) {
	/*
	 * This loop will only execute more than once if some other thread
	 * races to create the object we are trying to create.
	 */
	while (!d_is_negative(dentry)) {
		ret = cachefiles_unlink(volume->cache, object, fan, dentry,
					FSCACHE_OBJECT_IS_STALE);
		if (ret < 0)
			goto out_dput;
			goto out_end;

		end_creating(dentry);

		dput(dentry);
		ret = cachefiles_inject_read_error();
		if (ret == 0)
			dentry = lookup_one(&nop_mnt_idmap, &QSTR(object->d_name), fan);
			dentry = start_creating(&nop_mnt_idmap, fan,
						&QSTR(object->d_name));
		else
			dentry = ERR_PTR(ret);
		if (IS_ERR(dentry)) {
			trace_cachefiles_vfs_error(object, d_inode(fan), PTR_ERR(dentry),
						   cachefiles_trace_lookup_error);
			_debug("lookup fail %ld", PTR_ERR(dentry));
			goto out_unlock;
			goto out;
		}
	}

@@ -729,10 +733,9 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
		success = true;
	}

out_dput:
	dput(dentry);
out_unlock:
	inode_unlock(d_inode(fan));
out_end:
	end_creating(dentry);
out:
	_leave(" = %u", success);
	return success;
}
@@ -748,26 +751,20 @@ static struct dentry *cachefiles_lookup_for_cull(struct cachefiles_cache *cache,
	struct dentry *victim;
	int ret = -ENOENT;

	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
	victim = start_removing(&nop_mnt_idmap, dir, &QSTR(filename));

	victim = lookup_one(&nop_mnt_idmap, &QSTR(filename), dir);
	if (IS_ERR(victim))
		goto lookup_error;
	if (d_is_negative(victim))
		goto lookup_put;
	if (d_inode(victim)->i_flags & S_KERNEL_FILE)
		goto lookup_busy;
	return victim;

lookup_busy:
	ret = -EBUSY;
lookup_put:
	inode_unlock(d_inode(dir));
	dput(victim);
	end_removing(victim);
	return ERR_PTR(ret);

lookup_error:
	inode_unlock(d_inode(dir));
	ret = PTR_ERR(victim);
	if (ret == -ENOENT)
		return ERR_PTR(-ESTALE); /* Probably got retired by the netfs */
@@ -815,18 +812,17 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,

	ret = cachefiles_bury_object(cache, NULL, dir, victim,
				     FSCACHE_OBJECT_WAS_CULLED);
	dput(victim);
	if (ret < 0)
		goto error;

	fscache_count_culled();
	dput(victim);
	_leave(" = 0");
	return 0;

error_unlock:
	inode_unlock(d_inode(dir));
	end_removing(victim);
error:
	dput(victim);
	if (ret == -ENOENT)
		return -ESTALE; /* Probably got retired by the netfs */

+6 −3
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@

#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include "internal.h"
#include <trace/events/fscache.h>

@@ -58,8 +59,10 @@ void cachefiles_acquire_volume(struct fscache_volume *vcookie)
		if (ret < 0) {
			if (ret != -ESTALE)
				goto error_dir;
			inode_lock_nested(d_inode(cache->store), I_MUTEX_PARENT);
			cachefiles_bury_object(cache, NULL, cache->store, vdentry,
			vdentry = start_removing_dentry(cache->store, vdentry);
			if (!IS_ERR(vdentry))
				cachefiles_bury_object(cache, NULL, cache->store,
						       vdentry,
						       FSCACHE_VOLUME_IS_WEIRD);
			cachefiles_put_directory(volume->dentry);
			cond_resched();
Loading