Unverified Commit 4037d966 authored by NeilBrown's avatar NeilBrown Committed by Christian Brauner
Browse files

VFS: introduce start_dirop() and end_dirop()



The fact that directory operations (create,remove,rename) are protected
by a lock on the parent is known widely throughout the kernel.
In order to change this - to instead lock the target dentry  - it is
best to centralise this knowledge so it can be changed in one place.

This patch introduces start_dirop() which is local to VFS code.
It performs the required locking for create and remove.  Rename
will be handled separately.

Various functions with names like start_creating() or start_removing_path(),
some of which already exist, will export this functionality beyond the VFS.

end_dirop() is the partner of start_dirop().  It drops the lock and
releases the reference on the dentry.
It *is* exported so that various end_creating etc functions can be inline.

As vfs_mkdir() drops the dentry on error we cannot use end_dirop() as
that won't unlock when the dentry IS_ERR().  For now we need an explicit
unlock when dentry IS_ERR().  I hope to change vfs_mkdir() to unlock
when it drops a dentry so that explicit unlock can go away.

end_dirop() can always be called on the result of start_dirop(), but not
after vfs_mkdir().  After a vfs_mkdir() we still may need the explicit
unlock as seen in end_creating_path().

As well as adding start_dirop() and end_dirop()
this patch uses them in:
 - simple_start_creating (which requires sharing lookup_noperm_common()
        with libfs.c)
 - start_removing_path / start_removing_user_path_at
 - filename_create / end_creating_path()
 - do_rmdir(), do_unlinkat()

Reviewed-by: default avatarAmir Goldstein <amir73il@gmail.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarNeilBrown <neil@brown.name>
Link: https://patch.msgid.link/20251113002050.676694-3-neilb@ownmail.net


Tested-by: default avatar <syzbot@syzkaller.appspotmail.com>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 8b45b9a8
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -67,6 +67,9 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
		const struct path *parentpath,
		struct file *file, umode_t mode);
struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
			   unsigned int lookup_flags);
int lookup_noperm_common(struct qstr *qname, struct dentry *base);

/*
 * namespace.c
+17 −19
Original line number Diff line number Diff line
@@ -2289,27 +2289,25 @@ void stashed_dentry_prune(struct dentry *dentry)
	cmpxchg(stashed, dentry, NULL);
}

/* parent must be held exclusive */
/**
 * simple_start_creating - prepare to create a given name
 * @parent: directory in which to prepare to create the name
 * @name:   the name to be created
 *
 * Required lock is taken and a lookup in performed prior to creating an
 * object in a directory.  No permission checking is performed.
 *
 * Returns: a negative dentry on which vfs_create() or similar may
 *  be attempted, or an error.
 */
struct dentry *simple_start_creating(struct dentry *parent, const char *name)
{
	struct dentry *dentry;
	struct inode *dir = d_inode(parent);
	struct qstr qname = QSTR(name);
	int err;

	inode_lock(dir);
	if (unlikely(IS_DEADDIR(dir))) {
		inode_unlock(dir);
		return ERR_PTR(-ENOENT);
	}
	dentry = lookup_noperm(&QSTR(name), parent);
	if (IS_ERR(dentry)) {
		inode_unlock(dir);
		return dentry;
	}
	if (dentry->d_inode) {
		dput(dentry);
		inode_unlock(dir);
		return ERR_PTR(-EEXIST);
	}
	return dentry;
	err = lookup_noperm_common(&qname, parent);
	if (err)
		return ERR_PTR(err);
	return start_dirop(parent, &qname, LOOKUP_CREATE | LOOKUP_EXCL);
}
EXPORT_SYMBOL(simple_start_creating);
+73 −25
Original line number Diff line number Diff line
@@ -2765,6 +2765,48 @@ static int filename_parentat(int dfd, struct filename *name,
	return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
}

/**
 * start_dirop - begin a create or remove dirop, performing locking and lookup
 * @parent:       the dentry of the parent in which the operation will occur
 * @name:         a qstr holding the name within that parent
 * @lookup_flags: intent and other lookup flags.
 *
 * The lookup is performed and necessary locks are taken so that, on success,
 * the returned dentry can be operated on safely.
 * The qstr must already have the hash value calculated.
 *
 * Returns: a locked dentry, or an error.
 *
 */
struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
			   unsigned int lookup_flags)
{
	struct dentry *dentry;
	struct inode *dir = d_inode(parent);

	inode_lock_nested(dir, I_MUTEX_PARENT);
	dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
	if (IS_ERR(dentry))
		inode_unlock(dir);
	return dentry;
}

/**
 * end_dirop - signal completion of a dirop
 * @de: the dentry which was returned by start_dirop or similar.
 *
 * If the de is an error, nothing happens. Otherwise any lock taken to
 * protect the dentry is dropped and the dentry itself is release (dput()).
 */
void end_dirop(struct dentry *de)
{
	if (!IS_ERR(de)) {
		inode_unlock(de->d_parent->d_inode);
		dput(de);
	}
}
EXPORT_SYMBOL(end_dirop);

/* does lookup, returns the object with parent locked */
static struct dentry *__start_removing_path(int dfd, struct filename *name,
					   struct path *path)
@@ -2781,10 +2823,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
		return ERR_PTR(-EINVAL);
	/* don't fail immediately if it's r/o, at least try to report other errors */
	error = mnt_want_write(parent_path.mnt);
	inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
	d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
	d = start_dirop(parent_path.dentry, &last, 0);
	if (IS_ERR(d))
		goto unlock;
		goto drop;
	if (error)
		goto fail;
	path->dentry = no_free_ptr(parent_path.dentry);
@@ -2792,10 +2833,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
	return d;

fail:
	dput(d);
	end_dirop(d);
	d = ERR_PTR(error);
unlock:
	inode_unlock(parent_path.dentry->d_inode);
drop:
	if (!error)
		mnt_drop_write(parent_path.mnt);
	return d;
@@ -2910,7 +2950,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);

static int lookup_noperm_common(struct qstr *qname, struct dentry *base)
int lookup_noperm_common(struct qstr *qname, struct dentry *base)
{
	const char *name = qname->name;
	u32 len = qname->len;
@@ -4223,21 +4263,18 @@ static struct dentry *filename_create(int dfd, struct filename *name,
	 */
	if (last.name[last.len] && !want_dir)
		create_flags &= ~LOOKUP_CREATE;
	inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
	dentry = lookup_one_qstr_excl(&last, path->dentry,
				      reval_flag | create_flags);
	dentry = start_dirop(path->dentry, &last, reval_flag | create_flags);
	if (IS_ERR(dentry))
		goto unlock;
		goto out_drop_write;

	if (unlikely(error))
		goto fail;

	return dentry;
fail:
	dput(dentry);
	end_dirop(dentry);
	dentry = ERR_PTR(error);
unlock:
	inode_unlock(path->dentry->d_inode);
out_drop_write:
	if (!error)
		mnt_drop_write(path->mnt);
out:
@@ -4256,11 +4293,26 @@ struct dentry *start_creating_path(int dfd, const char *pathname,
}
EXPORT_SYMBOL(start_creating_path);

/**
 * end_creating_path - finish a code section started by start_creating_path()
 * @path: the path instantiated by start_creating_path()
 * @dentry: the dentry returned by start_creating_path()
 *
 * end_creating_path() will unlock and locks taken by start_creating_path()
 * and drop an references that were taken.  It should only be called
 * if start_creating_path() returned a non-error.
 * If vfs_mkdir() was called and it returned an error, that error *should*
 * be passed to end_creating_path() together with the path.
 */
void end_creating_path(const struct path *path, struct dentry *dentry)
{
	if (!IS_ERR(dentry))
		dput(dentry);
	if (IS_ERR(dentry))
		/* The parent is still locked despite the error from
		 * vfs_mkdir() - must unlock it.
		 */
		inode_unlock(path->dentry->d_inode);
	else
		end_dirop(dentry);
	mnt_drop_write(path->mnt);
	path_put(path);
}
@@ -4592,8 +4644,7 @@ int do_rmdir(int dfd, struct filename *name)
	if (error)
		goto exit2;

	inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
	dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
	dentry = start_dirop(path.dentry, &last, lookup_flags);
	error = PTR_ERR(dentry);
	if (IS_ERR(dentry))
		goto exit3;
@@ -4602,9 +4653,8 @@ int do_rmdir(int dfd, struct filename *name)
		goto exit4;
	error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
exit4:
	dput(dentry);
	end_dirop(dentry);
exit3:
	inode_unlock(path.dentry->d_inode);
	mnt_drop_write(path.mnt);
exit2:
	path_put(&path);
@@ -4721,8 +4771,7 @@ int do_unlinkat(int dfd, struct filename *name)
	if (error)
		goto exit2;
retry_deleg:
	inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
	dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
	dentry = start_dirop(path.dentry, &last, lookup_flags);
	error = PTR_ERR(dentry);
	if (!IS_ERR(dentry)) {

@@ -4737,9 +4786,8 @@ int do_unlinkat(int dfd, struct filename *name)
		error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
				   dentry, &delegated_inode);
exit3:
		dput(dentry);
		end_dirop(dentry);
	}
	inode_unlock(path.dentry->d_inode);
	if (inode)
		iput(inode);	/* truncate the inode here */
	inode = NULL;
+2 −0
Original line number Diff line number Diff line
@@ -3609,6 +3609,8 @@ extern void iterate_supers_type(struct file_system_type *,
void filesystems_freeze(void);
void filesystems_thaw(void);

void end_dirop(struct dentry *de);

extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
extern loff_t dcache_dir_lseek(struct file *, loff_t, int);