Unverified Commit c54b3869 authored by NeilBrown's avatar NeilBrown Committed by Christian Brauner
Browse files

VFS: Change vfs_mkdir() to return the dentry.



vfs_mkdir() does not guarantee to leave the child dentry hashed or make
it positive on success, and in many such cases the filesystem had to use
a different dentry which it can now return.

This patch changes vfs_mkdir() to return the dentry provided by the
filesystems which is hashed and positive when provided.  This reduces
the number of cases where the resulting dentry is not positive to a
handful which don't deserve extra efforts.

The only callers of vfs_mkdir() which are interested in the resulting
inode are in-kernel filesystem clients: cachefiles, nfsd, smb/server.
The only filesystems that don't reliably provide the inode are:
- kernfs, tracefs which these clients are unlikely to be interested in
- cifs in some configurations would need to do a lookup to find the
  created inode, but doesn't.  cifs cannot be exported via NFS, is
  unlikely to be used by cachefiles, and smb/server only has a soft
  requirement for the inode, so this is unlikely to be a problem in
  practice.
- hostfs, nfs, cifs may need to do a lookup (rarely for NFS) and it is
  possible for a race to make that lookup fail.  Actual failure
  is unlikely and providing callers handle negative dentries graceful
  they will fail-safe.

So this patch removes the lookup code in nfsd and smb/server and adjusts
them to fail safe if a negative dentry is provided:
- cache-files already fails safe by restarting the task from the
  top - it still does with this change, though it no longer calls
  cachefiles_put_directory() as that will crash if the dentry is
  negative.
- nfsd reports "Server-fault" which it what it used to do if the lookup
  failed. This will never happen on any file-systems that it can actually
  export, so this is of no consequence.  I removed the fh_update()
  call as that is not needed and out-of-place.  A subsequent
  nfsd_create_setattr() call will call fh_update() when needed.
- smb/server only wants the inode to call ksmbd_smb_inherit_owner()
  which updates ->i_uid (without calling notify_change() or similar)
  which can be safely skipping on cifs (I hope).

If a different dentry is returned, the first one is put.  If necessary
the fact that it is new can be determined by comparing pointers.  A new
dentry will certainly have a new pointer (as the old is put after the
new is obtained).
Similarly if an error is returned (via ERR_PTR()) the original dentry is
put.

Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20250227013949.536172-7-neilb@suse.de


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 8376583b
Loading
Loading
Loading
Loading
+3 −4
Original line number Diff line number Diff line
@@ -160,18 +160,17 @@ static int dev_mkdir(const char *name, umode_t mode)
{
	struct dentry *dentry;
	struct path path;
	int err;

	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
	if (IS_ERR(dentry))
		return PTR_ERR(dentry);

	err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
	if (!err)
	dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode);
	if (!IS_ERR(dentry))
		/* mark as kernel-created inode */
		d_inode(dentry)->i_private = &thread;
	done_path_create(&path, dentry);
	return err;
	return PTR_ERR_OR_ZERO(dentry);
}

static int create_path(const char *nodepath)
+9 −7
Original line number Diff line number Diff line
@@ -128,18 +128,19 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
		ret = security_path_mkdir(&path, subdir, 0700);
		if (ret < 0)
			goto mkdir_error;
		ret = cachefiles_inject_write_error();
		if (ret == 0)
			ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
		if (ret < 0) {
		subdir = ERR_PTR(cachefiles_inject_write_error());
		if (!IS_ERR(subdir))
			subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700);
		ret = PTR_ERR(subdir);
		if (IS_ERR(subdir)) {
			trace_cachefiles_vfs_error(NULL, d_inode(dir), ret,
						   cachefiles_trace_mkdir_error);
			goto mkdir_error;
		}
		trace_cachefiles_mkdir(dir, subdir);

		if (unlikely(d_unhashed(subdir))) {
			cachefiles_put_directory(subdir);
		if (unlikely(d_unhashed(subdir) || d_is_negative(subdir))) {
			dput(subdir);
			goto retry;
		}
		ASSERT(d_backing_inode(subdir));
@@ -195,6 +196,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,

mkdir_error:
	inode_unlock(d_inode(dir));
	if (!IS_ERR(subdir))
		dput(subdir);
	pr_err("mkdir %s failed with error %d\n", dirname, ret);
	return ERR_PTR(ret);
+10 −4
Original line number Diff line number Diff line
@@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
	struct inode *lower_dir;

	rc = lock_parent(dentry, &lower_dentry, &lower_dir);
	if (!rc)
		rc = vfs_mkdir(&nop_mnt_idmap, lower_dir,
	if (rc)
		goto out;

	lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir,
				 lower_dentry, mode);
	if (rc || d_really_is_negative(lower_dentry))
	rc = PTR_ERR(lower_dentry);
	if (IS_ERR(lower_dentry))
		goto out;
	rc = 0;
	if (d_unhashed(lower_dentry))
		goto out;
	rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
	if (rc)
+5 −2
Original line number Diff line number Diff line
@@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode)
		return PTR_ERR(dentry);
	mode = mode_strip_umask(d_inode(path.dentry), mode);
	error = security_path_mkdir(&path, dentry, mode);
	if (!error)
		error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
	if (!error) {
		dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
				  dentry, mode);
		if (IS_ERR(dentry))
			error = PTR_ERR(dentry);
	}
	done_path_create(&path, dentry);
	return error;
}
+29 −16
Original line number Diff line number Diff line
@@ -4128,6 +4128,7 @@ EXPORT_SYMBOL(kern_path_create);

void done_path_create(struct path *path, struct dentry *dentry)
{
	if (!IS_ERR(dentry))
		dput(dentry);
	inode_unlock(path->dentry->d_inode);
	mnt_drop_write(path->mnt);
@@ -4274,7 +4275,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
}

/**
 * vfs_mkdir - create directory
 * vfs_mkdir - create directory returning correct dentry if possible
 * @idmap:	idmap of the mount the inode was found from
 * @dir:	inode of the parent directory
 * @dentry:	dentry of the child directory
@@ -4287,8 +4288,14 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
 * care to map the inode according to @idmap before checking permissions.
 * On non-idmapped mounts or if permission checking is to be performed on the
 * raw inode simply pass @nop_mnt_idmap.
 *
 * In the event that the filesystem does not use the *@dentry but leaves it
 * negative or unhashes it and possibly splices a different one returning it,
 * the original dentry is dput() and the alternate is returned.
 *
 * In case of an error the dentry is dput() and an ERR_PTR() is returned.
 */
int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
			 struct dentry *dentry, umode_t mode)
{
	int error;
@@ -4297,31 +4304,35 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,

	error = may_create(idmap, dir, dentry);
	if (error)
		return error;
		goto err;

	error = -EPERM;
	if (!dir->i_op->mkdir)
		return -EPERM;
		goto err;

	mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
	error = security_inode_mkdir(dir, dentry, mode);
	if (error)
		return error;
		goto err;

	error = -EMLINK;
	if (max_links && dir->i_nlink >= max_links)
		return -EMLINK;
		goto err;

	de = dir->i_op->mkdir(idmap, dir, dentry, mode);
	error = PTR_ERR(de);
	if (IS_ERR(de))
		return PTR_ERR(de);
		goto err;
	if (de) {
		fsnotify_mkdir(dir, de);
		/* Cannot return de yet */
		dput(de);
	} else {
		fsnotify_mkdir(dir, dentry);
		dput(dentry);
		dentry = de;
	}
	fsnotify_mkdir(dir, dentry);
	return dentry;

	return 0;
err:
	dput(dentry);
	return ERR_PTR(error);
}
EXPORT_SYMBOL(vfs_mkdir);

@@ -4341,8 +4352,10 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
	error = security_path_mkdir(&path, dentry,
			mode_strip_umask(path.dentry->d_inode, mode));
	if (!error) {
		error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
		dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
				  dentry, mode);
		if (IS_ERR(dentry))
			error = PTR_ERR(dentry);
	}
	done_path_create(&path, dentry);
	if (retry_estale(error, lookup_flags)) {
Loading