Unverified Commit 04f0955b authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "cheaper MAY_EXEC handling for path lookup"

Mateusz Guzik <mjguzik@gmail.com> says:

In short, MAY_WRITE checks are elided.

This obsoletes the idea of pre-computing if perm checks are necessary as
that turned out to be too hairy. The new code has 2 more branches per
path component compared to that idea, but the perf difference for
typical paths (< 6 components) was basically within noise. To be
revisited if someone(tm) removes other slowdowns.

Instead of the pre-computing thing I added IOP_FASTPERM_MAY_EXEC so that
filesystems like btrfs can still avoid the hard work.

* patches from https://patch.msgid.link/20251107142149.989998-1-mjguzik@gmail.com:
  fs: retire now stale MAY_WRITE predicts in inode_permission()
  btrfs: utilize IOP_FASTPERM_MAY_EXEC
  fs: speed up path lookup with cheaper handling of MAY_EXEC

Link: https://patch.msgid.link/20251107142149.989998-1-mjguzik@gmail.com


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 854e8df2 a0a28c4e
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -5837,6 +5837,8 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root)
	if (ret)
		return ERR_PTR(ret);

	if (S_ISDIR(inode->vfs_inode.i_mode))
		inode->vfs_inode.i_opflags |= IOP_FASTPERM_MAY_EXEC;
	unlock_new_inode(&inode->vfs_inode);
	return inode;
}
@@ -6788,8 +6790,11 @@ static int btrfs_create_common(struct inode *dir, struct dentry *dentry,
	}

	ret = btrfs_create_new_inode(trans, &new_inode_args);
	if (!ret)
	if (!ret) {
		if (S_ISDIR(inode->i_mode))
			inode->i_opflags |= IOP_FASTPERM_MAY_EXEC;
		d_instantiate_new(dentry, inode);
	}

	btrfs_end_transaction(trans);
	btrfs_btree_balance_dirty(fs_info);
@@ -9169,6 +9174,11 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
					   min_size, actual_len, alloc_hint, trans);
}

/*
 * NOTE: in case you are adding MAY_EXEC check for directories:
 * we are marking them with IOP_FASTPERM_MAY_EXEC, allowing path lookup to
 * elide calls here.
 */
static int btrfs_permission(struct mnt_idmap *idmap,
			    struct inode *inode, int mask)
{
+43 −4
Original line number Diff line number Diff line
@@ -540,10 +540,13 @@ static inline int do_inode_permission(struct mnt_idmap *idmap,
 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
 * Separate out file-system wide checks from inode-specific permission checks.
 *
 * Note: lookup_inode_permission_may_exec() does not call here. If you add
 * MAY_EXEC checks, adjust it.
 */
static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
{
	if (unlikely(mask & MAY_WRITE)) {
	if (mask & MAY_WRITE) {
		umode_t mode = inode->i_mode;

		/* Nobody gets write access to a read-only fs. */
@@ -574,7 +577,7 @@ int inode_permission(struct mnt_idmap *idmap,
	if (unlikely(retval))
		return retval;

	if (unlikely(mask & MAY_WRITE)) {
	if (mask & MAY_WRITE) {
		/*
		 * Nobody gets write access to an immutable file.
		 */
@@ -602,6 +605,42 @@ int inode_permission(struct mnt_idmap *idmap,
}
EXPORT_SYMBOL(inode_permission);

/*
 * lookup_inode_permission_may_exec - Check traversal right for given inode
 *
 * This is a special case routine for may_lookup() making assumptions specific
 * to path traversal. Use inode_permission() if you are doing something else.
 *
 * Work is shaved off compared to inode_permission() as follows:
 * - we know for a fact there is no MAY_WRITE to worry about
 * - it is an invariant the inode is a directory
 *
 * Since majority of real-world traversal happens on inodes which grant it for
 * everyone, we check it upfront and only resort to more expensive work if it
 * fails.
 *
 * Filesystems which have their own ->permission hook and consequently miss out
 * on IOP_FASTPERM can still get the optimization if they set IOP_FASTPERM_MAY_EXEC
 * on their directory inodes.
 */
static __always_inline int lookup_inode_permission_may_exec(struct mnt_idmap *idmap,
	struct inode *inode, int mask)
{
	/* Lookup already checked this to return -ENOTDIR */
	VFS_BUG_ON_INODE(!S_ISDIR(inode->i_mode), inode);
	VFS_BUG_ON((mask & ~MAY_NOT_BLOCK) != 0);

	mask |= MAY_EXEC;

	if (unlikely(!(inode->i_opflags & (IOP_FASTPERM | IOP_FASTPERM_MAY_EXEC))))
		return inode_permission(idmap, inode, mask);

	if (unlikely(((inode->i_mode & 0111) != 0111) || !no_acl_inode(inode)))
		return inode_permission(idmap, inode, mask);

	return security_inode_permission(inode, mask);
}

/**
 * path_get - get a reference to a path
 * @path: path to get the reference to
@@ -1855,7 +1894,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
	int err, mask;

	mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0;
	err = inode_permission(idmap, nd->inode, mask | MAY_EXEC);
	err = lookup_inode_permission_may_exec(idmap, nd->inode, mask);
	if (likely(!err))
		return 0;

@@ -1870,7 +1909,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
	if (err != -ECHILD)	// hard error
		return err;

	return inode_permission(idmap, nd->inode, MAY_EXEC);
	return lookup_inode_permission_may_exec(idmap, nd->inode, 0);
}

static int reserve_stack(struct nameidata *nd, struct path *link)
+7 −6
Original line number Diff line number Diff line
@@ -666,6 +666,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_DEFAULT_READLINK	0x0010
#define IOP_MGTIME		0x0020
#define IOP_CACHED_LINK		0x0040
#define IOP_FASTPERM_MAY_EXEC	0x0080

/*
 * Inode state bits.  Protected by inode->i_lock