Unverified Commit b3caba8f authored by Christian Brauner's avatar Christian Brauner
Browse files

pidfs: implement file handle support



On 64-bit platforms, userspace can read the pidfd's inode in order to
get a never-repeated PID identifier. On 32-bit platforms this identifier
is not exposed, as inodes are limited to 32 bits. Instead expose the
identifier via export_fh, which makes it available to userspace via
name_to_handle_at.

In addition we implement fh_to_dentry, which allows userspace to
recover a pidfd from a pidfs file handle.

Signed-off-by: default avatarErin Shepherd <erin.shepherd@e43.eu>
[brauner: patch heavily rewritten]
Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-6-87d803a42495@kernel.org


Reviewed-by: default avatarAmir Goldstein <amir73il@gmail.com>
Co-Developed-by: default avatarChristian Brauner <brauner@kernel.org>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent c220e216
Loading
Loading
Loading
Loading
+114 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
#include <linux/anon_inodes.h>
#include <linux/exportfs.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/cgroup.h>
@@ -473,6 +474,118 @@ static const struct dentry_operations pidfs_dentry_operations = {
	.d_prune	= stashed_dentry_prune,
};

static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
{
	const struct pid *pid = inode->i_private;

	if (*max_len < 2) {
		*max_len = 2;
		return FILEID_INVALID;
	}

	*max_len = 2;
	*(u64 *)fh = pid->ino;
	return FILEID_KERNFS;
}

/* Find a struct pid based on the inode number. */
static struct pid *pidfs_ino_get_pid(u64 ino)
{
	unsigned long pid_ino = pidfs_ino(ino);
	u32 gen = pidfs_gen(ino);
	struct pid *pid;

	guard(rcu)();

	pid = idr_find(&pidfs_ino_idr, lower_32_bits(pid_ino));
	if (!pid)
		return NULL;

	if (pidfs_ino(pid->ino) != pid_ino)
		return NULL;

	if (pidfs_gen(pid->ino) != gen)
		return NULL;

	/* Within our pid namespace hierarchy? */
	if (pid_vnr(pid) == 0)
		return NULL;

	return get_pid(pid);
}

static struct dentry *pidfs_fh_to_dentry(struct super_block *sb,
					 struct fid *fid, int fh_len,
					 int fh_type)
{
	int ret;
	u64 pid_ino;
	struct path path;
	struct pid *pid;

	if (fh_len < 2)
		return NULL;

	switch (fh_type) {
	case FILEID_KERNFS:
		pid_ino = *(u64 *)fid;
		break;
	default:
		return NULL;
	}

	pid = pidfs_ino_get_pid(pid_ino);
	if (!pid)
		return NULL;

	ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path);
	if (ret < 0)
		return ERR_PTR(ret);

	mntput(path.mnt);
	return path.dentry;
}

/*
 * Make sure that we reject any nonsensical flags that users pass via
 * open_by_handle_at(). Note that PIDFD_THREAD is defined as O_EXCL, and
 * PIDFD_NONBLOCK as O_NONBLOCK.
 */
#define VALID_FILE_HANDLE_OPEN_FLAGS \
	(O_RDONLY | O_WRONLY | O_RDWR | O_NONBLOCK | O_CLOEXEC | O_EXCL)

static int pidfs_export_permission(struct handle_to_path_ctx *ctx,
				   unsigned int oflags)
{
	if (oflags & ~(VALID_FILE_HANDLE_OPEN_FLAGS | O_LARGEFILE))
		return -EINVAL;

	/*
	 * pidfd_ino_get_pid() will verify that the struct pid is part
	 * of the caller's pid namespace hierarchy. No further
	 * permission checks are needed.
	 */
	return 0;
}

static struct file *pidfs_export_open(struct path *path, unsigned int oflags)
{
	/*
	 * Clear O_LARGEFILE as open_by_handle_at() forces it and raise
	 * O_RDWR as pidfds always are.
	 */
	oflags &= ~O_LARGEFILE;
	return dentry_open(path, oflags | O_RDWR, current_cred());
}

static const struct export_operations pidfs_export_operations = {
	.encode_fh	= pidfs_encode_fh,
	.fh_to_dentry	= pidfs_fh_to_dentry,
	.open		= pidfs_export_open,
	.permission	= pidfs_export_permission,
};

static int pidfs_init_inode(struct inode *inode, void *data)
{
	const struct pid *pid = data;
@@ -507,6 +620,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
		return -ENOMEM;

	ctx->ops = &pidfs_sops;
	ctx->eops = &pidfs_export_operations;
	ctx->dops = &pidfs_dentry_operations;
	fc->s_fs_info = (void *)&pidfs_stashed_ops;
	return 0;