Unverified Commit 5b08bd40 authored by Christian Brauner's avatar Christian Brauner
Browse files

pidfs: allow retrieval of namespace file descriptors

For users that hold a reference to a pidfd procfs might not even be
available nor is it desirable to parse through procfs just for the sake
of getting namespace file descriptors for a process.

Make it possible to directly retrieve namespace file descriptors from a
pidfd. Pidfds already can be used with setns() to change a set of
namespaces atomically.

Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-4-7e9ab6cc3bb1@kernel.org


Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarAlexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 460695a2
Loading
Loading
Loading
Loading
+90 −0
Original line number Diff line number Diff line
@@ -11,10 +11,16 @@
#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/pseudo_fs.h>
#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <uapi/linux/pidfd.h>
#include <linux/ipc_namespace.h>
#include <linux/time_namespace.h>
#include <linux/utsname.h>
#include <net/net_namespace.h>

#include "internal.h"
#include "mount.h"

#ifdef CONFIG_PROC_FS
/**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
	return poll_flags;
}

static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct task_struct *task __free(put_task) = NULL;
	struct nsproxy *nsp __free(put_nsproxy) = NULL;
	struct pid *pid = pidfd_pid(file);
	struct ns_common *ns_common;

	if (arg)
		return -EINVAL;

	task = get_pid_task(pid, PIDTYPE_PID);
	if (!task)
		return -ESRCH;

	scoped_guard(task_lock, task) {
		nsp = task->nsproxy;
		if (nsp)
			get_nsproxy(nsp);
	}
	if (!nsp)
		return -ESRCH; /* just pretend it didn't exist */

	/*
	 * We're trying to open a file descriptor to the namespace so perform a
	 * filesystem cred ptrace check. Also, we mirror nsfs behavior.
	 */
	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
		return -EACCES;

	switch (cmd) {
	/* Namespaces that hang of nsproxy. */
	case PIDFD_GET_CGROUP_NAMESPACE:
		get_cgroup_ns(nsp->cgroup_ns);
		ns_common = to_ns_common(nsp->cgroup_ns);
		break;
	case PIDFD_GET_IPC_NAMESPACE:
		get_ipc_ns(nsp->ipc_ns);
		ns_common = to_ns_common(nsp->ipc_ns);
		break;
	case PIDFD_GET_MNT_NAMESPACE:
		get_mnt_ns(nsp->mnt_ns);
		ns_common = to_ns_common(nsp->mnt_ns);
		break;
	case PIDFD_GET_NET_NAMESPACE:
		ns_common = to_ns_common(nsp->net_ns);
		get_net_ns(ns_common);
		break;
	case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
		get_pid_ns(nsp->pid_ns_for_children);
		ns_common = to_ns_common(nsp->pid_ns_for_children);
		break;
	case PIDFD_GET_TIME_NAMESPACE:
		get_time_ns(nsp->time_ns);
		ns_common = to_ns_common(nsp->time_ns);
		break;
	case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
		get_time_ns(nsp->time_ns_for_children);
		ns_common = to_ns_common(nsp->time_ns_for_children);
		break;
	case PIDFD_GET_UTS_NAMESPACE:
		get_uts_ns(nsp->uts_ns);
		ns_common = to_ns_common(nsp->uts_ns);
		break;
	/* Namespaces that don't hang of nsproxy. */
	case PIDFD_GET_USER_NAMESPACE:
		rcu_read_lock();
		ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
		rcu_read_unlock();
		break;
	case PIDFD_GET_PID_NAMESPACE:
		rcu_read_lock();
		ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
		rcu_read_unlock();
		break;
	default:
		return -ENOIOCTLCMD;
	}

	/* open_namespace() unconditionally consumes the reference */
	return open_namespace(ns_common);
}

static const struct file_operations pidfs_file_operations = {
	.poll		= pidfd_poll,
#ifdef CONFIG_PROC_FS
	.show_fdinfo	= pidfd_show_fdinfo,
#endif
	.unlocked_ioctl	= pidfd_ioctl,
	.compat_ioctl   = compat_ptr_ioctl,
};

struct pid *pidfd_pid(const struct file *file)
+14 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@

#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/ioctl.h>

/* Flags for pidfd_open().  */
#define PIDFD_NONBLOCK	O_NONBLOCK
@@ -15,4 +16,17 @@
#define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
#define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)

#define PIDFS_IOCTL_MAGIC 0xFF

#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)

#endif /* _UAPI_LINUX_PIDFD_H */