Unverified Commit cdda1f26 authored by Luca Boccassi's avatar Luca Boccassi Committed by Christian Brauner
Browse files

pidfd: add ioctl to retrieve pid info



A common pattern when using pid fds is having to get information
about the process, which currently requires /proc being mounted,
resolving the fd to a pid, and then do manual string parsing of
/proc/N/status and friends. This needs to be reimplemented over
and over in all userspace projects (e.g.: I have reimplemented
resolving in systemd, dbus, dbus-daemon, polkit so far), and
requires additional care in checking that the fd is still valid
after having parsed the data, to avoid races.

Having a programmatic API that can be used directly removes all
these requirements, including having /proc mounted.

As discussed at LPC24, add an ioctl with an extensible struct
so that more parameters can be added later if needed. Start with
returning pid/tgid/ppid and creds unconditionally, and cgroupid
optionally.

Signed-off-by: default avatarLuca Boccassi <luca.boccassi@gmail.com>
Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 8e929cb5
Loading
Loading
Loading
Loading
+83 −3
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/cgroup.h>
#include <linux/magic.h>
#include <linux/mount.h>
#include <linux/pid.h>
@@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
	return poll_flags;
}

static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
{
	struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
	size_t usize = _IOC_SIZE(cmd);
	struct pidfd_info kinfo = {};
	struct user_namespace *user_ns;
	const struct cred *c;
	__u64 mask;
#ifdef CONFIG_CGROUPS
	struct cgroup *cgrp;
#endif

	if (!uinfo)
		return -EINVAL;
	if (usize < PIDFD_INFO_SIZE_VER0)
		return -EINVAL; /* First version, no smaller struct possible */

	if (copy_from_user(&mask, &uinfo->mask, sizeof(mask)))
		return -EFAULT;

	c = get_task_cred(task);
	if (!c)
		return -ESRCH;

	/* Unconditionally return identifiers and credentials, the rest only on request */

	user_ns = current_user_ns();
	kinfo.ruid = from_kuid_munged(user_ns, c->uid);
	kinfo.rgid = from_kgid_munged(user_ns, c->gid);
	kinfo.euid = from_kuid_munged(user_ns, c->euid);
	kinfo.egid = from_kgid_munged(user_ns, c->egid);
	kinfo.suid = from_kuid_munged(user_ns, c->suid);
	kinfo.sgid = from_kgid_munged(user_ns, c->sgid);
	kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid);
	kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid);
	kinfo.mask |= PIDFD_INFO_CREDS;
	put_cred(c);

#ifdef CONFIG_CGROUPS
	rcu_read_lock();
	cgrp = task_dfl_cgroup(task);
	kinfo.cgroupid = cgroup_id(cgrp);
	kinfo.mask |= PIDFD_INFO_CGROUPID;
	rcu_read_unlock();
#endif

	/*
	 * Copy pid/tgid last, to reduce the chances the information might be
	 * stale. Note that it is not possible to ensure it will be valid as the
	 * task might return as soon as the copy_to_user finishes, but that's ok
	 * and userspace expects that might happen and can act accordingly, so
	 * this is just best-effort. What we can do however is checking that all
	 * the fields are set correctly, or return ESRCH to avoid providing
	 * incomplete information. */

	kinfo.ppid = task_ppid_nr_ns(task, NULL);
	kinfo.tgid = task_tgid_vnr(task);
	kinfo.pid = task_pid_vnr(task);
	kinfo.mask |= PIDFD_INFO_PID;

	if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
		return -ESRCH;

	/*
	 * If userspace and the kernel have the same struct size it can just
	 * be copied. If userspace provides an older struct, only the bits that
	 * userspace knows about will be copied. If userspace provides a new
	 * struct, only the bits that the kernel knows about will be copied.
	 */
	if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo))))
		return -EFAULT;

	return 0;
}

static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct task_struct *task __free(put_task) = NULL;
@@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
	struct ns_common *ns_common = NULL;
	struct pid_namespace *pid_ns;

	if (arg)
		return -EINVAL;

	task = get_pid_task(pid, PIDTYPE_PID);
	if (!task)
		return -ESRCH;

	/* Extensible IOCTL that does not open namespace FDs, take a shortcut */
	if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
		return pidfd_info(task, cmd, arg);

	if (arg)
		return -EINVAL;

	scoped_guard(task_lock, task) {
		nsp = task->nsproxy;
		if (nsp)
+50 −0
Original line number Diff line number Diff line
@@ -16,6 +16,55 @@
#define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
#define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)

/* Flags for pidfd_info. */
#define PIDFD_INFO_PID			(1UL << 0) /* Always returned, even if not requested */
#define PIDFD_INFO_CREDS		(1UL << 1) /* Always returned, even if not requested */
#define PIDFD_INFO_CGROUPID		(1UL << 2) /* Always returned if available, even if not requested */

#define PIDFD_INFO_SIZE_VER0		64 /* sizeof first published struct */

struct pidfd_info {
	/*
	 * This mask is similar to the request_mask in statx(2).
	 *
	 * Userspace indicates what extensions or expensive-to-calculate fields
	 * they want by setting the corresponding bits in mask. The kernel
	 * will ignore bits that it does not know about.
	 *
	 * When filling the structure, the kernel will only set bits
	 * corresponding to the fields that were actually filled by the kernel.
	 * This also includes any future extensions that might be automatically
	 * filled. If the structure size is too small to contain a field
	 * (requested or not), to avoid confusion the mask will not
	 * contain a bit for that field.
	 *
	 * As such, userspace MUST verify that mask contains the
	 * corresponding flags after the ioctl(2) returns to ensure that it is
	 * using valid data.
	 */
	__u64 mask;
	/*
	 * The information contained in the following fields might be stale at the
	 * time it is received, as the target process might have exited as soon as
	 * the IOCTL was processed, and there is no way to avoid that. However, it
	 * is guaranteed that if the call was successful, then the information was
	 * correct and referred to the intended process at the time the work was
	 * performed. */
	__u64 cgroupid;
	__u32 pid;
	__u32 tgid;
	__u32 ppid;
	__u32 ruid;
	__u32 rgid;
	__u32 euid;
	__u32 egid;
	__u32 suid;
	__u32 sgid;
	__u32 fsuid;
	__u32 fsgid;
	__u32 spare0[1];
};

#define PIDFS_IOCTL_MAGIC 0xFF

#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
@@ -28,5 +77,6 @@
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)

#endif /* _UAPI_LINUX_PIDFD_H */
+81 −1
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/wait.h>
@@ -21,6 +22,32 @@
#include "pidfd.h"
#include "../kselftest.h"

#ifndef PIDFS_IOCTL_MAGIC
#define PIDFS_IOCTL_MAGIC 0xFF
#endif

#ifndef PIDFD_GET_INFO
#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
#define PIDFD_INFO_CGROUPID		(1UL << 0)

struct pidfd_info {
	__u64 request_mask;
	__u64 cgroupid;
	__u32 pid;
	__u32 tgid;
	__u32 ppid;
	__u32 ruid;
	__u32 rgid;
	__u32 euid;
	__u32 egid;
	__u32 suid;
	__u32 sgid;
	__u32 fsuid;
	__u32 fsgid;
	__u32 spare0[1];
};
#endif

static int safe_int(const char *numstr, int *converted)
{
	char *err = NULL;
@@ -120,10 +147,13 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)

int main(int argc, char **argv)
{
	struct pidfd_info info = {
		.request_mask = PIDFD_INFO_CGROUPID,
	};
	int pidfd = -1, ret = 1;
	pid_t pid;

	ksft_set_plan(3);
	ksft_set_plan(4);

	pidfd = sys_pidfd_open(-1, 0);
	if (pidfd >= 0) {
@@ -153,6 +183,56 @@ int main(int argc, char **argv)
	pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
	ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);

	if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) {
		ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno));
		goto on_error;
	}
	if (info.pid != pid) {
		ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n",
			       pid, info.pid);
		goto on_error;
	}
	if (info.ppid != getppid()) {
		ksft_print_msg("ppid %d does not match ppid from ioctl %d\n",
			       pid, info.pid);
		goto on_error;
	}
	if (info.ruid != getuid()) {
		ksft_print_msg("uid %d does not match uid from ioctl %d\n",
			       getuid(), info.ruid);
		goto on_error;
	}
	if (info.rgid != getgid()) {
		ksft_print_msg("gid %d does not match gid from ioctl %d\n",
			       getgid(), info.rgid);
		goto on_error;
	}
	if (info.euid != geteuid()) {
		ksft_print_msg("euid %d does not match euid from ioctl %d\n",
			       geteuid(), info.euid);
		goto on_error;
	}
	if (info.egid != getegid()) {
		ksft_print_msg("egid %d does not match egid from ioctl %d\n",
			       getegid(), info.egid);
		goto on_error;
	}
	if (info.suid != geteuid()) {
		ksft_print_msg("suid %d does not match suid from ioctl %d\n",
			       geteuid(), info.suid);
		goto on_error;
	}
	if (info.sgid != getegid()) {
		ksft_print_msg("sgid %d does not match sgid from ioctl %d\n",
			       getegid(), info.sgid);
		goto on_error;
	}
	if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
		ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n");
		goto on_error;
	}
	ksft_test_result_pass("get info from pidfd test: passed\n");

	ret = 0;

on_error: