Merge patch series "net, pidfs: enable handing out pidfds for reaped sk->sk_peer_pid" (923ea4d4) · Commits · git / linux-net

fs/pidfs.c

+72 −9

Original line number	Diff line number	Diff line
		@@ -768,7 +768,7 @@ static inline bool pidfs_pid_valid(struct pid pid, const struct path path,
		{
		enum pid_type type;

		if (flags & PIDFD_CLONE)
		if (flags & PIDFD_STALE)
		return true;

		/*
		@@ -777,10 +777,14 @@ static inline bool pidfs_pid_valid(struct pid pid, const struct path path,
		* pidfd has been allocated perform another check that the pid
		* is still alive. If it is exit information is available even
		* if the task gets reaped before the pidfd is returned to
		* userspace. The only exception is PIDFD_CLONE where no task
		* linkage has been established for @pid yet and the kernel is
		* in the middle of process creation so there's nothing for
		* pidfs to miss.
		* userspace. The only exception are indicated by PIDFD_STALE:
		*
		* (1) The kernel is in the middle of task creation and thus no
		* task linkage has been established yet.
		* (2) The caller knows @pid has been registered in pidfs at a
		* time when the task was still alive.
		*
		* In both cases exit information will have been reported.
		*/
		if (flags & PIDFD_THREAD)
		type = PIDTYPE_PID;
		@@ -874,11 +878,11 @@ struct file pidfs_alloc_file(struct pid pid, unsigned int flags)
		int ret;

		/*
		* Ensure that PIDFD_CLONE can be passed as a flag without
		* Ensure that PIDFD_STALE can be passed as a flag without
		* overloading other uapi pidfd flags.
		*/
		BUILD_BUG_ON(PIDFD_CLONE == PIDFD_THREAD);
		BUILD_BUG_ON(PIDFD_CLONE == PIDFD_NONBLOCK);
		BUILD_BUG_ON(PIDFD_STALE == PIDFD_THREAD);
		BUILD_BUG_ON(PIDFD_STALE == PIDFD_NONBLOCK);

		ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
		if (ret < 0)
		@@ -887,7 +891,7 @@ struct file pidfs_alloc_file(struct pid pid, unsigned int flags)
		if (!pidfs_pid_valid(pid, &path, flags))
		return ERR_PTR(-ESRCH);

		flags &= ~PIDFD_CLONE;
		flags &= ~PIDFD_STALE;
		pidfd_file = dentry_open(&path, flags, current_cred());
		/* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */
		if (!IS_ERR(pidfd_file))
		@@ -896,6 +900,65 @@ struct file pidfs_alloc_file(struct pid pid, unsigned int flags)
		return pidfd_file;
		}

		/**
		* pidfs_register_pid - register a struct pid in pidfs
		* @pid: pid to pin
		*
		* Register a struct pid in pidfs. Needs to be paired with
		* pidfs_put_pid() to not risk leaking the pidfs dentry and inode.
		*
		* Return: On success zero, on error a negative error code is returned.
		*/
		int pidfs_register_pid(struct pid *pid)
		{
		struct path path __free(path_put) = {};
		int ret;

		might_sleep();

		if (!pid)
		return 0;

		ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path);
		if (unlikely(ret))
		return ret;
		/* Keep the dentry and only put the reference to the mount. */
		path.dentry = NULL;
		return 0;
		}

		/**
		* pidfs_get_pid - pin a struct pid through pidfs
		* @pid: pid to pin
		*
		* Similar to pidfs_register_pid() but only valid if the caller knows
		* there's a reference to the @pid through a dentry already that can't
		* go away.
		*/
		void pidfs_get_pid(struct pid *pid)
		{
		if (!pid)
		return;
		WARN_ON_ONCE(!stashed_dentry_get(&pid->stashed));
		}

		/**
		* pidfs_put_pid - drop a pidfs reference
		* @pid: pid to drop
		*
		* Drop a reference to @pid via pidfs. This is only safe if the
		* reference has been taken via pidfs_get_pid().
		*/
		void pidfs_put_pid(struct pid *pid)
		{
		might_sleep();

		if (!pid)
		return;
		VFS_WARN_ON_ONCE(!pid->stashed);
		dput(pid->stashed);
		}

		static void pidfs_inode_init_once(void *data)
		{
		struct pidfs_inode *pi = data;

include/linux/pid.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -77,7 +77,7 @@ struct file;
		struct pid pidfd_pid(const struct file file);
		struct pid pidfd_get_pid(unsigned int fd, unsigned int flags);
		struct task_struct pidfd_get_task(int pidfd, unsigned int flags);
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret);
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret_file);
		void do_notify_pidfd(struct task_struct *task);

		static inline struct pid get_pid(struct pid pid)

include/linux/pidfs.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -8,5 +8,8 @@ void pidfs_add_pid(struct pid *pid);
		void pidfs_remove_pid(struct pid *pid);
		void pidfs_exit(struct task_struct *tsk);
		extern const struct dentry_operations pidfs_dentry_operations;
		int pidfs_register_pid(struct pid *pid);
		void pidfs_get_pid(struct pid *pid);
		void pidfs_put_pid(struct pid *pid);

		#endif /* _LINUX_PID_FS_H */

include/uapi/linux/pidfd.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -12,7 +12,7 @@
		#define PIDFD_THREAD O_EXCL
		#ifdef __KERNEL__
		#include <linux/sched.h>
		#define PIDFD_CLONE CLONE_PIDFD
		#define PIDFD_STALE CLONE_PIDFD
		#endif

		/* Flags for pidfd_send_signal(). */

kernel/fork.c

+29 −54

Original line number	Diff line number	Diff line
		@@ -2035,55 +2035,11 @@ static inline void rcu_copy_process(struct task_struct *p)
		#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
		}

		/**
		* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
		* @pid: the struct pid for which to create a pidfd
		* @flags: flags of the new @pidfd
		* @ret: Where to return the file for the pidfd.
		*
		* Allocate a new file that stashes @pid and reserve a new pidfd number in the
		* caller's file descriptor table. The pidfd is reserved but not installed yet.
		*
		* The helper doesn't perform checks on @pid which makes it useful for pidfds
		* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
		* pidfd file are prepared.
		*
		* If this function returns successfully the caller is responsible to either
		* call fd_install() passing the returned pidfd and pidfd file as arguments in
		* order to install the pidfd into its file descriptor table or they must use
		* put_unused_fd() and fput() on the returned pidfd and pidfd file
		* respectively.
		*
		* This function is useful when a pidfd must already be reserved but there
		* might still be points of failure afterwards and the caller wants to ensure
		* that no pidfd is leaked into its file descriptor table.
		*
		* Return: On success, a reserved pidfd is returned from the function and a new
		* pidfd file is returned in the last argument to the function. On
		* error, a negative error code is returned from the function and the
		* last argument remains unchanged.
		*/
		static int __pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret)
		{
		struct file *pidfd_file;

		CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
		if (pidfd < 0)
		return pidfd;

		pidfd_file = pidfs_alloc_file(pid, flags \| O_RDWR);
		if (IS_ERR(pidfd_file))
		return PTR_ERR(pidfd_file);

		*ret = pidfd_file;
		return take_fd(pidfd);
		}

		/**
		* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
		* @pid: the struct pid for which to create a pidfd
		* @flags: flags of the new @pidfd
		* @ret: Where to return the pidfd.
		* @ret_file: return the new pidfs file
		*
		* Allocate a new file that stashes @pid and reserve a new pidfd number in the
		* caller's file descriptor table. The pidfd is reserved but not installed yet.
		@@ -2106,16 +2062,26 @@ static int __pidfd_prepare(struct pid pid, unsigned int flags, struct file *re
		* error, a negative error code is returned from the function and the
		* last argument remains unchanged.
		*/
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret)
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret_file)
		{
		struct file *pidfs_file;

		/*
		* PIDFD_STALE is only allowed to be passed if the caller knows
		* that @pid is already registered in pidfs and thus
		* PIDFD_INFO_EXIT information is guaranteed to be available.
		*/
		if (!(flags & PIDFD_STALE)) {
		/*
		* While holding the pidfd waitqueue lock removing the task
		* linkage for the thread-group leader pid (PIDTYPE_TGID) isn't
		* possible. Thus, if there's still task linkage for PIDTYPE_PID
		* not having thread-group leader linkage for the pid means it
		* wasn't a thread-group leader in the first place.
		* While holding the pidfd waitqueue lock removing the
		* task linkage for the thread-group leader pid
		* (PIDTYPE_TGID) isn't possible. Thus, if there's still
		* task linkage for PIDTYPE_PID not having thread-group
		* leader linkage for the pid means it wasn't a
		* thread-group leader in the first place.
		*/
		scoped_guard(spinlock_irq, &pid->wait_pidfd.lock) {
		guard(spinlock_irq)(&pid->wait_pidfd.lock);

		/* Task has already been reaped. */
		if (!pid_has_task(pid, PIDTYPE_PID))
		return -ESRCH;
		@@ -2128,7 +2094,16 @@ int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret)
		return -ENOENT;
		}

		return __pidfd_prepare(pid, flags, ret);
		CLASS(get_unused_fd, pidfd)(O_CLOEXEC);
		if (pidfd < 0)
		return pidfd;

		pidfs_file = pidfs_alloc_file(pid, flags \| O_RDWR);
		if (IS_ERR(pidfs_file))
		return PTR_ERR(pidfs_file);

		*ret_file = pidfs_file;
		return take_fd(pidfd);
		}

		static void __delayed_free_task(struct rcu_head *rhp)
		@@ -2477,7 +2452,7 @@ __latent_entropy struct task_struct *copy_process(
		* Note that no task has been attached to @pid yet indicate
		* that via CLONE_PIDFD.
		*/
		retval = __pidfd_prepare(pid, flags \| PIDFD_CLONE, &pidfile);
		retval = pidfd_prepare(pid, flags \| PIDFD_STALE, &pidfile);
		if (retval < 0)
		goto bad_fork_free_pid;
		pidfd = retval;

Original line number	Diff line number	Diff line
		@@ -77,7 +77,7 @@ struct file;
		struct pid pidfd_pid(const struct file file);
		struct pid pidfd_get_pid(unsigned int fd, unsigned int flags);
		struct task_struct pidfd_get_task(int pidfd, unsigned int flags);
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret);
		int pidfd_prepare(struct pid pid, unsigned int flags, struct file *ret_file);
		void do_notify_pidfd(struct task_struct *task);

		static inline struct pid get_pid(struct pid pid)