Commit f70d24c2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull namespace updates from Christian Brauner:
 "This contains namespace updates. This time specifically for nsfs:

   - Userspace heavily relies on the root inode numbers for namespaces
     to identify the initial namespaces. That's already a hard
     dependency. So we cannot change that anymore. Move the initial
     inode numbers to a public header and align the only two namespaces
     that currently don't do that with all the other namespaces.

   - The root inode of /proc having a fixed inode number has been part
     of the core kernel ABI since its inception, and recently some
     userspace programs (mainly container runtimes) have started to
     explicitly depend on this behaviour.

     The main reason this is useful to userspace is that by checking
     that a suspect /proc handle has fstype PROC_SUPER_MAGIC and is
     PROCFS_ROOT_INO, they can then use openat2() together with
     RESOLVE_{NO_{XDEV,MAGICLINK},BENEATH} to ensure that there isn't a
     bind-mount that replaces some procfs file with a different one.

     This kind of attack has lead to security issues in container
     runtimes in the past (such as CVE-2019-19921) and libraries like
     libpathrs[1] use this feature of procfs to provide safe procfs
     handling functions"

* tag 'vfs-6.17-rc1.nsfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  uapi: export PROCFS_ROOT_INO
  mntns: use stable inode number for initial mount ns
  netns: use stable inode number for initial mount ns
  nsfs: move root inode number to uapi
parents 934600da 76fdb7eb
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -6090,9 +6090,11 @@ static void __init init_mount_tree(void)
	if (IS_ERR(mnt))
		panic("Can't create rootfs");

	ns = alloc_mnt_ns(&init_user_ns, false);
	ns = alloc_mnt_ns(&init_user_ns, true);
	if (IS_ERR(ns))
		panic("Can't allocate initial namespace");
	ns->seq = atomic64_inc_return(&mnt_ns_seq);
	ns->ns.inum = PROC_MNT_INIT_INO;
	m = real_mount(mnt);
	ns->root = m;
	ns->nr_mounts = 1;
+5 −5
Original line number Diff line number Diff line
@@ -363,7 +363,7 @@ static const struct inode_operations proc_root_inode_operations = {
 * This is the root "inode" in the /proc tree..
 */
struct proc_dir_entry proc_root = {
	.low_ino	= PROC_ROOT_INO, 
	.low_ino	= PROCFS_ROOT_INO,
	.namelen	= 5,
	.mode		= S_IFDIR | S_IRUGO | S_IXUGO,
	.nlink		= 2,
+9 −7
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#define _LINUX_PROC_NS_H

#include <linux/ns_common.h>
#include <uapi/linux/nsfs.h>

struct pid_namespace;
struct nsset;
@@ -39,13 +40,14 @@ extern const struct proc_ns_operations timens_for_children_operations;
 * We always define these enumerators
 */
enum {
	PROC_ROOT_INO		= 1,
	PROC_IPC_INIT_INO	= 0xEFFFFFFFU,
	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
	PROC_TIME_INIT_INO	= 0xEFFFFFFAU,
	PROC_IPC_INIT_INO	= IPC_NS_INIT_INO,
	PROC_UTS_INIT_INO	= UTS_NS_INIT_INO,
	PROC_USER_INIT_INO	= USER_NS_INIT_INO,
	PROC_PID_INIT_INO	= PID_NS_INIT_INO,
	PROC_CGROUP_INIT_INO	= CGROUP_NS_INIT_INO,
	PROC_TIME_INIT_INO	= TIME_NS_INIT_INO,
	PROC_NET_INIT_INO	= NET_NS_INIT_INO,
	PROC_MNT_INIT_INO	= MNT_NS_INIT_INO,
};

#ifdef CONFIG_PROC_FS
+11 −0
Original line number Diff line number Diff line
@@ -60,6 +60,17 @@
#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */

/*
 * The root inode of procfs is guaranteed to always have the same inode number.
 * For programs that make heavy use of procfs, verifying that the root is a
 * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
 * will allow you to make sure you are never tricked into operating on the
 * wrong procfs file.
 */
enum procfs_ino {
	PROCFS_ROOT_INO = 1,
};

struct file_clone_range {
	__s64 src_fd;
	__u64 src_offset;
+11 −0
Original line number Diff line number Diff line
@@ -42,4 +42,15 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV		_IOR(NSIO, 12, struct mnt_ns_info)

enum init_ns_ino {
	IPC_NS_INIT_INO		= 0xEFFFFFFFU,
	UTS_NS_INIT_INO		= 0xEFFFFFFEU,
	USER_NS_INIT_INO	= 0xEFFFFFFDU,
	PID_NS_INIT_INO		= 0xEFFFFFFCU,
	CGROUP_NS_INIT_INO	= 0xEFFFFFFBU,
	TIME_NS_INIT_INO	= 0xEFFFFFFAU,
	NET_NS_INIT_INO		= 0xEFFFFFF9U,
	MNT_NS_INIT_INO		= 0xEFFFFFF8U,
};

#endif /* __LINUX_NSFS_H */
Loading