Unverified Commit 3c1a52f2 authored by Christian Brauner's avatar Christian Brauner
Browse files

nstree: maintain list of owned namespaces

The namespace tree doesn't express the ownership concept of namespace
appropriately. Maintain a list of directly owned namespaces per user
namespace. This will allow userspace and the kernel to use the listns()
system call to walk the namespace tree by owning user namespace. The
rbtree is used to find the relevant namespace entry point which allows
to continue iteration and the owner list can be used to walk the tree
completely lock free.

Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-16-2e6f823ebdc0@kernel.org


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 3760342f
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -116,6 +116,12 @@ struct ns_common {
				struct rb_node ns_tree_node;
				struct list_head ns_list_node;
			};
			struct /* namespace ownership rbtree and list */ {
				struct rb_root ns_owner_tree; /* rbtree of namespaces owned by this namespace */
				struct list_head ns_owner; /* list of namespaces owned by this namespace */
				struct rb_node ns_owner_tree_node; /* node in the owner namespace's rbtree */
				struct list_head ns_owner_entry; /* node in the owner namespace's ns_owned list */
			};
			atomic_t __ns_ref_active; /* do not use directly */
		};
		struct rcu_head ns_rcu;
@@ -216,6 +222,8 @@ static __always_inline bool is_initial_namespace(struct ns_common *ns)
	.__ns_ref		= REFCOUNT_INIT(refs),					\
	.__ns_ref_active	= ATOMIC_INIT(1),					\
	.ns_list_node		= LIST_HEAD_INIT(nsname.ns.ns_list_node),		\
	.ns_owner_entry		= LIST_HEAD_INIT(nsname.ns.ns_owner_entry),		\
	.ns_owner		= LIST_HEAD_INIT(nsname.ns.ns_owner),			\
}

#define ns_common_init(__ns)                     \
+4 −0
Original line number Diff line number Diff line
@@ -63,7 +63,11 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope
	ns->ns_type = ns_type;
	RB_CLEAR_NODE(&ns->ns_tree_node);
	RB_CLEAR_NODE(&ns->ns_unified_tree_node);
	RB_CLEAR_NODE(&ns->ns_owner_tree_node);
	INIT_LIST_HEAD(&ns->ns_list_node);
	ns->ns_owner_tree = RB_ROOT;
	INIT_LIST_HEAD(&ns->ns_owner);
	INIT_LIST_HEAD(&ns->ns_owner_entry);

#ifdef CONFIG_DEBUG_VFS
	ns_debug(ns, ops);
+67 −1
Original line number Diff line number Diff line
@@ -3,7 +3,9 @@

#include <linux/nstree.h>
#include <linux/proc_ns.h>
#include <linux/rculist.h>
#include <linux/vfsdebug.h>
#include <linux/user_namespace.h>

static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */
@@ -83,6 +85,13 @@ static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
	return rb_entry(node, struct ns_common, ns_unified_tree_node);
}

static inline struct ns_common *node_to_ns_owner(const struct rb_node *node)
{
	if (!node)
		return NULL;
	return rb_entry(node, struct ns_common, ns_owner_tree_node);
}

static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
{
	struct ns_common *ns_a = node_to_ns(a);
@@ -111,11 +120,27 @@ static inline int ns_cmp_unified(struct rb_node *a, const struct rb_node *b)
	return 0;
}

static inline int ns_cmp_owner(struct rb_node *a, const struct rb_node *b)
{
	struct ns_common *ns_a = node_to_ns_owner(a);
	struct ns_common *ns_b = node_to_ns_owner(b);
	u64 ns_id_a = ns_a->ns_id;
	u64 ns_id_b = ns_b->ns_id;

	if (ns_id_a < ns_id_b)
		return -1;
	if (ns_id_a > ns_id_b)
		return 1;
	return 0;
}

void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
{
	struct rb_node *node, *prev;
	const struct proc_ns_operations *ops = ns->ops;

	VFS_WARN_ON_ONCE(!ns->ns_id);
	VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);

	write_seqlock(&ns_tree_lock);

@@ -131,6 +156,30 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
		list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);

	rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified);

	if (ops) {
		struct user_namespace *user_ns;

		VFS_WARN_ON_ONCE(!ops->owner);
		user_ns = ops->owner(ns);
		if (user_ns) {
			struct ns_common *owner = &user_ns->ns;
			VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);

			/* Insert into owner's rbtree */
			rb_find_add_rcu(&ns->ns_owner_tree_node, &owner->ns_owner_tree, ns_cmp_owner);

			/* Insert into owner's list in sorted order */
			prev = rb_prev(&ns->ns_owner_tree_node);
			if (!prev)
				list_add_rcu(&ns->ns_owner_entry, &owner->ns_owner);
			else
				list_add_rcu(&ns->ns_owner_entry, &node_to_ns_owner(prev)->ns_owner_entry);
		} else {
			/* Only the initial user namespace doesn't have an owner. */
			VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns));
		}
	}
	write_sequnlock(&ns_tree_lock);

	VFS_WARN_ON_ONCE(node);
@@ -146,6 +195,9 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)

void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
{
	const struct proc_ns_operations *ops = ns->ops;
	struct user_namespace *user_ns;

	VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));
	VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
	VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
@@ -153,8 +205,22 @@ void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
	write_seqlock(&ns_tree_lock);
	rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
	rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree);
	list_bidir_del_rcu(&ns->ns_list_node);
	RB_CLEAR_NODE(&ns->ns_tree_node);

	list_bidir_del_rcu(&ns->ns_list_node);

	/* Remove from owner's rbtree if this namespace has an owner */
	if (ops) {
		user_ns = ops->owner(ns);
		if (user_ns) {
			struct ns_common *owner = &user_ns->ns;
			rb_erase(&ns->ns_owner_tree_node, &owner->ns_owner_tree);
			RB_CLEAR_NODE(&ns->ns_owner_tree_node);
		}

		list_bidir_del_rcu(&ns->ns_owner_entry);
	}

	write_sequnlock(&ns_tree_lock);
}
EXPORT_SYMBOL_GPL(__ns_tree_remove);