Commit 5c0f43e8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'kernel-7.1-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull pid_namespace updates from Christian Brauner:

 - pid_namespace: make init creation more flexible

   Annotate ->child_reaper accesses with {READ,WRITE}_ONCE() to protect
   the unlocked readers from cpu/compiler reordering, and enforce that
   pid 1 in a pid namespace is always the first allocated pid (the
   set_tid path already required this).

   On top of that, allow opening pid_for_children before the pid
   namespace init has been created. This lets one process create the pid
   namespace and a different process create the init via setns(), which
   makes clone3(set_tid) usable in all cases evenly and is particularly
   useful to CRIU when restoring nested containers.

   A new selftest covers both the basic create-pidns-then-init flow and
   the cross-process variant, and a MAINTAINERS entry for the pid
   namespace code is added.

 - unrelated signal cleanup: update outdated comment for the removed
   freezable_schedule()

* tag 'kernel-7.1-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  signal: update outdated comment for removed freezable_schedule()
  MAINTAINERS: add a pid namespace entry
  selftests: Add tests for creating pidns init via setns
  pid_namespace: allow opening pid_for_children before init was created
  pid: check init is created first after idr alloc
  pid_namespace: avoid optimization of accesses to ->child_reaper
parents 7c8a4671 4c68d150
Loading
Loading
Loading
Loading
+9 −2
Original line number Diff line number Diff line
@@ -18191,6 +18191,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/next
F:	drivers/mtd/nand/
F:	include/linux/mtd/*nand*.h
NAMESPACES:
M:	Christian Brauner <christian@brauner.io>
R:	Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
L:	linux-kernel@vger.kernel.org
S:	Maintained
F:	rust/kernel/pid_namespace.rs
F:	kernel/pid_namespace.c
F:	tools/testing/selftests/pid_namespace/
NATIONAL INSTRUMENTS SERIAL DRIVER
M:	Chaitanya Vadrevu <chaitanya.vadrevu@emerson.com>
L:	linux-serial@vger.kernel.org
@@ -20804,10 +20813,8 @@ M: Christian Brauner <christian@brauner.io>
L:	linux-kernel@vger.kernel.org
S:	Maintained
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
F:	rust/kernel/pid_namespace.rs
F:	samples/pidfd/
F:	tools/testing/selftests/clone3/
F:	tools/testing/selftests/pid_namespace/
F:	tools/testing/selftests/pidfd/
K:	(?i)pidfd
K:	(?i)clone3
+2 −1
Original line number Diff line number Diff line
@@ -608,7 +608,8 @@ static struct task_struct *find_child_reaper(struct task_struct *father,

	reaper = find_alive_thread(father);
	if (reaper) {
		pid_ns->child_reaper = reaper;
		ASSERT_EXCLUSIVE_WRITER(pid_ns->child_reaper);
		WRITE_ONCE(pid_ns->child_reaper, reaper);
		return reaper;
	}

+4 −1
Original line number Diff line number Diff line
@@ -2469,7 +2469,10 @@ __latent_entropy struct task_struct *copy_process(
			init_task_pid(p, PIDTYPE_SID, task_session(current));

			if (is_child_reaper(pid)) {
				ns_of_pid(pid)->child_reaper = p;
				struct pid_namespace *ns = ns_of_pid(pid);

				ASSERT_EXCLUSIVE_WRITER(ns->child_reaper);
				WRITE_ONCE(ns->child_reaper, p);
				p->signal->flags |= SIGNAL_UNKILLABLE;
			}
			p->signal->shared_pending.signal = delayed.signal;
+11 −8
Original line number Diff line number Diff line
@@ -128,7 +128,7 @@ void free_pid(struct pid *pid)
			 * is the reaper wake up the reaper.  The reaper
			 * may be sleeping in zap_pid_ns_processes().
			 */
			wake_up_process(ns->child_reaper);
			wake_up_process(READ_ONCE(ns->child_reaper));
			break;
		case PIDNS_ADDING:
			/* Handle a fork failure of the first process */
@@ -215,12 +215,6 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid,
			retval = -EINVAL;
			if (tid < 1 || tid >= pid_max[ns->level - i])
				goto out_abort;
			/*
			 * Also fail if a PID != 1 is requested and
			 * no PID 1 exists.
			 */
			if (tid != 1 && !tmp->child_reaper)
				goto out_abort;
			retval = -EPERM;
			if (!checkpoint_restore_ns_capable(tmp->user_ns))
				goto out_abort;
@@ -296,9 +290,18 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *arg_set_tid,

		pid->numbers[i].nr = nr;
		pid->numbers[i].ns = tmp;
		tmp = tmp->parent;
		i--;
		retried_preload = false;

		/*
		 * PID 1 (init) must be created first.
		 */
		if (!READ_ONCE(tmp->child_reaper) && nr != 1) {
			retval = -EINVAL;
			goto out_free;
		}

		tmp = tmp->parent;
	}

	/*
+0 −9
Original line number Diff line number Diff line
@@ -369,15 +369,6 @@ static struct ns_common *pidns_for_children_get(struct task_struct *task)
	}
	task_unlock(task);

	if (ns) {
		read_lock(&tasklist_lock);
		if (!ns->child_reaper) {
			put_pid_ns(ns);
			ns = NULL;
		}
		read_unlock(&tasklist_lock);
	}

	return ns ? &ns->ns : NULL;
}

Loading