Commit 35b574a6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull mount fixes from Al Viro:
 "Various mount-related bugfixes:

   - split the do_move_mount() checks in subtree-of-our-ns and
     entire-anon cases and adapt detached mount propagation selftest for
     mount_setattr

   - allow clone_private_mount() for a path on real rootfs

   - fix a race in call of has_locked_children()

   - fix move_mount propagation graph breakage by MOVE_MOUNT_SET_GROUP

   - make sure clone_private_mnt() caller has CAP_SYS_ADMIN in the right
     userns

   - avoid false negatives in path_overmount()

   - don't leak MNT_LOCKED from parent to child in finish_automount()

   - do_change_type(): refuse to operate on unmounted/not ours mounts"

* tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  do_change_type(): refuse to operate on unmounted/not ours mounts
  clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns
  selftests/mount_setattr: adapt detached mount propagation test
  do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases
  fs: allow clone_private_mount() for a path on real rootfs
  fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2)
  finish_automount(): don't leak MNT_LOCKED from parent to child
  path_overmount(): avoid false negatives
  fs/fhandle.c: fix a race in call of has_locked_children()
parents 522cd6ac 12f147dd
Loading
Loading
Loading
Loading
+71 −42
Original line number Diff line number Diff line
@@ -2410,7 +2410,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
	namespace_unlock();
}

bool has_locked_children(struct mount *mnt, struct dentry *dentry)
static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{
	struct mount *child;

@@ -2424,6 +2424,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
	return false;
}

bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
	bool res;

	read_seqlock_excl(&mount_lock);
	res = __has_locked_children(mnt, dentry);
	read_sequnlock_excl(&mount_lock);
	return res;
}

/*
 * Check that there aren't references to earlier/same mount namespaces in the
 * specified subtree.  Such references can act as pins for mount namespaces
@@ -2468,23 +2478,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
	if (IS_MNT_UNBINDABLE(old_mnt))
		return ERR_PTR(-EINVAL);

	if (mnt_has_parent(old_mnt)) {
		if (!check_mnt(old_mnt))
			return ERR_PTR(-EINVAL);
	} else {
		if (!is_mounted(&old_mnt->mnt))
			return ERR_PTR(-EINVAL);

		/* Make sure this isn't something purely kernel internal. */
		if (!is_anon_ns(old_mnt->mnt_ns))
	/*
	 * Make sure the source mount is acceptable.
	 * Anything mounted in our mount namespace is allowed.
	 * Otherwise, it must be the root of an anonymous mount
	 * namespace, and we need to make sure no namespace
	 * loops get created.
	 */
	if (!check_mnt(old_mnt)) {
		if (!is_mounted(&old_mnt->mnt) ||
			!is_anon_ns(old_mnt->mnt_ns) ||
			mnt_has_parent(old_mnt))
			return ERR_PTR(-EINVAL);

		/* Make sure we don't create mount namespace loops. */
		if (!check_for_nsfs_mounts(old_mnt))
			return ERR_PTR(-EINVAL);
	}

	if (has_locked_children(old_mnt, path->dentry))
        if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
		return ERR_PTR(-EPERM);

	if (__has_locked_children(old_mnt, path->dentry))
		return ERR_PTR(-EINVAL);

	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
@@ -2930,6 +2944,10 @@ static int do_change_type(struct path *path, int ms_flags)
		return -EINVAL;

	namespace_lock();
	if (!check_mnt(mnt)) {
		err = -EINVAL;
		goto out_unlock;
	}
	if (type == MS_SHARED) {
		err = invent_group_ids(mnt, recurse);
		if (err)
@@ -3021,7 +3039,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
	if (!may_copy_tree(old_path))
		return mnt;

	if (!recurse && has_locked_children(old, old_path->dentry))
	if (!recurse && __has_locked_children(old, old_path->dentry))
		return mnt;

	if (recurse)
@@ -3414,7 +3432,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
		goto out;

	/* From mount should not have locked children in place of To's root */
	if (has_locked_children(from, to->mnt.mnt_root))
	if (__has_locked_children(from, to->mnt.mnt_root))
		goto out;

	/* Setting sharing groups is only allowed on private mounts */
@@ -3428,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
	if (IS_MNT_SLAVE(from)) {
		struct mount *m = from->mnt_master;

		list_add(&to->mnt_slave, &m->mnt_slave_list);
		list_add(&to->mnt_slave, &from->mnt_slave);
		to->mnt_master = m;
	}

@@ -3453,18 +3471,25 @@ static int do_set_group(struct path *from_path, struct path *to_path)
 * Check if path is overmounted, i.e., if there's a mount on top of
 * @path->mnt with @path->dentry as mountpoint.
 *
 * Context: This function expects namespace_lock() to be held.
 * Context: namespace_sem must be held at least shared.
 * MUST NOT be called under lock_mount_hash() (there one should just
 * call __lookup_mnt() and check if it returns NULL).
 * Return: If path is overmounted true is returned, false if not.
 */
static inline bool path_overmounted(const struct path *path)
{
	unsigned seq = read_seqbegin(&mount_lock);
	bool no_child;

	rcu_read_lock();
	if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
	no_child = !__lookup_mnt(path->mnt, path->dentry);
	rcu_read_unlock();
		return true;
	if (need_seqretry(&mount_lock, seq)) {
		read_seqlock_excl(&mount_lock);
		no_child = !__lookup_mnt(path->mnt, path->dentry);
		read_sequnlock_excl(&mount_lock);
	}
	rcu_read_unlock();
	return false;
	return unlikely(!no_child);
}

/**
@@ -3623,36 +3648,40 @@ static int do_move_mount(struct path *old_path,
	ns = old->mnt_ns;

	err = -EINVAL;
	if (!may_use_mount(p))
		goto out;

	/* The thing moved must be mounted... */
	if (!is_mounted(&old->mnt))
		goto out;

	/* ... and either ours or the root of anon namespace */
	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
	if (check_mnt(old)) {
		/* if the source is in our namespace... */
		/* ... it should be detachable from parent */
		if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
			goto out;

	if (is_anon_ns(ns) && ns == p->mnt_ns) {
		/* ... and the target should be in our namespace */
		if (!check_mnt(p))
			goto out;
	} else {
		/*
		 * Ending up with two files referring to the root of the
		 * same anonymous mount namespace would cause an error
		 * as this would mean trying to move the same mount
		 * twice into the mount tree which would be rejected
		 * later. But be explicit about it right here.
		 * otherwise the source must be the root of some anon namespace.
		 * AV: check for mount being root of an anon namespace is worth
		 * an inlined predicate...
		 */
		if (!is_anon_ns(ns) || mnt_has_parent(old))
			goto out;
	} else if (is_anon_ns(p->mnt_ns)) {
		/*
		 * Don't allow moving an attached mount tree to an
		 * anonymous mount tree.
		 * Bail out early if the target is within the same namespace -
		 * subsequent checks would've rejected that, but they lose
		 * some corner cases if we check it early.
		 */
		if (ns == p->mnt_ns)
			goto out;
	}

	if (old->mnt.mnt_flags & MNT_LOCKED)
		/*
		 * Target should be either in our namespace or in an acceptable
		 * anon namespace, sensu check_anonymous_mnt().
		 */
		if (!may_use_mount(p))
			goto out;
	}

	if (!path_mounted(old_path))
		goto out;
+2 −1
Original line number Diff line number Diff line
@@ -65,7 +65,8 @@ enum mount_flags {
	MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME,

	MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL |
			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED,
			     MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED |
			     MNT_LOCKED,
};

struct vfsmount {
+1 −16
Original line number Diff line number Diff line
@@ -2079,24 +2079,9 @@ TEST_F(mount_setattr, detached_tree_propagation)
	 * means that the device information will be different for any
	 * statx() that was taken from /mnt/A before the mount compared
	 * to one after the mount.
	 *
	 * Since we already now that the device information between the
	 * stx1 and stx2 samples are identical we also now that stx2 and
	 * stx3 device information will necessarily differ.
	 */
	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);

	/*
	 * If mount propagation worked correctly then the tmpfs mount
	 * that was created after the mount namespace was unshared will
	 * have propagated onto /mnt/A in the detached mount tree.
	 *
	 * Verify that the device information for stx3 and stx4 are
	 * identical. It is already established that stx3 is different
	 * from both stx1 and stx2 sampled before the tmpfs mount was
	 * done so if stx3 and stx4 are identical the proof is done.
	 */
	ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
	ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);

	EXPECT_EQ(close(fd_tree), 0);
}