Merge tag 'vfs-6.15-rc1.overlayfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs overlayfs updates from Christian Brauner:
 "Currently overlayfs uses the mounter's credentials for its
  override_creds() calls. That provides a consistent permission model.

  This patches allows a caller to instruct overlayfs to use its
  credentials instead. The caller must be located in the same user
  namespace hierarchy as the user namespace the overlayfs instance will
  be mounted in. This provides a consistent and simple security model.

  With this it is possible to e.g., mount an overlayfs instance where
  the mounter must have CAP_SYS_ADMIN but the credentials used for
  override_creds() have dropped CAP_SYS_ADMIN. It also allows the usage
  of custom fs{g,u}id different from the callers and other tweaks"

* tag 'vfs-6.15-rc1.overlayfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  selftests/ovl: add third selftest for "override_creds"
  selftests/ovl: add second selftest for "override_creds"
  selftests/filesystems: add utils.{c,h}
  selftests/ovl: add first selftest for "override_creds"
  ovl: allow to specify override credentials
This commit is contained in:
Linus Torvalds
2025-03-24 10:37:40 -07:00
7 changed files with 926 additions and 12 deletions

View File

@@ -1,7 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
CFLAGS += -Wall
CFLAGS += $(KHDR_INCLUDES)
LDLIBS += -lcap
CFLAGS := -Wall -Werror
LOCAL_HDRS += wrappers.h log.h
TEST_GEN_PROGS := dev_in_maps
TEST_GEN_PROGS += set_layers_via_fds
include ../../lib.mk
$(OUTPUT)/set_layers_via_fds: ../utils.c

View File

@@ -6,30 +6,40 @@
#include <sched.h>
#include <stdio.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/mount.h>
#include <unistd.h>
#include "../../kselftest_harness.h"
#include "../../pidfd/pidfd.h"
#include "log.h"
#include "../utils.h"
#include "wrappers.h"
FIXTURE(set_layers_via_fds) {
int pidfd;
};
FIXTURE_SETUP(set_layers_via_fds)
{
ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
ASSERT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
self->pidfd = -EBADF;
EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
}
FIXTURE_TEARDOWN(set_layers_via_fds)
{
if (self->pidfd >= 0) {
EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
EXPECT_EQ(close(self->pidfd), 0);
}
umount2("/set_layers_via_fds", 0);
ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
umount2("/set_layers_via_fds_tmpfs", 0);
ASSERT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
}
TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -218,6 +228,304 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
ASSERT_EQ(close(fd_overlay), 0);
}
TEST_F(set_layers_via_fds, set_override_creds)
{
int fd_context, fd_tmpfs, fd_overlay;
int layer_fds[] = { [0 ... 3] = -EBADF };
pid_t pid;
int pidfd;
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
fd_context = sys_fsopen("tmpfs", 0);
ASSERT_GE(fd_context, 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_tmpfs = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_tmpfs, 0);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
ASSERT_GE(layer_fds[0], 0);
layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
ASSERT_GE(layer_fds[1], 0);
layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
ASSERT_GE(layer_fds[2], 0);
layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
ASSERT_GE(layer_fds[3], 0);
ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(close(fd_tmpfs), 0);
fd_context = sys_fsopen("overlay", 0);
ASSERT_GE(fd_context, 0);
ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
pid = create_child(&pidfd, 0);
ASSERT_GE(pid, 0);
if (pid == 0) {
if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
TH_LOG("sys_fsconfig should have succeeded");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
ASSERT_GE(close(pidfd), 0);
pid = create_child(&pidfd, 0);
ASSERT_GE(pid, 0);
if (pid == 0) {
if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
TH_LOG("sys_fsconfig should have succeeded");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
ASSERT_GE(close(pidfd), 0);
pid = create_child(&pidfd, 0);
ASSERT_GE(pid, 0);
if (pid == 0) {
if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
TH_LOG("sys_fsconfig should have succeeded");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
ASSERT_GE(close(pidfd), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_overlay = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_overlay, 0);
ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(close(fd_overlay), 0);
}
TEST_F(set_layers_via_fds, set_override_creds_invalid)
{
int fd_context, fd_tmpfs, fd_overlay, ret;
int layer_fds[] = { [0 ... 3] = -EBADF };
pid_t pid;
int fd_userns1, fd_userns2;
int ipc_sockets[2];
char c;
const unsigned int predictable_fd_context_nr = 123;
fd_userns1 = get_userns_fd(0, 0, 10000);
ASSERT_GE(fd_userns1, 0);
fd_userns2 = get_userns_fd(0, 1234, 10000);
ASSERT_GE(fd_userns2, 0);
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
ASSERT_GE(ret, 0);
pid = create_child(&self->pidfd, 0);
ASSERT_GE(pid, 0);
if (pid == 0) {
if (close(ipc_sockets[0])) {
TH_LOG("close should have succeeded");
_exit(EXIT_FAILURE);
}
if (!switch_userns(fd_userns2, 0, 0, false)) {
TH_LOG("switch_userns should have succeeded");
_exit(EXIT_FAILURE);
}
if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
TH_LOG("read_nointr should have succeeded");
_exit(EXIT_FAILURE);
}
if (close(ipc_sockets[1])) {
TH_LOG("close should have succeeded");
_exit(EXIT_FAILURE);
}
if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
TH_LOG("sys_fsconfig should have failed");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
ASSERT_EQ(close(ipc_sockets[1]), 0);
ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
fd_context = sys_fsopen("tmpfs", 0);
ASSERT_GE(fd_context, 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_tmpfs = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_tmpfs, 0);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
ASSERT_GE(layer_fds[0], 0);
layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
ASSERT_GE(layer_fds[1], 0);
layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
ASSERT_GE(layer_fds[2], 0);
layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
ASSERT_GE(layer_fds[3], 0);
ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(close(fd_tmpfs), 0);
fd_context = sys_fsopen("overlay", 0);
ASSERT_GE(fd_context, 0);
ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
ASSERT_EQ(close(fd_context), 0);
fd_context = predictable_fd_context_nr;
ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
ASSERT_EQ(close(ipc_sockets[0]), 0);
ASSERT_EQ(wait_for_pid(pid), 0);
ASSERT_EQ(close(self->pidfd), 0);
self->pidfd = -EBADF;
ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
ASSERT_EQ(close(layer_fds[i]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_overlay = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_overlay, 0);
ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(close(fd_overlay), 0);
ASSERT_EQ(close(fd_userns1), 0);
ASSERT_EQ(close(fd_userns2), 0);
}
TEST_F(set_layers_via_fds, set_override_creds_nomknod)
{
int fd_context, fd_tmpfs, fd_overlay;
int layer_fds[] = { [0 ... 3] = -EBADF };
pid_t pid;
int pidfd;
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
fd_context = sys_fsopen("tmpfs", 0);
ASSERT_GE(fd_context, 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_tmpfs = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_tmpfs, 0);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
ASSERT_GE(layer_fds[0], 0);
layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
ASSERT_GE(layer_fds[1], 0);
layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
ASSERT_GE(layer_fds[2], 0);
layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
ASSERT_GE(layer_fds[3], 0);
ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(close(fd_tmpfs), 0);
fd_context = sys_fsopen("overlay", 0);
ASSERT_GE(fd_context, 0);
ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
pid = create_child(&pidfd, 0);
ASSERT_GE(pid, 0);
if (pid == 0) {
if (!cap_down(CAP_MKNOD))
_exit(EXIT_FAILURE);
if (!cap_down(CAP_SYS_ADMIN))
_exit(EXIT_FAILURE);
if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
ASSERT_GE(close(pidfd), 0);
ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
fd_overlay = sys_fsmount(fd_context, 0, 0);
ASSERT_GE(fd_overlay, 0);
ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
ASSERT_EQ(errno, EPERM);
ASSERT_EQ(close(fd_context), 0);
ASSERT_EQ(close(fd_overlay), 0);
}
TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
{
int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;