Commit fa2b906f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vfs fixes from Christian Brauner:

 - Avoid calling back into LSMs from vfs_getattr_nosec() calls.

   IMA used to query inode properties accessing raw inode fields without
   dedicated helpers. That was finally fixed a few releases ago by
   forcing IMA to use vfs_getattr_nosec() helpers.

   The goal of the vfs_getattr_nosec() helper is to query for attributes
   without calling into the LSM layer which would be quite problematic
   because incredibly IMA is called from __fput()...

     __fput()
       -> ima_file_free()

   What it does is to call back into the filesystem to update the file's
   IMA xattr. Querying the inode without using vfs_getattr_nosec() meant
   that IMA didn't handle stacking filesystems such as overlayfs
   correctly. So the switch to vfs_getattr_nosec() is quite correct. But
   the switch to vfs_getattr_nosec() revealed another bug when used on
   stacking filesystems:

     __fput()
       -> ima_file_free()
          -> vfs_getattr_nosec()
             -> i_op->getattr::ovl_getattr()
                -> vfs_getattr()
                   -> i_op->getattr::$WHATEVER_UNDERLYING_FS_getattr()
                      -> security_inode_getattr() # calls back into LSMs

   Now, if that __fput() happens from task_work_run() of an exiting task
   current->fs and various other pointer could already be NULL. So
   anything in the LSM layer relying on that not being NULL would be
   quite surprised.

   Fix that by passing the information that this is a security request
   through to the stacking filesystem by adding a new internal
   ATT_GETATTR_NOSEC flag. Now the callchain becomes:

     __fput()
       -> ima_file_free()
          -> vfs_getattr_nosec()
             -> i_op->getattr::ovl_getattr()
                -> if (AT_GETATTR_NOSEC)
                          vfs_getattr_nosec()
                   else
                          vfs_getattr()
                   -> i_op->getattr::$WHATEVER_UNDERLYING_FS_getattr()

 - Fix a bug introduced with the iov_iter rework from last cycle.

   This broke /proc/kcore by copying too much and without the correct
   offset.

 - Add a missing NULL check when allocating the root inode in
   autofs_fill_super().

 - Fix stable writes for multi-device filesystems (xfs, btrfs etc) and
   the block device pseudo filesystem.

   Stable writes used to be a superblock flag only, making it a per
   filesystem property. Add an additional AS_STABLE_WRITES mapping flag
   to allow for fine-grained control.

 - Ensure that offset_iterate_dir() returns 0 after reaching the end of
   a directory so it adheres to getdents() convention.

* tag 'vfs-6.7-rc3.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  libfs: getdents() should return 0 after reaching EOD
  xfs: respect the stable writes flag on the RT device
  xfs: clean up FS_XFLAG_REALTIME handling in xfs_ioctl_setattr_xflags
  block: update the stable_writes flag in bdev_add
  filemap: add a per-mapping stable writes flag
  autofs: add: new_inode check in autofs_fill_super()
  iov_iter: fix copy_page_to_iter_nofault()
  fs: Pass AT_GETATTR_NOSEC flag to getattr interface function
parents afa0f6ee 796432ef
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)

void bdev_add(struct block_device *bdev, dev_t dev)
{
	if (bdev_stable_writes(bdev))
		mapping_set_stable_writes(bdev->bd_inode->i_mapping);
	bdev->bd_dev = dev;
	bdev->bd_inode->i_rdev = dev;
	bdev->bd_inode->i_ino = dev;
+21 −35
Original line number Diff line number Diff line
@@ -309,9 +309,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
	struct autofs_fs_context *ctx = fc->fs_private;
	struct autofs_sb_info *sbi = s->s_fs_info;
	struct inode *root_inode;
	struct dentry *root;
	struct autofs_info *ino;
	int ret = -ENOMEM;

	pr_debug("starting up, sbi = %p\n", sbi);

@@ -328,56 +326,44 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
	 */
	ino = autofs_new_ino(sbi);
	if (!ino)
		goto fail;
		return -ENOMEM;

	root_inode = autofs_get_inode(s, S_IFDIR | 0755);
	if (!root_inode)
		return -ENOMEM;

	root_inode->i_uid = ctx->uid;
	root_inode->i_gid = ctx->gid;
	root_inode->i_fop = &autofs_root_operations;
	root_inode->i_op = &autofs_dir_inode_operations;

	root = d_make_root(root_inode);
	if (!root)
		goto fail_ino;

	root->d_fsdata = ino;
	s->s_root = d_make_root(root_inode);
	if (unlikely(!s->s_root)) {
		autofs_free_ino(ino);
		return -ENOMEM;
	}
	s->s_root->d_fsdata = ino;

	if (ctx->pgrp_set) {
		sbi->oz_pgrp = find_get_pid(ctx->pgrp);
		if (!sbi->oz_pgrp) {
			ret = invalf(fc, "Could not find process group %d",
		if (!sbi->oz_pgrp)
			return invalf(fc, "Could not find process group %d",
				      ctx->pgrp);
			goto fail_dput;
		}
	} else {
	} else
		sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
	}

	if (autofs_type_trigger(sbi->type))
		__managed_dentry_set_managed(root);

	root_inode->i_fop = &autofs_root_operations;
	root_inode->i_op = &autofs_dir_inode_operations;
		/* s->s_root won't be contended so there's little to
		 * be gained by not taking the d_lock when setting
		 * d_flags, even when a lot mounts are being done.
		 */
		managed_dentry_set_managed(s->s_root);

	pr_debug("pipe fd = %d, pgrp = %u\n",
		 sbi->pipefd, pid_nr(sbi->oz_pgrp));

	sbi->flags &= ~AUTOFS_SBI_CATATONIC;

	/*
	 * Success! Install the root dentry now to indicate completion.
	 */
	s->s_root = root;
	return 0;

	/*
	 * Failure ... clean up.
	 */
fail_dput:
	dput(root);
	goto fail;
fail_ino:
	autofs_free_ino(ino);
fail:
	return ret;
}

/*
+10 −2
Original line number Diff line number Diff line
@@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
	return rc;
}

static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat,
			       u32 request_mask, unsigned int flags)
{
	if (flags & AT_GETATTR_NOSEC)
		return vfs_getattr_nosec(path, stat, request_mask, flags);
	return vfs_getattr(path, stat, request_mask, flags);
}

static int ecryptfs_getattr(struct mnt_idmap *idmap,
			    const struct path *path, struct kstat *stat,
			    u32 request_mask, unsigned int flags)
@@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
	struct kstat lower_stat;
	int rc;

	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
			 request_mask, flags);
	rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry),
				 &lower_stat, request_mask, flags);
	if (!rc) {
		fsstack_copy_attr_all(d_inode(dentry),
				      ecryptfs_inode_to_lower(d_inode(dentry)));
+2 −0
Original line number Diff line number Diff line
@@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
	lockdep_set_class_and_name(&mapping->invalidate_lock,
				   &sb->s_type->invalidate_lock_key,
				   "mapping.invalidate_lock");
	if (sb->s_iflags & SB_I_STABLE_WRITES)
		mapping_set_stable_writes(mapping);
	inode->i_private = NULL;
	inode->i_mapping = mapping;
	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
+11 −3
Original line number Diff line number Diff line
@@ -399,6 +399,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
		return -EINVAL;
	}

	/* In this case, ->private_data is protected by f_pos_lock */
	file->private_data = NULL;
	return vfs_setpos(file, offset, U32_MAX);
}

@@ -428,7 +430,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}

static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
{
	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
	XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +439,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
	while (true) {
		dentry = offset_find_next(&xas);
		if (!dentry)
			break;
			return ERR_PTR(-ENOENT);

		if (!offset_dir_emit(ctx, dentry)) {
			dput(dentry);
@@ -447,6 +449,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
		dput(dentry);
		ctx->pos = xas.xa_index + 1;
	}
	return NULL;
}

/**
@@ -479,7 +482,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
	if (!dir_emit_dots(file, ctx))
		return 0;

	offset_iterate_dir(d_inode(dir), ctx);
	/* In this case, ->private_data is protected by f_pos_lock */
	if (ctx->pos == 2)
		file->private_data = NULL;
	else if (file->private_data == ERR_PTR(-ENOENT))
		return 0;
	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
	return 0;
}

Loading