Commit cdc59a62 authored by Al Viro's avatar Al Viro
Browse files

kernel/acct.c: saner struct file treatment



        Instead of switching ->f_path.mnt of an opened file to internal
clone, get a struct path with ->mnt set to internal clone of that
->f_path.mnt, then dentry_open() that to get the file with right ->f_path.mnt
from the very beginning.

	The only subtle part here is that on failure exits we need to
close the file with __fput_sync() and make sure we do that *before*
dropping the original mount.

	With that done, only fs/{file_table,open,namei}.c ever store
anything to file->f_path and only prior to file->f_mode & FMODE_OPENED
becoming true.  Analysis of mount write count handling also becomes
less brittle and convoluted...

[AV: folded a fix for a bug spotted by Jan Kara - we do need a full-blown
open of the original file, not just user_path_at() or we end up skipping
permission checks]

Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarChristian Brauner <brauner@kernel.org>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent b320789d
Loading
Loading
Loading
Loading
+36 −60
Original line number Diff line number Diff line
@@ -44,19 +44,14 @@
 * a struct file opened for write. Fixed. 2/6/2000, AV.
 */

#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/capability.h>
#include <linux/file.h>
#include <linux/tty.h>
#include <linux/security.h>
#include <linux/vfs.h>
#include <linux/statfs.h>
#include <linux/jiffies.h>
#include <linux/times.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/uaccess.h>
#include <linux/namei.h>
#include <linux/sched/cputime.h>

#include <asm/div64.h>
@@ -217,84 +212,70 @@ static void close_work(struct work_struct *work)
	complete(&acct->done);
}

static int acct_on(struct filename *pathname)
DEFINE_FREE(fput_sync, struct file *, if (!IS_ERR_OR_NULL(_T)) __fput_sync(_T))
static int acct_on(const char __user *name)
{
	struct file *file;
	struct vfsmount *mnt, *internal;
	/* Difference from BSD - they don't do O_APPEND */
	const int open_flags = O_WRONLY|O_APPEND|O_LARGEFILE;
	struct pid_namespace *ns = task_active_pid_ns(current);
	struct filename *pathname __free(putname) = getname(name);
	struct file *original_file __free(fput) = NULL;	// in that order
	struct path internal __free(path_put) = {};	// in that order
	struct file *file __free(fput_sync) = NULL;	// in that order
	struct bsd_acct_struct *acct;
	struct vfsmount *mnt;
	struct fs_pin *old;
	int err;

	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
	if (!acct)
		return -ENOMEM;
	if (IS_ERR(pathname))
		return PTR_ERR(pathname);
	original_file = file_open_name(pathname, open_flags, 0);
	if (IS_ERR(original_file))
		return PTR_ERR(original_file);

	/* Difference from BSD - they don't do O_APPEND */
	file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
	if (IS_ERR(file)) {
		kfree(acct);
	mnt = mnt_clone_internal(&original_file->f_path);
	if (IS_ERR(mnt))
		return PTR_ERR(mnt);

	internal.mnt = mnt;
	internal.dentry = dget(mnt->mnt_root);

	file = dentry_open(&internal, open_flags, current_cred());
	if (IS_ERR(file))
		return PTR_ERR(file);
	}

	if (!S_ISREG(file_inode(file)->i_mode)) {
		kfree(acct);
		filp_close(file, NULL);
	if (!S_ISREG(file_inode(file)->i_mode))
		return -EACCES;
	}

	/* Exclude kernel kernel internal filesystems. */
	if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) {
		kfree(acct);
		filp_close(file, NULL);
	if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT))
		return -EINVAL;
	}

	/* Exclude procfs and sysfs. */
	if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) {
		kfree(acct);
		filp_close(file, NULL);
	if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE)
		return -EINVAL;
	}

	if (!(file->f_mode & FMODE_CAN_WRITE)) {
		kfree(acct);
		filp_close(file, NULL);
	if (!(file->f_mode & FMODE_CAN_WRITE))
		return -EIO;
	}
	internal = mnt_clone_internal(&file->f_path);
	if (IS_ERR(internal)) {
		kfree(acct);
		filp_close(file, NULL);
		return PTR_ERR(internal);
	}
	err = mnt_get_write_access(internal);
	if (err) {
		mntput(internal);
		kfree(acct);
		filp_close(file, NULL);
		return err;
	}
	mnt = file->f_path.mnt;
	file->f_path.mnt = internal;

	acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
	if (!acct)
		return -ENOMEM;

	atomic_long_set(&acct->count, 1);
	init_fs_pin(&acct->pin, acct_pin_kill);
	acct->file = file;
	acct->file = no_free_ptr(file);
	acct->needcheck = jiffies;
	acct->ns = ns;
	mutex_init(&acct->lock);
	INIT_WORK(&acct->work, close_work);
	init_completion(&acct->done);
	mutex_lock_nested(&acct->lock, 1);	/* nobody has seen it yet */
	pin_insert(&acct->pin, mnt);
	pin_insert(&acct->pin, original_file->f_path.mnt);

	rcu_read_lock();
	old = xchg(&ns->bacct, &acct->pin);
	mutex_unlock(&acct->lock);
	pin_kill(old);
	mnt_put_write_access(mnt);
	mntput(mnt);
	return 0;
}

@@ -319,14 +300,9 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
		return -EPERM;

	if (name) {
		struct filename *tmp = getname(name);

		if (IS_ERR(tmp))
			return PTR_ERR(tmp);
		mutex_lock(&acct_on_mutex);
		error = acct_on(tmp);
		error = acct_on(name);
		mutex_unlock(&acct_on_mutex);
		putname(tmp);
	} else {
		rcu_read_lock();
		pin_kill(task_active_pid_ns(current)->bacct);