Commit 42d23732 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet
Browse files

bcachefs: Snapshot creation, deletion



This is the final patch in the patch series implementing snapshots.
This patch implements two new ioctls that work like creation and
deletion of directories, but fancier.

 - BCH_IOCTL_SUBVOLUME_CREATE, for creating new subvolumes and snaphots
 - BCH_IOCTL_SUBVOLUME_DESTROY, for deleting subvolumes and snapshots

Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent a861c722
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -383,14 +383,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
	return ret;
}

int bch2_dirent_delete_at(struct btree_trans *trans,
			  const struct bch_hash_info *hash_info,
			  struct btree_iter *iter)
{
	return bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
				   hash_info, iter);
}

int __bch2_dirent_lookup_trans(struct btree_trans *trans,
			       struct btree_iter *iter,
			       subvol_inum dir,
+0 −4
Original line number Diff line number Diff line
@@ -33,10 +33,6 @@ int bch2_dirent_create(struct btree_trans *, subvol_inum,
		       const struct bch_hash_info *, u8,
		       const struct qstr *, u64, u64 *, int);

int bch2_dirent_delete_at(struct btree_trans *,
			  const struct bch_hash_info *,
			  struct btree_iter *);

int __bch2_dirent_read_target(struct btree_trans *, struct bkey_s_c_dirent,
			      u32 *, u32 *, u64 *, bool);

+148 −34
Original line number Diff line number Diff line
@@ -11,6 +11,11 @@

#include <linux/posix_acl.h>

static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
{
	return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
}

int bch2_create_trans(struct btree_trans *trans,
		      subvol_inum dir,
		      struct bch_inode_unpacked *dir_u,
@@ -19,6 +24,7 @@ int bch2_create_trans(struct btree_trans *trans,
		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
		      struct posix_acl *default_acl,
		      struct posix_acl *acl,
		      subvol_inum snapshot_src,
		      unsigned flags)
{
	struct bch_fs *c = trans->c;
@@ -27,10 +33,9 @@ int bch2_create_trans(struct btree_trans *trans,
	subvol_inum new_inum = dir;
	u64 now = bch2_current_time(c);
	u64 cpu = raw_smp_processor_id();
	u64 dir_offset = 0;
	u64 dir_target;
	u32 snapshot;
	unsigned dir_type;
	unsigned dir_type = mode_to_type(mode);
	int ret;

	ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
@@ -41,19 +46,102 @@ int bch2_create_trans(struct btree_trans *trans,
	if (ret)
		goto err;

	if (!(flags & BCH_CREATE_SNAPSHOT)) {
		/* Normal create path - allocate a new inode: */
		bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);

	if (!name)
		if (flags & BCH_CREATE_TMPFILE)
			new_inode->bi_flags |= BCH_INODE_UNLINKED;

		ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
		if (ret)
			goto err;

		snapshot_src = (subvol_inum) { 0 };
	} else {
		/*
		 * Creating a snapshot - we're not allocating a new inode, but
		 * we do have to lookup the root inode of the subvolume we're
		 * snapshotting and update it (in the new snapshot):
		 */

		if (!snapshot_src.inum) {
			/* Inode wasn't specified, just snapshot: */
			struct btree_iter subvol_iter;
			struct bkey_s_c k;

			bch2_trans_iter_init(trans, &subvol_iter, BTREE_ID_subvolumes,
					     POS(0, snapshot_src.subvol), 0);
			k = bch2_btree_iter_peek_slot(&subvol_iter);

			ret = bkey_err(k);
			if (!ret && k.k->type != KEY_TYPE_subvolume) {
				bch_err(c, "subvolume %u not found",
					snapshot_src.subvol);
				ret = -ENOENT;
			}

			if (!ret)
				snapshot_src.inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode);
			bch2_trans_iter_exit(trans, &subvol_iter);

			if (ret)
				goto err;
		}

		ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
				      BTREE_ITER_INTENT);
		if (ret)
			goto err;

		if (new_inode->bi_subvol != snapshot_src.subvol) {
			/* Not a subvolume root: */
			ret = -EINVAL;
			goto err;
		}

		/*
		 * If we're not root, we have to own the subvolume being
		 * snapshotted:
		 */
		if (uid && new_inode->bi_uid != uid) {
			ret = -EPERM;
			goto err;
		}

		flags |= BCH_CREATE_SUBVOL;
	}

	new_inum.inum	= new_inode->bi_inum;
	dir_target	= new_inode->bi_inum;
	dir_type	= mode_to_type(new_inode->bi_mode);

	if (flags & BCH_CREATE_SUBVOL) {
		u32 new_subvol, dir_snapshot;

		ret = bch2_subvolume_create(trans, new_inode->bi_inum,
					    snapshot_src.subvol,
					    &new_subvol, &snapshot,
					    (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
		if (ret)
			goto err;

		new_inode->bi_parent_subvol	= dir.subvol;
		new_inode->bi_subvol		= new_subvol;
		new_inum.subvol			= new_subvol;
		dir_target			= new_subvol;
		dir_type			= DT_SUBVOL;

		ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
		if (ret)
			goto err;

		bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
		ret = bch2_btree_iter_traverse(&dir_iter);
		if (ret)
			goto err;
	}

	if (!(flags & BCH_CREATE_SNAPSHOT)) {
		if (default_acl) {
			ret = bch2_set_acl_trans(trans, new_inum, new_inode,
						 default_acl, ACL_TYPE_DEFAULT);
@@ -67,11 +155,13 @@ int bch2_create_trans(struct btree_trans *trans,
			if (ret)
				goto err;
		}
	}

	if (name) {
	if (!(flags & BCH_CREATE_TMPFILE)) {
		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
		u64 dir_offset;

		if (S_ISDIR(new_inode->bi_mode))
		if (is_subdir_for_nlink(new_inode))
			dir_u->bi_nlink++;
		dir_u->bi_mtime = dir_u->bi_ctime = now;

@@ -87,12 +177,12 @@ int bch2_create_trans(struct btree_trans *trans,
					 BCH_HASH_SET_MUST_CREATE);
		if (ret)
			goto err;
	}

		if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
			new_inode->bi_dir		= dir_u->bi_inum;
			new_inode->bi_dir_offset	= dir_offset;
		}
	}

	inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
	bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
@@ -160,7 +250,8 @@ int bch2_unlink_trans(struct btree_trans *trans,
		      subvol_inum dir,
		      struct bch_inode_unpacked *dir_u,
		      struct bch_inode_unpacked *inode_u,
		      const struct qstr *name)
		      const struct qstr *name,
		      int deleting_snapshot)
{
	struct bch_fs *c = trans->c;
	struct btree_iter dir_iter = { NULL };
@@ -169,6 +260,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
	struct bch_hash_info dir_hash;
	subvol_inum inum;
	u64 now = bch2_current_time(c);
	struct bkey_s_c k;
	int ret;

	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_INTENT);
@@ -187,29 +279,51 @@ int bch2_unlink_trans(struct btree_trans *trans,
	if (ret)
		goto err;

	if (inode_u->bi_dir		== dirent_iter.pos.inode &&
	    inode_u->bi_dir_offset	== dirent_iter.pos.offset) {
		inode_u->bi_dir		= 0;
		inode_u->bi_dir_offset	= 0;
	if (deleting_snapshot == 1 && !inode_u->bi_subvol) {
		ret = -ENOENT;
		goto err;
	}

	if (S_ISDIR(inode_u->bi_mode)) {
	if (deleting_snapshot <= 0 && S_ISDIR(inode_u->bi_mode)) {
		ret = bch2_empty_dir_trans(trans, inum);
		if (ret)
			goto err;
	}

	if (dir.subvol != inum.subvol) {
		ret = bch2_subvolume_delete(trans, inum.subvol, false);
	if (inode_u->bi_subvol) {
		ret = bch2_subvolume_delete(trans, inode_u->bi_subvol,
					    deleting_snapshot);
		if (ret)
			goto err;

		k = bch2_btree_iter_peek_slot(&dirent_iter);
		ret = bkey_err(k);
		if (ret)
			goto err;

		/*
		 * If we're deleting a subvolume, we need to really delete the
		 * dirent, not just emit a whiteout in the current snapshot:
		 */
		bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
		ret = bch2_btree_iter_traverse(&dirent_iter);
		if (ret)
			goto err;
	}

	if (inode_u->bi_dir		== dirent_iter.pos.inode &&
	    inode_u->bi_dir_offset	== dirent_iter.pos.offset) {
		inode_u->bi_dir		= 0;
		inode_u->bi_dir_offset	= 0;
	}

	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
	dir_u->bi_nlink -= S_ISDIR(inode_u->bi_mode);
	dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
	bch2_inode_nlink_dec(inode_u);

	ret =   bch2_dirent_delete_at(trans, &dir_hash, &dirent_iter) ?:
	ret =   bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
				    &dir_hash, &dirent_iter,
				    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
		bch2_inode_write(trans, &dir_iter, dir_u) ?:
		bch2_inode_write(trans, &inode_iter, inode_u);
err:
@@ -348,12 +462,12 @@ int bch2_rename_trans(struct btree_trans *trans,
		goto err;
	}

	if (S_ISDIR(src_inode_u->bi_mode)) {
	if (is_subdir_for_nlink(src_inode_u)) {
		src_dir_u->bi_nlink--;
		dst_dir_u->bi_nlink++;
	}

	if (dst_inum.inum && S_ISDIR(dst_inode_u->bi_mode)) {
	if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
		dst_dir_u->bi_nlink--;
		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
	}
+5 −2
Original line number Diff line number Diff line
@@ -5,6 +5,9 @@
struct posix_acl;

#define BCH_CREATE_TMPFILE		(1U << 0)
#define BCH_CREATE_SUBVOL		(1U << 1)
#define BCH_CREATE_SNAPSHOT		(1U << 2)
#define BCH_CREATE_SNAPSHOT_RO		(1U << 3)

int bch2_create_trans(struct btree_trans *, subvol_inum,
		      struct bch_inode_unpacked *,
@@ -13,7 +16,7 @@ int bch2_create_trans(struct btree_trans *, subvol_inum,
		      uid_t, gid_t, umode_t, dev_t,
		      struct posix_acl *,
		      struct posix_acl *,
		      unsigned);
		      subvol_inum, unsigned);

int bch2_link_trans(struct btree_trans *,
		    subvol_inum, struct bch_inode_unpacked *,
@@ -23,7 +26,7 @@ int bch2_link_trans(struct btree_trans *,
int bch2_unlink_trans(struct btree_trans *, subvol_inum,
		      struct bch_inode_unpacked *,
		      struct bch_inode_unpacked *,
		      const struct qstr *);
		      const struct qstr *, int);

int bch2_rename_trans(struct btree_trans *,
		      subvol_inum, struct bch_inode_unpacked *,
+168 −0
Original line number Diff line number Diff line
@@ -10,7 +10,11 @@
#include "quota.h"

#include <linux/compat.h>
#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/writeback.h>

#define FS_IOC_GOINGDOWN	     _IOR('X', 125, __u32)
#define FSOP_GOING_FLAGS_DEFAULT	0x0	/* going down */
@@ -292,6 +296,154 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg)
	return ret;
}

static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
				struct bch_ioctl_subvolume arg)
{
	struct inode *dir;
	struct bch_inode_info *inode;
	struct user_namespace *s_user_ns;
	struct dentry *dst_dentry;
	struct path src_path, dst_path;
	int how = LOOKUP_FOLLOW;
	int error;
	subvol_inum snapshot_src = { 0 };
	unsigned lookup_flags = 0;
	unsigned create_flags = BCH_CREATE_SUBVOL;

	if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE|
			  BCH_SUBVOL_SNAPSHOT_RO))
		return -EINVAL;

	if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
	    (arg.src_ptr ||
	     (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)))
		return -EINVAL;

	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
		create_flags |= BCH_CREATE_SNAPSHOT;

	if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO)
		create_flags |= BCH_CREATE_SNAPSHOT_RO;

	/* why do we need this lock? */
	down_read(&c->vfs_sb->s_umount);

	if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE)
		sync_inodes_sb(c->vfs_sb);
retry:
	if (arg.src_ptr) {
		error = user_path_at(arg.dirfd,
				(const char __user *)(unsigned long)arg.src_ptr,
				how, &src_path);
		if (error)
			goto err1;

		if (src_path.dentry->d_sb->s_fs_info != c) {
			path_put(&src_path);
			error = -EXDEV;
			goto err1;
		}

		snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode));
	}

	dst_dentry = user_path_create(arg.dirfd,
			(const char __user *)(unsigned long)arg.dst_ptr,
			&dst_path, lookup_flags);
	error = PTR_ERR_OR_ZERO(dst_dentry);
	if (error)
		goto err2;

	if (dst_dentry->d_sb->s_fs_info != c) {
		error = -EXDEV;
		goto err3;
	}

	if (dst_dentry->d_inode) {
		error = -EEXIST;
		goto err3;
	}

	dir = dst_path.dentry->d_inode;
	if (IS_DEADDIR(dir)) {
		error = -ENOENT;
		goto err3;
	}

	s_user_ns = dir->i_sb->s_user_ns;
	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
	    !kgid_has_mapping(s_user_ns, current_fsgid())) {
		error = -EOVERFLOW;
		goto err3;
	}

	error = inode_permission(file_mnt_idmap(filp),
				 dir, MAY_WRITE | MAY_EXEC);
	if (error)
		goto err3;

	if (!IS_POSIXACL(dir))
		arg.mode &= ~current_umask();

	error = security_path_mkdir(&dst_path, dst_dentry, arg.mode);
	if (error)
		goto err3;

	if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) &&
	    !arg.src_ptr)
		snapshot_src.subvol = to_bch_ei(dir)->ei_inode.bi_subvol;

	inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir),
			      dst_dentry, arg.mode|S_IFDIR,
			      0, snapshot_src, create_flags);
	error = PTR_ERR_OR_ZERO(inode);
	if (error)
		goto err3;

	d_instantiate(dst_dentry, &inode->v);
	fsnotify_mkdir(dir, dst_dentry);
err3:
	done_path_create(&dst_path, dst_dentry);
err2:
	if (arg.src_ptr)
		path_put(&src_path);

	if (retry_estale(error, lookup_flags)) {
		lookup_flags |= LOOKUP_REVAL;
		goto retry;
	}
err1:
	up_read(&c->vfs_sb->s_umount);

	return error;
}

static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
				struct bch_ioctl_subvolume arg)
{
	struct path path;
	int ret = 0;

	if (arg.flags)
		return -EINVAL;

	ret = user_path_at(arg.dirfd,
			(const char __user *)(unsigned long)arg.dst_ptr,
			LOOKUP_FOLLOW, &path);
	if (ret)
		return ret;

	if (path.dentry->d_sb->s_fs_info != c) {
		path_put(&path);
		return -EXDEV;
	}

	ret = __bch2_unlink(path.dentry->d_parent->d_inode, path.dentry, 1);
	path_put(&path);

	return ret;
}

long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
	struct bch_inode_info *inode = file_bch_inode(file);
@@ -322,6 +474,22 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg)
	case FS_IOC_GOINGDOWN:
		return bch2_ioc_goingdown(c, (u32 __user *) arg);

	case BCH_IOCTL_SUBVOLUME_CREATE: {
		struct bch_ioctl_subvolume i;

		if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
			return -EFAULT;
		return bch2_ioctl_subvolume_create(c, file, i);
	}

	case BCH_IOCTL_SUBVOLUME_DESTROY: {
		struct bch_ioctl_subvolume i;

		if (copy_from_user(&i, (void __user *) arg, sizeof(i)))
			return -EFAULT;
		return bch2_ioctl_subvolume_destroy(c, file, i);
	}

	default:
		return bch2_fs_ioctl(c, cmd, (void __user *) arg);
	}
Loading