Commit a292be3b authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Reconstruct missing snapshot nodes



When the snapshots btree is going, we'll have to delete huge amounts of
data - unless we can reconstruct it by looking at the keys that refer to
it.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 55936afe
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -615,6 +615,7 @@ struct bch_dev {
 */

#define BCH_FS_FLAGS()			\
	x(new_fs)			\
	x(started)			\
	x(may_go_rw)			\
	x(rw)				\
+1 −0
Original line number Diff line number Diff line
@@ -938,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
	int ret;

	bch_notice(c, "initializing new filesystem");
	set_bit(BCH_FS_new_fs, &c->flags);

	mutex_lock(&c->sb_lock);
	c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
+1 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
	x(check_alloc_to_lru_refs,		15, PASS_ONLINE|PASS_FSCK)	\
	x(fs_freespace_init,			16, PASS_ALWAYS|PASS_SILENT)	\
	x(bucket_gens_init,			17, 0)				\
	x(reconstruct_snapshots,		38, 0)				\
	x(check_snapshot_trees,			18, PASS_ONLINE|PASS_FSCK)	\
	x(check_snapshots,			19, PASS_ONLINE|PASS_FSCK)	\
	x(check_subvols,			20, PASS_ONLINE|PASS_FSCK)	\
+2 −1
Original line number Diff line number Diff line
@@ -268,7 +268,8 @@
	x(btree_node_bkey_bad_u64s,				260)	\
	x(btree_node_topology_empty_interior_node,		261)	\
	x(btree_ptr_v2_min_key_bad,				262)	\
	x(btree_root_unreadable_and_scan_found_nothing,		263)
	x(btree_root_unreadable_and_scan_found_nothing,		263)	\
	x(snapshot_node_missing,				264)

enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
+171 −2
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include "errcode.h"
#include "error.h"
#include "fs.h"
#include "recovery_passes.h"
#include "snapshot.h"

#include <linux/random.h>
@@ -574,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
		u32 subvol_id;

		ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
		bch_err_fn(c, ret);

		if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
			ret = 0;
			goto err;
		}

		if (ret)
			goto err;

@@ -731,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
	u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
	u32 real_depth;
	struct printbuf buf = PRINTBUF;
	bool should_have_subvol;
	u32 i, id;
	int ret = 0;

@@ -777,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
		}
	}

	should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
	bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
		!BCH_SNAPSHOT_DELETED(&s);

	if (should_have_subvol) {
@@ -879,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
	return ret;
}

static int check_snapshot_exists(struct btree_trans *trans, u32 id)
{
	struct bch_fs *c = trans->c;

	if (bch2_snapshot_equiv(c, id))
		return 0;

	u32 tree_id;
	int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
	if (ret)
		return ret;

	struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
	ret = PTR_ERR_OR_ZERO(snapshot);
	if (ret)
		return ret;

	bkey_snapshot_init(&snapshot->k_i);
	snapshot->k.p		= POS(0, id);
	snapshot->v.tree	= cpu_to_le32(tree_id);
	snapshot->v.btime.lo	= cpu_to_le64(bch2_current_time(c));

	return  bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
		bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
				   bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
		bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
}

/* Figure out which snapshot nodes belong in the same tree: */
struct snapshot_tree_reconstruct {
	enum btree_id			btree;
	struct bpos			cur_pos;
	snapshot_id_list		cur_ids;
	DARRAY(snapshot_id_list)	trees;
};

static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
{
	darray_for_each(r->trees, i)
		darray_exit(i);
	darray_exit(&r->trees);
	darray_exit(&r->cur_ids);
}

static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
{
	return r->btree == BTREE_ID_inodes
		? r->cur_pos.offset == pos.offset
		: r->cur_pos.inode == pos.inode;
}

static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
{
	darray_for_each(*l, i)
		if (snapshot_list_has_id(r, *i))
			return true;
	return false;
}

static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
{
	bool first = true;
	darray_for_each(*s, i) {
		if (!first)
			prt_char(out, ' ');
		first = false;
		prt_printf(out, "%u", *i);
	}
}

static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
{
	if (r->cur_ids.nr) {
		darray_for_each(r->trees, i)
			if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
				int ret = snapshot_list_merge(c, i, &r->cur_ids);
				if (ret)
					return ret;
				goto out;
			}
		darray_push(&r->trees, r->cur_ids);
		darray_init(&r->cur_ids);
	}
out:
	r->cur_ids.nr = 0;
	return 0;
}

static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
{
	if (!same_snapshot(r, pos))
		snapshot_tree_reconstruct_next(c, r);
	r->cur_pos = pos;
	return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
}

int bch2_reconstruct_snapshots(struct bch_fs *c)
{
	struct btree_trans *trans = bch2_trans_get(c);
	struct printbuf buf = PRINTBUF;
	struct snapshot_tree_reconstruct r = {};
	int ret = 0;

	for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
		if (btree_type_has_snapshots(btree)) {
			r.btree = btree;

			ret = for_each_btree_key(trans, iter, btree, POS_MIN,
					BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
				get_snapshot_trees(c, &r, k.k->p);
			}));
			if (ret)
				goto err;

			snapshot_tree_reconstruct_next(c, &r);
		}
	}

	darray_for_each(r.trees, t) {
		printbuf_reset(&buf);
		snapshot_id_list_to_text(&buf, t);

		darray_for_each(*t, id) {
			if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
					c, snapshot_node_missing,
					"snapshot node %u from tree %s missing", *id, buf.buf)) {
				if (t->nr > 1) {
					bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
					ret = -BCH_ERR_fsck_repair_unimplemented;
					goto err;
				}

				ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
						check_snapshot_exists(trans, *id));
				if (ret)
					goto err;
			}
		}
	}
fsck_err:
err:
	bch2_trans_put(trans);
	snapshot_tree_reconstruct_exit(&r);
	printbuf_exit(&buf);
	bch_err_fn(c, ret);
	return ret;
}

/*
 * Mark a snapshot as deleted, for future cleanup:
 */
@@ -1689,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
				   POS_MIN, 0, k,
			   (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
	bch_err_fn(c, ret);

	/*
	 * It's important that we check if we need to reconstruct snapshots
	 * before going RW, so we mark that pass as required in the superblock -
	 * otherwise, we could end up deleting keys with missing snapshot nodes
	 * instead
	 */
	BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
	       test_bit(BCH_FS_may_go_rw, &c->flags));

	if (bch2_err_matches(ret, EIO) ||
	    (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
		ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);

	return ret;
}

Loading