Commit 39cea302 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: bch2_check_bucket_backpointer_mismatch()



Detect buckets with missing backpointers, and run repair on demand.

__bch2_move_data_phys() now calls
bch2_check_bucket_backpointer_mismatch() as it walks buckets, which
checks for missing backpointers by comparing backpointers against bucket
sector counts.

When missing backpointers are detected, we kick off
bch2_check_extents_to_backpointers() asynchronously - right away if
we're trying to evacuate, or with a threshold if we're just running
copygc.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 15f96932
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -2175,8 +2175,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
	BUG_ON(a->data_type != BCH_DATA_cached);
	BUG_ON(a->dirty_sectors);

	if (!a->cached_sectors)
		bch_err(c, "invalidating empty bucket, confused");
	if (!a->cached_sectors) {
		bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset,
						       true, last_flushed);
		goto out;
	}

	unsigned cached_sectors = a->cached_sectors;
	u8 gen = a->gen;
+70 −5
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include "disk_accounting.h"
#include "error.h"
#include "progress.h"
#include "recovery_passes.h"

#include <linux/mm.h>

@@ -804,6 +805,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
	return ret;
}

static inline int bch2_fs_going_ro(struct bch_fs *c)
{
	return test_bit(BCH_FS_going_ro, &c->flags)
		? -EROFS
		: 0;
}

static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
						   struct extents_to_bp_state *s)
{
@@ -831,6 +839,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,

			ret = for_each_btree_key_continue(trans, iter, 0, k, ({
				bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
				bch2_fs_going_ro(c) ?:
				check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
				bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
			}));
@@ -870,6 +879,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);

static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
					     bool *had_mismatch,
					     struct bkey_buf *last_flushed)
{
	struct bch_fs *c = trans->c;
@@ -877,6 +887,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
	bool need_commit = false;

	*had_mismatch = false;

	if (a->data_type == BCH_DATA_sb ||
	    a->data_type == BCH_DATA_journal ||
	    a->data_type == BCH_DATA_parity)
@@ -957,6 +969,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
			 ? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
						  alloc_k.k->p.offset)
			 : 0);

		*had_mismatch = true;
	}
err:
	bch2_dev_put(ca);
@@ -1104,7 +1118,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)

	ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
				 POS_MIN, BTREE_ITER_prefetch, k, ({
		check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
		bool had_mismatch;
		bch2_fs_going_ro(c) ?:
		check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
	}));
	if (ret)
		goto err;
@@ -1150,20 +1166,69 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)

		s.bp_start = bpos_successor(s.bp_end);
	}
err:
	bch2_trans_put(trans);
	bch2_bkey_buf_exit(&s.last_flushed, c);
	bch2_btree_cache_unpin(c);

	for_each_member_device(c, ca) {
		bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
		bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
	}
err:
	bch2_trans_put(trans);
	bch2_bkey_buf_exit(&s.last_flushed, c);
	bch2_btree_cache_unpin(c);

	bch_err_fn(c, ret);
	return ret;
}

static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
						 struct bpos bucket,
						 bool *had_mismatch,
						 struct bkey_buf *last_flushed)
{
	struct btree_iter alloc_iter;
	struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
					       BTREE_ID_alloc, bucket,
					       BTREE_ITER_cached);
	int ret = bkey_err(k);
	if (ret)
		return ret;

	ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
	bch2_trans_iter_exit(trans, &alloc_iter);
	return ret;
}

int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
					   struct bch_dev *ca, u64 bucket,
					   bool copygc,
					   struct bkey_buf *last_flushed)
{
	struct bch_fs *c = trans->c;
	bool had_mismatch;
	int ret = lockrestart_do(trans,
		check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
						      &had_mismatch, last_flushed));
	if (ret || !had_mismatch)
		return ret;

	u64 nr = ca->bucket_backpointer_mismatch.nr;
	u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;

	struct printbuf buf = PRINTBUF;
	__bch2_log_msg_start(ca->name, &buf);

	prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
		   bucket, nr, ca->mi.nbuckets);

	bch2_run_explicit_recovery_pass(c, &buf,
			BCH_RECOVERY_PASS_check_extents_to_backpointers,
			nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);

	bch2_print_str(c, KERN_ERR, buf.buf);
	printbuf_exit(&buf);
	return 0;
}

/* backpointers -> extents */

static int check_one_backpointer(struct btree_trans *trans,
+2 −1
Original line number Diff line number Diff line
@@ -182,7 +182,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
					struct btree_iter *, struct bkey_buf *);

int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *);
int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
					   bool, struct bkey_buf *);

int bch2_check_btree_backpointers(struct bch_fs *);
int bch2_check_extents_to_backpointers(struct bch_fs *);
+18 −3
Original line number Diff line number Diff line
@@ -815,6 +815,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
			u64 bucket_start,
			u64 bucket_end,
			unsigned data_types,
			bool copygc,
			move_pred_fn pred, void *arg)
{
	struct btree_trans *trans = ctxt->trans;
@@ -825,6 +826,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
	struct bkey_buf sk;
	struct bkey_s_c k;
	struct bkey_buf last_flushed;
	u64 check_mismatch_done = bucket_start;
	int ret = 0;

	struct bch_dev *ca = bch2_dev_tryget(c, dev);
@@ -835,8 +837,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,

	struct bpos bp_start	= bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
	struct bpos bp_end	= bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
	bch2_dev_put(ca);
	ca = NULL;

	bch2_bkey_buf_init(&last_flushed);
	bkey_init(&last_flushed.k->k);
@@ -871,6 +871,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
		if (!k.k || bkey_gt(k.k->p, bp_end))
			break;

		if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
			while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
				bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
								       copygc, &last_flushed);
			}
			continue;
		}

		if (k.k->type != KEY_TYPE_backpointer)
			goto next;

@@ -946,10 +954,15 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
next:
		bch2_btree_iter_advance(trans, &bp_iter);
	}

	while (check_mismatch_done < bucket_end)
		bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
						       copygc, &last_flushed);
err:
	bch2_trans_iter_exit(trans, &bp_iter);
	bch2_bkey_buf_exit(&sk, c);
	bch2_bkey_buf_exit(&last_flushed, c);
	bch2_dev_put(ca);
	return ret;
}

@@ -974,7 +987,8 @@ int bch2_move_data_phys(struct bch_fs *c,
		ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
	}

	int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
	int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end,
					data_types, false, pred, arg);
	bch2_moving_ctxt_exit(&ctxt);

	return ret;
@@ -1019,6 +1033,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
				   bucket.offset,
				   bucket.offset + 1,
				   ~0,
				   true,
				   evacuate_bucket_pred, &arg);
}

+3 −0
Original line number Diff line number Diff line
@@ -75,6 +75,9 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
	if (!ca)
		goto out;

	if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset))
		goto out;

	if (ca->mi.state != BCH_MEMBER_STATE_rw ||
	    !bch2_dev_is_online(ca))
		goto out;