Commit 2252aa27 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet
Browse files

bcachefs: btree gc refactoring



prep work for erasure coding

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent abce30b7
Loading
Loading
Loading
Loading
+0 −11
Original line number Diff line number Diff line
@@ -19,17 +19,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
	return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}

static inline bool btree_type_has_ptrs(enum bkey_type type)
{
	switch (type) {
	case BKEY_TYPE_BTREE:
	case BKEY_TYPE_EXTENTS:
		return true;
	default:
		return false;
	}
}

struct bch_fs;
struct btree;
struct bkey;
+158 −140
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
@@ -32,6 +33,23 @@
#include <linux/rcupdate.h>
#include <linux/sched/task.h>

static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
	preempt_disable();
	write_seqcount_begin(&c->gc_pos_lock);
	c->gc_pos = new_pos;
	write_seqcount_end(&c->gc_pos_lock);
	preempt_enable();
}

static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
	BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
	__gc_pos_set(c, new_pos);
}

/* range_checks - for validating min/max pos of each btree node: */

struct range_checks {
	struct range_level {
		struct bpos	min;
@@ -91,6 +109,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
	}
}

/* marking of btree keys/nodes: */

static bool bkey_type_needs_gc(enum bkey_type type)
{
	switch (type) {
	case BKEY_TYPE_BTREE:
	case BKEY_TYPE_EXTENTS:
		return true;
	default:
		return false;
	}
}

u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
{
	const struct bch_extent_ptr *ptr;
@@ -113,38 +144,7 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
	return max_stale;
}

/*
 * For runtime mark and sweep:
 */
static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
			   struct bkey_s_c k, unsigned flags)
{
	struct gc_pos pos = { 0 };
	u8 ret = 0;

	switch (type) {
	case BKEY_TYPE_BTREE:
		bch2_mark_key(c, k, c->opts.btree_node_size,
			      BCH_DATA_BTREE, pos, NULL,
			      0, flags|
			      BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
			      BCH_BUCKET_MARK_GC_LOCK_HELD);
		break;
	case BKEY_TYPE_EXTENTS:
		bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
			      0, flags|
			      BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
			      BCH_BUCKET_MARK_GC_LOCK_HELD);
		ret = bch2_btree_key_recalc_oldest_gen(c, k);
		break;
	default:
		BUG();
	}

	return ret;
}

int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
					struct bkey_s_c k)
{
	enum bch_data_type data_type = type == BKEY_TYPE_BTREE
@@ -199,54 +199,90 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
	}
	}

	atomic64_set(&c->key_version,
		     max_t(u64, k.k->version.lo,
			   atomic64_read(&c->key_version)));

	bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
	if (k.k->version.lo > atomic64_read(&c->key_version))
		atomic64_set(&c->key_version, k.k->version.lo);
fsck_err:
	return ret;
}

static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
/*
 * For runtime mark and sweep:
 */
static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
			    struct bkey_s_c k, bool initial)
{
	struct gc_pos pos = { 0 };
	unsigned flags = initial ? BCH_BUCKET_MARK_NOATOMIC : 0;
	int ret = 0;

	switch (type) {
	case BKEY_TYPE_BTREE:
		if (initial) {
			ret = bch2_btree_mark_ptrs_initial(c, type, k);
			if (ret < 0)
				return ret;
		}

		bch2_mark_key(c, k, c->opts.btree_node_size,
			      BCH_DATA_BTREE, pos, NULL,
			      0, flags|
			      BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
			      BCH_BUCKET_MARK_GC_LOCK_HELD);
		break;
	case BKEY_TYPE_EXTENTS:
		if (initial) {
			ret = bch2_btree_mark_ptrs_initial(c, type, k);
			if (ret < 0)
				return ret;
		}

		bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
			      0, flags|
			      BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
			      BCH_BUCKET_MARK_GC_LOCK_HELD);
		ret = bch2_btree_key_recalc_oldest_gen(c, k);
		break;
	default:
		break;
	}

	return ret;
}

static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
			      bool initial)
{
	enum bkey_type type = btree_node_type(b);
	struct btree_node_iter iter;
	struct bkey unpacked;
	struct bkey_s_c k;
	u8 stale = 0;
	int ret;

	if (!bkey_type_needs_gc(type))
		return 0;

	if (btree_node_has_ptrs(b))
	for_each_btree_node_key_unpack(b, k, &iter,
				       &unpacked) {
		bch2_bkey_debugcheck(c, b, k);
			stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
		}

	return stale;
}
		ret = bch2_gc_mark_key(c, type, k, initial);
		if (ret < 0)
			return ret;

static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
	preempt_disable();
	write_seqcount_begin(&c->gc_pos_lock);
	c->gc_pos = new_pos;
	write_seqcount_end(&c->gc_pos_lock);
	preempt_enable();
		stale = max_t(u8, stale, ret);
	}

static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
	BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
	__gc_pos_set(c, new_pos);
	return stale;
}

static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
			 bool initial)
{
	struct btree_iter iter;
	struct btree *b;
	struct range_checks r;
	unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
	unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
	unsigned max_stale;
	int ret = 0;

@@ -257,8 +293,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)

	/*
	 * if expensive_debug_checks is on, run range_checks on all leaf nodes:
	 *
	 * and on startup, we have to read every btree node (XXX: only if it was
	 * an unclean shutdown)
	 */
	if (expensive_debug_checks(c))
	if (initial || expensive_debug_checks(c))
		depth = 0;

	btree_node_range_checks_init(&r, depth);
@@ -269,10 +308,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)

		bch2_verify_btree_nr_keys(b);

		max_stale = btree_gc_mark_node(c, b);
		max_stale = btree_gc_mark_node(c, b, initial);

		gc_pos_set(c, gc_pos_btree_node(b));

		if (!initial) {
			if (max_stale > 64)
				bch2_btree_node_rewrite(c, &iter,
						b->data->keys.seq,
@@ -285,6 +325,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
						b->data->keys.seq,
						BTREE_INSERT_NOWAIT|
						BTREE_INSERT_GC_LOCK_HELD);
		}

		bch2_btree_iter_cond_resched(&iter);
	}
@@ -296,13 +337,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)

	b = c->btree_roots[btree_id].b;
	if (!btree_node_fake(b))
		bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
		bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
				 bkey_i_to_s_c(&b->key), initial);
	gc_pos_set(c, gc_pos_btree_root(b->btree_id));

	mutex_unlock(&c->btree_root_lock);
	return 0;
}

static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
			  bool initial)
{
	unsigned i;

	for (i = 0; i < BTREE_ID_NR; i++) {
		enum bkey_type type = bkey_type(0, i);

		int ret = bch2_gc_btree(c, i, initial);
		if (ret)
			return ret;

		if (journal && bkey_type_needs_gc(type)) {
			struct bkey_i *k, *n;
			struct jset_entry *j;
			struct journal_replay *r;
			int ret;

			list_for_each_entry(r, journal, list)
				for_each_jset_key(k, n, j, &r->j) {
					if (type == bkey_type(j->level, j->btree_id)) {
						ret = bch2_gc_mark_key(c, type,
							bkey_i_to_s_c(k), initial);
						if (ret < 0)
							return ret;
					}
				}
		}
	}

	return 0;
}

static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
				  u64 start, u64 end,
				  enum bch_data_type type,
@@ -525,6 +600,7 @@ void bch2_gc(struct bch_fs *c)
	struct bch_dev *ca;
	u64 start_time = local_clock();
	unsigned i;
	int ret;

	/*
	 * Walk _all_ references to buckets, and recompute them:
@@ -560,15 +636,12 @@ void bch2_gc(struct bch_fs *c)

	bch2_mark_superblocks(c);

	/* Walk btree: */
	for (i = 0; i < BTREE_ID_NR; i++) {
		int ret = bch2_gc_btree(c, i);
	ret = bch2_gc_btrees(c, NULL, false);
	if (ret) {
		bch_err(c, "btree gc failed: %d", ret);
		set_bit(BCH_FS_GC_FAILURE, &c->flags);
		goto out;
	}
	}

	bch2_mark_pending_btree_node_frees(c);
	bch2_mark_allocator_buckets(c);
@@ -1009,58 +1082,9 @@ int bch2_gc_thread_start(struct bch_fs *c)

/* Initial GC computes bucket marks during startup */

static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
{
	struct btree_iter iter;
	struct btree *b;
	struct range_checks r;
	int ret = 0;

	btree_node_range_checks_init(&r, 0);

	gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));

	if (!c->btree_roots[id].b)
		return 0;

	b = c->btree_roots[id].b;
	if (!btree_node_fake(b))
		ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
						  bkey_i_to_s_c(&b->key));
	if (ret)
		return ret;

	/*
	 * We have to hit every btree node before starting journal replay, in
	 * order for the journal seq blacklist machinery to work:
	 */
	for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
		btree_node_range_checks(c, b, &r);

		if (btree_node_has_ptrs(b)) {
			struct btree_node_iter node_iter;
			struct bkey unpacked;
			struct bkey_s_c k;

			for_each_btree_node_key_unpack(b, k, &node_iter,
						       &unpacked) {
				ret = bch2_btree_mark_key_initial(c,
							btree_node_type(b), k);
				if (ret)
					goto err;
			}
		}

		bch2_btree_iter_cond_resched(&iter);
	}
err:
	return bch2_btree_iter_unlock(&iter) ?: ret;
}

int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{
	unsigned iter = 0;
	enum btree_id id;
	int ret = 0;

	down_write(&c->gc_lock);
@@ -1069,13 +1093,7 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)

	bch2_mark_superblocks(c);

	for (id = 0; id < BTREE_ID_NR; id++) {
		ret = bch2_initial_gc_btree(c, id);
		if (ret)
			goto err;
	}

	ret = bch2_journal_mark(c, journal);
	ret = bch2_gc_btrees(c, journal, true);
	if (ret)
		goto err;

+0 −2
Original line number Diff line number Diff line
@@ -12,8 +12,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
				struct bkey_s_c);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);

/*
+0 −5
Original line number Diff line number Diff line
@@ -415,11 +415,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
	return &bch2_bkey_ops[btree_node_type(b)];
}

static inline bool btree_node_has_ptrs(struct btree *b)
{
	return btree_type_has_ptrs(btree_node_type(b));
}

static inline bool btree_node_is_extents(struct btree *b)
{
	return btree_node_type(b) == BKEY_TYPE_EXTENTS;
+0 −4
Original line number Diff line number Diff line
@@ -355,10 +355,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
	return true;
}

int bch2_journal_mark(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);

static inline void bch2_journal_set_replay_done(struct journal *j)
{
	BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
Loading