Commit 2177147b authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet
Browse files

bcachefs: Improve bset compaction



The previous patch that fixed btree nodes being written too aggressively
now meant that we weren't sorting btree node bsets optimally - this
patch fixes that.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 241e2636
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -215,7 +215,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
		if (bch2_verify_btree_ondisk)
			bch2_btree_node_write(c, b, SIX_LOCK_intent);
		else
			__bch2_btree_node_write(c, b, SIX_LOCK_read);
			__bch2_btree_node_write(c, b);

		/* wait for any in flight btree write */
		btree_node_wait_on_io(b);
+34 −17
Original line number Diff line number Diff line
@@ -241,7 +241,6 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
}

static void btree_node_sort(struct bch_fs *c, struct btree *b,
			    struct btree_iter *iter,
			    unsigned start_idx,
			    unsigned end_idx,
			    bool filter_whiteouts)
@@ -377,8 +376,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
 * We're about to add another bset to the btree node, so if there's currently
 * too many bsets - sort some of them together:
 */
static bool btree_node_compact(struct bch_fs *c, struct btree *b,
			       struct btree_iter *iter)
static bool btree_node_compact(struct bch_fs *c, struct btree *b)
{
	unsigned unwritten_idx;
	bool ret = false;
@@ -390,13 +388,13 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b,
			break;

	if (b->nsets - unwritten_idx > 1) {
		btree_node_sort(c, b, iter, unwritten_idx,
		btree_node_sort(c, b, unwritten_idx,
				b->nsets, false);
		ret = true;
	}

	if (unwritten_idx > 1) {
		btree_node_sort(c, b, iter, 0, unwritten_idx, false);
		btree_node_sort(c, b, 0, unwritten_idx, false);
		ret = true;
	}

@@ -426,12 +424,30 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
			  struct btree_iter *iter)
{
	struct btree_node_entry *bne;
	bool did_sort;
	bool reinit_iter = false;

	EBUG_ON(!(b->c.lock.state.seq & 1));
	EBUG_ON(iter && iter->l[b->c.level].b != b);
	BUG_ON(bset_written(b, bset(b, &b->set[1])));

	if (b->nsets == MAX_BSETS) {
		unsigned log_u64s[] = {
			ilog2(bset_u64s(&b->set[0])),
			ilog2(bset_u64s(&b->set[1])),
			ilog2(bset_u64s(&b->set[2])),
		};

		if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
			bch2_btree_node_write(c, b, SIX_LOCK_write);
			reinit_iter = true;
		}
	}

	if (b->nsets == MAX_BSETS &&
	    btree_node_compact(c, b))
		reinit_iter = true;

	did_sort = btree_node_compact(c, b, iter);
	BUG_ON(b->nsets >= MAX_BSETS);

	bne = want_new_bset(c, b);
	if (bne)
@@ -439,7 +455,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,

	bch2_btree_build_aux_trees(b);

	if (iter && did_sort)
	if (iter && reinit_iter)
		bch2_btree_iter_reinit_node(iter, b);
}

@@ -1324,8 +1340,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
	return ret;
}

void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
			    enum six_lock_type lock_type_held)
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
{
	struct btree_write_bio *wbio;
	struct bset_tree *t;
@@ -1596,7 +1611,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
	 * single bset:
	 */
	if (b->nsets > 1) {
		btree_node_sort(c, b, NULL, 0, b->nsets, true);
		btree_node_sort(c, b, 0, b->nsets, true);
		invalidated_iter = true;
	} else {
		invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
@@ -1628,11 +1643,10 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
			   enum six_lock_type lock_type_held)
{
	BUG_ON(lock_type_held == SIX_LOCK_write);

	if (lock_type_held == SIX_LOCK_intent ||
	    six_lock_tryupgrade(&b->c.lock)) {
		__bch2_btree_node_write(c, b, SIX_LOCK_intent);
	    (lock_type_held == SIX_LOCK_read &&
	     six_lock_tryupgrade(&b->c.lock))) {
		__bch2_btree_node_write(c, b);

		/* don't cycle lock unnecessarily: */
		if (btree_node_just_written(b) &&
@@ -1644,7 +1658,10 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
		if (lock_type_held == SIX_LOCK_read)
			six_lock_downgrade(&b->c.lock);
	} else {
		__bch2_btree_node_write(c, b, SIX_LOCK_read);
		__bch2_btree_node_write(c, b);
		if (lock_type_held == SIX_LOCK_write &&
		    btree_node_just_written(b))
			bch2_btree_post_write_cleanup(c, b);
	}
}

+1 −2
Original line number Diff line number Diff line
@@ -144,8 +144,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
			      struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *);

void __bch2_btree_node_write(struct bch_fs *, struct btree *,
			    enum six_lock_type);
void __bch2_btree_node_write(struct bch_fs *, struct btree *);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);

void bch2_btree_node_write(struct bch_fs *, struct btree *,
+3 −1
Original line number Diff line number Diff line
@@ -256,13 +256,15 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
	return remaining;
}

#define BTREE_WRITE_SET_U64s_BITS	9

static inline unsigned btree_write_set_buffer(struct btree *b)
{
	/*
	 * Could buffer up larger amounts of keys for btrees with larger keys,
	 * pending benchmarking:
	 */
	return 4 << 10;
	return 8 << BTREE_WRITE_SET_U64s_BITS;
}

static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,