Commit fb23d57a authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Convert gc to new accounting



Rewrite fsck/gc for the new accounting scheme.

This adds a second set of in-memory accounting counters for gc to use;
like with other parts of gc we run all trigger in TRIGGER_GC mode, then
compare what we calculated to existing in-memory accounting at the end.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 4c4a7d48
Loading
Loading
Loading
Loading
+3 −31
Original line number Diff line number Diff line
@@ -774,7 +774,7 @@ static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, s
	};
	s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };

	return bch2_disk_accounting_mod(trans, &acc, d, 3);
	return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
}

int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
@@ -894,7 +894,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
		if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
		    old_a->cached_sectors) {
			ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
					 -((s64) old_a->cached_sectors));
					 -((s64) old_a->cached_sectors),
					 flags & BTREE_TRIGGER_gc);
			if (ret)
				goto err;
		}
@@ -973,35 +974,6 @@ int bch2_trigger_alloc(struct btree_trans *trans,
		if (statechange(a->data_type == BCH_DATA_need_gc_gens))
			bch2_gc_gens_async(c);
	}

	if ((flags & BTREE_TRIGGER_gc) &&
	    (flags & BTREE_TRIGGER_bucket_invalidate)) {
		struct bch_alloc_v4 new_a_convert;
		const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);

		percpu_down_read(&c->mark_lock);
		struct bucket *g = gc_bucket(ca, new.k->p.offset);
		if (unlikely(!g)) {
			percpu_up_read(&c->mark_lock);
			goto invalid_bucket;
		}
		g->gen_valid	= 1;

		bucket_lock(g);

		g->gen_valid		= 1;
		g->gen			= new_a->gen;
		g->data_type		= new_a->data_type;
		g->stripe		= new_a->stripe;
		g->stripe_redundancy	= new_a->stripe_redundancy;
		g->dirty_sectors	= new_a->dirty_sectors;
		g->cached_sectors	= new_a->cached_sectors;

		bucket_unlock(g);
		percpu_up_read(&c->mark_lock);

		bch2_dev_usage_update(c, ca, old_a, new_a);
	}
err:
	printbuf_exit(&buf);
	bch2_dev_put(ca);
+7 −9
Original line number Diff line number Diff line
@@ -1708,15 +1708,13 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)

	prt_printf(out, "capacity\t%llu\n",		c->capacity);
	prt_printf(out, "reserved\t%llu\n",		c->reserved);
	percpu_down_read(&c->mark_lock);
	prt_printf(out, "hidden\t%llu\n",			bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
	prt_printf(out, "btree\t%llu\n",			bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
	prt_printf(out, "data\t%llu\n",				bch2_fs_usage_read_one(c, &c->usage_base->b.data));
	prt_printf(out, "cached\t%llu\n",			bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
	prt_printf(out, "reserved\t%llu\n",			bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
	prt_printf(out, "hidden\t%llu\n",		percpu_u64_get(&c->usage->hidden));
	prt_printf(out, "btree\t%llu\n",		percpu_u64_get(&c->usage->btree));
	prt_printf(out, "data\t%llu\n",			percpu_u64_get(&c->usage->data));
	prt_printf(out, "cached\t%llu\n",		percpu_u64_get(&c->usage->cached));
	prt_printf(out, "reserved\t%llu\n",		percpu_u64_get(&c->usage->reserved));
	prt_printf(out, "online_reserved\t%llu\n",	percpu_u64_get(c->online_reserved));
	prt_printf(out, "nr_inodes\t%llu\n",			bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
	percpu_up_read(&c->mark_lock);
	prt_printf(out, "nr_inodes\t%llu\n",		percpu_u64_get(&c->usage->nr_inodes));

	prt_newline(out);
	prt_printf(out, "freelist_wait\t%s\n",			c->freelist_wait.list.first ? "waiting" : "empty");
+1 −3
Original line number Diff line number Diff line
@@ -546,7 +546,6 @@ struct bch_dev {
	struct rw_semaphore	bucket_lock;

	struct bch_dev_usage __percpu	*usage;
	struct bch_dev_usage __percpu	*usage_gc;

	/* Allocator: */
	u64			new_fs_bucket_idx;
@@ -741,7 +740,7 @@ struct bch_fs {

	struct bch_dev __rcu	*devs[BCH_SB_MEMBERS_MAX];

	struct bch_accounting_mem accounting;
	struct bch_accounting_mem accounting[2];

	struct bch_replicas_cpu replicas;
	struct bch_replicas_cpu replicas_gc;
@@ -890,7 +889,6 @@ struct bch_fs {

	seqcount_t			usage_lock;
	struct bch_fs_usage_base __percpu *usage;
	struct bch_fs_usage __percpu	*usage_gc;
	u64 __percpu		*online_reserved;

	struct io_clock		io_clock[2];
+16 −117
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include "buckets.h"
#include "clock.h"
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@@ -735,132 +736,25 @@ static int bch2_mark_superblocks(struct bch_fs *c)

static void bch2_gc_free(struct bch_fs *c)
{
	bch2_accounting_free(&c->accounting[1]);

	genradix_free(&c->reflink_gc_table);
	genradix_free(&c->gc_stripes);

	for_each_member_device(c, ca) {
		kvfree(rcu_dereference_protected(ca->buckets_gc, 1));
		ca->buckets_gc = NULL;

		free_percpu(ca->usage_gc);
		ca->usage_gc = NULL;
	}

	free_percpu(c->usage_gc);
	c->usage_gc = NULL;
}

static int bch2_gc_done(struct bch_fs *c)
{
	struct bch_dev *ca = NULL;
	struct printbuf buf = PRINTBUF;
	unsigned i;
	int ret = 0;

	percpu_down_write(&c->mark_lock);

#define copy_field(_err, _f, _msg, ...)						\
	if (fsck_err_on(dst->_f != src->_f, c, _err,				\
			_msg ": got %llu, should be %llu" , ##__VA_ARGS__,	\
			dst->_f, src->_f))					\
		dst->_f = src->_f
#define copy_dev_field(_err, _f, _msg, ...)					\
	copy_field(_err, _f, "dev %u has wrong " _msg, ca->dev_idx, ##__VA_ARGS__)
#define copy_fs_field(_err, _f, _msg, ...)					\
	copy_field(_err, _f, "fs has wrong " _msg, ##__VA_ARGS__)

	__for_each_member_device(c, ca) {
		/* XXX */
		struct bch_dev_usage *dst = this_cpu_ptr(ca->usage);
		struct bch_dev_usage *src = (void *)
			bch2_acc_percpu_u64s((u64 __percpu *) ca->usage_gc,
					     dev_usage_u64s());

		for (i = 0; i < BCH_DATA_NR; i++) {
			copy_dev_field(dev_usage_buckets_wrong,
				       d[i].buckets,	"%s buckets", bch2_data_type_str(i));
			copy_dev_field(dev_usage_sectors_wrong,
				       d[i].sectors,	"%s sectors", bch2_data_type_str(i));
			copy_dev_field(dev_usage_fragmented_wrong,
				       d[i].fragmented,	"%s fragmented", bch2_data_type_str(i));
		}
	}

	{
#if 0
		unsigned nr = fs_usage_u64s(c);
		/* XX: */
		struct bch_fs_usage *dst = this_cpu_ptr(c->usage);
		struct bch_fs_usage *src = (void *)
			bch2_acc_percpu_u64s((u64 __percpu *) c->usage_gc, nr);

		copy_fs_field(fs_usage_hidden_wrong,
			      b.hidden,		"hidden");
		copy_fs_field(fs_usage_btree_wrong,
			      b.btree,		"btree");

		copy_fs_field(fs_usage_data_wrong,
			      b.data,	"data");
		copy_fs_field(fs_usage_cached_wrong,
			      b.cached,	"cached");
		copy_fs_field(fs_usage_reserved_wrong,
			      b.reserved,	"reserved");
		copy_fs_field(fs_usage_nr_inodes_wrong,
			      b.nr_inodes,"nr_inodes");

		for (i = 0; i < BCH_REPLICAS_MAX; i++)
			copy_fs_field(fs_usage_persistent_reserved_wrong,
				      persistent_reserved[i],
				      "persistent_reserved[%i]", i);

		for (i = 0; i < c->replicas.nr; i++) {
			struct bch_replicas_entry_v1 *e =
				cpu_replicas_entry(&c->replicas, i);

			printbuf_reset(&buf);
			bch2_replicas_entry_to_text(&buf, e);

			copy_fs_field(fs_usage_replicas_wrong,
				      replicas[i], "%s", buf.buf);
		}
#endif
	}

#undef copy_fs_field
#undef copy_dev_field
#undef copy_stripe_field
#undef copy_field
fsck_err:
	bch2_dev_put(ca);
	bch_err_fn(c, ret);
	percpu_up_write(&c->mark_lock);
	printbuf_exit(&buf);
	return ret;
}

static int bch2_gc_start(struct bch_fs *c)
{
	BUG_ON(c->usage_gc);

	c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
					 sizeof(u64), GFP_KERNEL);
	if (!c->usage_gc) {
		bch_err(c, "error allocating c->usage_gc");
		return -BCH_ERR_ENOMEM_gc_start;
	}

	for_each_member_device(c, ca) {
		BUG_ON(ca->usage_gc);

		ca->usage_gc = alloc_percpu(struct bch_dev_usage);
		if (!ca->usage_gc) {
			bch_err(c, "error allocating ca->usage_gc");
		int ret = bch2_dev_usage_init(ca, true);
		if (ret) {
			bch2_dev_put(ca);
			return -BCH_ERR_ENOMEM_gc_start;
			return ret;
		}

		this_cpu_write(ca->usage_gc->d[BCH_DATA_free].buckets,
			       ca->mi.nbuckets - ca->mi.first_bucket);
	}

	return 0;
@@ -908,6 +802,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
		gc.data_type = old->data_type;
		gc.dirty_sectors = old->dirty_sectors;
	}
	percpu_up_read(&c->mark_lock);

	/*
	 * gc.data_type doesn't yet include need_discard & need_gc_gen states -
@@ -916,9 +811,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
	alloc_data_type_set(&gc, gc.data_type);

	if (gc.data_type != old_gc.data_type ||
	    gc.dirty_sectors != old_gc.dirty_sectors)
		bch2_dev_usage_update(c, ca, &old_gc, &gc);
	percpu_up_read(&c->mark_lock);
	    gc.dirty_sectors != old_gc.dirty_sectors) {
		ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
		if (ret)
			return ret;
	}

	gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);

@@ -1235,7 +1132,9 @@ int bch2_check_allocations(struct bch_fs *c)
	gc_pos_set(c, gc_phase(GC_PHASE_start));

	ret = bch2_mark_superblocks(c);
	BUG_ON(ret);
	bch_err_msg(c, ret, "marking superblocks");
	if (ret)
		goto out;

	ret = bch2_gc_btrees(c);
	if (ret)
@@ -1246,7 +1145,7 @@ int bch2_check_allocations(struct bch_fs *c)
	bch2_journal_block(&c->journal);
out:
	ret   = bch2_gc_alloc_done(c) ?:
		bch2_gc_done(c) ?:
		bch2_accounting_gc_done(c) ?:
		bch2_gc_stripes_done(c) ?:
		bch2_gc_reflink_done(c);

+2 −2
Original line number Diff line number Diff line
@@ -724,7 +724,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
				a->k.version = journal_pos_to_bversion(&trans->journal_res,
								(u64 *) entry - (u64 *) trans->journal_entries);
				BUG_ON(bversion_zero(a->k.version));
				ret = bch2_accounting_mem_mod(trans, accounting_i_to_s_c(a));
				ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false);
				if (ret)
					goto revert_fs_usage;
			}
@@ -812,7 +812,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
			struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);

			bch2_accounting_neg(a);
			bch2_accounting_mem_mod(trans, a.c);
			bch2_accounting_mem_mod_locked(trans, a.c, false);
			bch2_accounting_neg(a);
		}
	percpu_up_read(&c->mark_lock);
Loading