Commit 720261cf authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs updates from Kent Overstreet:

 - Metadata version 1.8: Stripe sectors accounting, BCH_DATA_unstriped

   This splits out the accounting of dirty sectors and stripe sectors in
   alloc keys; this lets us see stripe buckets that still have unstriped
   data in them.

   This is needed for ensuring that erasure coding is working correctly,
   as well as completing stripe creation after a crash.

 - Metadata version 1.9: Disk accounting rewrite

   The previous disk accounting scheme relied heavily on percpu counters
   that were also sharded by outstanding journal buffer; it was fast but
   not extensible or scalable, and meant that all accounting counters
   were recorded in every journal entry.

   The new disk accounting scheme stores accounting as normal btree
   keys; updates are deltas until they are flushed by the btree write
   buffer.

   This means we have no practical limit on the number of counters, and
   a new tagged union format that's easy to extend.

   We now have counters for compression type/ratio, per-snapshot-id
   usage, per-btree-id usage, and pending rebalance work.

 - Self healing on read IO/checksum error

   Data is now automatically rewritten if we get a read error and then a
   successful retry

 - Mount API conversion (thanks to Thomas Bertschinger)

 - Better lockdep coverage

   Previously, btree node locks were tracked individually by lockdep,
   like any other lock. But we may take _many_ btree node locks
   simultaneously, we easily blow through the limit of 48 locks that
   lockdep can track, leading to lockdep turning itself off.

   Tracking each btree node lock individually isn't really necessary
   since we have our own cycle detector for deadlock avoidance and
   centralized tracking of btree node locks, so we now have a single
   lockdep_map in btree_trans for "any btree nodes are locked".

 - Some more small incremental work towards online check_allocations

 - Lots more debugging improvements

 - Fixes, including:
    - undefined behaviour fixes, originally noted as breaking userspace
      LTO builds
    - fix a spurious warning in fsck_err, reported by Marcin
    - fix an integer overflow on trans->nr_updates, also reported by
      Marcin; this broke during deletion of highly fragmented indirect
      extents

* tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs: (120 commits)
  lockdep: Add comments for lockdep_set_no{validate,track}_class()
  bcachefs: Fix integer overflow on trans->nr_updates
  bcachefs: silence silly kdoc warning
  bcachefs: Fix fsck warning about btree_trans not passed to fsck error
  bcachefs: Add an error message for insufficient rw journal devs
  bcachefs: varint: Avoid left-shift of a negative value
  bcachefs: darray: Don't pass NULL to memcpy()
  bcachefs: Kill bch2_assert_btree_nodes_not_locked()
  bcachefs: Rename BCH_WRITE_DONE -> BCH_WRITE_SUBMITTED
  bcachefs: __bch2_read(): call trans_begin() on every loop iter
  bcachefs: show none if label is not set
  bcachefs: drop packed, aligned from bkey_inode_buf
  bcachefs: btree node scan: fall back to comparing by journal seq
  bcachefs: Add lockdep support for btree node locks
  lockdep: lockdep_set_notrack_class()
  bcachefs: Improve copygc_wait_to_text()
  bcachefs: Convert clock code to u64s
  bcachefs: Improve startup message
  bcachefs: Self healing on read IO error
  bcachefs: Make read_only a mount option again, but hidden
  ...
parents 4f40c636 a97b43fa
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -3720,7 +3720,6 @@ F: drivers/md/bcache/
BCACHEFS
M:	Kent Overstreet <kent.overstreet@linux.dev>
R:	Brian Foster <bfoster@redhat.com>
L:	linux-bcachefs@vger.kernel.org
S:	Supported
C:	irc://irc.oftc.net/bcache
+2 −1
Original line number Diff line number Diff line
@@ -29,10 +29,11 @@ bcachefs-y := \
	clock.o			\
	compress.o		\
	darray.o		\
	data_update.o		\
	debug.o			\
	dirent.o		\
	disk_accounting.o	\
	disk_groups.o		\
	data_update.o		\
	ec.o			\
	errcode.o		\
	error.o			\
+2 −2
Original line number Diff line number Diff line
@@ -346,7 +346,6 @@ int bch2_set_acl(struct mnt_idmap *idmap,
{
	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
	struct bch_fs *c = inode->v.i_sb->s_fs_info;
	struct btree_trans *trans = bch2_trans_get(c);
	struct btree_iter inode_iter = { NULL };
	struct bch_inode_unpacked inode_u;
	struct posix_acl *acl;
@@ -354,6 +353,7 @@ int bch2_set_acl(struct mnt_idmap *idmap,
	int ret;

	mutex_lock(&inode->ei_update_lock);
	struct btree_trans *trans = bch2_trans_get(c);
retry:
	bch2_trans_begin(trans);
	acl = _acl;
@@ -394,8 +394,8 @@ int bch2_set_acl(struct mnt_idmap *idmap,

	set_cached_acl(&inode->v, type, acl);
err:
	mutex_unlock(&inode->ei_update_lock);
	bch2_trans_put(trans);
	mutex_unlock(&inode->ei_update_lock);

	return ret;
}
+119 −70
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include "buckets_waiting_for_journal.h"
#include "clock.h"
#include "debug.h"
#include "disk_accounting.h"
#include "ec.h"
#include "error.h"
#include "lru.h"
@@ -268,27 +269,41 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
				 i == READ ? "read" : "write",
				 a.v->io_time[i], LRU_TIME_MAX);

	unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) >
		offsetof(struct bch_alloc_v4, stripe_sectors)
		? a.v->stripe_sectors
		: 0;

	switch (a.v->data_type) {
	case BCH_DATA_free:
	case BCH_DATA_need_gc_gens:
	case BCH_DATA_need_discard:
		bkey_fsck_err_on(bch2_bucket_sectors_total(*a.v) || a.v->stripe,
		bkey_fsck_err_on(stripe_sectors ||
				 a.v->dirty_sectors ||
				 a.v->cached_sectors ||
				 a.v->stripe,
				 c, err, alloc_key_empty_but_have_data,
				 "empty data type free but have data");
				 "empty data type free but have data %u.%u.%u %u",
				 stripe_sectors,
				 a.v->dirty_sectors,
				 a.v->cached_sectors,
				 a.v->stripe);
		break;
	case BCH_DATA_sb:
	case BCH_DATA_journal:
	case BCH_DATA_btree:
	case BCH_DATA_user:
	case BCH_DATA_parity:
		bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
		bkey_fsck_err_on(!a.v->dirty_sectors &&
				 !stripe_sectors,
				 c, err, alloc_key_dirty_sectors_0,
				 "data_type %s but dirty_sectors==0",
				 bch2_data_type_str(a.v->data_type));
		break;
	case BCH_DATA_cached:
		bkey_fsck_err_on(!a.v->cached_sectors ||
				 bch2_bucket_sectors_dirty(*a.v) ||
				 a.v->dirty_sectors ||
				 stripe_sectors ||
				 a.v->stripe,
				 c, err, alloc_key_cached_inconsistency,
				 "data type inconsistency");
@@ -319,6 +334,7 @@ void bch2_alloc_v4_swab(struct bkey_s k)
	a->stripe		= swab32(a->stripe);
	a->nr_external_backpointers = swab32(a->nr_external_backpointers);
	a->fragmentation_lru	= swab64(a->fragmentation_lru);
	a->stripe_sectors	= swab32(a->stripe_sectors);

	bps = alloc_v4_backpointers(a);
	for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
@@ -343,6 +359,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
	prt_printf(out, "need_discard      %llu\n",	BCH_ALLOC_V4_NEED_DISCARD(a));
	prt_printf(out, "need_inc_gen      %llu\n",	BCH_ALLOC_V4_NEED_INC_GEN(a));
	prt_printf(out, "dirty_sectors     %u\n",	a->dirty_sectors);
	prt_printf(out, "stripe_sectors    %u\n",	a->stripe_sectors);
	prt_printf(out, "cached_sectors    %u\n",	a->cached_sectors);
	prt_printf(out, "stripe            %u\n",	a->stripe);
	prt_printf(out, "stripe_redundancy %u\n",	a->stripe_redundancy);
@@ -460,7 +477,8 @@ bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_i
}

__flatten
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos)
struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
						      enum btree_iter_update_trigger_flags flags)
{
	struct btree_iter iter;
	struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
@@ -468,7 +486,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans,
	if (ret)
		return ERR_PTR(ret);

	ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
	ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
	bch2_trans_iter_exit(trans, &iter);
	return unlikely(ret) ? ERR_PTR(ret) : a;
}
@@ -579,8 +597,6 @@ int bch2_alloc_read(struct bch_fs *c)
	struct bch_dev *ca = NULL;
	int ret;

	down_read(&c->gc_lock);

	if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
		ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
					 BTREE_ITER_prefetch, k, ({
@@ -629,7 +645,6 @@ int bch2_alloc_read(struct bch_fs *c)

	bch2_dev_put(ca);
	bch2_trans_put(trans);
	up_read(&c->gc_lock);

	bch_err_fn(c, ret);
	return ret;
@@ -744,6 +759,61 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
	return ret;
}

static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca,
						    enum bch_data_type data_type,
						    s64 delta_buckets,
						    s64 delta_sectors,
						    s64 delta_fragmented, unsigned flags)
{
	struct disk_accounting_pos acc = {
		.type = BCH_DISK_ACCOUNTING_dev_data_type,
		.dev_data_type.dev		= ca->dev_idx,
		.dev_data_type.data_type	= data_type,
	};
	s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };

	return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc);
}

int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
				   const struct bch_alloc_v4 *old,
				   const struct bch_alloc_v4 *new,
				   unsigned flags)
{
	s64 old_sectors = bch2_bucket_sectors(*old);
	s64 new_sectors = bch2_bucket_sectors(*new);
	if (old->data_type != new->data_type) {
		int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
				 1,  new_sectors,  bch2_bucket_sectors_fragmented(ca, *new), flags) ?:
			  bch2_dev_data_type_accounting_mod(trans, ca, old->data_type,
				-1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags);
		if (ret)
			return ret;
	} else if (old_sectors != new_sectors) {
		int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
					 0,
					 new_sectors - old_sectors,
					 bch2_bucket_sectors_fragmented(ca, *new) -
					 bch2_bucket_sectors_fragmented(ca, *old), flags);
		if (ret)
			return ret;
	}

	s64 old_unstriped = bch2_bucket_sectors_unstriped(*old);
	s64 new_unstriped = bch2_bucket_sectors_unstriped(*new);
	if (old_unstriped != new_unstriped) {
		int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped,
					 !!new_unstriped - !!old_unstriped,
					 new_unstriped - old_unstriped,
					 0,
					 flags);
		if (ret)
			return ret;
	}

	return 0;
}

int bch2_trigger_alloc(struct btree_trans *trans,
		       enum btree_id btree, unsigned level,
		       struct bkey_s_c old, struct bkey_s new,
@@ -759,10 +829,9 @@ int bch2_trigger_alloc(struct btree_trans *trans,

	struct bch_alloc_v4 old_a_convert;
	const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);

	if (flags & BTREE_TRIGGER_transactional) {
	struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;

	if (flags & BTREE_TRIGGER_transactional) {
		alloc_data_type_set(new_a, new_a->data_type);

		if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
@@ -819,22 +888,21 @@ int bch2_trigger_alloc(struct btree_trans *trans,
				goto err;
		}

		/*
		 * need to know if we're getting called from the invalidate path or
		 * not:
		 */

		if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
		    old_a->cached_sectors) {
			ret = bch2_update_cached_sectors_list(trans, new.k->p.inode,
							      -((s64) old_a->cached_sectors));
			ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
					 -((s64) old_a->cached_sectors),
					 flags & BTREE_TRIGGER_gc);
			if (ret)
				goto err;
		}

		ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
		if (ret)
			goto err;
	}

	if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
		struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
		u64 journal_seq = trans->journal_res.seq;
		u64 bucket_journal_seq = new_a->journal_seq;

@@ -863,26 +931,22 @@ int bch2_trigger_alloc(struct btree_trans *trans,
					c->journal.flushed_seq_ondisk,
					new.k->p.inode, new.k->p.offset,
					bucket_journal_seq);
			if (ret) {
				bch2_fs_fatal_error(c,
					"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
			if (bch2_fs_fatal_err_on(ret, c,
					"setting bucket_needs_journal_commit: %s", bch2_err_str(ret)))
				goto err;
		}
		}

		percpu_down_read(&c->mark_lock);
		if (new_a->gen != old_a->gen) {
			rcu_read_lock();
			u8 *gen = bucket_gen(ca, new.k->p.offset);
			if (unlikely(!gen)) {
				percpu_up_read(&c->mark_lock);
				rcu_read_unlock();
				goto invalid_bucket;
			}
			*gen = new_a->gen;
			rcu_read_unlock();
		}

		bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
		percpu_up_read(&c->mark_lock);

#define eval_state(_a, expr)		({ const struct bch_alloc_v4 *a = _a; expr; })
#define statechange(expr)		!eval_state(old_a, expr) && eval_state(new_a, expr)
#define bucket_flushed(a)		(!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
@@ -905,31 +969,16 @@ int bch2_trigger_alloc(struct btree_trans *trans,
			bch2_gc_gens_async(c);
	}

	if ((flags & BTREE_TRIGGER_gc) &&
	    (flags & BTREE_TRIGGER_bucket_invalidate)) {
		struct bch_alloc_v4 new_a_convert;
		const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);

		percpu_down_read(&c->mark_lock);
	if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
		rcu_read_lock();
		struct bucket *g = gc_bucket(ca, new.k->p.offset);
		if (unlikely(!g)) {
			percpu_up_read(&c->mark_lock);
			rcu_read_unlock();
			goto invalid_bucket;
		}
		g->gen_valid	= 1;

		bucket_lock(g);

		g->gen_valid	= 1;
		g->gen		= new_a->gen;
		g->data_type		= new_a->data_type;
		g->stripe		= new_a->stripe;
		g->stripe_redundancy	= new_a->stripe_redundancy;
		g->dirty_sectors	= new_a->dirty_sectors;
		g->cached_sectors	= new_a->cached_sectors;

		bucket_unlock(g);
		percpu_up_read(&c->mark_lock);
		rcu_read_unlock();
	}
err:
	printbuf_exit(&buf);
@@ -1063,7 +1112,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,

	struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p);
	if (fsck_err_on(!ca,
			c, alloc_key_to_missing_dev_bucket,
			trans, alloc_key_to_missing_dev_bucket,
			"alloc key for invalid device:bucket %llu:%llu",
			alloc_k.k->p.inode, alloc_k.k->p.offset))
		ret = bch2_btree_delete_at(trans, alloc_iter, 0);
@@ -1083,7 +1132,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
		goto err;

	if (fsck_err_on(k.k->type != discard_key_type,
			c, need_discard_key_wrong,
			trans, need_discard_key_wrong,
			"incorrect key in need_discard btree (got %s should be %s)\n"
			"  %s",
			bch2_bkey_types[k.k->type],
@@ -1113,7 +1162,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
		goto err;

	if (fsck_err_on(k.k->type != freespace_key_type,
			c, freespace_key_wrong,
			trans, freespace_key_wrong,
			"incorrect key in freespace btree (got %s should be %s)\n"
			"  %s",
			bch2_bkey_types[k.k->type],
@@ -1144,7 +1193,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
		goto err;

	if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
			c, bucket_gens_key_wrong,
			trans, bucket_gens_key_wrong,
			"incorrect gen in bucket_gens btree (got %u should be %u)\n"
			"  %s",
			alloc_gen(k, gens_offset), a->gen,
@@ -1185,7 +1234,6 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
				    struct bpos *end,
				    struct btree_iter *freespace_iter)
{
	struct bch_fs *c = trans->c;
	struct bkey_s_c k;
	struct printbuf buf = PRINTBUF;
	int ret;
@@ -1203,7 +1251,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
	*end = bkey_min(k.k->p, *end);

	if (fsck_err_on(k.k->type != KEY_TYPE_set,
			c, freespace_hole_missing,
			trans, freespace_hole_missing,
			"hole in alloc btree missing in freespace btree\n"
			"  device %llu buckets %llu-%llu",
			freespace_iter->pos.inode,
@@ -1239,7 +1287,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
				      struct bpos *end,
				      struct btree_iter *bucket_gens_iter)
{
	struct bch_fs *c = trans->c;
	struct bkey_s_c k;
	struct printbuf buf = PRINTBUF;
	unsigned i, gens_offset, gens_end_offset;
@@ -1263,7 +1310,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
		bkey_reassemble(&g.k_i, k);

		for (i = gens_offset; i < gens_end_offset; i++) {
			if (fsck_err_on(g.v.gens[i], c,
			if (fsck_err_on(g.v.gens[i], trans,
					bucket_gens_hole_wrong,
					"hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
					bucket_gens_pos_to_alloc(k.k->p, i).inode,
@@ -1321,8 +1368,8 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran
	if (ret)
		return ret;

	if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
			need_discard_freespace_key_to_invalid_dev_bucket,
	if (fsck_err_on(!bch2_dev_bucket_exists(c, pos),
			trans, need_discard_freespace_key_to_invalid_dev_bucket,
			"entry in %s btree for nonexistant dev:bucket %llu:%llu",
			bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
		goto delete;
@@ -1331,8 +1378,8 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran

	if (fsck_err_on(a->data_type != state ||
			(state == BCH_DATA_free &&
			 genbits != alloc_freespace_genbits(*a)), c,
			need_discard_freespace_key_bad,
			 genbits != alloc_freespace_genbits(*a)),
			trans, need_discard_freespace_key_bad,
			"%s\n  incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
			bch2_btree_id_str(iter->btree_id),
@@ -1379,7 +1426,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,

	struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
	if (!ca) {
		if (fsck_err(c, bucket_gens_to_invalid_dev,
		if (fsck_err(trans, bucket_gens_to_invalid_dev,
			     "bucket_gens key for invalid device:\n  %s",
			     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
			ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1387,8 +1434,8 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
	}

	if (fsck_err_on(end <= ca->mi.first_bucket ||
			start >= ca->mi.nbuckets, c,
			bucket_gens_to_invalid_buckets,
			start >= ca->mi.nbuckets,
			trans, bucket_gens_to_invalid_buckets,
			"bucket_gens key for invalid buckets:\n  %s",
			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
		ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1396,16 +1443,16 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
	}

	for (b = start; b < ca->mi.first_bucket; b++)
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
				bucket_gens_nonzero_for_invalid_buckets,
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
				trans, bucket_gens_nonzero_for_invalid_buckets,
				"bucket_gens key has nonzero gen for invalid bucket")) {
			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
			need_update = true;
		}

	for (b = ca->mi.nbuckets; b < end; b++)
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
				bucket_gens_nonzero_for_invalid_buckets,
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
				trans, bucket_gens_nonzero_for_invalid_buckets,
				"bucket_gens key has nonzero gen for invalid bucket")) {
			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
			need_update = true;
@@ -1585,8 +1632,8 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
	if (a->data_type != BCH_DATA_cached)
		return 0;

	if (fsck_err_on(!a->io_time[READ], c,
			alloc_key_cached_but_read_time_zero,
	if (fsck_err_on(!a->io_time[READ],
			trans, alloc_key_cached_but_read_time_zero,
			"cached bucket with read_time 0\n"
			"  %s",
		(printbuf_reset(&buf),
@@ -1960,7 +2007,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
	if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
		return 0;

	a = bch2_trans_start_alloc_update(trans, bucket);
	a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate);
	ret = PTR_ERR_OR_ZERO(a);
	if (ret)
		goto out;
@@ -1981,6 +2028,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
	a->v.gen++;
	a->v.data_type		= 0;
	a->v.dirty_sectors	= 0;
	a->v.stripe_sectors	= 0;
	a->v.cached_sectors	= 0;
	a->v.io_time[READ]	= bch2_current_io_time(c, READ);
	a->v.io_time[WRITE]	= bch2_current_io_time(c, WRITE);
@@ -2336,6 +2384,7 @@ void bch2_recalc_capacity(struct bch_fs *c)

	reserved_sectors = min(reserved_sectors, capacity);

	c->reserved = reserved_sectors;
	c->capacity = capacity - reserved_sectors;

	c->bucket_size_max = bucket_size_max;
+33 −8
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ static inline void alloc_to_bucket(struct bucket *dst, struct bch_alloc_v4 src)
{
	dst->gen		= src.gen;
	dst->data_type		= src.data_type;
	dst->stripe_sectors	= src.stripe_sectors;
	dst->dirty_sectors	= src.dirty_sectors;
	dst->cached_sectors	= src.cached_sectors;
	dst->stripe		= src.stripe;
@@ -50,6 +51,7 @@ static inline void __bucket_m_to_alloc(struct bch_alloc_v4 *dst, struct bucket s
{
	dst->gen		= src.gen;
	dst->data_type		= src.data_type;
	dst->stripe_sectors	= src.stripe_sectors;
	dst->dirty_sectors	= src.dirty_sectors;
	dst->cached_sectors	= src.cached_sectors;
	dst->stripe		= src.stripe;
@@ -80,30 +82,49 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket,
		bucket_data_type(bucket) != bucket_data_type(ptr);
}

static inline unsigned bch2_bucket_sectors_total(struct bch_alloc_v4 a)
static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a)
{
	return a.dirty_sectors + a.cached_sectors;
	return a.stripe_sectors + a.dirty_sectors + a.cached_sectors;
}

static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
static inline s64 bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
{
	return a.dirty_sectors;
	return a.stripe_sectors + a.dirty_sectors;
}

static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
static inline s64 bch2_bucket_sectors(struct bch_alloc_v4 a)
{
	return a.data_type == BCH_DATA_cached
		? a.cached_sectors
		: bch2_bucket_sectors_dirty(a);
}

static inline s64 bch2_bucket_sectors_fragmented(struct bch_dev *ca,
						 struct bch_alloc_v4 a)
{
	int d = bch2_bucket_sectors_dirty(a);
	int d = bch2_bucket_sectors(a);

	return d ? max(0, ca->mi.bucket_size - d) : 0;
}

static inline s64 bch2_gc_bucket_sectors_fragmented(struct bch_dev *ca, struct bucket a)
{
	int d = a.stripe_sectors + a.dirty_sectors;

	return d ? max(0, ca->mi.bucket_size - d) : 0;
}

static inline s64 bch2_bucket_sectors_unstriped(struct bch_alloc_v4 a)
{
	return a.data_type == BCH_DATA_stripe ? a.dirty_sectors : 0;
}

static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
						 enum bch_data_type data_type)
{
	if (a.stripe)
		return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
	if (a.dirty_sectors)
	if (bch2_bucket_sectors_dirty(a))
		return data_type;
	if (a.cached_sectors)
		return BCH_DATA_cached;
@@ -185,7 +206,8 @@ static inline void set_alloc_v4_u64s(struct bkey_i_alloc_v4 *a)
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update_noupdate(struct btree_trans *, struct btree_iter *, struct bpos);
struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos);
bch2_trans_start_alloc_update(struct btree_trans *, struct bpos,
			      enum btree_iter_update_trigger_flags);

void __bch2_alloc_to_v4(struct bkey_s_c, struct bch_alloc_v4 *);

@@ -270,6 +292,9 @@ static inline bool bkey_is_alloc(const struct bkey *k)

int bch2_alloc_read(struct bch_fs *);

int bch2_alloc_key_to_dev_counters(struct btree_trans *, struct bch_dev *,
				   const struct bch_alloc_v4 *,
				   const struct bch_alloc_v4 *, unsigned);
int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
		       struct bkey_s_c, struct bkey_s,
		       enum btree_iter_update_trigger_flags);
Loading