Commit b7181758 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2024-08-16' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent OverstreetL

 - New on disk format version, bcachefs_metadata_version_disk_accounting_inum

   This adds one more disk accounting counter, which counts disk usage
   and number of extents per inode number. This lets us track
   fragmentation, for implementing defragmentation later, and it also
   counts disk usage per inode in all snapshots, which will be a useful
   thing to expose to users.

 - One performance issue we've observed is threads spinning when they
   should be waiting for dirty keys in the key cache to be flushed by
   journal reclaim, so we now have hysteresis for the waiting thread, as
   well as improving the tracepoint and a new time_stat, for tracking
   time blocked waiting on key cache flushing.

... and various assorted smaller fixes.

* tag 'bcachefs-2024-08-16' of git://evilpiepirate.org/bcachefs:
  bcachefs: Fix locking in __bch2_trans_mark_dev_sb()
  bcachefs: fix incorrect i_state usage
  bcachefs: avoid overflowing LRU_TIME_BITS for cached data lru
  bcachefs: Fix forgetting to pass trans to fsck_err()
  bcachefs: Increase size of cuckoo hash table on too many rehashes
  bcachefs: bcachefs_metadata_version_disk_accounting_inum
  bcachefs: Kill __bch2_accounting_mem_mod()
  bcachefs: Make bkey_fsck_err() a wrapper around fsck_err()
  bcachefs: Fix warning in __bch2_fsck_err() for trans not passed in
  bcachefs: Add a time_stat for blocked on key cache flush
  bcachefs: Improve trans_blocked_journal_reclaim tracepoint
  bcachefs: Add hysteresis to waiting on btree key cache flush
  lib/generic-radix-tree.c: Fix rare race in __genradix_ptr_alloc()
  bcachefs: Convert for_each_btree_node() to lockrestart_do()
  bcachefs: Add missing downgrade table entry
  bcachefs: disk accounting: ignore unknown types
  bcachefs: bch2_accounting_invalid() fixup
  bcachefs: Fix bch2_trigger_alloc when upgrading from old versions
  bcachefs: delete faulty fastpath in bch2_btree_path_traverse_cached()
parents e5fa841a 0e49d3ff
Loading
Loading
Loading
Loading
+42 −35
Original line number Diff line number Diff line
@@ -196,75 +196,71 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
	return DIV_ROUND_UP(bytes, sizeof(u64));
}

int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bch_validate_flags flags,
			  struct printbuf *err)
int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k,
			   enum bch_validate_flags flags)
{
	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
	int ret = 0;

	/* allow for unknown fields */
	bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err,
			 alloc_v1_val_size_bad,
	bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v),
			 c, alloc_v1_val_size_bad,
			 "incorrect value size (%zu < %u)",
			 bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
fsck_err:
	return ret;
}

int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bch_validate_flags flags,
			  struct printbuf *err)
int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k,
			   enum bch_validate_flags flags)
{
	struct bkey_alloc_unpacked u;
	int ret = 0;

	bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err,
			 alloc_v2_unpack_error,
	bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k),
			 c, alloc_v2_unpack_error,
			 "unpack error");
fsck_err:
	return ret;
}

int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bch_validate_flags flags,
			  struct printbuf *err)
int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
			   enum bch_validate_flags flags)
{
	struct bkey_alloc_unpacked u;
	int ret = 0;

	bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err,
			 alloc_v2_unpack_error,
	bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k),
			 c, alloc_v2_unpack_error,
			 "unpack error");
fsck_err:
	return ret;
}

int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bch_validate_flags flags, struct printbuf *err)
int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
			   enum bch_validate_flags flags)
{
	struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
	int ret = 0;

	bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err,
			 alloc_v4_val_size_bad,
	bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k),
			 c, alloc_v4_val_size_bad,
			 "bad val size (%u > %zu)",
			 alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));

	bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
			 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
			 alloc_v4_backpointers_start_bad,
			 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v),
			 c, alloc_v4_backpointers_start_bad,
			 "invalid backpointers_start");

	bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err,
			 alloc_key_data_type_bad,
	bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type,
			 c, alloc_key_data_type_bad,
			 "invalid data type (got %u should be %u)",
			 a.v->data_type, alloc_data_type(*a.v, a.v->data_type));

	for (unsigned i = 0; i < 2; i++)
		bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
				 c, err,
				 alloc_key_io_time_bad,
				 c, alloc_key_io_time_bad,
				 "invalid io_time[%s]: %llu, max %llu",
				 i == READ ? "read" : "write",
				 a.v->io_time[i], LRU_TIME_MAX);
@@ -282,7 +278,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
				 a.v->dirty_sectors ||
				 a.v->cached_sectors ||
				 a.v->stripe,
				 c, err, alloc_key_empty_but_have_data,
				 c, alloc_key_empty_but_have_data,
				 "empty data type free but have data %u.%u.%u %u",
				 stripe_sectors,
				 a.v->dirty_sectors,
@@ -296,7 +292,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
	case BCH_DATA_parity:
		bkey_fsck_err_on(!a.v->dirty_sectors &&
				 !stripe_sectors,
				 c, err, alloc_key_dirty_sectors_0,
				 c, alloc_key_dirty_sectors_0,
				 "data_type %s but dirty_sectors==0",
				 bch2_data_type_str(a.v->data_type));
		break;
@@ -305,12 +301,12 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
				 a.v->dirty_sectors ||
				 stripe_sectors ||
				 a.v->stripe,
				 c, err, alloc_key_cached_inconsistency,
				 c, alloc_key_cached_inconsistency,
				 "data type inconsistency");

		bkey_fsck_err_on(!a.v->io_time[READ] &&
				 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
				 c, err, alloc_key_cached_but_read_time_zero,
				 c, alloc_key_cached_but_read_time_zero,
				 "cached bucket with read_time == 0");
		break;
	case BCH_DATA_stripe:
@@ -513,14 +509,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
		: 0;
}

int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k,
			     enum bch_validate_flags flags,
			     struct printbuf *err)
int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k,
			     enum bch_validate_flags flags)
{
	int ret = 0;

	bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err,
			 bucket_gens_val_size_bad,
	bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens),
			 c, bucket_gens_val_size_bad,
			 "bad val size (%zu != %zu)",
			 bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
fsck_err:
@@ -829,7 +824,19 @@ int bch2_trigger_alloc(struct btree_trans *trans,

	struct bch_alloc_v4 old_a_convert;
	const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
	struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;

	struct bch_alloc_v4 *new_a;
	if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
		new_a = bkey_s_to_alloc_v4(new).v;
	} else {
		BUG_ON(!(flags & BTREE_TRIGGER_gc));

		struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
		ret = PTR_ERR_OR_ZERO(new_ka);
		if (unlikely(ret))
			goto err;
		new_a = &new_ka->v;
	}

	if (flags & BTREE_TRIGGER_transactional) {
		alloc_data_type_set(new_a, new_a->data_type);
+14 −16
Original line number Diff line number Diff line
@@ -150,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ

static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
{
	return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
	return a.data_type == BCH_DATA_cached
		? a.io_time[READ] & LRU_TIME_MAX
		: 0;
}

#define DATA_TYPES_MOVABLE		\
@@ -240,52 +242,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s

int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);

int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bch_validate_flags, struct printbuf *);
int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bch_validate_flags, struct printbuf *);
int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bch_validate_flags, struct printbuf *);
int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bch_validate_flags, struct printbuf *);
int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);

#define bch2_bkey_ops_alloc ((struct bkey_ops) {	\
	.key_invalid	= bch2_alloc_v1_invalid,	\
	.key_validate	= bch2_alloc_v1_validate,	\
	.val_to_text	= bch2_alloc_to_text,		\
	.trigger	= bch2_trigger_alloc,		\
	.min_val_size	= 8,				\
})

#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) {	\
	.key_invalid	= bch2_alloc_v2_invalid,	\
	.key_validate	= bch2_alloc_v2_validate,	\
	.val_to_text	= bch2_alloc_to_text,		\
	.trigger	= bch2_trigger_alloc,		\
	.min_val_size	= 8,				\
})

#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) {	\
	.key_invalid	= bch2_alloc_v3_invalid,	\
	.key_validate	= bch2_alloc_v3_validate,	\
	.val_to_text	= bch2_alloc_to_text,		\
	.trigger	= bch2_trigger_alloc,		\
	.min_val_size	= 16,				\
})

#define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) {	\
	.key_invalid	= bch2_alloc_v4_invalid,	\
	.key_validate	= bch2_alloc_v4_validate,	\
	.val_to_text	= bch2_alloc_to_text,		\
	.swab		= bch2_alloc_v4_swab,		\
	.trigger	= bch2_trigger_alloc,		\
	.min_val_size	= 48,				\
})

int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
			     enum bch_validate_flags, struct printbuf *);
int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c,
			     enum bch_validate_flags);
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);

#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) {	\
	.key_invalid	= bch2_bucket_gens_invalid,	\
	.key_validate	= bch2_bucket_gens_validate,	\
	.val_to_text	= bch2_bucket_gens_to_text,	\
})

+8 −15
Original line number Diff line number Diff line
@@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c,
	return false;
}

int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
			     enum bch_validate_flags flags,
			     struct printbuf *err)
int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
			      enum bch_validate_flags flags)
{
	struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);

@@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,

	bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
			 !bpos_eq(bp.k->p, bp_pos),
			 c, err,
			 backpointer_bucket_offset_wrong,
			 c, backpointer_bucket_offset_wrong,
			 "backpointer bucket_offset wrong");
fsck_err:
	return ret;
@@ -763,27 +761,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
	     btree < BTREE_ID_NR && !ret;
	     btree++) {
		unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1;
		struct btree_iter iter;
		struct btree *b;

		if (!(BIT_ULL(btree) & btree_leaf_mask) &&
		    !(BIT_ULL(btree) & btree_interior_mask))
			continue;

		bch2_trans_begin(trans);

		__for_each_btree_node(trans, iter, btree,
		ret = __for_each_btree_node(trans, iter, btree,
				      btree == start.btree ? start.pos : POS_MIN,
				      0, depth, BTREE_ITER_prefetch, b, ret) {
				      0, depth, BTREE_ITER_prefetch, b, ({
			mem_may_pin -= btree_buf_bytes(b);
			if (mem_may_pin <= 0) {
				c->btree_cache.pinned_nodes_end = *end =
					BBPOS(btree, b->key.k.p);
				bch2_trans_iter_exit(trans, &iter);
				return 0;
			}
				break;
			}
		bch2_trans_iter_exit(trans, &iter);
			0;
		}));
	}

	return ret;
+2 −3
Original line number Diff line number Diff line
@@ -18,14 +18,13 @@ static inline u64 swab40(u64 x)
		((x & 0xff00000000ULL) >> 32));
}

int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k,
			     enum bch_validate_flags, struct printbuf *);
int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags);
void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *);
void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_backpointer_swab(struct bkey_s);

#define bch2_bkey_ops_backpointer ((struct bkey_ops) {	\
	.key_invalid	= bch2_backpointer_invalid,	\
	.key_validate	= bch2_backpointer_validate,	\
	.val_to_text	= bch2_backpointer_k_to_text,	\
	.swab		= bch2_backpointer_swab,	\
	.min_val_size	= 32,				\
+1 −0
Original line number Diff line number Diff line
@@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG()
	x(blocked_journal_low_on_space)		\
	x(blocked_journal_low_on_pin)		\
	x(blocked_journal_max_in_flight)	\
	x(blocked_key_cache_flush)		\
	x(blocked_allocate)			\
	x(blocked_allocate_open_bucket)		\
	x(blocked_write_buffer_full)		\
Loading