Commit df448ca3 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: bcachefs_metadata_version_persistent_inode_cursors



Persistent cursors for inode allocation.

A free inodes btree would add substantial overhead to inode allocation
and freeing - a "next num to allocate" cursor is always going to be
faster.

We just need it to be persistent, to avoid scanning the inodes btree
from the start on startup.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 59c50511
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -1063,9 +1063,6 @@ struct bch_fs {
	struct btree_node	*verify_ondisk;
	struct mutex		verify_lock;

	u64			*unused_inode_hints;
	unsigned		inode_shard_bits;

	/*
	 * A btree node on disk could have too many bsets for an iterator to fit
	 * on the stack - have to dynamically allocate them
+7 −3
Original line number Diff line number Diff line
@@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k)
	x(snapshot_tree,	31)			\
	x(logged_op_truncate,	32)			\
	x(logged_op_finsert,	33)			\
	x(accounting,		34)
	x(accounting,		34)			\
	x(inode_alloc_cursor,	35)

enum bch_bkey_type {
#define x(name, nr) KEY_TYPE_##name	= nr,
@@ -682,7 +683,8 @@ struct bch_sb_field_ext {
	x(backpointer_bucket_gen,	BCH_VERSION(1, 14))		\
	x(disk_accounting_big_endian,	BCH_VERSION(1, 15))		\
	x(reflink_p_may_update_opts,	BCH_VERSION(1, 16))		\
	x(inode_depth,			BCH_VERSION(1, 17))
	x(inode_depth,			BCH_VERSION(1, 17))		\
	x(persistent_inode_cursors,	BCH_VERSION(1, 18))

enum bcachefs_metadata_version {
	bcachefs_metadata_version_min = 9,
@@ -850,6 +852,7 @@ LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT,	struct bch_sb, flags[5], 32, 48);
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
					struct bch_sb, flags[5], 48, 64);
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS,	struct bch_sb, flags[6],  0,  4);

static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
{
@@ -1347,7 +1350,8 @@ enum btree_id_flags {
	  BIT_ULL(KEY_TYPE_set))						\
	x(logged_ops,		17,	0,					\
	  BIT_ULL(KEY_TYPE_logged_op_truncate)|					\
	  BIT_ULL(KEY_TYPE_logged_op_finsert))					\
	  BIT_ULL(KEY_TYPE_logged_op_finsert)|					\
	  BIT_ULL(KEY_TYPE_inode_alloc_cursor))					\
	x(rebalance_work,	18,	BTREE_ID_SNAPSHOT_FIELD,		\
	  BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))			\
	x(subvolume_children,	19,	0,					\
+1 −1
Original line number Diff line number Diff line
@@ -588,7 +588,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
			     enum btree_id btree, struct bpos end)
{
	bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent);
	bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
	struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
	int ret = bkey_err(k);
	if (ret)
+85 −35
Original line number Diff line number Diff line
@@ -799,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
	prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
}

int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k,
				   struct bkey_validate_context from)
{
	int ret = 0;

	bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors,
			 c, inode_alloc_cursor_inode_bad,
			 "k.p.inode bad");
fsck_err:
	return ret;
}

void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c,
				     struct bkey_s_c k)
{
	struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k);

	prt_printf(out, "idx %llu generation %llu",
		   le64_to_cpu(i.v->idx),
		   le64_to_cpu(i.v->gen));
}

void bch2_inode_init_early(struct bch_fs *c,
			   struct bch_inode_unpacked *inode_u)
{
@@ -859,43 +881,78 @@ static inline u32 bkey_generation(struct bkey_s_c k)
	}
}

/*
 * This just finds an empty slot:
 */
int bch2_inode_create(struct btree_trans *trans,
		      struct btree_iter *iter,
		      struct bch_inode_unpacked *inode_u,
		      u32 snapshot, u64 cpu)
static struct bkey_i_inode_alloc_cursor *
bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max)
{
	struct bch_fs *c = trans->c;
	struct bkey_s_c k;
	u64 min, max, start, pos, *hint;
	int ret = 0;
	unsigned bits = (c->opts.inodes_32bit ? 31 : 63);

	if (c->opts.shard_inode_numbers) {
		bits -= c->inode_shard_bits;
	u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1;

		min = (cpu << bits);
		max = (cpu << bits) | ~(ULLONG_MAX << bits);
	cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);

	struct btree_iter iter;
	struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
					BTREE_ID_logged_ops,
					POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx),
					BTREE_ITER_cached);
	int ret = bkey_err(k);
	if (ret)
		return ERR_PTR(ret);

	struct bkey_i_inode_alloc_cursor *cursor =
		k.k->type == KEY_TYPE_inode_alloc_cursor
		? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor)
		: bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor);
	ret = PTR_ERR_OR_ZERO(cursor);
	if (ret)
		goto err;

		min = max_t(u64, min, BLOCKDEV_INODE_MAX);
		hint = c->unused_inode_hints + cpu;
	if (c->opts.inodes_32bit) {
		*min = BLOCKDEV_INODE_MAX;
		*max = INT_MAX;
	} else {
		min = BLOCKDEV_INODE_MAX;
		max = ~(ULLONG_MAX << bits);
		hint = c->unused_inode_hints;
		cursor->v.bits = c->opts.shard_inode_numbers_bits;

		unsigned bits = 63 - c->opts.shard_inode_numbers_bits;

		*min = max(cpu << bits, (u64) INT_MAX + 1);
		*max = (cpu << bits) | ~(ULLONG_MAX << bits);
	}

	start = READ_ONCE(*hint);
	if (le64_to_cpu(cursor->v.idx)  < *min)
		cursor->v.idx = cpu_to_le64(*min);

	if (start >= max || start < min)
		start = min;
	if (le64_to_cpu(cursor->v.idx) >= *max) {
		cursor->v.idx = cpu_to_le64(*min);
		le32_add_cpu(&cursor->v.gen, 1);
	}
err:
	bch2_trans_iter_exit(trans, &iter);
	return ret ? ERR_PTR(ret) : cursor;
}

/*
 * This just finds an empty slot:
 */
int bch2_inode_create(struct btree_trans *trans,
		      struct btree_iter *iter,
		      struct bch_inode_unpacked *inode_u,
		      u32 snapshot, u64 cpu)
{
	u64 min, max;
	struct bkey_i_inode_alloc_cursor *cursor =
		bch2_inode_alloc_cursor_get(trans, cpu, &min, &max);
	int ret = PTR_ERR_OR_ZERO(cursor);
	if (ret)
		return ret;

	u64 start = le64_to_cpu(cursor->v.idx);
	u64 pos = start;

	pos = start;
	bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
			     BTREE_ITER_all_snapshots|
			     BTREE_ITER_intent);
	struct bkey_s_c k;
again:
	while ((k = bch2_btree_iter_peek(iter)).k &&
	       !(ret = bkey_err(k)) &&
@@ -925,6 +982,7 @@ int bch2_inode_create(struct btree_trans *trans,
	/* Retry from start */
	pos = start = min;
	bch2_btree_iter_set_pos(iter, POS(0, pos));
	le32_add_cpu(&cursor->v.gen, 1);
	goto again;
found_slot:
	bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
@@ -935,9 +993,9 @@ int bch2_inode_create(struct btree_trans *trans,
		return ret;
	}

	*hint			= k.k->p.offset;
	inode_u->bi_inum	= k.k->p.offset;
	inode_u->bi_generation	= bkey_generation(k);
	inode_u->bi_generation	= le64_to_cpu(cursor->v.gen);
	cursor->v.idx		= cpu_to_le64(k.k->p.offset + 1);
	return 0;
}

@@ -999,8 +1057,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
{
	struct btree_trans *trans = bch2_trans_get(c);
	struct btree_iter iter = { NULL };
	struct bkey_i_inode_generation delete;
	struct bch_inode_unpacked inode_u;
	struct bkey_s_c k;
	u32 snapshot;
	int ret;
@@ -1040,13 +1096,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
		goto err;
	}

	bch2_inode_unpack(k, &inode_u);

	bkey_inode_generation_init(&delete.k_i);
	delete.k.p = iter.pos;
	delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);

	ret   = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
	ret   = bch2_btree_delete_at(trans, &iter, 0) ?:
		bch2_trans_commit(trans, NULL, NULL,
				BCH_TRANS_COMMIT_no_enospc);
err:
+10 −0
Original line number Diff line number Diff line
@@ -68,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk
	.min_val_size	= 8,					\
})

int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c,
				     struct bkey_validate_context);
void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);

#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) {	\
	.key_validate	= bch2_inode_alloc_cursor_validate,	\
	.val_to_text	= bch2_inode_alloc_cursor_to_text,	\
	.min_val_size	= 16,					\
})

#if 0
typedef struct {
	u64			lo;
Loading