Commit c9d01179 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2023-11-5' of https://evilpiepirate.org/git/bcachefs

Pull more bcachefs updates from Kent Overstreet:
 "Here's the second big bcachefs pull request. This brings your tree up
  to date with my master branch, which is what existing bcachefs users
  are currently running.

  New features:
   - rebalance_work btree (and metadata version 1.3): the rebalance
     thread no longer has to scan to find extents that need processing -
     big scalability improvement.
   - sb_errors superblock section: this adds counters for each fsck
     error type, since filesystem creation, along with the date of the
     most recent error. It'll get us better bug reports (since users do
     not typically report errors that fsck was able to fix), and I might
     add telemetry for this in the future.

  Fixes include:
   - multiple snapshot deletion fixes
   - members_v2 fixups
   - deleted_inodes btree fixes
   - copygc thread no longer spins when a device is full but has no
     fragmented buckets (i.e. rebalance needs to move data around
     instead)
   - a fix for a memory reclaim issue with the btree key cache: we're
     now careful not to hold the srcu read lock that blocks key cache
     reclaim for too long
   - an early allocator locking fix, from Brian
   - endianness fixes, from Brian
   - CONFIG_BCACHEFS_DEBUG_TRANSACTIONS no longer defaults to y, a big
     performance improvement on multithreaded workloads"

* tag 'bcachefs-2023-11-5' of https://evilpiepirate.org/git/bcachefs: (70 commits)
  bcachefs: Improve stripe checksum error message
  bcachefs: Simplify, fix bch2_backpointer_get_key()
  bcachefs: kill thing_it_points_to arg to backpointer_not_found()
  bcachefs: bch2_ec_read_extent() now takes btree_trans
  bcachefs: bch2_stripe_to_text() now prints ptr gens
  bcachefs: Don't iterate over journal entries just for btree roots
  bcachefs: Break up bch2_journal_write()
  bcachefs: Replace ERANGE with private error codes
  bcachefs: bkey_copy() is no longer a macro
  bcachefs: x-macro-ify inode flags enum
  bcachefs: Convert bch2_fs_open() to darray
  bcachefs: Move __bch2_members_v2_get_mut to sb-members.h
  bcachefs: bch2_prt_datetime()
  bcachefs: CONFIG_BCACHEFS_DEBUG_TRANSACTIONS no longer defaults to y
  bcachefs: Add a comment for BTREE_INSERT_NOJOURNAL usage
  bcachefs: rebalance_work btree is not a snapshots btree
  bcachefs: Add missing printk newlines
  bcachefs: Fix recovery when forced to use JSET_NO_FLUSH journal entry
  bcachefs: .get_parent() should return an error pointer
  bcachefs: Fix bch2_delete_dead_inodes()
  ...
parents be3ca57c c7046ed0
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -24,7 +24,6 @@ config BCACHEFS_FS
	select XXHASH
	select SRCU
	select SYMBOLIC_ERRNAME
	select MEAN_AND_VARIANCE
	help
	The bcachefs filesystem - a modern, copy on write filesystem, with
	support for multiple devices, compression, checksumming, etc.
@@ -42,7 +41,6 @@ config BCACHEFS_POSIX_ACL
config BCACHEFS_DEBUG_TRANSACTIONS
	bool "bcachefs runtime info"
	depends on BCACHEFS_FS
	default y
	help
	This makes the list of running btree transactions available in debugfs.

@@ -78,7 +76,7 @@ config BCACHEFS_NO_LATENCY_ACCT
config MEAN_AND_VARIANCE_UNIT_TEST
	tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
	depends on KUNIT
	select MEAN_AND_VARIANCE
	depends on BCACHEFS_FS
	default KUNIT_ALL_TESTS
	help
	  This option enables the kunit tests for mean_and_variance module.
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ bcachefs-y := \
	reflink.o		\
	replicas.o		\
	sb-clean.o		\
	sb-errors.o		\
	sb-members.o		\
	siphash.o		\
	six.o			\
+99 −86
Original line number Diff line number Diff line
@@ -192,123 +192,109 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
	return DIV_ROUND_UP(bytes, sizeof(u64));
}

int bch2_alloc_v1_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bkey_invalid_flags flags,
			  struct printbuf *err)
{
	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
	int ret = 0;

	/* allow for unknown fields */
	if (bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v)) {
		prt_printf(err, "incorrect value size (%zu < %u)",
	bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err,
			 alloc_v1_val_size_bad,
			 "incorrect value size (%zu < %u)",
			 bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
		return -BCH_ERR_invalid_bkey;
	}

	return 0;
fsck_err:
	return ret;
}

int bch2_alloc_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bkey_invalid_flags flags,
			  struct printbuf *err)
{
	struct bkey_alloc_unpacked u;
	int ret = 0;

	if (bch2_alloc_unpack_v2(&u, k)) {
		prt_printf(err, "unpack error");
		return -BCH_ERR_invalid_bkey;
	}

	return 0;
	bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err,
			 alloc_v2_unpack_error,
			 "unpack error");
fsck_err:
	return ret;
}

int bch2_alloc_v3_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bkey_invalid_flags flags,
			  struct printbuf *err)
{
	struct bkey_alloc_unpacked u;
	int ret = 0;

	if (bch2_alloc_unpack_v3(&u, k)) {
		prt_printf(err, "unpack error");
		return -BCH_ERR_invalid_bkey;
	}

	return 0;
	bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err,
			 alloc_v2_unpack_error,
			 "unpack error");
fsck_err:
	return ret;
}

int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
			  enum bkey_invalid_flags flags, struct printbuf *err)
{
	struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
	int ret = 0;

	if (alloc_v4_u64s(a.v) > bkey_val_u64s(k.k)) {
		prt_printf(err, "bad val size (%u > %zu)",
	bkey_fsck_err_on(alloc_v4_u64s(a.v) > bkey_val_u64s(k.k), c, err,
			 alloc_v4_val_size_bad,
			 "bad val size (%u > %zu)",
			 alloc_v4_u64s(a.v), bkey_val_u64s(k.k));
		return -BCH_ERR_invalid_bkey;
	}

	if (!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
	    BCH_ALLOC_V4_NR_BACKPOINTERS(a.v)) {
		prt_printf(err, "invalid backpointers_start");
		return -BCH_ERR_invalid_bkey;
	}
	bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
			 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err,
			 alloc_v4_backpointers_start_bad,
			 "invalid backpointers_start");

	if (alloc_data_type(*a.v, a.v->data_type) != a.v->data_type) {
		prt_printf(err, "invalid data type (got %u should be %u)",
	bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err,
			 alloc_key_data_type_bad,
			 "invalid data type (got %u should be %u)",
			 a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
		return -BCH_ERR_invalid_bkey;
	}

	switch (a.v->data_type) {
	case BCH_DATA_free:
	case BCH_DATA_need_gc_gens:
	case BCH_DATA_need_discard:
		if (a.v->dirty_sectors ||
		bkey_fsck_err_on(a.v->dirty_sectors ||
				 a.v->cached_sectors ||
		    a.v->stripe) {
			prt_printf(err, "empty data type free but have data");
			return -BCH_ERR_invalid_bkey;
		}
				 a.v->stripe, c, err,
				 alloc_key_empty_but_have_data,
				 "empty data type free but have data");
		break;
	case BCH_DATA_sb:
	case BCH_DATA_journal:
	case BCH_DATA_btree:
	case BCH_DATA_user:
	case BCH_DATA_parity:
		if (!a.v->dirty_sectors) {
			prt_printf(err, "data_type %s but dirty_sectors==0",
		bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
				 alloc_key_dirty_sectors_0,
				 "data_type %s but dirty_sectors==0",
				 bch2_data_types[a.v->data_type]);
			return -BCH_ERR_invalid_bkey;
		}
		break;
	case BCH_DATA_cached:
		if (!a.v->cached_sectors ||
		bkey_fsck_err_on(!a.v->cached_sectors ||
				 a.v->dirty_sectors ||
		    a.v->stripe) {
			prt_printf(err, "data type inconsistency");
			return -BCH_ERR_invalid_bkey;
		}

		if (!a.v->io_time[READ] &&
		    c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs) {
			prt_printf(err, "cached bucket with read_time == 0");
			return -BCH_ERR_invalid_bkey;
		}
				 a.v->stripe, c, err,
				 alloc_key_cached_inconsistency,
				 "data type inconsistency");

		bkey_fsck_err_on(!a.v->io_time[READ] &&
				 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
				 c, err,
				 alloc_key_cached_but_read_time_zero,
				 "cached bucket with read_time == 0");
		break;
	case BCH_DATA_stripe:
		break;
	}

	return 0;
}

static inline u64 swab40(u64 x)
{
	return (((x & 0x00000000ffULL) << 32)|
		((x & 0x000000ff00ULL) << 16)|
		((x & 0x0000ff0000ULL) >>  0)|
		((x & 0x00ff000000ULL) >> 16)|
		((x & 0xff00000000ULL) >> 32));
fsck_err:
	return ret;
}

void bch2_alloc_v4_swab(struct bkey_s k)
@@ -324,6 +310,7 @@ void bch2_alloc_v4_swab(struct bkey_s k)
	a->io_time[1]		= swab64(a->io_time[1]);
	a->stripe		= swab32(a->stripe);
	a->nr_external_backpointers = swab32(a->nr_external_backpointers);
	a->fragmentation_lru	= swab64(a->fragmentation_lru);

	bps = alloc_v4_backpointers(a);
	for (bp = bps; bp < bps + BCH_ALLOC_V4_NR_BACKPOINTERS(a); bp++) {
@@ -521,17 +508,18 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
		: 0;
}

int bch2_bucket_gens_invalid(const struct bch_fs *c, struct bkey_s_c k,
int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k,
			     enum bkey_invalid_flags flags,
			     struct printbuf *err)
{
	if (bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens)) {
		prt_printf(err, "bad val size (%zu != %zu)",
		       bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
		return -BCH_ERR_invalid_bkey;
	}
	int ret = 0;

	return 0;
	bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err,
			 bucket_gens_val_size_bad,
			 "bad val size (%zu != %zu)",
			 bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
fsck_err:
	return ret;
}

void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
@@ -727,7 +715,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
			"incorrect key when %s %s:%llu:%llu:0 (got %s should be %s)\n"
			"  for %s",
			set ? "setting" : "clearing",
			bch2_btree_ids[btree],
			bch2_btree_id_str(btree),
			iter.pos.inode,
			iter.pos.offset,
			bch2_bkey_types[old.k->type],
@@ -986,6 +974,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
	int ret;

	if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_k.k->p), c,
			alloc_key_to_missing_dev_bucket,
			"alloc key for invalid device:bucket %llu:%llu",
			alloc_k.k->p.inode, alloc_k.k->p.offset))
		return bch2_btree_delete_at(trans, alloc_iter, 0);
@@ -1005,7 +994,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,

	if (k.k->type != discard_key_type &&
	    (c->opts.reconstruct_alloc ||
	     fsck_err(c, "incorrect key in need_discard btree (got %s should be %s)\n"
	     fsck_err(c, need_discard_key_wrong,
		      "incorrect key in need_discard btree (got %s should be %s)\n"
		      "  %s",
		      bch2_bkey_types[k.k->type],
		      bch2_bkey_types[discard_key_type],
@@ -1035,7 +1025,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,

	if (k.k->type != freespace_key_type &&
	    (c->opts.reconstruct_alloc ||
	     fsck_err(c, "incorrect key in freespace btree (got %s should be %s)\n"
	     fsck_err(c, freespace_key_wrong,
		      "incorrect key in freespace btree (got %s should be %s)\n"
		      "  %s",
		      bch2_bkey_types[k.k->type],
		      bch2_bkey_types[freespace_key_type],
@@ -1066,7 +1057,8 @@ int bch2_check_alloc_key(struct btree_trans *trans,

	if (a->gen != alloc_gen(k, gens_offset) &&
	    (c->opts.reconstruct_alloc ||
	     fsck_err(c, "incorrect gen in bucket_gens btree (got %u should be %u)\n"
	     fsck_err(c, bucket_gens_key_wrong,
		      "incorrect gen in bucket_gens btree (got %u should be %u)\n"
		      "  %s",
		      alloc_gen(k, gens_offset), a->gen,
		      (printbuf_reset(&buf),
@@ -1124,7 +1116,8 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,

	if (k.k->type != KEY_TYPE_set &&
	    (c->opts.reconstruct_alloc ||
	     fsck_err(c, "hole in alloc btree missing in freespace btree\n"
	     fsck_err(c, freespace_hole_missing,
		      "hole in alloc btree missing in freespace btree\n"
		      "  device %llu buckets %llu-%llu",
		      freespace_iter->pos.inode,
		      freespace_iter->pos.offset,
@@ -1187,6 +1180,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,

		for (i = gens_offset; i < gens_end_offset; i++) {
			if (fsck_err_on(g.v.gens[i], c,
					bucket_gens_hole_wrong,
					"hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
					bucket_gens_pos_to_alloc(k.k->p, i).inode,
					bucket_gens_pos_to_alloc(k.k->p, i).offset,
@@ -1244,8 +1238,9 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr
		return ret;

	if (fsck_err_on(!bch2_dev_bucket_exists(c, pos), c,
			need_discard_freespace_key_to_invalid_dev_bucket,
			"entry in %s btree for nonexistant dev:bucket %llu:%llu",
			bch2_btree_ids[iter->btree_id], pos.inode, pos.offset))
			bch2_btree_id_str(iter->btree_id), pos.inode, pos.offset))
		goto delete;

	a = bch2_alloc_to_v4(alloc_k, &a_convert);
@@ -1253,9 +1248,10 @@ static noinline_for_stack int __bch2_check_discard_freespace_key(struct btree_tr
	if (fsck_err_on(a->data_type != state ||
			(state == BCH_DATA_free &&
			 genbits != alloc_freespace_genbits(*a)), c,
			need_discard_freespace_key_bad,
			"%s\n  incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
			(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
			bch2_btree_ids[iter->btree_id],
			bch2_btree_id_str(iter->btree_id),
			iter->pos.inode,
			iter->pos.offset,
			a->data_type == state,
@@ -1320,6 +1316,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
	dev_exists = bch2_dev_exists2(c, k.k->p.inode);
	if (!dev_exists) {
		if (fsck_err_on(!dev_exists, c,
				bucket_gens_to_invalid_dev,
				"bucket_gens key for invalid device:\n  %s",
				(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
			ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1330,6 +1327,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
	ca = bch_dev_bkey_exists(c, k.k->p.inode);
	if (fsck_err_on(end <= ca->mi.first_bucket ||
			start >= ca->mi.nbuckets, c,
			bucket_gens_to_invalid_buckets,
			"bucket_gens key for invalid buckets:\n  %s",
			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
		ret = bch2_btree_delete_at(trans, iter, 0);
@@ -1338,6 +1336,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,

	for (b = start; b < ca->mi.first_bucket; b++)
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
				bucket_gens_nonzero_for_invalid_buckets,
				"bucket_gens key has nonzero gen for invalid bucket")) {
			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
			need_update = true;
@@ -1345,6 +1344,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,

	for (b = ca->mi.nbuckets; b < end; b++)
		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], c,
				bucket_gens_nonzero_for_invalid_buckets,
				"bucket_gens key has nonzero gen for invalid bucket")) {
			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
			need_update = true;
@@ -1495,11 +1495,13 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
		return ret;

	if (fsck_err_on(!a->io_time[READ], c,
			alloc_key_cached_but_read_time_zero,
			"cached bucket with read_time 0\n"
			"  %s",
		(printbuf_reset(&buf),
		 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
	    fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
			alloc_key_to_missing_lru_entry,
			"missing lru entry\n"
			"  %s",
			(printbuf_reset(&buf),
@@ -2075,6 +2077,17 @@ void bch2_recalc_capacity(struct bch_fs *c)
	closure_wake_up(&c->freelist_wait);
}

u64 bch2_min_rw_member_capacity(struct bch_fs *c)
{
	struct bch_dev *ca;
	unsigned i;
	u64 ret = U64_MAX;

	for_each_rw_member(ca, c, i)
		ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
	return ret;
}

static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
{
	struct open_bucket *ob;
+6 −5
Original line number Diff line number Diff line
@@ -149,13 +149,13 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s

int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);

int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bkey_invalid_flags, struct printbuf *);
int bch2_alloc_v4_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c,
			  enum bkey_invalid_flags, struct printbuf *);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
@@ -193,7 +193,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
	.min_val_size	= 48,				\
})

int bch2_bucket_gens_invalid(const struct bch_fs *, struct bkey_s_c,
int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c,
			     enum bkey_invalid_flags, struct printbuf *);
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);

@@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
int bch2_fs_freespace_init(struct bch_fs *);

void bch2_recalc_capacity(struct bch_fs *);
u64 bch2_min_rw_member_capacity(struct bch_fs *);

void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
+31 −7
Original line number Diff line number Diff line
@@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
			struct bucket_alloc_state *s,
			struct closure *cl)
{
	struct btree_iter iter;
	struct bkey_s_c k;
	struct btree_iter iter, citer;
	struct bkey_s_c k, ck;
	struct open_bucket *ob = NULL;
	u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
	u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
	u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
	u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
	u64 alloc_cursor = alloc_start;
	int ret;

	/*
	 * Scan with an uncached iterator to avoid polluting the key cache. An
	 * uncached iter will return a cached key if one exists, but if not
	 * there is no other underlying protection for the associated key cache
	 * slot. To avoid racing bucket allocations, look up the cached key slot
	 * of any likely allocation candidate before attempting to proceed with
	 * the allocation. This provides proper exclusion on the associated
	 * bucket.
	 */
again:
	for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
			   BTREE_ITER_SLOTS, k, ret) {
@@ -419,25 +430,38 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
			continue;

		a = bch2_alloc_to_v4(k, &a_convert);

		if (a->data_type != BCH_DATA_free)
			continue;

		/* now check the cached key to serialize concurrent allocs of the bucket */
		ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
		ret = bkey_err(ck);
		if (ret)
			break;

		a = bch2_alloc_to_v4(ck, &a_convert);
		if (a->data_type != BCH_DATA_free)
			goto next;

		s->buckets_seen++;

		ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
next:
		citer.path->preserve = false;
		bch2_trans_iter_exit(trans, &citer);
		if (ob)
			break;
	}
	bch2_trans_iter_exit(trans, &iter);

	alloc_cursor = iter.pos.offset;
	ca->alloc_cursor = alloc_cursor;

	if (!ob && ret)
		ob = ERR_PTR(ret);

	if (!ob && alloc_cursor > alloc_start) {
		alloc_cursor = alloc_start;
	if (!ob && alloc_start > first_bucket) {
		alloc_cursor = alloc_start = first_bucket;
		goto again;
	}

Loading