Commit 36df6f73 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2025-06-12' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "As usual, highlighting the ones users have been noticing:

   - Fix a small issue with has_case_insensitive not being propagated on
     snapshot creation; this led to fsck errors, which we're harmless
     because we're not using this flag yet (it's for overlayfs +
     casefolding).

   - Log the error being corrected in the journal when we're doing fsck
     repair: this was one of the "lessons learned" from the i_nlink 0 ->
     subvolume deletion bug, where reconstructing what had happened by
     analyzing the journal was a bit more difficult than it needed to
     be.

   - Don't schedule btree node scan to run in the superblock: this fixes
     a regression from the 6.16 recovery passes rework, and let to it
     running unnecessarily.

     The real issue here is that we don't have online, "self healing"
     style topology repair yet: topology repair currently has to run
     before we go RW, which means that we may schedule it unnecessarily
     after a transient error. This will be fixed in the future.

   - We now track, in btree node flags, the reason it was scheduled to
     be rewritten. We discovered a deadlock in recovery when many btree
     nodes need to be rewritten because they're degraded: fully fixing
     this will take some work but it's now easier to see what's going
     on.

     For the bug report where this came up, a device had been kicked RO
     due to transient errors: manually setting it back to RW was
     sufficient to allow recovery to succeed.

   - Mark a few more fsck errors as autofix: as a reminder to users,
     please do keep reporting cases where something needs to be repaired
     and is not repaired automatically (i.e. cases where -o fix_errors
     or fsck -y is required).

   - rcu_pending.c now works with PREEMPT_RT

   - 'bcachefs device add', then umount, then remount wasn't working -
     we now emit a uevent so that the new device's new superblock is
     correctly picked up

   - Assorted repair fixes: btree node scan will no longer incorrectly
     update sb->version_min,

   - Assorted syzbot fixes"

* tag 'bcachefs-2025-06-12' of git://evilpiepirate.org/bcachefs: (23 commits)
  bcachefs: Don't trace should_be_locked unless changing
  bcachefs: Ensure that snapshot creation propagates has_case_insensitive
  bcachefs: Print devices we're mounting on multi device filesystems
  bcachefs: Don't trust sb->nr_devices in members_to_text()
  bcachefs: Fix version checks in validate_bset()
  bcachefs: ioctl: avoid stack overflow warning
  bcachefs: Don't pass trans to fsck_err() in gc_accounting_done
  bcachefs: Fix leak in bch2_fs_recovery() error path
  bcachefs: Fix rcu_pending for PREEMPT_RT
  bcachefs: Fix downgrade_table_extra()
  bcachefs: Don't put rhashtable on stack
  bcachefs: Make sure opts.read_only gets propagated back to VFS
  bcachefs: Fix possible console lock involved deadlock
  bcachefs: mark more errors autofix
  bcachefs: Don't persistently run scan_for_btree_nodes
  bcachefs: Read error message now prints if self healing
  bcachefs: Only run 'increase_depth' for keys from btree node csan
  bcachefs: Mark need_discard_freespace_key_bad autofix
  bcachefs: Update /dev/disk/by-uuid on device add
  bcachefs: Add more flags to btree nodes for rewrite reason
  ...
parents d080d3b5 aef22f6f
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -296,7 +296,6 @@ do { \
#define bch2_fmt(_c, fmt)		bch2_log_msg(_c, fmt "\n")

void bch2_print_str(struct bch_fs *, const char *, const char *);
void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);

__printf(2, 3)
void bch2_print_opts(struct bch_opts *, const char *, ...);
+60 −35
Original line number Diff line number Diff line
@@ -397,7 +397,11 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
			continue;
		}

		ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan);
		ret = lockrestart_do(trans,
			btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan));
		if (ret < 0)
			goto err;

		if (ret == DID_FILL_FROM_SCAN) {
			new_pass = true;
			ret = 0;
@@ -438,7 +442,8 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct

	if (!ret && !IS_ERR_OR_NULL(prev)) {
		BUG_ON(cur);
		ret = btree_repair_node_end(trans, b, prev, pulled_from_scan);
		ret = lockrestart_do(trans,
			btree_repair_node_end(trans, b, prev, pulled_from_scan));
		if (ret == DID_FILL_FROM_SCAN) {
			new_pass = true;
			ret = 0;
@@ -519,27 +524,21 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
	bch2_bkey_buf_exit(&prev_k, c);
	bch2_bkey_buf_exit(&cur_k, c);
	printbuf_exit(&buf);
	bch_err_fn(c, ret);
	return ret;
}

int bch2_check_topology(struct bch_fs *c)
static int bch2_check_root(struct btree_trans *trans, enum btree_id i,
			   bool *reconstructed_root)
{
	struct btree_trans *trans = bch2_trans_get(c);
	struct bpos pulled_from_scan = POS_MIN;
	struct bch_fs *c = trans->c;
	struct btree_root *r = bch2_btree_id_root(c, i);
	struct printbuf buf = PRINTBUF;
	int ret = 0;

	bch2_trans_srcu_unlock(trans);

	for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
		struct btree_root *r = bch2_btree_id_root(c, i);
		bool reconstructed_root = false;

		printbuf_reset(&buf);
	bch2_btree_id_to_text(&buf, i);

	if (r->error) {
reconstruct_root:
		bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);

		r->alive = false;
@@ -556,12 +555,34 @@ int bch2_check_topology(struct bch_fs *c)
			bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
			ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
			if (ret)
					break;
				goto err;
		}

			reconstructed_root = true;
		*reconstructed_root = true;
	}
err:
fsck_err:
	printbuf_exit(&buf);
	bch_err_fn(c, ret);
	return ret;
}

int bch2_check_topology(struct bch_fs *c)
{
	struct btree_trans *trans = bch2_trans_get(c);
	struct bpos pulled_from_scan = POS_MIN;
	int ret = 0;

	bch2_trans_srcu_unlock(trans);

	for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
		bool reconstructed_root = false;
recover:
		ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root));
		if (ret)
			break;

		struct btree_root *r = bch2_btree_id_root(c, i);
		struct btree *b = r->b;

		btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
@@ -575,17 +596,21 @@ int bch2_check_topology(struct bch_fs *c)

			r->b = NULL;

			if (!reconstructed_root)
				goto reconstruct_root;
			if (!reconstructed_root) {
				r->error = -EIO;
				goto recover;
			}

			struct printbuf buf = PRINTBUF;
			bch2_btree_id_to_text(&buf, i);
			bch_err(c, "empty btree root %s", buf.buf);
			printbuf_exit(&buf);
			bch2_btree_root_alloc_fake_trans(trans, i, 0);
			r->alive = false;
			ret = 0;
		}
	}
fsck_err:
	printbuf_exit(&buf);

	bch2_trans_put(trans);
	return ret;
}
+19 −7
Original line number Diff line number Diff line
@@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
		     BCH_VERSION_MAJOR(version),
		     BCH_VERSION_MINOR(version));

	if (btree_err_on(version < c->sb.version_min,
	if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes &&
	    btree_err_on(version < c->sb.version_min,
			 -BCH_ERR_btree_node_read_err_fixable,
			 c, NULL, b, i, NULL,
			 btree_node_bset_older_than_sb_min,
			 "bset version %u older than superblock version_min %u",
			 version, c->sb.version_min)) {
		if (bch2_version_compatible(version)) {
			mutex_lock(&c->sb_lock);
			c->disk_sb.sb->version_min = cpu_to_le16(version);
			bch2_write_super(c);
			mutex_unlock(&c->sb_lock);
		} else {
			/* We have no idea what's going on: */
			i->version = cpu_to_le16(c->sb.version);
		}
	}

	if (btree_err_on(BCH_VERSION_MAJOR(version) >
@@ -1045,6 +1051,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
		le16_add_cpu(&i->u64s, -next_good_key);
		memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
		set_btree_node_need_rewrite(b);
		set_btree_node_need_rewrite_error(b);
	}
fsck_err:
	printbuf_exit(&buf);
@@ -1305,6 +1312,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
					  (u64 *) vstruct_end(i) - (u64 *) k);
			set_btree_bset_end(b, b->set);
			set_btree_node_need_rewrite(b);
			set_btree_node_need_rewrite_error(b);
			continue;
		}
		if (ret)
@@ -1329,12 +1337,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
		bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
			struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);

			if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
			if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
				set_btree_node_need_rewrite(b);
				set_btree_node_need_rewrite_degraded(b);
			}
		}

	if (!ptr_written)
	if (!ptr_written) {
		set_btree_node_need_rewrite(b);
		set_btree_node_need_rewrite_ptr_written_zero(b);
	}
fsck_err:
	mempool_free(iter, &c->fill_iter);
	printbuf_exit(&buf);
+1 −1
Original line number Diff line number Diff line
@@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
		prt_newline(&buf);
	}

	bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
	bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf);
	printbuf_exit(&buf);
	BUG();
}
+4 −2
Original line number Diff line number Diff line
@@ -417,9 +417,11 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st
	EBUG_ON(!btree_node_locked(path, path->level));
	EBUG_ON(path->uptodate);

	if (!path->should_be_locked) {
		path->should_be_locked = true;
		trace_btree_path_should_be_locked(trans, path);
	}
}

static inline void __btree_path_set_level_up(struct btree_trans *trans,
				      struct btree_path *path,
Loading