Commit a4145ce1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Assorted bugfixes.

  Most are fixes for simple assertion pops; the most significant fix is
  for a deadlock in recovery when we have to rewrite large numbers of
  btree nodes to fix errors. This was incorrectly running out of the
  same workqueue as the core interior btree update path - we now give it
  its own single threaded workqueue.

  This was visible to users as "bch2_btree_update_start(): error:
  BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging"

* tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix lost wakeup on journal shutdown
  bcachefs; Fix deadlock in bch2_btree_update_start()
  bcachefs: ratelimit errors from async_btree_node_rewrite
  bcachefs: Run check_topology() first
  bcachefs: Improve bch2_fatal_error()
  bcachefs: Fix lost transaction restart error
  bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info
  bcachefs: fix for building in userspace
  bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery
  bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init()
  bcachefs: Improve sysfs internal/btree_updates
  bcachefs: Split out btree_node_rewrite_worker
  bcachefs: Fix locking in bch2_alloc_write_key()
  bcachefs: Avoid extent entry type assertions in .invalid()
  bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested
  bcachefs: Fix check_key_has_snapshot() call
  bcachefs: Change "accounting overran journal reservation" to a warning
parents 78c3925c 2e92d26b
Loading
Loading
Loading
Loading
+8 −7
Original line number Diff line number Diff line
@@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
		u8 gen = bch2_alloc_to_v4(k, &a)->gen;
		unsigned offset;
		struct bpos pos = alloc_gens_pos(iter.pos, &offset);
		int ret2 = 0;

		if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
			ret = commit_do(trans, NULL, NULL,
					BCH_TRANS_COMMIT_no_enospc,
				bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
			if (ret)
				break;
			ret2 =  bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
				bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
			if (ret2)
				goto iter_err;
			have_bucket_gens_key = false;
		}

@@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
		}

		g.v.gens[offset] = gen;
		0;
iter_err:
		ret2;
	}));

	if (have_bucket_gens_key && !ret)
@@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
					bucket_journal_seq);
			if (ret) {
				bch2_fs_fatal_error(c,
					"error setting bucket_needs_journal_commit: %i", ret);
					"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
				return ret;
			}
		}
+6 −4
Original line number Diff line number Diff line
@@ -1356,16 +1356,18 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,

		/* Don't retry from all devices if we're out of open buckets: */
		if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
			int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
			int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
					      target, erasure_code,
					      nr_replicas, &nr_effective,
					      &have_cache, watermark,
					      flags, cl);
			if (!ret ||
			    bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
			    bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
			if (!ret2 ||
			    bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
			    bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
				ret = ret2;
				goto alloc_done;
			}
		}

		/*
		 * Only try to allocate cache (durability = 0 devices) from the
+2 −0
Original line number Diff line number Diff line
@@ -849,6 +849,8 @@ struct bch_fs {
	struct workqueue_struct	*btree_interior_update_worker;
	struct work_struct	btree_interior_update_work;

	struct workqueue_struct	*btree_node_rewrite_worker;

	struct list_head	pending_node_rewrites;
	struct mutex		pending_node_rewrites_lock;

+1 −1
Original line number Diff line number Diff line
@@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
					 *old,
					 b->data_type);
	gc = *b;
	percpu_up_read(&c->mark_lock);

	if (gc.data_type != old_gc.data_type ||
	    gc.dirty_sectors != old_gc.dirty_sectors)
		bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
	percpu_up_read(&c->mark_lock);

	if (metadata_only &&
	    gc.data_type != BCH_DATA_sb &&
+6 −6
Original line number Diff line number Diff line
@@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,

			ret = bset_encrypt(c, i, b->written << 9);
			if (bch2_fs_fatal_err_on(ret, c,
					"error decrypting btree node: %i", ret))
					"decrypting btree node: %s", bch2_err_str(ret)))
				goto fsck_err;

			btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
@@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,

			ret = bset_encrypt(c, i, b->written << 9);
			if (bch2_fs_fatal_err_on(ret, c,
					"error decrypting btree node: %i\n", ret))
					"decrypting btree node: %s", bch2_err_str(ret)))
				goto fsck_err;

			sectors = vstruct_sectors(bne, c->block_bits);
@@ -1338,7 +1338,7 @@ static void btree_node_read_work(struct work_struct *work)
	if (saw_error && !btree_node_read_error(b)) {
		printbuf_reset(&buf);
		bch2_bpos_to_text(&buf, b->key.k.p);
		bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
		bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
			 __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);

		bch2_btree_node_rewrite_async(c, b);
@@ -1874,8 +1874,8 @@ static void btree_node_write_work(struct work_struct *work)
	return;
err:
	set_btree_node_noevict(b);
	if (!bch2_err_matches(ret, EROFS))
		bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
	bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
			     "writing btree node: %s", bch2_err_str(ret));
	goto out;
}

@@ -2131,7 +2131,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)

	ret = bset_encrypt(c, i, b->written << 9);
	if (bch2_fs_fatal_err_on(ret, c,
			"error encrypting btree node: %i\n", ret))
			"encrypting btree node: %s", bch2_err_str(ret)))
		goto err;

	nonce = btree_nonce(i, b->written << 9);
Loading