Commit 4412b8b2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2025-07-11' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet.

* tag 'bcachefs-2025-07-11' of git://evilpiepirate.org/bcachefs:
  bcachefs: Don't set BCH_FS_error on transaction restart
  bcachefs: Fix additional misalignment in journal space calculations
  bcachefs: Don't schedule non persistent passes persistently
  bcachefs: Fix bch2_btree_transactions_read() synchronization
  bcachefs: btree read retry fixes
  bcachefs: btree node scan no longer uses btree cache
  bcachefs: Tweak btree cache helpers for use by btree node scan
  bcachefs: Fix btree for nonexistent tree depth
  bcachefs: Fix bch2_io_failures_to_text()
  bcachefs: bch2_fpunch_snapshot()
parents 2632d81f fec5e6f9
Loading
Loading
Loading
Loading
+13 −13
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
	six_unlock_intent(&b->c.lock);
}

static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
void __btree_node_data_free(struct btree *b)
{
	BUG_ON(!list_empty(&b->list));
	BUG_ON(btree_node_hashed(b));
@@ -112,16 +112,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
	munmap(b->aux_data, btree_aux_data_bytes(b));
#endif
	b->aux_data = NULL;

	btree_node_to_freedlist(bc, b);
}

static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(list_empty(&b->list));
	list_del_init(&b->list);

	__btree_node_data_free(b);

	--bc->nr_freeable;
	__btree_node_data_free(bc, b);
	btree_node_to_freedlist(bc, b);
}

static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
@@ -185,10 +186,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp)

struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
{
	struct btree_cache *bc = &c->btree_cache;
	struct btree *b;

	b = __btree_node_mem_alloc(c, GFP_KERNEL);
	struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL);
	if (!b)
		return NULL;

@@ -198,8 +196,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
	}

	bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);

	__bch2_btree_node_to_freelist(bc, b);
	return b;
}

@@ -524,7 +520,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
			--touched;;
		} else if (!btree_node_reclaim(c, b)) {
			__bch2_btree_node_hash_remove(bc, b);
			__btree_node_data_free(bc, b);
			__btree_node_data_free(b);
			btree_node_to_freedlist(bc, b);

			freed++;
			bc->nr_freed++;
@@ -652,9 +649,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c)

	bch2_recalc_btree_reserve(c);

	for (i = 0; i < bc->nr_reserve; i++)
		if (!__bch2_btree_node_mem_alloc(c))
	for (i = 0; i < bc->nr_reserve; i++) {
		struct btree *b = __bch2_btree_node_mem_alloc(c);
		if (!b)
			goto err;
		__bch2_btree_node_to_freelist(bc, b);
	}

	list_splice_init(&bc->live[0].list, &bc->freeable);

+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig
void bch2_btree_cache_cannibalize_unlock(struct btree_trans *);
int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *);

void __btree_node_data_free(struct btree *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);

+3 −5
Original line number Diff line number Diff line
@@ -568,9 +568,9 @@ static int __btree_err(int ret,
		bch2_mark_btree_validate_failure(failed, ca->dev_idx);

		struct extent_ptr_decoded pick;
		have_retry = !bch2_bkey_pick_read_device(c,
		have_retry = bch2_bkey_pick_read_device(c,
					bkey_i_to_s_c(&b->key),
					failed, &pick, -1);
					failed, &pick, -1) == 1;
	}

	if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
@@ -615,7 +615,6 @@ static int __btree_err(int ret,
			goto out;
		case -BCH_ERR_btree_node_read_err_bad_node:
			prt_str(&out, ", ");
			ret = __bch2_topology_error(c, &out);
			break;
		}

@@ -644,7 +643,6 @@ static int __btree_err(int ret,
		goto out;
	case -BCH_ERR_btree_node_read_err_bad_node:
		prt_str(&out, ", ");
		ret = __bch2_topology_error(c, &out);
		break;
	}
print:
@@ -1408,7 +1406,7 @@ static void btree_node_read_work(struct work_struct *work)
		ret = bch2_bkey_pick_read_device(c,
					bkey_i_to_s_c(&b->key),
					&failed, &rb->pick, -1);
		if (ret) {
		if (ret <= 0) {
			set_btree_node_read_error(b);
			break;
		}
+41 −43
Original line number Diff line number Diff line
@@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k)
	}
}

static bool found_btree_node_is_readable(struct btree_trans *trans,
					 struct found_btree_node *f)
{
	struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;

	found_btree_node_to_key(&tmp.k, f);

	struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
	bool ret = !IS_ERR_OR_NULL(b);
	if (!ret)
		return ret;

	f->sectors_written = b->written;
	f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);

	struct bkey_s_c k;
	struct bkey unpacked;
	struct btree_node_iter iter;
	for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
		f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));

	six_unlock_read(&b->c.lock);

	/*
	 * We might update this node's range; if that happens, we need the node
	 * to be re-read so the read path can trim keys that are no longer in
	 * this node
	 */
	if (b != btree_node_root(trans->c, b))
		bch2_btree_node_evict(trans, &tmp.k);
	return ret;
}

static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
{
	const struct found_btree_node *l = _l;
@@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = {
};

static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
				struct bio *bio, struct btree_node *bn, u64 offset)
				struct btree *b, struct bio *bio, u64 offset)
{
	struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
	struct btree_node *bn = b->data;

	bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
	bio->bi_iter.bi_sector	= offset;
	bch2_bio_map(bio, bn, PAGE_SIZE);
	bch2_bio_map(bio, b->data, c->opts.block_size);

	u64 submit_time = local_clock();
	submit_bio_wait(bio);

	bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);

	if (bio->bi_status) {
@@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
	if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX)
		return;

	bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
	bio->bi_iter.bi_sector	= offset;
	bch2_bio_map(bio, b->data, c->opts.btree_node_size);

	submit_time = local_clock();
	submit_bio_wait(bio);
	bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status);

	rcu_read_lock();
	struct found_btree_node n = {
		.btree_id	= BTREE_NODE_ID(bn),
@@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
	};
	rcu_read_unlock();

	if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
	found_btree_node_to_key(&b->key, &n);

	CLASS(printbuf, buf)();
	if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) {
		/* read_done will swap out b->data for another buffer */
		bn = b->data;
		/*
		 * Grab journal_seq here because we want the max journal_seq of
		 * any bset; read_done sorts down to a single set and picks the
		 * max journal_seq
		 */
		n.journal_seq		= le64_to_cpu(bn->keys.journal_seq),
		n.sectors_written	= b->written;

		mutex_lock(&f->lock);
		if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
			bch_err(c, "try_read_btree_node() can't handle endian conversion");
@@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p)
	struct find_btree_nodes_worker *w = p;
	struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
	struct bch_dev *ca = w->ca;
	void *buf = (void *) __get_free_page(GFP_KERNEL);
	struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
	unsigned long last_print = jiffies;
	struct btree *b = NULL;
	struct bio *bio = NULL;

	b = __bch2_btree_node_mem_alloc(c);
	if (!b) {
		bch_err(c, "read_btree_nodes_worker: error allocating buf");
		w->f->ret = -ENOMEM;
		goto err;
	}

	if (!buf || !bio) {
		bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
	bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL);
	if (!bio) {
		bch_err(c, "read_btree_nodes_worker: error allocating bio");
		w->f->ret = -ENOMEM;
		goto err;
	}
@@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p)
			    !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
				continue;

			try_read_btree_node(w->f, ca, bio, buf, sector);
			try_read_btree_node(w->f, ca, b, bio, sector);
		}
err:
	if (b)
		__btree_node_data_free(b);
	kfree(b);
	bio_put(bio);
	free_page((unsigned long) buf);
	enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan);
	closure_put(w->cl);
	kfree(w);
+9 −2
Original line number Diff line number Diff line
@@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
		c->verify_data = __bch2_btree_node_mem_alloc(c);
		if (!c->verify_data)
			goto out;

		list_del_init(&c->verify_data->list);
	}

	BUG_ON(b->nsets != 1);
@@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
	i->ubuf = buf;
	i->size	= size;
	i->ret	= 0;

	int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
restart:
	seqmutex_lock(&c->btree_trans_lock);
	list_sort(&c->btree_trans_list, list_ptr_order_cmp);
@@ -599,6 +599,11 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
		if (!closure_get_not_zero(&trans->ref))
			continue;

		if (!trans->srcu_held) {
			closure_put(&trans->ref);
			continue;
		}

		u32 seq = seqmutex_unlock(&c->btree_trans_lock);

		bch2_btree_trans_to_text(&i->buf, trans);
@@ -620,6 +625,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
	}
	seqmutex_unlock(&c->btree_trans_lock);
unlocked:
	srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);

	if (i->buf.allocation_failure)
		ret = -ENOMEM;

Loading