Commit b5f1b488 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'bcachefs-2024-11-07' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Some trivial syzbot fixes, two more serious btree fixes found by
  looping single_devices.ktest small_nodes:

   - Topology error on split after merge, where we accidentaly picked
     the node being deleted for the pivot, resulting in an assertion pop

   - New nodes being preallocated were left on the freedlist, unlocked,
     resulting in them sometimes being accidentally freed: this dated
     from pre-cycle detector, when we could leave them locked. This
     should have resulted in more explosions and fireworks, but turned
     out to be surprisingly hard to hit because the preallocated nodes
     were being used right away.

     The fix for this is bigger than we'd like - reworking btree list
     handling was a bit invasive - but we've now got more assertions and
     it's well tested.

   - Also another mishandled transaction restart fix (in
     btree_node_prefetch) - we're almost done with those"

* tag 'bcachefs-2024-11-07' of git://evilpiepirate.org/bcachefs:
  bcachefs: Fix UAF in __promote_alloc() error path
  bcachefs: Change OPT_STR max to be 1 less than the size of choices array
  bcachefs: btree_cache.freeable list fixes
  bcachefs: check the invalid parameter for perf test
  bcachefs: add check NULL return of bio_kmalloc in journal_read_bucket
  bcachefs: Ensure BCH_FS_may_go_rw is set before exiting recovery
  bcachefs: Fix topology errors on split after merge
  bcachefs: Ancient versions with bad bkey_formats are no longer supported
  bcachefs: Fix error handling in bch2_btree_node_prefetch()
  bcachefs: Fix null ptr deref in bucket_gen_get()
parents 9ea7edac 8440da93
Loading
Loading
Loading
Loading
+3 −4
Original line number Diff line number Diff line
@@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
			     enum bch_validate_flags flags,
			     struct printbuf *err)
{
	unsigned i, bits = KEY_PACKED_BITS_START;
	unsigned bits = KEY_PACKED_BITS_START;

	if (f->nr_fields != BKEY_NR_FIELDS) {
		prt_printf(err, "incorrect number of fields: got %u, should be %u",
@@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
	 * Verify that the packed format can't represent fields larger than the
	 * unpacked format:
	 */
	for (i = 0; i < f->nr_fields; i++) {
		if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) &&
		    bch2_bkey_format_field_overflows(f, i)) {
	for (unsigned i = 0; i < f->nr_fields; i++) {
		if (bch2_bkey_format_field_overflows(f, i)) {
			unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
			u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
			unsigned packed_bits = min(64, f->bits_per_field[i]);
+66 −41
Original line number Diff line number Diff line
@@ -59,16 +59,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)

static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(!list_empty(&b->list));

	if (b->c.lock.readers)
		list_move(&b->list, &bc->freed_pcpu);
		list_add(&b->list, &bc->freed_pcpu);
	else
		list_move(&b->list, &bc->freed_nonpcpu);
		list_add(&b->list, &bc->freed_nonpcpu);
}

static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(!list_empty(&b->list));
	BUG_ON(!b->data);

	bc->nr_freeable++;
	list_add(&b->list, &bc->freeable);
}

static void btree_node_data_free(struct bch_fs *c, struct btree *b)
void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
	struct btree_cache *bc = &c->btree_cache;

	mutex_lock(&bc->lock);
	__bch2_btree_node_to_freelist(bc, b);
	mutex_unlock(&bc->lock);

	six_unlock_write(&b->c.lock);
	six_unlock_intent(&b->c.lock);
}

static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(!list_empty(&b->list));
	BUG_ON(btree_node_hashed(b));

	/*
@@ -94,11 +116,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
#endif
	b->aux_data = NULL;

	bc->nr_freeable--;

	btree_node_to_freedlist(bc, b);
}

static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(list_empty(&b->list));
	list_del_init(&b->list);
	--bc->nr_freeable;
	__btree_node_data_free(bc, b);
}

static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
				   const void *obj)
{
@@ -174,21 +202,10 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)

	bch2_btree_lock_init(&b->c, 0);

	bc->nr_freeable++;
	list_add(&b->list, &bc->freeable);
	__bch2_btree_node_to_freelist(bc, b);
	return b;
}

void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
	mutex_lock(&c->btree_cache.lock);
	list_move(&b->list, &c->btree_cache.freeable);
	mutex_unlock(&c->btree_cache.lock);

	six_unlock_write(&b->c.lock);
	six_unlock_intent(&b->c.lock);
}

static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
{
	struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
@@ -236,11 +253,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c)

/* Btree in memory cache - hash table */

void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
{
	lockdep_assert_held(&bc->lock);
	int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);

	int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
	BUG_ON(ret);

	/* Cause future lookups for this node to fail: */
@@ -248,17 +265,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)

	if (b->c.btree_id < BTREE_ID_NR)
		--bc->nr_by_btree[b->c.btree_id];
	--bc->live[btree_node_pinned(b)].nr;
	list_del_init(&b->list);
}

	bc->live[btree_node_pinned(b)].nr--;
	bc->nr_freeable++;
	list_move(&b->list, &bc->freeable);
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
{
	__bch2_btree_node_hash_remove(bc, b);
	__bch2_btree_node_to_freelist(bc, b);
}

int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
{
	BUG_ON(!list_empty(&b->list));
	BUG_ON(b->hash_val);
	b->hash_val = btree_ptr_hash_val(&b->key);

	b->hash_val = btree_ptr_hash_val(&b->key);
	int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
						bch_btree_cache_params);
	if (ret)
@@ -270,10 +292,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
	bool p = __btree_node_pinned(bc, b);
	mod_bit(BTREE_NODE_pinned, &b->flags, p);

	list_move_tail(&b->list, &bc->live[p].list);
	list_add_tail(&b->list, &bc->live[p].list);
	bc->live[p].nr++;

	bc->nr_freeable--;
	return 0;
}

@@ -485,7 +505,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
			goto out;

		if (!btree_node_reclaim(c, b, true)) {
			btree_node_data_free(c, b);
			btree_node_data_free(bc, b);
			six_unlock_write(&b->c.lock);
			six_unlock_intent(&b->c.lock);
			freed++;
@@ -501,10 +521,10 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
			bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
			--touched;;
		} else if (!btree_node_reclaim(c, b, true)) {
			bch2_btree_node_hash_remove(bc, b);
			__bch2_btree_node_hash_remove(bc, b);
			__btree_node_data_free(bc, b);

			freed++;
			btree_node_data_free(c, b);
			bc->nr_freed++;

			six_unlock_write(&b->c.lock);
@@ -587,7 +607,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
		BUG_ON(btree_node_read_in_flight(b) ||
		       btree_node_write_in_flight(b));

		btree_node_data_free(c, b);
		btree_node_data_free(bc, b);
	}

	BUG_ON(!bch2_journal_error(&c->journal) &&
@@ -786,8 +806,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea

	BUG_ON(!six_trylock_intent(&b->c.lock));
	BUG_ON(!six_trylock_write(&b->c.lock));
got_node:

got_node:
	/*
	 * btree_free() doesn't free memory; it sticks the node on the end of
	 * the list. Check if there's any freed nodes there:
@@ -796,7 +816,12 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
		if (!btree_node_reclaim(c, b2, false)) {
			swap(b->data, b2->data);
			swap(b->aux_data, b2->aux_data);

			list_del_init(&b2->list);
			--bc->nr_freeable;
			btree_node_to_freedlist(bc, b2);
			mutex_unlock(&bc->lock);

			six_unlock_write(&b2->c.lock);
			six_unlock_intent(&b2->c.lock);
			goto got_mem;
@@ -810,11 +835,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
			goto err;
	}

	mutex_lock(&bc->lock);
	bc->nr_freeable++;
got_mem:
	mutex_unlock(&bc->lock);

	BUG_ON(!list_empty(&b->list));
	BUG_ON(btree_node_hashed(b));
	BUG_ON(btree_node_dirty(b));
	BUG_ON(btree_node_write_in_flight(b));
@@ -845,7 +867,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
	if (bc->alloc_lock == current) {
		b2 = btree_node_cannibalize(c);
		clear_btree_node_just_written(b2);
		bch2_btree_node_hash_remove(bc, b2);
		__bch2_btree_node_hash_remove(bc, b2);

		if (b) {
			swap(b->data, b2->data);
@@ -855,9 +877,9 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
			six_unlock_intent(&b2->c.lock);
		} else {
			b = b2;
			list_del_init(&b->list);
		}

		BUG_ON(!list_empty(&b->list));
		mutex_unlock(&bc->lock);

		trace_and_count(c, btree_cache_cannibalize, trans);
@@ -936,7 +958,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
		b->hash_val = 0;

		mutex_lock(&bc->lock);
		list_add(&b->list, &bc->freeable);
		__bch2_btree_node_to_freelist(bc, b);
		mutex_unlock(&bc->lock);

		six_unlock_write(&b->c.lock);
@@ -1312,9 +1334,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,

	b = bch2_btree_node_fill(trans, path, k, btree_id,
				 level, SIX_LOCK_read, false);
	if (!IS_ERR_OR_NULL(b))
	int ret = PTR_ERR_OR_ZERO(b);
	if (ret)
		return ret;
	if (b)
		six_unlock_read(&b->c.lock);
	return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
	return 0;
}

void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1353,7 +1378,7 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)

	mutex_lock(&bc->lock);
	bch2_btree_node_hash_remove(bc, b);
	btree_node_data_free(c, b);
	btree_node_data_free(bc, b);
	mutex_unlock(&bc->lock);
out:
	six_unlock_write(&b->c.lock);
+2 −0
Original line number Diff line number Diff line
@@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *);

void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);

void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);

int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
				unsigned, enum btree_id);
+1 −1
Original line number Diff line number Diff line
@@ -186,7 +186,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
		.ptrs[0].type	= 1 << BCH_EXTENT_ENTRY_ptr,
		.ptrs[0].offset	= offset,
		.ptrs[0].dev	= ca->dev_idx,
		.ptrs[0].gen	= *bucket_gen(ca, sector_to_bucket(ca, offset)),
		.ptrs[0].gen	= bucket_gen_get(ca, sector_to_bucket(ca, offset)),
	};
	rcu_read_unlock();

+18 −12
Original line number Diff line number Diff line
@@ -237,10 +237,6 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b)
	BUG_ON(b->will_make_reachable);

	clear_btree_node_noevict(b);

	mutex_lock(&c->btree_cache.lock);
	list_move(&b->list, &c->btree_cache.freeable);
	mutex_unlock(&c->btree_cache.lock);
}

static void bch2_btree_node_free_inmem(struct btree_trans *trans,
@@ -252,12 +248,12 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,

	bch2_btree_node_lock_write_nofail(trans, path, &b->c);

	__btree_node_free(trans, b);

	mutex_lock(&c->btree_cache.lock);
	bch2_btree_node_hash_remove(&c->btree_cache, b);
	mutex_unlock(&c->btree_cache.lock);

	__btree_node_free(trans, b);

	six_unlock_write(&b->c.lock);
	mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);

@@ -289,7 +285,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
	clear_btree_node_need_write(b);

	mutex_lock(&c->btree_cache.lock);
	bch2_btree_node_hash_remove(&c->btree_cache, b);
	__bch2_btree_node_hash_remove(&c->btree_cache, b);
	mutex_unlock(&c->btree_cache.lock);

	BUG_ON(p->nr >= ARRAY_SIZE(p->b));
@@ -521,8 +517,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
			btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
			btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
			__btree_node_free(trans, b);
			six_unlock_write(&b->c.lock);
			six_unlock_intent(&b->c.lock);
			bch2_btree_node_to_freelist(c, b);
		}
	}
}
@@ -1434,6 +1429,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
	}
}

static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos)
{
	if (insert_keys)
		for_each_keylist_key(insert_keys, k)
			if (bkey_deleted(&k->k) && bpos_eq(k->k.p, pos))
				return true;
	return false;
}

/*
 * Move keys from n1 (original replacement node, now lower node) to n2 (higher
 * node)
@@ -1441,7 +1445,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
static void __btree_split_node(struct btree_update *as,
			       struct btree_trans *trans,
			       struct btree *b,
			       struct btree *n[2])
			       struct btree *n[2],
			       struct keylist *insert_keys)
{
	struct bkey_packed *k;
	struct bpos n1_pos = POS_MIN;
@@ -1476,7 +1481,8 @@ static void __btree_split_node(struct btree_update *as,
		if (b->c.level &&
		    u64s < n1_u64s &&
		    u64s + k->u64s >= n1_u64s &&
		    bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
		    (bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p) ||
		     key_deleted_in_insert(insert_keys, uk.p)))
			n1_u64s += k->u64s;

		i = u64s >= n1_u64s;
@@ -1603,7 +1609,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
		n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
		n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);

		__btree_split_node(as, trans, b, n);
		__btree_split_node(as, trans, b, n, keys);

		if (keys) {
			btree_split_insert_keys(as, trans, path, n1, keys);
Loading