Commit d9534cc9 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: fix buffer overflow in nocow write path



BCH_REPLICAS_MAX isn't the actual maximum number of pointers in an
extent, it's the maximum number of dirty pointers.

We don't have a real restriction on the number of cached pointers, and
we don't want a fixed size array here anyways - so switch to
DARRAY_PREALLOCATED().

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
Reported-and-tested-by: default avatarDaniel J Blueman <daniel@quora.org>
parent 099dc5c2
Loading
Loading
Loading
Loading
+41 −41
Original line number Diff line number Diff line
@@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done)
	bch2_write_done(cl);
}

struct bucket_to_lock {
	struct bpos		b;
	unsigned		gen;
	struct nocow_lock_bucket *l;
};

static void bch2_nocow_write(struct bch_write_op *op)
{
	struct bch_fs *c = op->c;
@@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op)
	struct bkey_s_c k;
	struct bkey_ptrs_c ptrs;
	const struct bch_extent_ptr *ptr;
	struct {
		struct bpos	b;
		unsigned	gen;
		struct nocow_lock_bucket *l;
	} buckets[BCH_REPLICAS_MAX];
	unsigned nr_buckets = 0;
	DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
	struct bucket_to_lock *i;
	u32 snapshot;
	int ret, i;
	struct bucket_to_lock *stale_at;
	int ret;

	if (op->flags & BCH_WRITE_MOVE)
		return;

	darray_init(&buckets);
	trans = bch2_trans_get(c);
retry:
	bch2_trans_begin(trans);
@@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
	while (1) {
		struct bio *bio = &op->wbio.bio;

		nr_buckets = 0;
		buckets.nr = 0;

		k = bch2_btree_iter_peek_slot(&iter);
		ret = bkey_err(k);
@@ -1271,18 +1275,18 @@ static void bch2_nocow_write(struct bch_write_op *op)
		/* Get iorefs before dropping btree locks: */
		ptrs = bch2_bkey_ptrs_c(k);
		bkey_for_each_ptr(ptrs, ptr) {
			buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
			buckets[nr_buckets].gen = ptr->gen;
			buckets[nr_buckets].l =
				bucket_nocow_lock(&c->nocow_locks,
						  bucket_to_u64(buckets[nr_buckets].b));

			prefetch(buckets[nr_buckets].l);
			struct bpos b = PTR_BUCKET_POS(c, ptr);
			struct nocow_lock_bucket *l =
				bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
			prefetch(l);

			if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
				goto err_get_ioref;

			nr_buckets++;
			/* XXX allocating memory with btree locks held - rare */
			darray_push_gfp(&buckets, ((struct bucket_to_lock) {
						   .b = b, .gen = ptr->gen, .l = l,
						   }), GFP_KERNEL|__GFP_NOFAIL);

			if (ptr->unwritten)
				op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
@@ -1296,22 +1300,22 @@ static void bch2_nocow_write(struct bch_write_op *op)
		if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
			bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);

		for (i = 0; i < nr_buckets; i++) {
			struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
			struct nocow_lock_bucket *l = buckets[i].l;
			bool stale;
		darray_for_each(buckets, i) {
			struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);

			__bch2_bucket_nocow_lock(&c->nocow_locks, l,
						 bucket_to_u64(buckets[i].b),
			__bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
						 bucket_to_u64(i->b),
						 BUCKET_NOCOW_LOCK_UPDATE);

			rcu_read_lock();
			stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
			bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
			rcu_read_unlock();

			if (unlikely(stale))
			if (unlikely(stale)) {
				stale_at = i;
				goto err_bucket_stale;
			}
		}

		bio = &op->wbio.bio;
		if (k.k->p.offset < op->pos.offset + bio_sectors(bio)) {
@@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op)

	if (ret) {
		bch_err_inum_offset_ratelimited(c,
				op->pos.inode,
				op->pos.offset << 9,
				"%s: btree lookup error %s",
				__func__, bch2_err_str(ret));
			op->pos.inode, op->pos.offset << 9,
			"%s: btree lookup error %s", __func__, bch2_err_str(ret));
		op->error = ret;
		op->flags |= BCH_WRITE_DONE;
	}

	bch2_trans_put(trans);
	darray_exit(&buckets);

	/* fallback to cow write path? */
	if (!(op->flags & BCH_WRITE_DONE)) {
@@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op)
	}
	return;
err_get_ioref:
	for (i = 0; i < nr_buckets; i++)
		percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
	darray_for_each(buckets, i)
		percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);

	/* Fall back to COW path: */
	goto out;
err_bucket_stale:
	while (i >= 0) {
		bch2_bucket_nocow_unlock(&c->nocow_locks,
					 buckets[i].b,
					 BUCKET_NOCOW_LOCK_UPDATE);
		--i;
	darray_for_each(buckets, i) {
		bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
		if (i == stale_at)
			break;
	}
	for (i = 0; i < nr_buckets; i++)
		percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);

	/* We can retry this: */
	ret = -BCH_ERR_transaction_restart;
	goto out;
	goto err_get_ioref;
}

static void __bch2_write(struct bch_write_op *op)