Commit 035d72f7 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices



This factors out ec_strie_head_devs_update(), which initializes the
bitmap of devices we're allocating from, and runs it every time
c->rw_devs_change_count changes.

We also cancel pending, not allocated stripes, since they may refer to
devices that are no longer available.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 83ccd9b3
Loading
Loading
Loading
Loading
+57 −27
Original line number Diff line number Diff line
@@ -1572,10 +1572,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
		bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
}

static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{
	struct ec_stripe_new *s = h->s;

	lockdep_assert_held(&h->lock);

	BUG_ON(!s->allocated && !s->err);

	h->s		= NULL;
@@ -1588,6 +1590,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
	ec_stripe_new_put(c, s, STRIPE_REF_io);
}

static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
{
	h->s->err = err;
	ec_stripe_new_set_pending(c, h);
}

void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
{
	struct ec_stripe_new *s = ob->ec;
@@ -1711,27 +1719,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
	return 0;
}

static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
			 unsigned algo, unsigned redundancy,
			 enum bch_watermark watermark)
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
{
	struct ec_stripe_head *h;

	h = kzalloc(sizeof(*h), GFP_KERNEL);
	if (!h)
		return NULL;

	mutex_init(&h->lock);
	BUG_ON(!mutex_trylock(&h->lock));

	h->disk_label	= disk_label;
	h->algo		= algo;
	h->redundancy	= redundancy;
	h->watermark	= watermark;
	struct bch_devs_mask devs = h->devs;

	rcu_read_lock();
	h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0);
	h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
				 ? group_to_target(h->disk_label - 1)
				 : 0);
	unsigned nr_devs = dev_mask_nr(&h->devs);

	for_each_member_device_rcu(c, ca, &h->devs)
@@ -1741,6 +1736,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,

	h->blocksize = pick_blocksize(c, &h->devs);

	h->nr_active_devs = 0;
	for_each_member_device_rcu(c, ca, &h->devs)
		if (ca->mi.bucket_size == h->blocksize)
			h->nr_active_devs++;
@@ -1751,7 +1747,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
	 * If we only have redundancy + 1 devices, we're better off with just
	 * replication:
	 */
	if (h->nr_active_devs < h->redundancy + 2) {
	h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;

	if (h->insufficient_devs) {
		const char *err;

		if (nr_devs < h->redundancy + 2)
@@ -1766,6 +1764,34 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
				h->nr_active_devs, h->redundancy + 2, err);
	}

	struct bch_devs_mask devs_leaving;
	bitmap_andnot(devs_leaving.d, devs.d, h->devs.d, BCH_SB_MEMBERS_MAX);

	if (h->s && !h->s->allocated && dev_mask_nr(&devs_leaving))
		ec_stripe_new_cancel(c, h, -EINTR);

	h->rw_devs_change_count = c->rw_devs_change_count;
}

static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
			 unsigned algo, unsigned redundancy,
			 enum bch_watermark watermark)
{
	struct ec_stripe_head *h;

	h = kzalloc(sizeof(*h), GFP_KERNEL);
	if (!h)
		return NULL;

	mutex_init(&h->lock);
	BUG_ON(!mutex_trylock(&h->lock));

	h->disk_label	= disk_label;
	h->algo		= algo;
	h->redundancy	= redundancy;
	h->watermark	= watermark;

	list_add(&h->list, &c->ec_stripe_head_list);
	return h;
}
@@ -1776,7 +1802,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
	    h->s->allocated &&
	    bitmap_weight(h->s->blocks_allocated,
			  h->s->nr_data) == h->s->nr_data)
		ec_stripe_set_pending(c, h);
		ec_stripe_new_set_pending(c, h);

	mutex_unlock(&h->lock);
}
@@ -1801,7 +1827,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,

	if (test_bit(BCH_FS_going_ro, &c->flags)) {
		h = ERR_PTR(-BCH_ERR_erofs_no_writes);
		goto found;
		goto err;
	}

	list_for_each_entry(h, &c->ec_stripe_head_list, list)
@@ -1810,18 +1836,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
		    h->redundancy	== redundancy &&
		    h->watermark	== watermark) {
			ret = bch2_trans_mutex_lock(trans, &h->lock);
			if (ret)
			if (ret) {
				h = ERR_PTR(ret);
				goto err;
			}
			goto found;
		}

	h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
found:
	if (!IS_ERR_OR_NULL(h) &&
	    h->nr_active_devs < h->redundancy + 2) {
	if (h->rw_devs_change_count != c->rw_devs_change_count)
		ec_stripe_head_devs_update(c, h);

	if (h->insufficient_devs) {
		mutex_unlock(&h->lock);
		h = NULL;
	}
err:
	mutex_unlock(&c->ec_stripe_head_lock);
	return h;
}
@@ -2261,8 +2292,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
		}
		goto unlock;
found:
		h->s->err = -BCH_ERR_erofs_no_writes;
		ec_stripe_set_pending(c, h);
		ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
unlock:
		mutex_unlock(&h->lock);
	}
+3 −0
Original line number Diff line number Diff line
@@ -192,6 +192,9 @@ struct ec_stripe_head {
	unsigned		algo;
	unsigned		redundancy;
	enum bch_watermark	watermark;
	bool			insufficient_devs;

	unsigned long		rw_devs_change_count;

	u64			nr_created;