Commit a2cb8a62 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Self healing on read IO error



This repurposes the promote path, which already knows how to call
data_update() after a read: we now automatically rewrite bad data when
we get a read error and then successfully retry from a different
replica.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent b1d63b06
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
				 struct bch_extent_crc_unpacked,
				 enum bch_extent_entry_type);

static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
						 unsigned dev)
{
	struct bch_dev_io_failures *i;
@@ -52,7 +52,7 @@ static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
void bch2_mark_io_failure(struct bch_io_failures *failed,
			  struct extent_ptr_decoded *p)
{
	struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
	struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev);

	if (!f) {
		BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
@@ -140,7 +140,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
		if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
			continue;

		f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
		f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
		if (f)
			p.idx = f->nr_failed < f->nr_retries
				? f->idx
+2 −0
Original line number Diff line number Diff line
@@ -399,6 +399,8 @@ out: \

/* utility code common to all keys with pointers: */

struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
						 unsigned);
void bch2_mark_io_failure(struct bch_io_failures *,
			  struct extent_ptr_decoded *);
int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
+47 −22
Original line number Diff line number Diff line
@@ -93,8 +93,10 @@ static const struct rhashtable_params bch_promote_params = {
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
				  struct bpos pos,
				  struct bch_io_opts opts,
				  unsigned flags)
				  unsigned flags,
				  struct bch_io_failures *failed)
{
	if (!failed) {
		BUG_ON(!opts.promote_target);

		if (!(flags & BCH_READ_MAY_PROMOTE))
@@ -108,6 +110,7 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,

		if (bch2_target_congested(c, opts.promote_target))
			return -BCH_ERR_nopromote_congested;
	}

	if (rhashtable_lookup_fast(&c->promote_table, &pos,
				   bch_promote_params))
@@ -164,7 +167,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
					  struct extent_ptr_decoded *pick,
					  struct bch_io_opts opts,
					  unsigned sectors,
					  struct bch_read_bio **rbio)
					  struct bch_read_bio **rbio,
					  struct bch_io_failures *failed)
{
	struct bch_fs *c = trans->c;
	struct promote_op *op = NULL;
@@ -217,14 +221,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
	bio = &op->write.op.wbio.bio;
	bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);

	struct data_update_opts update_opts = {};

	if (!failed) {
		update_opts.target = opts.promote_target;
		update_opts.extra_replicas = 1;
		update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
	} else {
		update_opts.target = opts.foreground_target;

		struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
		unsigned i = 0;
		bkey_for_each_ptr(ptrs, ptr) {
			if (bch2_dev_io_failures(failed, ptr->dev))
				update_opts.rewrite_ptrs |= BIT(i);
			i++;
		}
	}

	ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
			writepoint_hashed((unsigned long) current),
			opts,
			(struct data_update_opts) {
				.target		= opts.promote_target,
				.extra_replicas	= 1,
				.write_flags	= BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
			},
			update_opts,
			btree_id, k);
	/*
	 * possible errors: -BCH_ERR_nocow_lock_blocked,
@@ -258,10 +276,17 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
					unsigned flags,
					struct bch_read_bio **rbio,
					bool *bounce,
					bool *read_full)
					bool *read_full,
					struct bch_io_failures *failed)
{
	struct bch_fs *c = trans->c;
	bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
	/*
	 * if failed != NULL we're not actually doing a promote, we're
	 * recovering from an io/checksum error
	 */
	bool promote_full = (failed ||
			     *read_full ||
			     READ_ONCE(c->promote_whole_extents));
	/* data might have to be decompressed in the write path: */
	unsigned sectors = promote_full
		? max(pick->crc.compressed_size, pick->crc.live_size)
@@ -272,7 +297,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
	struct promote_op *promote;
	int ret;

	ret = should_promote(c, k, pos, opts, flags);
	ret = should_promote(c, k, pos, opts, flags, failed);
	if (ret)
		goto nopromote;

@@ -280,7 +305,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
				  k.k->type == KEY_TYPE_reflink_v
				  ? BTREE_ID_reflink
				  : BTREE_ID_extents,
				  k, pos, pick, opts, sectors, rbio);
				  k, pos, pick, opts, sectors, rbio, failed);
	ret = PTR_ERR_OR_ZERO(promote);
	if (ret)
		goto nopromote;
@@ -910,9 +935,9 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
		bounce = true;
	}

	if (orig->opts.promote_target)
	if (orig->opts.promote_target)// || failed)
		promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
					&rbio, &bounce, &read_full);
					&rbio, &bounce, &read_full, failed);

	if (!read_full) {
		EBUG_ON(crc_is_compressed(pick.crc));