Commit 5bd95a37 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet
Browse files

bcachefs: new avoid mechanism for io retries

parent 47799326
Loading
Loading
Loading
Loading
+4 −5
Original line number Diff line number Diff line
@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
	struct bch_dev *ca	= bch_dev_bkey_exists(c, rb->pick.ptr.dev);
	struct btree *b		= rb->bio.bi_private;
	struct bio *bio		= &rb->bio;
	struct bch_devs_mask avoid;
	struct bch_io_failures failed = { .nr = 0 };
	bool can_retry;

	memset(&avoid, 0, sizeof(avoid));

	goto start;
	while (1) {
		bch_info(c, "retrying read");
@@ -1371,8 +1369,9 @@ static void btree_node_read_work(struct work_struct *work)
			percpu_ref_put(&ca->io_ref);
		rb->have_ioref = false;

		__set_bit(rb->pick.ptr.dev, avoid.d);
		can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
		bch2_mark_io_failure(&failed, &rb->pick);

		can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;

		if (!bio->bi_status &&
		    !bch2_btree_node_read_done(c, b, can_retry))
+47 −16
Original line number Diff line number Diff line
@@ -519,12 +519,45 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf,
	return out - buf;
}

static inline bool dev_latency_better(struct bch_fs *c,
			      const struct bch_extent_ptr *ptr1,
			      const struct bch_extent_ptr *ptr2)
static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
						   unsigned dev)
{
	struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
	struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
	struct bch_dev_io_failures *i;

	for (i = f->devs; i < f->devs + f->nr; i++)
		if (i->dev == dev)
			return i;

	return NULL;
}

void bch2_mark_io_failure(struct bch_io_failures *failed,
			  struct extent_ptr_decoded *p)
{
	struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);

	if (!f) {
		BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));

		f = &failed->devs[failed->nr++];
		f->dev		= p->ptr.dev;
		f->nr_failed	= 1;
		f->nr_retries	= 0;
	} else {
		f->nr_failed++;
	}
}

/*
 * returns true if p1 is better than p2:
 */
static inline bool ptr_better(struct bch_fs *c,
			      const struct extent_ptr_decoded p1,
			      const struct extent_ptr_decoded p2)
{
	struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
	struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);

	u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
	u64 l2 = atomic64_read(&dev2->cur_latency[READ]);

@@ -535,11 +568,12 @@ static inline bool dev_latency_better(struct bch_fs *c,

static int extent_pick_read_device(struct bch_fs *c,
				   struct bkey_s_c_extent e,
				   struct bch_devs_mask *avoid,
				   struct bch_io_failures *failed,
				   struct extent_ptr_decoded *pick)
{
	const union bch_extent_entry *entry;
	struct extent_ptr_decoded p;
	struct bch_dev_io_failures *f;
	struct bch_dev *ca;
	int ret = 0;

@@ -549,14 +583,11 @@ static int extent_pick_read_device(struct bch_fs *c,
		if (p.ptr.cached && ptr_stale(ca, &p.ptr))
			continue;

		/*
		 * XXX: need to make avoid work correctly for stripe ptrs
		 */

		if (avoid && test_bit(p.ptr.dev, avoid->d))
		f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
		if (f && f->nr_failed >= f->nr_retries)
			continue;

		if (ret && !dev_latency_better(c, &p.ptr, &pick->ptr))
		if (ret && !ptr_better(c, p, *pick))
			continue;

		*pick = p;
@@ -685,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
}

int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
			struct bch_devs_mask *avoid,
			struct bch_io_failures *failed,
			struct extent_ptr_decoded *pick)
{
	return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
				       avoid, pick);
				       failed, pick);
}

/* Extents */
@@ -1909,7 +1940,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
 * other devices, it will still pick a pointer from avoid.
 */
int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
			 struct bch_devs_mask *avoid,
			 struct bch_io_failures *failed,
			 struct extent_ptr_decoded *pick)
{
	int ret;
@@ -1921,7 +1952,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
	case BCH_EXTENT:
	case BCH_EXTENT_CACHED:
		ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
					      avoid, pick);
					      failed, pick);

		if (!ret && !bkey_extent_is_cached(k.k))
			ret = -EIO;
+4 −3
Original line number Diff line number Diff line
@@ -53,12 +53,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
						     struct btree *,
						     struct btree_node_iter_large *);

void bch2_mark_io_failure(struct bch_io_failures *,
			  struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
			struct bch_devs_mask *avoid,
			struct bch_io_failures *,
			struct extent_ptr_decoded *);

int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
			 struct bch_devs_mask *,
			 struct bch_io_failures *,
			 struct extent_ptr_decoded *);

void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
+9 −0
Original line number Diff line number Diff line
@@ -24,4 +24,13 @@ struct extent_ptr_decoded {
	struct bch_extent_ptr		ptr;
};

struct bch_io_failures {
	u8			nr;
	struct bch_dev_io_failures {
		u8		dev;
		u8		nr_failed;
		u8		nr_retries;
	}			devs[BCH_REPLICAS_MAX];
};

#endif /* _BCACHEFS_EXTENTS_TYPES_H */
+12 −13
Original line number Diff line number Diff line
@@ -1203,7 +1203,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)

static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
				     struct bvec_iter bvec_iter, u64 inode,
				     struct bch_devs_mask *avoid, unsigned flags)
				     struct bch_io_failures *failed,
				     unsigned flags)
{
	struct btree_iter iter;
	BKEY_PADDED(k) tmp;
@@ -1237,7 +1238,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
		goto out;
	}

	ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
	ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
	if (ret == READ_RETRY)
		goto retry;
	if (ret)
@@ -1251,7 +1252,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio

static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
			    struct bvec_iter bvec_iter, u64 inode,
			    struct bch_devs_mask *avoid, unsigned flags)
			    struct bch_io_failures *failed, unsigned flags)
{
	struct btree_iter iter;
	struct bkey_s_c k;
@@ -1274,7 +1275,7 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
			      (k.k->p.offset - bvec_iter.bi_sector) << 9);
		swap(bvec_iter.bi_size, bytes);

		ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
		ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
		switch (ret) {
		case READ_RETRY:
			goto retry;
@@ -1310,14 +1311,12 @@ static void bch2_rbio_retry(struct work_struct *work)
	struct bvec_iter iter	= rbio->bvec_iter;
	unsigned flags		= rbio->flags;
	u64 inode		= rbio->pos.inode;
	struct bch_devs_mask avoid;
	struct bch_io_failures failed = { .nr = 0 };

	trace_read_retry(&rbio->bio);

	memset(&avoid, 0, sizeof(avoid));

	if (rbio->retry == READ_RETRY_AVOID)
		__set_bit(rbio->pick.ptr.dev, avoid.d);
		bch2_mark_io_failure(&failed, &rbio->pick);

	rbio->bio.bi_status = 0;

@@ -1327,9 +1326,9 @@ static void bch2_rbio_retry(struct work_struct *work)
	flags &= ~BCH_READ_MAY_PROMOTE;

	if (flags & BCH_READ_NODECODE)
		bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
		bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
	else
		bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
		bch2_read_retry(c, rbio, iter, inode, &failed, flags);
}

static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
@@ -1569,7 +1568,7 @@ static void bch2_read_endio(struct bio *bio)

int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
		       struct bvec_iter iter, struct bkey_s_c k,
		       struct bch_devs_mask *avoid, unsigned flags)
		       struct bch_io_failures *failed, unsigned flags)
{
	struct extent_ptr_decoded pick;
	struct bch_read_bio *rbio = NULL;
@@ -1579,7 +1578,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
	struct bpos pos = bkey_start_pos(k.k);
	int pick_ret;

	pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
	pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);

	/* hole or reservation - just zero fill: */
	if (!pick_ret)
@@ -1750,7 +1749,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
		rbio = bch2_rbio_free(rbio);

		if (ret == READ_RETRY_AVOID) {
			__set_bit(pick.ptr.dev, avoid->d);
			bch2_mark_io_failure(failed, &pick);
			ret = READ_RETRY;
		}

Loading