Commit a34eef6d authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Don't keep tons of cached pointers around



We had a bug report where the data update path was creating an extent
that failed to validate because it had too many pointers; almost all of
them were cached.

To fix this, we have:
- want_cached_ptr(), a new helper that checks if we even want a cached
  pointer (is on appropriate target, device is readable).

- bch2_extent_set_ptr_cached() now only sets a pointer cached if we want
  it.

- bch2_extent_normalize_by_opts() now ensures that we only have a single
  cached pointer that we want.

While working on this, it was noticed that this doesn't work well with
reflinked data and per-file options. Another patch series is coming that
plumbs through additional io path options through bch_extent_rebalance,
with improved option handling.

Reported-by: default avatarReed Riley <reed@riley.engineer>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 3fd27e9c
Loading
Loading
Loading
Loading
+12 −9
Original line number Diff line number Diff line
@@ -236,7 +236,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
			if (((1U << i) & m->data_opts.rewrite_ptrs) &&
			    (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
			    !ptr->cached) {
				bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
				bch2_extent_ptr_set_cached(c, &m->op.opts,
							   bkey_i_to_s(insert), ptr);
				rewrites_found |= 1U << i;
			}
			i++;
@@ -284,7 +285,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
			    durability - ptr_durability >= m->op.opts.data_replicas) {
				durability -= ptr_durability;

				bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
				bch2_extent_ptr_set_cached(c, &m->op.opts,
							   bkey_i_to_s(insert), &entry->ptr);
				goto restart_drop_extra_replicas;
			}
		}
@@ -295,7 +297,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
			bch2_extent_ptr_decoded_append(insert, &p);

		bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
		bch2_extent_normalize(c, bkey_i_to_s(insert));
		bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert));

		ret = bch2_sum_sector_overwrites(trans, &iter, insert,
						 &should_check_enospc,
@@ -558,7 +560,8 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
int bch2_extent_drop_ptrs(struct btree_trans *trans,
			  struct btree_iter *iter,
			  struct bkey_s_c k,
			  struct data_update_opts data_opts)
			  struct bch_io_opts *io_opts,
			  struct data_update_opts *data_opts)
{
	struct bch_fs *c = trans->c;
	struct bkey_i *n;
@@ -569,11 +572,11 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
	if (ret)
		return ret;

	while (data_opts.kill_ptrs) {
		unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
	while (data_opts->kill_ptrs) {
		unsigned i = 0, drop = __fls(data_opts->kill_ptrs);

		bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, i++ == drop);
		data_opts.kill_ptrs ^= 1U << drop;
		data_opts->kill_ptrs ^= 1U << drop;
	}

	/*
@@ -581,7 +584,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
	 * will do the appropriate thing with it (turning it into a
	 * KEY_TYPE_error key, or just a discard if it was a cached extent)
	 */
	bch2_extent_normalize(c, bkey_i_to_s(n));
	bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n));

	/*
	 * Since we're not inserting through an extent iterator
@@ -720,7 +723,7 @@ int bch2_data_update_init(struct btree_trans *trans,
		m->data_opts.rewrite_ptrs = 0;
		/* if iter == NULL, it's just a promote */
		if (iter)
			ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
			ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
		goto out;
	}

+2 −1
Original line number Diff line number Diff line
@@ -40,7 +40,8 @@ void bch2_data_update_read_done(struct data_update *,
int bch2_extent_drop_ptrs(struct btree_trans *,
			  struct btree_iter *,
			  struct bkey_s_c,
			  struct data_update_opts);
			  struct bch_io_opts *,
			  struct data_update_opts *);

void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+70 −16
Original line number Diff line number Diff line
@@ -978,31 +978,54 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
	return NULL;
}

void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
			    struct bch_extent_ptr *ptr)
{
	if (!opts->promote_target ||
	    !bch2_dev_in_target(c, ptr->dev, opts->promote_target))
		return false;

	struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);

	return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr);
}

void bch2_extent_ptr_set_cached(struct bch_fs *c,
				struct bch_io_opts *opts,
				struct bkey_s k,
				struct bch_extent_ptr *ptr)
{
	struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
	union bch_extent_entry *entry;
	union bch_extent_entry *ec = NULL;
	struct extent_ptr_decoded p;

	bkey_extent_entry_for_each(ptrs, entry) {
		if (&entry->ptr == ptr) {
			ptr->cached = true;
			if (ec)
				extent_entry_drop(k, ec);
			return;
	rcu_read_lock();
	if (!want_cached_ptr(c, opts, ptr)) {
		bch2_bkey_drop_ptr_noerror(k, ptr);
		goto out;
	}

		if (extent_entry_is_stripe_ptr(entry))
			ec = entry;
		else if (extent_entry_is_ptr(entry))
			ec = NULL;
	/*
	 * Stripes can't contain cached data, for - reasons.
	 *
	 * Possibly something we can fix in the future?
	 */
	bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
		if (&entry->ptr == ptr) {
			if (p.has_ec)
				bch2_bkey_drop_ptr_noerror(k, ptr);
			else
				ptr->cached = true;
			goto out;
		}

	BUG();
out:
	rcu_read_unlock();
}

/*
 * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
 * bch2_extent_normalize - clean up an extent, dropping stale pointers etc.
 *
 * Returns true if @k should be dropped entirely
 *
@@ -1016,8 +1039,39 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
	rcu_read_lock();
	bch2_bkey_drop_ptrs(k, ptr,
		ptr->cached &&
		(ca = bch2_dev_rcu(c, ptr->dev)) &&
		dev_ptr_stale_rcu(ca, ptr) > 0);
		(!(ca = bch2_dev_rcu(c, ptr->dev)) ||
		 dev_ptr_stale_rcu(ca, ptr) > 0));
	rcu_read_unlock();

	return bkey_deleted(k.k);
}

/*
 * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc.
 *
 * Like bch2_extent_normalize(), but also only keeps a single cached pointer on
 * the promote target.
 */
bool bch2_extent_normalize_by_opts(struct bch_fs *c,
				   struct bch_io_opts *opts,
				   struct bkey_s k)
{
	struct bkey_ptrs ptrs;
	bool have_cached_ptr;

	rcu_read_lock();
restart_drop_ptrs:
	ptrs = bch2_bkey_ptrs(k);
	have_cached_ptr = false;

	bkey_for_each_ptr(ptrs, ptr)
		if (ptr->cached) {
			if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) {
				bch2_bkey_drop_ptr(k, ptr);
				goto restart_drop_ptrs;
			}
			have_cached_ptr = true;
		}
	rcu_read_unlock();

	return bkey_deleted(k.k);
+4 −1
Original line number Diff line number Diff line
@@ -686,9 +686,12 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);

void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *,
				struct bkey_s, struct bch_extent_ptr *);

bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);

void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
			    struct bkey_s_c);
+1 −1
Original line number Diff line number Diff line
@@ -266,7 +266,7 @@ int bch2_move_extent(struct moving_context *ctxt,
	if (!data_opts.rewrite_ptrs &&
	    !data_opts.extra_replicas) {
		if (data_opts.kill_ptrs)
			return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
			return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
		return 0;
	}