Commit 18443cb9 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet
Browse files

bcachefs: Update data move path for snapshots



The data move path operates on existing extents, and not within a
subvolume as the regular IO paths do. It needs to change because it may
cause existing extents to be split, and when splitting an existing
extent in an ancestor snapshot we need to make sure the new split has
the same visibility in child snapshots as the existing extent.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 7a7d17b2
Loading
Loading
Loading
Loading
+70 −11
Original line number Diff line number Diff line
@@ -941,6 +941,43 @@ int __bch2_trans_commit(struct btree_trans *trans)
	goto retry;
}

static int check_pos_snapshot_overwritten(struct btree_trans *trans,
					  enum btree_id id,
					  struct bpos pos)
{
	struct bch_fs *c = trans->c;
	struct btree_iter iter;
	struct bkey_s_c k;
	int ret;

	if (!snapshot_t(c, pos.snapshot)->children[0])
		return 0;

	bch2_trans_iter_init(trans, &iter, id, pos,
			     BTREE_ITER_NOT_EXTENTS|
			     BTREE_ITER_ALL_SNAPSHOTS);
	while (1) {
		k = bch2_btree_iter_prev(&iter);
		ret = bkey_err(k);
		if (ret)
			break;

		if (!k.k)
			break;

		if (bkey_cmp(pos, k.k->p))
			break;

		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, pos.snapshot)) {
			ret = 1;
			break;
		}
	}
	bch2_trans_iter_exit(trans, &iter);

	return ret;
}

static noinline int extent_front_merge(struct btree_trans *trans,
				       struct btree_iter *iter,
				       struct bkey_s_c k,
@@ -958,14 +995,40 @@ static noinline int extent_front_merge(struct btree_trans *trans,

	bkey_reassemble(update, k);

	if (bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) {
	if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert)))
		return 0;

	ret =   check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p) ?:
		check_pos_snapshot_overwritten(trans, iter->btree_id, (*insert)->k.p);
	if (ret < 0)
		return ret;
	if (ret)
		return 0;

	ret = bch2_btree_delete_at(trans, iter, flags);
	if (ret)
		return ret;

	*insert = update;
	return 0;
}

static noinline int extent_back_merge(struct btree_trans *trans,
				      struct btree_iter *iter,
				      struct bkey_i *insert,
				      struct bkey_s_c k)
{
	struct bch_fs *c = trans->c;
	int ret;

	ret =   check_pos_snapshot_overwritten(trans, iter->btree_id, insert->k.p) ?:
		check_pos_snapshot_overwritten(trans, iter->btree_id, k.k->p);
	if (ret < 0)
		return ret;
	if (ret)
		return 0;

	bch2_bkey_merge(c, bkey_i_to_s(insert), k);
	return 0;
}

@@ -974,7 +1037,6 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
				    struct bkey_i *insert,
				    enum btree_update_flags flags)
{
	struct bch_fs *c = trans->c;
	struct btree_iter iter, update_iter;
	struct bpos start = bkey_start_pos(&insert->k);
	struct bkey_i *update;
@@ -1002,9 +1064,6 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
		goto next;
	}

	if (!bkey_cmp(k.k->p, start))
		goto next;

	while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
		bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
		bool back_split  = bkey_cmp(k.k->p, insert->k.p) > 0;
@@ -1120,7 +1179,7 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
	}

	if (bch2_bkey_maybe_mergable(&insert->k, k.k))
		bch2_bkey_merge(c, bkey_i_to_s(insert), k);
		extent_back_merge(trans, &iter, insert, k);
out:
	if (!bkey_deleted(&insert->k)) {
		/*
+1 −34
Original line number Diff line number Diff line
@@ -473,24 +473,6 @@ static int remove_backpointer(struct btree_trans *trans,
	return ret;
}

struct snapshots_seen {
	struct bpos			pos;
	size_t				nr;
	size_t				size;
	u32				*d;
};

static void snapshots_seen_exit(struct snapshots_seen *s)
{
	kfree(s->d);
	s->d = NULL;
}

static void snapshots_seen_init(struct snapshots_seen *s)
{
	memset(s, 0, sizeof(*s));
}

static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
{
	pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
@@ -499,26 +481,11 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, str
		s->nr = 0;
	s->pos = pos;

	if (s->nr == s->size) {
		size_t new_size = max(s->size, 128UL) * 2;
		u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);

		if (!d) {
			bch_err(c, "error reallocating snapshots_seen table (new size %zu)",
				new_size);
			return -ENOMEM;
		}

		s->size = new_size;
		s->d	= d;
	}

	/* Might get called multiple times due to lock restarts */
	if (s->nr && s->d[s->nr - 1] == pos.snapshot)
		return 0;

	s->d[s->nr++] = pos.snapshot;
	return 0;
	return snapshots_seen_add(c, s, pos.snapshot);
}

/**
+2 −1
Original line number Diff line number Diff line
@@ -1828,7 +1828,8 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
	if (!bch2_bkey_narrow_crcs(new, new_crc))
		goto out;

	ret = bch2_trans_update(trans, &iter, new, 0);
	ret = bch2_trans_update(trans, &iter, new,
				BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
out:
	bch2_trans_iter_exit(trans, &iter);
	return ret;
+4 −2
Original line number Diff line number Diff line
@@ -48,7 +48,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
	bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);

	bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
			     BTREE_ITER_PREFETCH);
			     BTREE_ITER_PREFETCH|
			     BTREE_ITER_ALL_SNAPSHOTS);

	while ((k = bch2_btree_iter_peek(&iter)).k &&
	       !(ret = bkey_err(k))) {
@@ -74,7 +75,8 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
		bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));

		ret   = bch2_btree_iter_traverse(&iter) ?:
			bch2_trans_update(&trans, &iter, sk.k, 0) ?:
			bch2_trans_update(&trans, &iter, sk.k,
					  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
			bch2_trans_commit(&trans, NULL, NULL,
					BTREE_INSERT_NOFAIL);

+80 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include "keylist.h"
#include "move.h"
#include "replicas.h"
#include "subvolume.h"
#include "super-io.h"
#include "trace.h"

@@ -52,6 +53,81 @@ struct moving_context {
	wait_queue_head_t	wait;
};

static int insert_snapshot_whiteouts(struct btree_trans *trans,
				     enum btree_id id,
				     struct bpos old_pos,
				     struct bpos new_pos)
{
	struct bch_fs *c = trans->c;
	struct btree_iter iter, update_iter;
	struct bkey_s_c k;
	struct snapshots_seen s;
	int ret;

	if (!btree_type_has_snapshots(id))
		return 0;

	snapshots_seen_init(&s);

	if (!bkey_cmp(old_pos, new_pos))
		return 0;

	if (!snapshot_t(c, old_pos.snapshot)->children[0])
		return 0;

	bch2_trans_iter_init(trans, &iter, id, old_pos,
			     BTREE_ITER_NOT_EXTENTS|
			     BTREE_ITER_ALL_SNAPSHOTS);
	while (1) {
next:
		k = bch2_btree_iter_prev(&iter);
		ret = bkey_err(k);
		if (ret)
			break;

		if (bkey_cmp(old_pos, k.k->p))
			break;

		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) {
			struct bkey_i *update;
			size_t i;

			for (i = 0; i < s.nr; i++)
				if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, s.d[i]))
					goto next;

			update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));

			ret = PTR_ERR_OR_ZERO(update);
			if (ret)
				break;

			bkey_init(&update->k);
			update->k.p = new_pos;
			update->k.p.snapshot = k.k->p.snapshot;

			bch2_trans_iter_init(trans, &update_iter, id, update->k.p,
					     BTREE_ITER_NOT_EXTENTS|
					     BTREE_ITER_ALL_SNAPSHOTS|
					     BTREE_ITER_INTENT);
			ret   = bch2_btree_iter_traverse(&update_iter) ?:
				bch2_trans_update(trans, &update_iter, update,
					  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
			bch2_trans_iter_exit(trans, &update_iter);
			if (ret)
				break;

			ret = snapshots_seen_add(c, &s, k.k->p.snapshot);
			if (ret)
				break;
		}
	}
	bch2_trans_iter_exit(trans, &iter);
	kfree(s.d);

	return ret;
}

int bch2_migrate_index_update(struct bch_write_op *op)
{
	struct bch_fs *c = op->c;
@@ -165,7 +241,10 @@ int bch2_migrate_index_update(struct bch_write_op *op)

		next_pos = insert->k.p;

		ret   = bch2_trans_update(&trans, &iter, insert, 0) ?:
		ret   = insert_snapshot_whiteouts(&trans, m->btree_id,
						  k.k->p, insert->k.p) ?:
			bch2_trans_update(&trans, &iter, insert,
				BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
			bch2_trans_commit(&trans, &op->res,
				op_journal_seq(op),
				BTREE_INSERT_NOFAIL|
Loading