Commit 979e1dc3 authored by Mark Harmstone's avatar Mark Harmstone Committed by David Sterba
Browse files

btrfs: handle deletions from remapped block group



Handle the case where we free an extent from a block group that has the
REMAPPED flag set. Because the remap tree is orthogonal to the extent
tree, for data this may be within any number of identity remaps or
actual remaps. If we're freeing a metadata node, this will be wholly
inside one or the other.

btrfs_remove_extent_from_remap_tree() searches the remap tree for the
remaps that cover the range in question, then calls
remove_range_from_remap_tree() for each one, to punch a hole in the
remap and adjust the free-space tree.

For an identity remap, remove_range_from_remap_tree() will adjust the
block group's `identity_remap_count` if this changes. If it reaches
zero we mark the block group as fully remapped.

For an identity remap, remove_range_from_remap_tree() will adjust the
block group's `identity_remap_count` if this changes. If it reaches
zero we mark the block group as fully remapped.

Fully remapped block groups have their chunk stripes removed and their
device extents freed, which makes the disk space available again to the
chunk allocator. This happens asynchronously: in the cleaner thread for
sync discard and nodiscard, and (in a later patch) in the discard worker
for async discard.

Reviewed-by: default avatarBoris Burkov <boris@bur.io>
Signed-off-by: default avatarMark Harmstone <mark@harmstone.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 18ba6499
Loading
Loading
Loading
Loading
+61 −26
Original line number Diff line number Diff line
@@ -1067,6 +1067,24 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans,
	return btrfs_del_item(trans, root, path);
}

void btrfs_remove_bg_from_sinfo(struct btrfs_block_group *bg)
{
	int factor = btrfs_bg_type_to_factor(bg->flags);

	spin_lock(&bg->space_info->lock);
	if (btrfs_test_opt(bg->fs_info, ENOSPC_DEBUG)) {
		WARN_ON(bg->space_info->total_bytes < bg->length);
		WARN_ON(bg->space_info->bytes_readonly < bg->length - bg->zone_unusable);
		WARN_ON(bg->space_info->bytes_zone_unusable < bg->zone_unusable);
		WARN_ON(bg->space_info->disk_total < bg->length * factor);
	}
	bg->space_info->total_bytes -= bg->length;
	bg->space_info->bytes_readonly -= (bg->length - bg->zone_unusable);
	btrfs_space_info_update_bytes_zone_unusable(bg->space_info, -bg->zone_unusable);
	bg->space_info->disk_total -= bg->length * factor;
	spin_unlock(&bg->space_info->lock);
}

int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     struct btrfs_chunk_map *map)
{
@@ -1078,7 +1096,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	struct kobject *kobj = NULL;
	int ret;
	int index;
	int factor;
	struct btrfs_caching_control *caching_ctl = NULL;
	bool remove_map;
	bool remove_rsv = false;
@@ -1087,7 +1104,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
	if (!block_group)
		return -ENOENT;

	BUG_ON(!block_group->ro);
	BUG_ON(!block_group->ro && !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED));

	trace_btrfs_remove_block_group(block_group);
	/*
@@ -1099,7 +1116,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
				  block_group->length);

	index = btrfs_bg_flags_to_raid_index(block_group->flags);
	factor = btrfs_bg_type_to_factor(block_group->flags);

	/* make sure this block group isn't part of an allocation cluster */
	cluster = &fs_info->data_alloc_cluster;
@@ -1223,26 +1239,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,

	spin_lock(&block_group->space_info->lock);
	list_del_init(&block_group->ro_list);

	if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
		WARN_ON(block_group->space_info->total_bytes
			< block_group->length);
		WARN_ON(block_group->space_info->bytes_readonly
			< block_group->length - block_group->zone_unusable);
		WARN_ON(block_group->space_info->bytes_zone_unusable
			< block_group->zone_unusable);
		WARN_ON(block_group->space_info->disk_total
			< block_group->length * factor);
	}
	block_group->space_info->total_bytes -= block_group->length;
	block_group->space_info->bytes_readonly -=
		(block_group->length - block_group->zone_unusable);
	btrfs_space_info_update_bytes_zone_unusable(block_group->space_info,
						    -block_group->zone_unusable);
	block_group->space_info->disk_total -= block_group->length * factor;

	spin_unlock(&block_group->space_info->lock);

	if (!(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED))
		btrfs_remove_bg_from_sinfo(block_group);

	/*
	 * Remove the free space for the block group from the free space tree
	 * and the block group's item from the extent tree before marking the
@@ -1575,8 +1576,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)

		spin_lock(&space_info->lock);
		spin_lock(&block_group->lock);
		if (btrfs_is_block_group_used(block_group) || block_group->ro ||
		    list_is_singular(&block_group->list)) {
		if (btrfs_is_block_group_used(block_group) ||
		    (block_group->ro && !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) ||
		    list_is_singular(&block_group->list) ||
		    test_bit(BLOCK_GROUP_FLAG_FULLY_REMAPPED, &block_group->runtime_flags)) {
			/*
			 * We want to bail if we made new allocations or have
			 * outstanding allocations in this block group.  We do
@@ -1617,9 +1620,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		 * needing to allocate extents from the block group.
		 */
		used = btrfs_space_info_used(space_info, true);
		if ((space_info->total_bytes - block_group->length < used &&
		if (((space_info->total_bytes - block_group->length < used &&
		      block_group->zone_unusable < block_group->length) ||
		    has_unwritten_metadata(block_group)) {
		     has_unwritten_metadata(block_group)) &&
		    !(block_group->flags & BTRFS_BLOCK_GROUP_REMAPPED)) {
			/*
			 * Add a reference for the list, compensate for the ref
			 * drop under the "next" label for the
@@ -1784,6 +1788,9 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
		btrfs_get_block_group(bg);
		trace_btrfs_add_unused_block_group(bg);
		list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
	} else if (bg->flags & BTRFS_BLOCK_GROUP_REMAPPED &&
		   bg->identity_remap_count == 0) {
		/* Leave fully remapped block groups on the fully_remapped_bgs list. */
	} else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
		/* Pull out the block group from the reclaim_bgs list. */
		trace_btrfs_add_unused_block_group(bg);
@@ -4581,6 +4588,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
		list_del_init(&block_group->bg_list);
		btrfs_put_block_group(block_group);
	}

	while (!list_empty(&info->fully_remapped_bgs)) {
		block_group = list_first_entry(&info->fully_remapped_bgs,
					       struct btrfs_block_group, bg_list);
		list_del_init(&block_group->bg_list);
		btrfs_put_block_group(block_group);
	}
	spin_unlock(&info->unused_bgs_lock);

	spin_lock(&info->zone_active_bgs_lock);
@@ -4768,3 +4782,24 @@ bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg)
		return false;
	return true;
}

void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
				  struct btrfs_trans_handle *trans)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;

	spin_lock(&fs_info->unused_bgs_lock);
	/*
	 * The block group might already be on the unused_bgs list, remove it
	 * if it is. It'll get readded after the async discard worker finishes,
	 * or in btrfs_handle_fully_remapped_bgs() if we're not using async
	 * discard.
	 */
	if (!list_empty(&bg->bg_list))
		list_del(&bg->bg_list);
	else
		btrfs_get_block_group(bg);

	list_add_tail(&bg->bg_list, &fs_info->fully_remapped_bgs);
	spin_unlock(&fs_info->unused_bgs_lock);
}
+4 −0
Original line number Diff line number Diff line
@@ -92,6 +92,7 @@ enum btrfs_block_group_flags {
	 * transaction.
	 */
	BLOCK_GROUP_FLAG_NEW,
	BLOCK_GROUP_FLAG_FULLY_REMAPPED,
};

enum btrfs_caching_type {
@@ -340,6 +341,7 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
				struct btrfs_fs_info *fs_info,
				const u64 chunk_offset);
void btrfs_remove_bg_from_sinfo(struct btrfs_block_group *bg);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
			     struct btrfs_chunk_map *map);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
@@ -411,5 +413,7 @@ int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
				     enum btrfs_block_group_size_class size_class,
				     bool force_wrong_size_class);
bool btrfs_block_group_should_use_size_class(const struct btrfs_block_group *bg);
void btrfs_mark_bg_fully_remapped(struct btrfs_block_group *bg,
				  struct btrfs_trans_handle *trans);

#endif /* BTRFS_BLOCK_GROUP_H */
+6 −0
Original line number Diff line number Diff line
@@ -1473,6 +1473,10 @@ static int cleaner_kthread(void *arg)
		 */
		btrfs_run_defrag_inodes(fs_info);

		if (btrfs_fs_incompat(fs_info, REMAP_TREE) &&
		    !btrfs_test_opt(fs_info, DISCARD_ASYNC))
			btrfs_handle_fully_remapped_bgs(fs_info);

		/*
		 * Acquires fs_info->reclaim_bgs_lock to avoid racing
		 * with relocation (btrfs_relocate_chunk) and relocation
@@ -2807,6 +2811,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
	INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
	INIT_LIST_HEAD(&fs_info->unused_bgs);
	INIT_LIST_HEAD(&fs_info->reclaim_bgs);
	INIT_LIST_HEAD(&fs_info->fully_remapped_bgs);
	INIT_LIST_HEAD(&fs_info->zone_active_bgs);
#ifdef CONFIG_BTRFS_DEBUG
	INIT_LIST_HEAD(&fs_info->allocated_roots);
@@ -2862,6 +2867,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
	mutex_init(&fs_info->chunk_mutex);
	mutex_init(&fs_info->transaction_kthread_mutex);
	mutex_init(&fs_info->cleaner_mutex);
	mutex_init(&fs_info->remap_mutex);
	mutex_init(&fs_info->ro_block_group_mutex);
	init_rwsem(&fs_info->commit_root_sem);
	init_rwsem(&fs_info->cleanup_work_sem);
+88 −6
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#include "tree-checker.h"
#include "raid-stripe-tree.h"
#include "delayed-inode.h"
#include "relocation.h"

#undef SCRAMBLE_DELAYED_REFS

@@ -2848,6 +2849,73 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
	return 0;
}

/*
 * Complete the remapping of a block group by removing its chunk stripes and
 * device extents, and adding it to the unused list if there's no longer any
 * extents nominally within it.
 */
int btrfs_complete_bg_remapping(struct btrfs_block_group *bg)
{
	struct btrfs_fs_info *fs_info = bg->fs_info;
	struct btrfs_chunk_map *map;
	int ret;

	map = btrfs_get_chunk_map(fs_info, bg->start, 1);
	if (IS_ERR(map))
		return PTR_ERR(map);

	ret = btrfs_last_identity_remap_gone(map, bg);
	if (ret) {
		btrfs_free_chunk_map(map);
		return ret;
	}

	/*
	 * Set num_stripes to 0, so that btrfs_remove_dev_extents() won't run a
	 * second time.
	 */
	map->num_stripes = 0;

	btrfs_free_chunk_map(map);

	if (bg->used == 0) {
		spin_lock(&fs_info->unused_bgs_lock);
		if (!list_empty(&bg->bg_list)) {
			list_del_init(&bg->bg_list);
			btrfs_put_block_group(bg);
		}
		spin_unlock(&fs_info->unused_bgs_lock);

		btrfs_mark_bg_unused(bg);
	}

	return 0;
}

void btrfs_handle_fully_remapped_bgs(struct btrfs_fs_info *fs_info)
{
	struct btrfs_block_group *bg;
	int ret;

	spin_lock(&fs_info->unused_bgs_lock);
	while (!list_empty(&fs_info->fully_remapped_bgs)) {
		bg = list_first_entry(&fs_info->fully_remapped_bgs,
				      struct btrfs_block_group, bg_list);
		list_del_init(&bg->bg_list);
		spin_unlock(&fs_info->unused_bgs_lock);

		ret = btrfs_complete_bg_remapping(bg);
		if (ret) {
			btrfs_put_block_group(bg);
			return;
		}

		btrfs_put_block_group(bg);
		spin_lock(&fs_info->unused_bgs_lock);
	}
	spin_unlock(&fs_info->unused_bgs_lock);
}

int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
{
	struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -3000,11 +3068,22 @@ u64 btrfs_get_extent_owner_root(struct btrfs_fs_info *fs_info,
}

static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
				     u64 bytenr, struct btrfs_squota_delta *delta)
				     u64 bytenr, struct btrfs_squota_delta *delta,
				     struct btrfs_path *path)
{
	int ret;
	bool remapped = false;
	u64 num_bytes = delta->num_bytes;

	/* Returns 1 on success and 0 on no-op. */
	ret = btrfs_remove_extent_from_remap_tree(trans, path, bytenr, num_bytes);
	if (unlikely(ret < 0)) {
		btrfs_abort_transaction(trans, ret);
		return ret;
	} else if (ret == 1) {
		remapped = true;
	}

	if (delta->is_data) {
		struct btrfs_root *csum_root;

@@ -3028,11 +3107,14 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
		return ret;
	}

	/* If remapped, FST has already been taken care of in remove_range_from_remap_tree(). */
	if (!remapped) {
		ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes);
		if (unlikely(ret)) {
			btrfs_abort_transaction(trans, ret);
			return ret;
		}
	}

	ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
	if (ret)
@@ -3390,7 +3472,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
		}
		btrfs_release_path(path);

		ret = do_free_extent_accounting(trans, bytenr, &delta);
		ret = do_free_extent_accounting(trans, bytenr, &delta, path);
	}
	btrfs_release_path(path);

+2 −0
Original line number Diff line number Diff line
@@ -163,5 +163,7 @@ void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u6
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
			 u64 num_bytes, u64 *actual_bytes);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
void btrfs_handle_fully_remapped_bgs(struct btrfs_fs_info *fs_info);
int btrfs_complete_bg_remapping(struct btrfs_block_group *bg);

#endif
Loading