Commit e0a85137 authored by Leo Martins's avatar Leo Martins Committed by David Sterba
Browse files

btrfs: avoid GFP_ATOMIC allocations in qgroup free paths



When qgroups are enabled, __btrfs_qgroup_release_data() and
qgroup_free_reserved_data() pass an extent_changeset to
btrfs_clear_record_extent_bits() to track how many bytes had their
EXTENT_QGROUP_RESERVED bits cleared. Inside the extent IO tree spinlock,
add_extent_changeset() calls ulist_add() with GFP_ATOMIC to record each
changed range. If this allocation fails, it hits a BUG_ON and panics the
kernel.

However, both of these callers only read changeset.bytes_changed
afterwards — the range_changed ulist is populated and immediately freed
without ever being iterated. The GFP_ATOMIC allocation is entirely
unnecessary for these paths.

Introduce extent_changeset_init_bytes_only() which uses a sentinel value
(EXTENT_CHANGESET_BYTES_ONLY) on the ulist's prealloc field to signal
that only bytes_changed should be tracked. add_extent_changeset() checks
for this sentinel and returns early after updating bytes_changed,
skipping the ulist_add() call entirely. This eliminates the GFP_ATOMIC
allocation and makes the BUG_ON unreachable for these paths.

Callers that need range tracking (qgroup_reserve_data,
qgroup_unreserve_range, btrfs_qgroup_check_reserved_leak) continue to
use extent_changeset_init() and are unaffected.

Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarLeo Martins <loemra.dev@gmail.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 390aa432
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -195,7 +195,10 @@ static int add_extent_changeset(struct extent_state *state, u32 bits,
		return 0;
	if (!set && (state->state & bits) == 0)
		return 0;

	changeset->bytes_changed += state->end - state->start + 1;
	if (!extent_changeset_tracks_ranges(changeset))
		return 0;

	ret = ulist_add(&changeset->range_changed, state->start, state->end, GFP_ATOMIC);
	if (ret < 0)
+22 −1
Original line number Diff line number Diff line
@@ -198,6 +198,25 @@ static inline void extent_changeset_init(struct extent_changeset *changeset)
	ulist_init(&changeset->range_changed);
}

/*
 * Sentinel value for range_changed.prealloc indicating that the changeset
 * only tracks bytes_changed and does not record individual ranges. This
 * avoids GFP_ATOMIC allocations inside add_extent_changeset() when the
 * caller doesn't need to iterate the changed ranges afterwards.
 */
#define EXTENT_CHANGESET_BYTES_ONLY	((struct ulist_node *)1)

static inline void extent_changeset_init_bytes_only(struct extent_changeset *changeset)
{
	changeset->bytes_changed = 0;
	changeset->range_changed.prealloc = EXTENT_CHANGESET_BYTES_ONLY;
}

static inline bool extent_changeset_tracks_ranges(const struct extent_changeset *changeset)
{
	return changeset->range_changed.prealloc != EXTENT_CHANGESET_BYTES_ONLY;
}

static inline struct extent_changeset *extent_changeset_alloc(void)
{
	struct extent_changeset *ret;
@@ -212,6 +231,7 @@ static inline struct extent_changeset *extent_changeset_alloc(void)

static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
{
	ASSERT(extent_changeset_tracks_ranges(changeset));
	ulist_prealloc(&changeset->range_changed, gfp_mask);
}

@@ -220,6 +240,7 @@ static inline void extent_changeset_release(struct extent_changeset *changeset)
	if (!changeset)
		return;
	changeset->bytes_changed = 0;
	if (extent_changeset_tracks_ranges(changeset))
		ulist_release(&changeset->range_changed);
}

+3 −2
Original line number Diff line number Diff line
@@ -4324,7 +4324,7 @@ static int qgroup_free_reserved_data(struct btrfs_inode *inode,
	u64 freed = 0;
	int ret;

	extent_changeset_init(&changeset);
	extent_changeset_init_bytes_only(&changeset);
	len = round_up(start + len, root->fs_info->sectorsize);
	start = round_down(start, root->fs_info->sectorsize);

@@ -4389,7 +4389,7 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
	WARN_ON(!free && reserved);
	if (free && reserved)
		return qgroup_free_reserved_data(inode, reserved, start, len, released);
	extent_changeset_init(&changeset);
	extent_changeset_init_bytes_only(&changeset);
	ret = btrfs_clear_record_extent_bits(&inode->io_tree, start, start + len - 1,
					     EXTENT_QGROUP_RESERVED, &changeset);
	if (ret < 0)
@@ -4647,6 +4647,7 @@ void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)

	WARN_ON(ret < 0);
	if (WARN_ON(changeset.bytes_changed)) {
		ASSERT(extent_changeset_tracks_ranges(&changeset));
		ULIST_ITER_INIT(&iter);
		while ((unode = ulist_next(&changeset.range_changed, &iter))) {
			btrfs_warn(inode->root->fs_info,