Commit 8da8d884 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - Fix a deadlock in fiemap.

   There was a big lock around the whole operation that can interfere
   with a page fault and mkwrite.

   Reducing the lock scope can also speed up fiemap

 - Fix range condition for extent defragmentation which could lead to
   worse layout in some cases

* tag 'for-6.8-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix deadlock with fiemap and extent locking
  btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size
parents d8be5a55 b0ad381f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
			goto add;

		/* Skip too large extent */
		if (range_len >= extent_thresh)
		if (em->len >= extent_thresh)
			goto next;

		/*
+45 −17
Original line number Diff line number Diff line
@@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode,
	 * it beyond i_size.
	 */
	while (cur_offset < end && cur_offset < i_size) {
		struct extent_state *cached_state = NULL;
		u64 delalloc_start;
		u64 delalloc_end;
		u64 prealloc_start;
		u64 lockstart;
		u64 lockend;
		u64 prealloc_len = 0;
		bool delalloc;

		lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize);
		lockend = round_up(end, inode->root->fs_info->sectorsize);

		/*
		 * We are only locking for the delalloc range because that's the
		 * only thing that can change here.  With fiemap we have a lock
		 * on the inode, so no buffered or direct writes can happen.
		 *
		 * However mmaps and normal page writeback will cause this to
		 * change arbitrarily.  We have to lock the extent lock here to
		 * make sure that nobody messes with the tree while we're doing
		 * btrfs_find_delalloc_in_range.
		 */
		lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
		delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
							delalloc_cached_state,
							&delalloc_start,
							&delalloc_end);
		unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
		if (!delalloc)
			break;

@@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
		  u64 start, u64 len)
{
	const u64 ino = btrfs_ino(inode);
	struct extent_state *cached_state = NULL;
	struct extent_state *delalloc_cached_state = NULL;
	struct btrfs_path *path;
	struct fiemap_cache cache = { 0 };
	struct btrfs_backref_share_check_ctx *backref_ctx;
	u64 last_extent_end;
	u64 prev_extent_end;
	u64 lockstart;
	u64 lockend;
	u64 range_start;
	u64 range_end;
	const u64 sectorsize = inode->root->fs_info->sectorsize;
	bool stopped = false;
	int ret;

@@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
		goto out;
	}

	lockstart = round_down(start, inode->root->fs_info->sectorsize);
	lockend = round_up(start + len, inode->root->fs_info->sectorsize);
	prev_extent_end = lockstart;
	range_start = round_down(start, sectorsize);
	range_end = round_up(start + len, sectorsize);
	prev_extent_end = range_start;

	btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
	lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);

	ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
	if (ret < 0)
@@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
	btrfs_release_path(path);

	path->reada = READA_FORWARD;
	ret = fiemap_search_slot(inode, path, lockstart);
	ret = fiemap_search_slot(inode, path, range_start);
	if (ret < 0) {
		goto out_unlock;
	} else if (ret > 0) {
@@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
		goto check_eof_delalloc;
	}

	while (prev_extent_end < lockend) {
	while (prev_extent_end < range_end) {
		struct extent_buffer *leaf = path->nodes[0];
		struct btrfs_file_extent_item *ei;
		struct btrfs_key key;
@@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
		 * The first iteration can leave us at an extent item that ends
		 * before our range's start. Move to the next item.
		 */
		if (extent_end <= lockstart)
		if (extent_end <= range_start)
			goto next_item;

		backref_ctx->curr_leaf_bytenr = leaf->start;

		/* We have in implicit hole (NO_HOLES feature enabled). */
		if (prev_extent_end < key.offset) {
			const u64 range_end = min(key.offset, lockend) - 1;
			const u64 hole_end = min(key.offset, range_end) - 1;

			ret = fiemap_process_hole(inode, fieinfo, &cache,
						  &delalloc_cached_state,
						  backref_ctx, 0, 0, 0,
						  prev_extent_end, range_end);
						  prev_extent_end, hole_end);
			if (ret < 0) {
				goto out_unlock;
			} else if (ret > 0) {
@@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
			}

			/* We've reached the end of the fiemap range, stop. */
			if (key.offset >= lockend) {
			if (key.offset >= range_end) {
				stopped = true;
				break;
			}
@@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
	btrfs_free_path(path);
	path = NULL;

	if (!stopped && prev_extent_end < lockend) {
	if (!stopped && prev_extent_end < range_end) {
		ret = fiemap_process_hole(inode, fieinfo, &cache,
					  &delalloc_cached_state, backref_ctx,
					  0, 0, 0, prev_extent_end, lockend - 1);
					  0, 0, 0, prev_extent_end, range_end - 1);
		if (ret < 0)
			goto out_unlock;
		prev_extent_end = lockend;
		prev_extent_end = range_end;
	}

	if (cache.cached && cache.offset + cache.len >= last_extent_end) {
		const u64 i_size = i_size_read(&inode->vfs_inode);

		if (prev_extent_end < i_size) {
			struct extent_state *cached_state = NULL;
			u64 delalloc_start;
			u64 delalloc_end;
			u64 lockstart;
			u64 lockend;
			bool delalloc;

			lockstart = round_down(prev_extent_end, sectorsize);
			lockend = round_up(i_size, sectorsize);

			/*
			 * See the comment in fiemap_process_hole as to why
			 * we're doing the locking here.
			 */
			lock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
			delalloc = btrfs_find_delalloc_in_range(inode,
								prev_extent_end,
								i_size - 1,
								&delalloc_cached_state,
								&delalloc_start,
								&delalloc_end);
			unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
			if (!delalloc)
				cache.flags |= FIEMAP_EXTENT_LAST;
		} else {
@@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
	ret = emit_last_fiemap_cache(fieinfo, &cache);

out_unlock:
	unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state);
	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
	free_extent_state(delalloc_cached_state);