Commit 7505aa14 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - fix freeing allocated id for anon dev when snapshot creation fails

 - fiemap fixes:
     - followup for a recent deadlock fix, ranges that fiemap can access
       can still race with ordered extent completion
     - make sure fiemap with SYNC flag does not race with writes

* tag 'for-6.8-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix double free of anonymous device after snapshot creation failure
  btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given
  btrfs: fix race between ordered extent completion and fiemap
parents 3aec97e0 e2b54eaf
Loading
Loading
Loading
Loading
+11 −11
Original line number Diff line number Diff line
@@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 *
 * @objectid:	root id
 * @anon_dev:	preallocated anonymous block device number for new roots,
 * 		pass 0 for new allocation.
 *		pass NULL for a new allocation.
 * @check_ref:	whether to check root item references, If true, return -ENOENT
 *		for orphan roots
 */
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
					     u64 objectid, dev_t anon_dev,
					     u64 objectid, dev_t *anon_dev,
					     bool check_ref)
{
	struct btrfs_root *root;
@@ -1342,9 +1342,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
		 * that common but still possible.  In that case, we just need
		 * to free the anon_dev.
		 */
		if (unlikely(anon_dev)) {
			free_anon_bdev(anon_dev);
			anon_dev = 0;
		if (unlikely(anon_dev && *anon_dev)) {
			free_anon_bdev(*anon_dev);
			*anon_dev = 0;
		}

		if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
@@ -1366,7 +1366,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
		goto fail;
	}

	ret = btrfs_init_fs_root(root, anon_dev);
	ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0);
	if (ret)
		goto fail;

@@ -1402,7 +1402,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
	 * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
	 * and once again by our caller.
	 */
	if (anon_dev)
	if (anon_dev && *anon_dev)
		root->anon_dev = 0;
	btrfs_put_root(root);
	return ERR_PTR(ret);
@@ -1418,7 +1418,7 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
				     u64 objectid, bool check_ref)
{
	return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
	return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref);
}

/*
@@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
 * the anonymous block device id
 *
 * @objectid:	tree objectid
 * @anon_dev:	if zero, allocate a new anonymous block device or use the
 *		parameter value
 * @anon_dev:	if NULL, allocate a new anonymous block device or use the
 *		parameter value if not NULL
 */
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
					 u64 objectid, dev_t anon_dev)
					 u64 objectid, dev_t *anon_dev)
{
	return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
}
+1 −1
Original line number Diff line number Diff line
@@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
				     u64 objectid, bool check_ref);
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
					 u64 objectid, dev_t anon_dev);
					 u64 objectid, dev_t *anon_dev);
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
						 struct btrfs_path *path,
						 u64 objectid);
+104 −20
Original line number Diff line number Diff line
@@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
				struct fiemap_cache *cache,
				u64 offset, u64 phys, u64 len, u32 flags)
{
	u64 cache_end;
	int ret = 0;

	/* Set at the end of extent_fiemap(). */
@@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
		goto assign;

	/*
	 * Sanity check, extent_fiemap() should have ensured that new
	 * fiemap extent won't overlap with cached one.
	 * Not recoverable.
	 * When iterating the extents of the inode, at extent_fiemap(), we may
	 * find an extent that starts at an offset behind the end offset of the
	 * previous extent we processed. This happens if fiemap is called
	 * without FIEMAP_FLAG_SYNC and there are ordered extents completing
	 * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()).
	 *
	 * NOTE: Physical address can overlap, due to compression
	 * For example we are in leaf X processing its last item, which is the
	 * file extent item for file range [512K, 1M[, and after
	 * btrfs_next_leaf() releases the path, there's an ordered extent that
	 * completes for the file range [768K, 2M[, and that results in trimming
	 * the file extent item so that it now corresponds to the file range
	 * [512K, 768K[ and a new file extent item is inserted for the file
	 * range [768K, 2M[, which may end up as the last item of leaf X or as
	 * the first item of the next leaf - in either case btrfs_next_leaf()
	 * will leave us with a path pointing to the new extent item, for the
	 * file range [768K, 2M[, since that's the first key that follows the
	 * last one we processed. So in order not to report overlapping extents
	 * to user space, we trim the length of the previously cached extent and
	 * emit it.
	 *
	 * Upon calling btrfs_next_leaf() we may also find an extent with an
	 * offset smaller than or equals to cache->offset, and this happens
	 * when we had a hole or prealloc extent with several delalloc ranges in
	 * it, but after btrfs_next_leaf() released the path, delalloc was
	 * flushed and the resulting ordered extents were completed, so we can
	 * now have found a file extent item for an offset that is smaller than
	 * or equals to what we have in cache->offset. We deal with this as
	 * described below.
	 */
	cache_end = cache->offset + cache->len;
	if (cache_end > offset) {
		if (offset == cache->offset) {
			/*
			 * We cached a dealloc range (found in the io tree) for
			 * a hole or prealloc extent and we have now found a
			 * file extent item for the same offset. What we have
			 * now is more recent and up to date, so discard what
			 * we had in the cache and use what we have just found.
			 */
	if (cache->offset + cache->len > offset) {
		WARN_ON(1);
		return -EINVAL;
			goto assign;
		} else if (offset > cache->offset) {
			/*
			 * The extent range we previously found ends after the
			 * offset of the file extent item we found and that
			 * offset falls somewhere in the middle of that previous
			 * extent range. So adjust the range we previously found
			 * to end at the offset of the file extent item we have
			 * just found, since this extent is more up to date.
			 * Emit that adjusted range and cache the file extent
			 * item we have just found. This corresponds to the case
			 * where a previously found file extent item was split
			 * due to an ordered extent completing.
			 */
			cache->len = offset - cache->offset;
			goto emit;
		} else {
			const u64 range_end = offset + len;

			/*
			 * The offset of the file extent item we have just found
			 * is behind the cached offset. This means we were
			 * processing a hole or prealloc extent for which we
			 * have found delalloc ranges (in the io tree), so what
			 * we have in the cache is the last delalloc range we
			 * found while the file extent item we found can be
			 * either for a whole delalloc range we previously
			 * emmitted or only a part of that range.
			 *
			 * We have two cases here:
			 *
			 * 1) The file extent item's range ends at or behind the
			 *    cached extent's end. In this case just ignore the
			 *    current file extent item because we don't want to
			 *    overlap with previous ranges that may have been
			 *    emmitted already;
			 *
			 * 2) The file extent item starts behind the currently
			 *    cached extent but its end offset goes beyond the
			 *    end offset of the cached extent. We don't want to
			 *    overlap with a previous range that may have been
			 *    emmitted already, so we emit the currently cached
			 *    extent and then partially store the current file
			 *    extent item's range in the cache, for the subrange
			 *    going the cached extent's end to the end of the
			 *    file extent item.
			 */
			if (range_end <= cache_end)
				return 0;

			if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
				phys += cache_end - offset;

			offset = cache_end;
			len = range_end - cache_end;
			goto emit;
		}
	}

	/*
@@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
		return 0;
	}

emit:
	/* Not mergeable, need to submit cached one */
	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
				      cache->len, cache->flags);
@@ -2907,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
	range_end = round_up(start + len, sectorsize);
	prev_extent_end = range_start;

	btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);

	ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
	if (ret < 0)
		goto out_unlock;
		goto out;
	btrfs_release_path(path);

	path->reada = READA_FORWARD;
	ret = fiemap_search_slot(inode, path, range_start);
	if (ret < 0) {
		goto out_unlock;
		goto out;
	} else if (ret > 0) {
		/*
		 * No file extent item found, but we may have delalloc between
@@ -2964,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
						  backref_ctx, 0, 0, 0,
						  prev_extent_end, hole_end);
			if (ret < 0) {
				goto out_unlock;
				goto out;
			} else if (ret > 0) {
				/* fiemap_fill_next_extent() told us to stop. */
				stopped = true;
@@ -3020,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
								  extent_gen,
								  backref_ctx);
				if (ret < 0)
					goto out_unlock;
					goto out;
				else if (ret > 0)
					flags |= FIEMAP_EXTENT_SHARED;
			}
@@ -3031,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
		}

		if (ret < 0) {
			goto out_unlock;
			goto out;
		} else if (ret > 0) {
			/* fiemap_fill_next_extent() told us to stop. */
			stopped = true;
@@ -3042,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
next_item:
		if (fatal_signal_pending(current)) {
			ret = -EINTR;
			goto out_unlock;
			goto out;
		}

		ret = fiemap_next_leaf_item(inode, path);
		if (ret < 0) {
			goto out_unlock;
			goto out;
		} else if (ret > 0) {
			/* No more file extent items for this inode. */
			break;
@@ -3071,7 +3158,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
					  &delalloc_cached_state, backref_ctx,
					  0, 0, 0, prev_extent_end, range_end - 1);
		if (ret < 0)
			goto out_unlock;
			goto out;
		prev_extent_end = range_end;
	}

@@ -3109,9 +3196,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
	}

	ret = emit_last_fiemap_cache(fieinfo, &cache);

out_unlock:
	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
out:
	free_extent_state(delalloc_cached_state);
	btrfs_free_backref_share_ctx(backref_ctx);
+21 −1
Original line number Diff line number Diff line
@@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			u64 start, u64 len)
{
	struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
	int	ret;

	ret = fiemap_prep(inode, fieinfo, start, &len, 0);
@@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			return ret;
	}

	return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
	btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);

	/*
	 * We did an initial flush to avoid holding the inode's lock while
	 * triggering writeback and waiting for the completion of IO and ordered
	 * extents. Now after we locked the inode we do it again, because it's
	 * possible a new write may have happened in between those two steps.
	 */
	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
		ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX);
		if (ret) {
			btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
			return ret;
		}
	}

	ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
	btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);

	return ret;
}

static int btrfs_writepages(struct address_space *mapping,
+1 −1
Original line number Diff line number Diff line
@@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
	free_extent_buffer(leaf);
	leaf = NULL;

	new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
	new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev);
	if (IS_ERR(new_root)) {
		ret = PTR_ERR(new_root);
		btrfs_abort_transaction(trans, ret);
Loading