Commit 5d9248ee authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - zoned mode fixes:
     - fix slowdown when writing large file sequentially by looking up
       block groups with enough space faster
     - locking fixes when activating a zone

 - new mount API fixes:
     - preserve mount options for a ro/rw mount of the same subvolume

 - scrub fixes:
     - fix use-after-free in case the chunk length is not aligned to
       64K, this does not happen normally but has been reported on
       images converted from ext4
     - similar alignment check was missing with raid-stripe-tree

 - subvolume deletion fixes:
     - prevent calling ioctl on already deleted subvolume
     - properly track flag tracking a deleted subvolume

 - in subpage mode, fix decompression of an inline extent (zlib, lzo,
   zstd)

 - fix crash when starting writeback on a folio, after integration with
   recent MM changes this needs to be started conditionally

 - reject unknown flags in defrag ioctl

 - error handling, API fixes, minor warning fixes

* tag 'for-6.8-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: scrub: limit RST scrub to chunk boundary
  btrfs: scrub: avoid use-after-free when chunk length is not 64K aligned
  btrfs: don't unconditionally call folio_start_writeback in subpage
  btrfs: use the original mount's mount options for the legacy reconfigure
  btrfs: don't warn if discard range is not aligned to sector
  btrfs: tree-checker: fix inline ref size in error messages
  btrfs: zstd: fix and simplify the inline extent decompression
  btrfs: lzo: fix and simplify the inline extent decompression
  btrfs: zlib: fix and simplify the inline extent decompression
  btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args
  btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted
  btrfs: don't abort filesystem when attempting to snapshot deleted subvolume
  btrfs: zoned: fix lock ordering in btrfs_zone_activate()
  btrfs: fix unbalanced unlock of mapping_tree_lock
  btrfs: ref-verify: free ref cache before clearing mount opt
  btrfs: fix kvcalloc() arguments order in btrfs_ioctl_send()
  btrfs: zoned: optimize hint byte for zoned allocator
  btrfs: zoned: factor out prepare_allocation_zoned()
parents 610347ef 7f2d219e
Loading
Loading
Loading
Loading
+16 −7
Original line number Diff line number Diff line
@@ -142,15 +142,15 @@ static int compression_decompress_bio(struct list_head *ws,

static int compression_decompress(int type, struct list_head *ws,
		const u8 *data_in, struct page *dest_page,
               unsigned long start_byte, size_t srclen, size_t destlen)
		unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
	switch (type) {
	case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
						start_byte, srclen, destlen);
						dest_pgoff, srclen, destlen);
	case BTRFS_COMPRESS_LZO:  return lzo_decompress(ws, data_in, dest_page,
						start_byte, srclen, destlen);
						dest_pgoff, srclen, destlen);
	case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
						start_byte, srclen, destlen);
						dest_pgoff, srclen, destlen);
	case BTRFS_COMPRESS_NONE:
	default:
		/*
@@ -1037,14 +1037,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
 * start_byte tells us the offset into the compressed data we're interested in
 */
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
		     unsigned long start_byte, size_t srclen, size_t destlen)
		     unsigned long dest_pgoff, size_t srclen, size_t destlen)
{
	struct btrfs_fs_info *fs_info = btrfs_sb(dest_page->mapping->host->i_sb);
	struct list_head *workspace;
	const u32 sectorsize = fs_info->sectorsize;
	int ret;

	/*
	 * The full destination page range should not exceed the page size.
	 * And the @destlen should not exceed sectorsize, as this is only called for
	 * inline file extents, which should not exceed sectorsize.
	 */
	ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);

	workspace = get_workspace(type, 0);
	ret = compression_decompress(type, workspace, data_in, dest_page,
				     start_byte, srclen, destlen);
				     dest_pgoff, srclen, destlen);
	put_workspace(type, workspace);

	return ret;
+3 −3
Original line number Diff line number Diff line
@@ -148,7 +148,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
		unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
		struct page *dest_page, unsigned long start_byte, size_t srclen,
		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
		size_t destlen);
struct list_head *zlib_alloc_workspace(unsigned int level);
void zlib_free_workspace(struct list_head *ws);
@@ -159,7 +159,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
		unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
		struct page *dest_page, unsigned long start_byte, size_t srclen,
		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
		size_t destlen);
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);
@@ -169,7 +169,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
		unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
		struct page *dest_page, unsigned long start_byte, size_t srclen,
		struct page *dest_page, unsigned long dest_pgoff, size_t srclen,
		size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);
+39 −14
Original line number Diff line number Diff line
@@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
	u64 bytes_left, end;
	u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);

	if (WARN_ON(start != aligned_start)) {
	/* Adjust the range to be aligned to 512B sectors if necessary. */
	if (start != aligned_start) {
		len -= aligned_start - start;
		len = round_down(len, 1 << SECTOR_SHIFT);
		start = aligned_start;
@@ -4298,29 +4299,53 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
	return 0;
}

static int prepare_allocation(struct btrfs_fs_info *fs_info,
			      struct find_free_extent_ctl *ffe_ctl,
			      struct btrfs_space_info *space_info,
			      struct btrfs_key *ins)
static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
				    struct find_free_extent_ctl *ffe_ctl)
{
	switch (ffe_ctl->policy) {
	case BTRFS_EXTENT_ALLOC_CLUSTERED:
		return prepare_allocation_clustered(fs_info, ffe_ctl,
						    space_info, ins);
	case BTRFS_EXTENT_ALLOC_ZONED:
	if (ffe_ctl->for_treelog) {
		spin_lock(&fs_info->treelog_bg_lock);
		if (fs_info->treelog_bg)
			ffe_ctl->hint_byte = fs_info->treelog_bg;
		spin_unlock(&fs_info->treelog_bg_lock);
		}
		if (ffe_ctl->for_data_reloc) {
	} else if (ffe_ctl->for_data_reloc) {
		spin_lock(&fs_info->relocation_bg_lock);
		if (fs_info->data_reloc_bg)
			ffe_ctl->hint_byte = fs_info->data_reloc_bg;
		spin_unlock(&fs_info->relocation_bg_lock);
	} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
		struct btrfs_block_group *block_group;

		spin_lock(&fs_info->zone_active_bgs_lock);
		list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
			/*
			 * No lock is OK here because avail is monotinically
			 * decreasing, and this is just a hint.
			 */
			u64 avail = block_group->zone_capacity - block_group->alloc_offset;

			if (block_group_bits(block_group, ffe_ctl->flags) &&
			    avail >= ffe_ctl->num_bytes) {
				ffe_ctl->hint_byte = block_group->start;
				break;
			}
		}
		spin_unlock(&fs_info->zone_active_bgs_lock);
	}

	return 0;
}

static int prepare_allocation(struct btrfs_fs_info *fs_info,
			      struct find_free_extent_ctl *ffe_ctl,
			      struct btrfs_space_info *space_info,
			      struct btrfs_key *ins)
{
	switch (ffe_ctl->policy) {
	case BTRFS_EXTENT_ALLOC_CLUSTERED:
		return prepare_allocation_clustered(fs_info, ffe_ctl,
						    space_info, ins);
	case BTRFS_EXTENT_ALLOC_ZONED:
		return prepare_allocation_zoned(fs_info, ffe_ctl);
	default:
		BUG();
	}
+13 −9
Original line number Diff line number Diff line
@@ -4458,6 +4458,8 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
	u64 root_flags;
	int ret;

	down_write(&fs_info->subvol_sem);

	/*
	 * Don't allow to delete a subvolume with send in progress. This is
	 * inside the inode lock so the error handling that has to drop the bit
@@ -4469,25 +4471,25 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
		btrfs_warn(fs_info,
			   "attempt to delete subvolume %llu during send",
			   dest->root_key.objectid);
		return -EPERM;
		ret = -EPERM;
		goto out_up_write;
	}
	if (atomic_read(&dest->nr_swapfiles)) {
		spin_unlock(&dest->root_item_lock);
		btrfs_warn(fs_info,
			   "attempt to delete subvolume %llu with active swapfile",
			   root->root_key.objectid);
		return -EPERM;
		ret = -EPERM;
		goto out_up_write;
	}
	root_flags = btrfs_root_flags(&dest->root_item);
	btrfs_set_root_flags(&dest->root_item,
			     root_flags | BTRFS_ROOT_SUBVOL_DEAD);
	spin_unlock(&dest->root_item_lock);

	down_write(&fs_info->subvol_sem);

	ret = may_destroy_subvol(dest);
	if (ret)
		goto out_up_write;
		goto out_undead;

	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
	/*
@@ -4497,7 +4499,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
	 */
	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
	if (ret)
		goto out_up_write;
		goto out_undead;

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
@@ -4563,15 +4565,17 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
	inode->i_flags |= S_DEAD;
out_release:
	btrfs_subvolume_release_metadata(root, &block_rsv);
out_up_write:
	up_write(&fs_info->subvol_sem);
out_undead:
	if (ret) {
		spin_lock(&dest->root_item_lock);
		root_flags = btrfs_root_flags(&dest->root_item);
		btrfs_set_root_flags(&dest->root_item,
				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
		spin_unlock(&dest->root_item_lock);
	} else {
	}
out_up_write:
	up_write(&fs_info->subvol_sem);
	if (!ret) {
		d_invalidate(dentry);
		btrfs_prune_dentries(dest);
		ASSERT(dest->send_in_progress == 0);
+7 −0
Original line number Diff line number Diff line
@@ -790,6 +790,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
		return -EOPNOTSUPP;
	}

	if (btrfs_root_refs(&root->root_item) == 0)
		return -ENOENT;

	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
		return -EINVAL;

@@ -2608,6 +2611,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
				ret = -EFAULT;
				goto out;
			}
			if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
				ret = -EOPNOTSUPP;
				goto out;
			}
			/* compression requires us to start the IO */
			if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
				range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
Loading