Commit 7f98ab9d authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - fix potential deadlock due to mismatching transaction states when
   waiting for the current transaction

 - fix squota accounting with nested snapshots

 - fix quota inheritance of qgroups with multiple parent qgroups

 - fix NULL inode pointer in evict tracepoint

 - fix writes beyond end of file on systems with 64K page size and 4K
   block size

 - fix logging of inodes after exchange rename

 - fix use after free when using ref_tracker feature

 - space reservation fixes

* tag 'for-6.19-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix reservation leak in some error paths when inserting inline extent
  btrfs: do not free data reservation in fallback from inline due to -ENOSPC
  btrfs: fix use-after-free warning in btrfs_get_or_create_delayed_node()
  btrfs: always detect conflicting inodes when logging inode refs
  btrfs: fix beyond-EOF write handling
  btrfs: fix deadlock in wait_current_trans() due to ignored transaction type
  btrfs: fix NULL dereference on root when tracing inode eviction
  btrfs: qgroup: update all parent qgroups when doing quick inherit
  btrfs: fix qgroup_snapshot_quick_inherit() squota bug
parents 3609fa95 c1c050f9
Loading
Loading
Loading
Loading
+17 −15
Original line number Diff line number Diff line
@@ -152,37 +152,39 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
		return ERR_PTR(-ENOMEM);
	btrfs_init_delayed_node(node, root, ino);

	/* Cached in the inode and can be accessed. */
	refcount_set(&node->refs, 2);
	btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_NOFS);
	btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_NOFS);

	/* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
	ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
	if (ret == -ENOMEM) {
		btrfs_delayed_node_ref_tracker_dir_exit(node);
		kmem_cache_free(delayed_node_cache, node);
		return ERR_PTR(-ENOMEM);
	}
	if (ret == -ENOMEM)
		goto cleanup;

	xa_lock(&root->delayed_nodes);
	ptr = xa_load(&root->delayed_nodes, ino);
	if (ptr) {
		/* Somebody inserted it, go back and read it. */
		xa_unlock(&root->delayed_nodes);
		btrfs_delayed_node_ref_tracker_dir_exit(node);
		kmem_cache_free(delayed_node_cache, node);
		node = NULL;
		goto again;
		goto cleanup;
	}
	ptr = __xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
	ASSERT(xa_err(ptr) != -EINVAL);
	ASSERT(xa_err(ptr) != -ENOMEM);
	ASSERT(ptr == NULL);

	/* Cached in the inode and can be accessed. */
	refcount_set(&node->refs, 2);
	btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
	btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_ATOMIC);

	btrfs_inode->delayed_node = node;
	xa_unlock(&root->delayed_nodes);

	return node;
cleanup:
	btrfs_delayed_node_ref_tracker_free(node, tracker);
	btrfs_delayed_node_ref_tracker_free(node, &node->inode_cache_tracker);
	btrfs_delayed_node_ref_tracker_dir_exit(node);
	kmem_cache_free(delayed_node_cache, node);
	if (ret)
		return ERR_PTR(ret);
	goto again;
}

/*
+4 −4
Original line number Diff line number Diff line
@@ -1728,7 +1728,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
			struct btrfs_ordered_extent *ordered;

			ordered = btrfs_lookup_first_ordered_range(inode, cur,
								   folio_end - cur);
								   fs_info->sectorsize);
			/*
			 * We have just run delalloc before getting here, so
			 * there must be an ordered extent.
@@ -1742,7 +1742,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
			btrfs_put_ordered_extent(ordered);

			btrfs_mark_ordered_io_finished(inode, folio, cur,
						       end - cur, true);
						       fs_info->sectorsize, true);
			/*
			 * This range is beyond i_size, thus we don't need to
			 * bother writing back.
@@ -1751,8 +1751,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
			 * writeback the sectors with subpage dirty bits,
			 * causing writeback without ordered extent.
			 */
			btrfs_folio_clear_dirty(fs_info, folio, cur, end - cur);
			break;
			btrfs_folio_clear_dirty(fs_info, folio, cur, fs_info->sectorsize);
			continue;
		}
		ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
		if (unlikely(ret < 0)) {
+15 −7
Original line number Diff line number Diff line
@@ -618,19 +618,22 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
	struct btrfs_drop_extents_args drop_args = { 0 };
	struct btrfs_root *root = inode->root;
	struct btrfs_fs_info *fs_info = root->fs_info;
	struct btrfs_trans_handle *trans;
	struct btrfs_trans_handle *trans = NULL;
	u64 data_len = (compressed_size ?: size);
	int ret;
	struct btrfs_path *path;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
	if (!path) {
		ret = -ENOMEM;
		goto out;
	}

	trans = btrfs_join_transaction(root);
	if (IS_ERR(trans)) {
		btrfs_free_path(path);
		return PTR_ERR(trans);
		ret = PTR_ERR(trans);
		trans = NULL;
		goto out;
	}
	trans->block_rsv = &inode->block_rsv;

@@ -674,9 +677,14 @@ static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
	 * it won't count as data extent, free them directly here.
	 * And at reserve time, it's always aligned to page size, so
	 * just free one page here.
	 *
	 * If we fallback to non-inline (ret == 1) due to -ENOSPC, then we need
	 * to keep the data reservation.
	 */
	if (ret <= 0)
		btrfs_qgroup_free_data(inode, NULL, 0, fs_info->sectorsize, NULL);
	btrfs_free_path(path);
	if (trans)
		btrfs_end_transaction(trans);
	return ret;
}
+19 −2
Original line number Diff line number Diff line
@@ -3208,9 +3208,15 @@ static int qgroup_snapshot_quick_inherit(struct btrfs_fs_info *fs_info,
{
	struct btrfs_qgroup *src;
	struct btrfs_qgroup *parent;
	struct btrfs_qgroup *qgroup;
	struct btrfs_qgroup_list *list;
	LIST_HEAD(qgroup_list);
	const u32 nodesize = fs_info->nodesize;
	int nr_parents = 0;

	if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_FULL)
		return 0;

	src = find_qgroup_rb(fs_info, srcid);
	if (!src)
		return -ENOENT;
@@ -3245,8 +3251,19 @@ static int qgroup_snapshot_quick_inherit(struct btrfs_fs_info *fs_info,
	if (parent->excl != parent->rfer)
		return 1;

	parent->excl += fs_info->nodesize;
	parent->rfer += fs_info->nodesize;
	qgroup_iterator_add(&qgroup_list, parent);
	list_for_each_entry(qgroup, &qgroup_list, iterator) {
		qgroup->rfer += nodesize;
		qgroup->rfer_cmpr += nodesize;
		qgroup->excl += nodesize;
		qgroup->excl_cmpr += nodesize;
		qgroup_dirty(fs_info, qgroup);

		/* Append parent qgroups to @qgroup_list. */
		list_for_each_entry(list, &qgroup->groups, next_group)
			qgroup_iterator_add(&qgroup_list, list->group);
	}
	qgroup_iterator_clean(&qgroup_list);
	return 0;
}

+6 −5
Original line number Diff line number Diff line
@@ -520,13 +520,14 @@ static inline int is_transaction_blocked(struct btrfs_transaction *trans)
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
 */
static void wait_current_trans(struct btrfs_fs_info *fs_info)
static void wait_current_trans(struct btrfs_fs_info *fs_info, unsigned int type)
{
	struct btrfs_transaction *cur_trans;

	spin_lock(&fs_info->trans_lock);
	cur_trans = fs_info->running_transaction;
	if (cur_trans && is_transaction_blocked(cur_trans)) {
	if (cur_trans && is_transaction_blocked(cur_trans) &&
	    (btrfs_blocked_trans_types[cur_trans->state] & type)) {
		refcount_inc(&cur_trans->use_count);
		spin_unlock(&fs_info->trans_lock);

@@ -701,12 +702,12 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
		sb_start_intwrite(fs_info->sb);

	if (may_wait_transaction(fs_info, type))
		wait_current_trans(fs_info);
		wait_current_trans(fs_info, type);

	do {
		ret = join_transaction(fs_info, type);
		if (ret == -EBUSY) {
			wait_current_trans(fs_info);
			wait_current_trans(fs_info, type);
			if (unlikely(type == TRANS_ATTACH ||
				     type == TRANS_JOIN_NOSTART))
				ret = -ENOENT;
@@ -1003,7 +1004,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)

void btrfs_throttle(struct btrfs_fs_info *fs_info)
{
	wait_current_trans(fs_info);
	wait_current_trans(fs_info, TRANS_START);
}

bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
Loading