Commit 1fb91896 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - extend tree-checker verification of directory item type

 - fix regression in page/folio and extent state tracking in xarray, the
   dirty status can get out of sync and can cause problems e.g. a hang

 - in send, detect last extent and allow to clone it instead of sending
   it as write, reduces amount of data transferred in the stream

 - fix checking extent references when cleaning deleted subvolumes

 - fix one more case in the extent map shrinker, let it run only in the
   kswapd context so it does not cause latency spikes during other
   operations

* tag 'for-6.11-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix invalid mapping of extent xarray state
  btrfs: send: allow cloning non-aligned extent if it ends at i_size
  btrfs: only run the extent map shrinker from kswapd tasks
  btrfs: tree-checker: reject BTRFS_FT_UNKNOWN dir type
  btrfs: check delayed refs when we're checking if a ref exists
parents d07b4328 6252690f
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
@@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt
	return find_ref_head(delayed_refs, bytenr, false);
}

static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
{
	int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY;

	if (type < entry->type)
		return -1;
	if (type > entry->type)
		return 1;

	if (type == BTRFS_TREE_BLOCK_REF_KEY) {
		if (root < entry->ref_root)
			return -1;
		if (root > entry->ref_root)
			return 1;
	} else {
		if (parent < entry->parent)
			return -1;
		if (parent > entry->parent)
			return 1;
	}
	return 0;
}

/*
 * Check to see if a given root/parent reference is attached to the head.  This
 * only checks for BTRFS_ADD_DELAYED_REF references that match, as that
 * indicates the reference exists for the given root or parent.  This is for
 * tree blocks only.
 *
 * @head: the head of the bytenr we're searching.
 * @root: the root objectid of the reference if it is a normal reference.
 * @parent: the parent if this is a shared backref.
 */
bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
				 u64 root, u64 parent)
{
	struct rb_node *node;
	bool found = false;

	lockdep_assert_held(&head->mutex);

	spin_lock(&head->lock);
	node = head->ref_tree.rb_root.rb_node;
	while (node) {
		struct btrfs_delayed_ref_node *entry;
		int ret;

		entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
		ret = find_comp(entry, root, parent);
		if (ret < 0) {
			node = node->rb_left;
		} else if (ret > 0) {
			node = node->rb_right;
		} else {
			/*
			 * We only want to count ADD actions, as drops mean the
			 * ref doesn't exist.
			 */
			if (entry->action == BTRFS_ADD_DELAYED_REF)
				found = true;
			break;
		}
	}
	spin_unlock(&head->lock);
	return found;
}

void __cold btrfs_delayed_ref_exit(void)
{
	kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+2 −0
Original line number Diff line number Diff line
@@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
				  enum btrfs_reserve_flush_enum flush);
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
				 u64 root, u64 parent);

static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
{
+45 −6
Original line number Diff line number Diff line
@@ -5472,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans,
			    struct btrfs_root *root, u64 bytenr, u64 parent,
			    int level)
{
	struct btrfs_delayed_ref_root *delayed_refs;
	struct btrfs_delayed_ref_head *head;
	struct btrfs_path *path;
	struct btrfs_extent_inline_ref *iref;
	int ret;
	bool exists = false;

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;

again:
	ret = lookup_extent_backref(trans, path, &iref, bytenr,
				    root->fs_info->nodesize, parent,
				    btrfs_root_id(root), level, 0);
	if (ret != -ENOENT) {
		/*
		 * If we get 0 then we found our reference, return 1, else
		 * return the error if it's not -ENOENT;
		 */
		btrfs_free_path(path);
	if (ret == -ENOENT)
		return 0;
	if (ret < 0)
		return ret;
	return 1;
		return (ret < 0 ) ? ret : 1;
	}

	/*
	 * We could have a delayed ref with this reference, so look it up while
	 * we're holding the path open to make sure we don't race with the
	 * delayed ref running.
	 */
	delayed_refs = &trans->transaction->delayed_refs;
	spin_lock(&delayed_refs->lock);
	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
	if (!head)
		goto out;
	if (!mutex_trylock(&head->mutex)) {
		/*
		 * We're contended, means that the delayed ref is running, get a
		 * reference and wait for the ref head to be complete and then
		 * try again.
		 */
		refcount_inc(&head->refs);
		spin_unlock(&delayed_refs->lock);

		btrfs_release_path(path);

		mutex_lock(&head->mutex);
		mutex_unlock(&head->mutex);
		btrfs_put_delayed_ref_head(head);
		goto again;
	}

	exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent);
	mutex_unlock(&head->mutex);
out:
	spin_unlock(&delayed_refs->lock);
	btrfs_free_path(path);
	return exists ? 1 : 0;
}

/*
+7 −7
Original line number Diff line number Diff line
@@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
		free_extent_map(em);
		em = NULL;

		/*
		 * Although the PageDirty bit might be cleared before entering
		 * this function, subpage dirty bit is not cleared.
		 * So clear subpage dirty bit here so next time we won't submit
		 * page for range already written to disk.
		 */
		btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);
		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
		if (!PageWriteback(page)) {
			btrfs_err(inode->root->fs_info,
@@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
			       page->index, cur, end);
		}

		/*
		 * Although the PageDirty bit is cleared before entering this
		 * function, subpage dirty bit is not cleared.
		 * So clear subpage dirty bit here so next time we won't submit
		 * page for range already written to disk.
		 */
		btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize);

		submit_extent_page(bio_ctrl, disk_bytenr, page, iosize,
				   cur - page_offset(page));
+6 −16
Original line number Diff line number Diff line
@@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
		return 0;

	/*
	 * We want to be fast because we can be called from any path trying to
	 * allocate memory, so if the lock is busy we don't want to spend time
	 * We want to be fast so if the lock is busy we don't want to spend time
	 * waiting for it - either some task is about to do IO for the inode or
	 * we may have another task shrinking extent maps, here in this code, so
	 * skip this inode.
@@ -1191,9 +1190,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
		/*
		 * Stop if we need to reschedule or there's contention on the
		 * lock. This is to avoid slowing other tasks trying to take the
		 * lock and because the shrinker might be called during a memory
		 * allocation path and we want to avoid taking a very long time
		 * and slowing down all sorts of tasks.
		 * lock.
		 */
		if (need_resched() || rwlock_needbreak(&tree->lock))
			break;
@@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
		if (ctx->scanned >= ctx->nr_to_scan)
			break;

		/*
		 * We may be called from memory allocation paths, so we don't
		 * want to take too much time and slowdown tasks.
		 */
		if (need_resched())
			break;
		cond_resched();

		inode = btrfs_find_first_inode(root, min_ino);
	}
@@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
							   ctx.last_ino);
	}

	/*
	 * We may be called from memory allocation paths, so we don't want to
	 * take too much time and slowdown tasks, so stop if we need reschedule.
	 */
	while (ctx.scanned < ctx.nr_to_scan && !need_resched()) {
	while (ctx.scanned < ctx.nr_to_scan) {
		struct btrfs_root *root;
		unsigned long count;

		cond_resched();

		spin_lock(&fs_info->fs_roots_radix_lock);
		count = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
					       (void **)&root,
Loading