Commit a3d1f54d authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "This update brings a few minor performance improvements, otherwise
  there's a lot of refactoring, cleanups and other sort of not user
  visible changes.

  Performance improvements:

   - inline b-tree locking functions, improvement in metadata-heavy
     changes

   - relax locking on a range that's being reflinked, allows read
     operations to run in parallel

   - speed up NOCOW write checks (throughput +9% on a sample test)

   - extent locking ranges have been reduced in several places, namely
     around delayed ref processing

  Core:

   - more page to folio conversions:
      - relocation
      - send
      - compression
      - inline extent handling
      - super block write and wait

   - extent_map structure optimizations:
      - reduced structure size
      - code simplifications
      - add shrinker for allocated objects, the numbers can go high and
        could exhaust memory on smaller systems (reported) as they may
        not get an opportunity to be freed fast enough

   - extent locking optimizations:
      - reduce locking ranges where it does not seem to be necessary and
        are safe due to other means of synchronization
      - potential improvements due to lower contention,
        allocation/freeing and state management operations of extent
        state tracking structures

   - delayed ref cleanups and simplifications

   - updated trace points

   - improved error handling, warnings and assertions

   - cleanups and refactoring, unification of error handling paths"

* tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (122 commits)
  btrfs: qgroup: fix initialization of auto inherit array
  btrfs: count super block write errors in device instead of tracking folio error state
  btrfs: use the folio iterator in btrfs_end_super_write()
  btrfs: convert super block writes to folio in write_dev_supers()
  btrfs: convert super block writes to folio in wait_dev_supers()
  bio: Export bio_add_folio_nofail to modules
  btrfs: remove duplicate included header from fs.h
  btrfs: add a cached state to extent_clear_unlock_delalloc
  btrfs: push extent lock down in submit_one_async_extent
  btrfs: push lock_extent down in cow_file_range()
  btrfs: move can_cow_file_range_inline() outside of the extent lock
  btrfs: push lock_extent into cow_file_range_inline
  btrfs: push extent lock into cow_file_range
  btrfs: push extent lock into run_delalloc_cow
  btrfs: remove unlock_extent from run_delalloc_compressed
  btrfs: push extent lock down in run_delalloc_nocow
  btrfs: adjust while loop condition in run_delalloc_nocow
  btrfs: push extent lock into run_delalloc_nocow
  btrfs: push the extent lock into btrfs_run_delalloc_range
  btrfs: lock extent when doing inline extent in compression
  ...
parents 47e9bff7 0e39c9e5
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1136,6 +1136,7 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
	WARN_ON_ONCE(off > UINT_MAX);
	__bio_add_page(bio, &folio->page, len, off);
}
EXPORT_SYMBOL_GPL(bio_add_folio_nofail);

/**
 * bio_add_folio - Attempt to add part of a folio to a bio.
+21 −27
Original line number Diff line number Diff line
@@ -261,7 +261,7 @@ static void update_share_count(struct share_check *sc, int oldcount,
	else if (oldcount < 1 && newcount > 0)
		sc->share_count++;

	if (newref->root_id == sc->root->root_key.objectid &&
	if (newref->root_id == btrfs_root_id(sc->root) &&
	    newref->wanted_disk_byte == sc->data_bytenr &&
	    newref->key_for_search.objectid == sc->inum)
		sc->self_ref_count += newref->count;
@@ -769,7 +769,7 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
			continue;
		}

		if (sc && ref->root_id != sc->root->root_key.objectid) {
		if (sc && ref->root_id != btrfs_root_id(sc->root)) {
			free_pref(ref);
			ret = BACKREF_FOUND_SHARED;
			goto out;
@@ -919,40 +919,38 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
		switch (node->type) {
		case BTRFS_TREE_BLOCK_REF_KEY: {
			/* NORMAL INDIRECT METADATA backref */
			struct btrfs_delayed_tree_ref *ref;
			struct btrfs_key *key_ptr = NULL;
			/* The owner of a tree block ref is the level. */
			int level = btrfs_delayed_ref_owner(node);

			if (head->extent_op && head->extent_op->update_key) {
				btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
				key_ptr = &key;
			}

			ref = btrfs_delayed_node_to_tree_ref(node);
			ret = add_indirect_ref(fs_info, preftrees, ref->root,
					       key_ptr, ref->level + 1,
					       node->bytenr, count, sc,
					       GFP_ATOMIC);
			ret = add_indirect_ref(fs_info, preftrees, node->ref_root,
					       key_ptr, level + 1, node->bytenr,
					       count, sc, GFP_ATOMIC);
			break;
		}
		case BTRFS_SHARED_BLOCK_REF_KEY: {
			/* SHARED DIRECT METADATA backref */
			struct btrfs_delayed_tree_ref *ref;

			ref = btrfs_delayed_node_to_tree_ref(node);
			/*
			 * SHARED DIRECT METADATA backref
			 *
			 * The owner of a tree block ref is the level.
			 */
			int level = btrfs_delayed_ref_owner(node);

			ret = add_direct_ref(fs_info, preftrees, ref->level + 1,
					     ref->parent, node->bytenr, count,
			ret = add_direct_ref(fs_info, preftrees, level + 1,
					     node->parent, node->bytenr, count,
					     sc, GFP_ATOMIC);
			break;
		}
		case BTRFS_EXTENT_DATA_REF_KEY: {
			/* NORMAL INDIRECT DATA backref */
			struct btrfs_delayed_data_ref *ref;
			ref = btrfs_delayed_node_to_data_ref(node);

			key.objectid = ref->objectid;
			key.objectid = btrfs_delayed_ref_owner(node);
			key.type = BTRFS_EXTENT_DATA_KEY;
			key.offset = ref->offset;
			key.offset = btrfs_delayed_ref_offset(node);

			/*
			 * If we have a share check context and a reference for
@@ -972,18 +970,14 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
			if (sc && count < 0)
				sc->have_delayed_delete_refs = true;

			ret = add_indirect_ref(fs_info, preftrees, ref->root,
			ret = add_indirect_ref(fs_info, preftrees, node->ref_root,
					       &key, 0, node->bytenr, count, sc,
					       GFP_ATOMIC);
			break;
		}
		case BTRFS_SHARED_DATA_REF_KEY: {
			/* SHARED DIRECT FULL backref */
			struct btrfs_delayed_data_ref *ref;

			ref = btrfs_delayed_node_to_data_ref(node);

			ret = add_direct_ref(fs_info, preftrees, 0, ref->parent,
			ret = add_direct_ref(fs_info, preftrees, 0, node->parent,
					     node->bytenr, count, sc,
					     GFP_ATOMIC);
			break;
@@ -2629,7 +2623,7 @@ static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
			btrfs_debug(fs_root->fs_info,
				"following ref at offset %u for inode %llu in tree %llu",
				cur, found_key.objectid,
				fs_root->root_key.objectid);
				btrfs_root_id(fs_root));
			ret = inode_to_path(parent, name_len,
				      (unsigned long)(iref + 1), eb, ipath);
			if (ret)
@@ -3361,7 +3355,7 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
	if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
		btrfs_err(fs_info,
"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
			  cur->bytenr, level - 1, root->root_key.objectid,
			  cur->bytenr, level - 1, btrfs_root_id(root),
			  tree_key->objectid, tree_key->type, tree_key->offset);
		btrfs_put_root(root);
		ret = -ENOENT;
+5 −6
Original line number Diff line number Diff line
@@ -341,9 +341,9 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
	read_lock(&fs_info->global_root_lock);
	rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree,
					     rb_node) {
		if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
		    root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID ||
		    root->root_key.objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
		if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID ||
		    btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
		    btrfs_root_id(root) == BTRFS_FREE_SPACE_TREE_OBJECTID) {
			num_bytes += btrfs_root_used(&root->root_item);
			min_items++;
		}
@@ -406,7 +406,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
{
	struct btrfs_fs_info *fs_info = root->fs_info;

	switch (root->root_key.objectid) {
	switch (btrfs_root_id(root)) {
	case BTRFS_CSUM_TREE_OBJECTID:
	case BTRFS_EXTENT_TREE_OBJECTID:
	case BTRFS_FREE_SPACE_TREE_OBJECTID:
@@ -468,8 +468,7 @@ static struct btrfs_block_rsv *get_block_rsv(

	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
	    (root == fs_info->uuid_root) ||
	    (trans->adding_csums &&
	     root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID))
	    (trans->adding_csums && btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID))
		block_rsv = trans->block_rsv;

	if (!block_rsv)
+6 −4
Original line number Diff line number Diff line
@@ -381,9 +381,11 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
}

/*
 * Should be called while holding the inode's VFS lock in exclusive mode or in a
 * context where no one else can access the inode concurrently (during inode
 * creation or when loading an inode from disk).
 * Should be called while holding the inode's VFS lock in exclusive mode, or
 * while holding the inode's mmap lock (struct btrfs_inode::i_mmap_lock) in
 * either shared or exclusive mode, or in a context where no one else can access
 * the inode concurrently (during inode creation or when loading an inode from
 * disk).
 */
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
@@ -496,7 +498,6 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
				 struct extent_state *orig, u64 split);
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
void btrfs_evict_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
@@ -544,6 +545,7 @@ ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
		       size_t done_before);
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
				  size_t done_before);
struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino);

extern const struct dentry_operations btrfs_dentry_operations;

+71 −48
Original line number Diff line number Diff line
@@ -90,20 +90,20 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
}

static int compression_compress_pages(int type, struct list_head *ws,
               struct address_space *mapping, u64 start, struct page **pages,
               unsigned long *out_pages, unsigned long *total_in,
               unsigned long *total_out)
				      struct address_space *mapping, u64 start,
				      struct folio **folios, unsigned long *out_folios,
				      unsigned long *total_in, unsigned long *total_out)
{
	switch (type) {
	case BTRFS_COMPRESS_ZLIB:
		return zlib_compress_pages(ws, mapping, start, pages,
				out_pages, total_in, total_out);
		return zlib_compress_folios(ws, mapping, start, folios,
					    out_folios, total_in, total_out);
	case BTRFS_COMPRESS_LZO:
		return lzo_compress_pages(ws, mapping, start, pages,
				out_pages, total_in, total_out);
		return lzo_compress_folios(ws, mapping, start, folios,
					   out_folios, total_in, total_out);
	case BTRFS_COMPRESS_ZSTD:
		return zstd_compress_pages(ws, mapping, start, pages,
				out_pages, total_in, total_out);
		return zstd_compress_folios(ws, mapping, start, folios,
					    out_folios, total_in, total_out);
	case BTRFS_COMPRESS_NONE:
	default:
		/*
@@ -115,7 +115,7 @@ static int compression_compress_pages(int type, struct list_head *ws,
		 * Not a big deal, just need to inform caller that we
		 * haven't allocated any pages yet.
		 */
		*out_pages = 0;
		*out_folios = 0;
		return -E2BIG;
	}
}
@@ -158,11 +158,11 @@ static int compression_decompress(int type, struct list_head *ws,
	}
}

static void btrfs_free_compressed_pages(struct compressed_bio *cb)
static void btrfs_free_compressed_folios(struct compressed_bio *cb)
{
	for (unsigned int i = 0; i < cb->nr_pages; i++)
		btrfs_free_compr_page(cb->compressed_pages[i]);
	kfree(cb->compressed_pages);
	for (unsigned int i = 0; i < cb->nr_folios; i++)
		btrfs_free_compr_folio(cb->compressed_folios[i]);
	kfree(cb->compressed_folios);
}

static int btrfs_decompress_bio(struct compressed_bio *cb);
@@ -223,25 +223,25 @@ static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_co
/*
 * Common wrappers for page allocation from compression wrappers
 */
struct page *btrfs_alloc_compr_page(void)
struct folio *btrfs_alloc_compr_folio(void)
{
	struct page *page = NULL;
	struct folio *folio = NULL;

	spin_lock(&compr_pool.lock);
	if (compr_pool.count > 0) {
		page = list_first_entry(&compr_pool.list, struct page, lru);
		list_del_init(&page->lru);
		folio = list_first_entry(&compr_pool.list, struct folio, lru);
		list_del_init(&folio->lru);
		compr_pool.count--;
	}
	spin_unlock(&compr_pool.lock);

	if (page)
		return page;
	if (folio)
		return folio;

	return alloc_page(GFP_NOFS);
	return folio_alloc(GFP_NOFS, 0);
}

void btrfs_free_compr_page(struct page *page)
void btrfs_free_compr_folio(struct folio *folio)
{
	bool do_free = false;

@@ -249,7 +249,7 @@ void btrfs_free_compr_page(struct page *page)
	if (compr_pool.count > compr_pool.thresh) {
		do_free = true;
	} else {
		list_add(&page->lru, &compr_pool.list);
		list_add(&folio->lru, &compr_pool.list);
		compr_pool.count++;
	}
	spin_unlock(&compr_pool.lock);
@@ -257,8 +257,8 @@ void btrfs_free_compr_page(struct page *page)
	if (!do_free)
		return;

	ASSERT(page_ref_count(page) == 1);
	put_page(page);
	ASSERT(folio_ref_count(folio) == 1);
	folio_put(folio);
}

static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
@@ -269,7 +269,7 @@ static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
	if (!status)
		status = errno_to_blk_status(btrfs_decompress_bio(cb));

	btrfs_free_compressed_pages(cb);
	btrfs_free_compressed_folios(cb);
	btrfs_bio_end_io(cb->orig_bbio, status);
	bio_put(&bbio->bio);
}
@@ -323,7 +323,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
		end_compressed_writeback(cb);
	/* Note, our inode could be gone now */

	btrfs_free_compressed_pages(cb);
	btrfs_free_compressed_folios(cb);
	bio_put(&cb->bbio.bio);
}

@@ -342,17 +342,19 @@ static void end_bbio_comprssed_write(struct btrfs_bio *bbio)
	queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
}

static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
	struct bio *bio = &cb->bbio.bio;
	u32 offset = 0;

	while (offset < cb->compressed_len) {
		int ret;
		u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);

		/* Maximum compressed extent is smaller than bio size limit. */
		__bio_add_page(bio, cb->compressed_pages[offset >> PAGE_SHIFT],
		ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT],
				    len, 0);
		ASSERT(ret);
		offset += len;
	}
}
@@ -367,8 +369,8 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
 * the end io hooks.
 */
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
				   struct page **compressed_pages,
				   unsigned int nr_pages,
				   struct folio **compressed_folios,
				   unsigned int nr_folios,
				   blk_opf_t write_flags,
				   bool writeback)
{
@@ -384,14 +386,14 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
				  end_bbio_comprssed_write);
	cb->start = ordered->file_offset;
	cb->len = ordered->num_bytes;
	cb->compressed_pages = compressed_pages;
	cb->compressed_folios = compressed_folios;
	cb->compressed_len = ordered->disk_num_bytes;
	cb->writeback = writeback;
	INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
	cb->nr_pages = nr_pages;
	cb->nr_folios = nr_folios;
	cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
	cb->bbio.ordered = ordered;
	btrfs_add_compressed_bio_pages(cb);
	btrfs_add_compressed_bio_folios(cb);

	btrfs_submit_bio(&cb->bbio, 0);
}
@@ -599,14 +601,14 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)

	free_extent_map(em);

	cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
	cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS);
	if (!cb->compressed_pages) {
	cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
	cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS);
	if (!cb->compressed_folios) {
		ret = BLK_STS_RESOURCE;
		goto out_free_bio;
	}

	ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
	ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios, 0);
	if (ret2) {
		ret = BLK_STS_RESOURCE;
		goto out_free_compressed_pages;
@@ -618,7 +620,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
	/* include any pages we added in add_ra-bio_pages */
	cb->len = bbio->bio.bi_iter.bi_size;
	cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector;
	btrfs_add_compressed_bio_pages(cb);
	btrfs_add_compressed_bio_folios(cb);

	if (memstall)
		psi_memstall_leave(&pflags);
@@ -627,7 +629,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
	return;

out_free_compressed_pages:
	kfree(cb->compressed_pages);
	kfree(cb->compressed_folios);
out_free_bio:
	bio_put(&cb->bbio.bio);
out:
@@ -974,6 +976,29 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
	return level;
}

/* Wrapper around find_get_page(), with extra error message. */
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
				     struct folio **in_folio_ret)
{
	struct folio *in_folio;

	/*
	 * The compressed write path should have the folio locked already, thus
	 * we only need to grab one reference.
	 */
	in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT);
	if (IS_ERR(in_folio)) {
		struct btrfs_inode *inode = BTRFS_I(mapping->host);

		btrfs_crit(inode->root->fs_info,
		"failed to get page cache, root %lld ino %llu file offset %llu",
			   btrfs_root_id(inode->root), btrfs_ino(inode), start);
		return -ENOENT;
	}
	*in_folio_ret = in_folio;
	return 0;
}

/*
 * Given an address space and start and length, compress the bytes into @pages
 * that are allocated on demand.
@@ -994,11 +1019,9 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
 * @total_out is an in/out parameter, must be set to the input length and will
 * be also used to return the total number of compressed bytes
 */
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
			 u64 start, struct page **pages,
			 unsigned long *out_pages,
			 unsigned long *total_in,
			 unsigned long *total_out)
int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
			 u64 start, struct folio **folios, unsigned long *out_folios,
			 unsigned long *total_in, unsigned long *total_out)
{
	int type = btrfs_compress_type(type_level);
	int level = btrfs_compress_level(type_level);
@@ -1007,8 +1030,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,

	level = btrfs_compress_set_level(type, level);
	workspace = get_workspace(type, level);
	ret = compression_compress_pages(type, workspace, mapping, start, pages,
					 out_pages, total_in, total_out);
	ret = compression_compress_pages(type, workspace, mapping, start, folios,
					 out_folios, total_in, total_out);
	put_workspace(type, workspace);
	return ret;
}
Loading