Commit 5e82ed5c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "Apart from numerous cleanups, there are some performance improvements
  and one minor mount option update. There's one more radix-tree
  conversion (one remaining), and continued work towards enabling large
  folios (almost finished).

  Performance:

   - extent buffer conversion to xarray gains throughput and runtime
     improvements on metadata heavy operations doing writeback (sample
     test shows +50% throughput, -33% runtime)

   - extent io tree cleanups lead to performance improvements by
     avoiding unnecessary searches or repeated searches

   - more efficient extent unpinning when committing transaction
     (estimated run time improvement 3-5%)

  User visible changes:

   - remove standalone mount option 'nologreplay', deprecated in 5.9,
     replacement is 'rescue=nologreplay'

   - in scrub, update reporting, add back device stats message after
     detected errors (accidentally removed during recent refactoring)

  Core:

   - convert extent buffer radix tree to xarray

   - in subpage mode, move block perfect compression out of experimental
     build

   - in zoned mode, introduce sub block groups to allow managing special
     block groups, like the one for relocation or tree-log, to handle
     some corner cases of ENOSPC

   - in scrub, simplify bitmaps for block tracking status

   - continued preparations for large folios:
       - remove assertions for folio order 0
       - add support where missing: compression, buffered write, defrag,
         hole punching, subpage, send

   - fix fsync of files with no hard links not persisting deletion

   - reject tree blocks which are not nodesize aligned, a precaution
     from 4.9 times

   - move transaction abort calls closer to the error sites

   - remove usage of some struct bio_vec internals

   - simplifications in extent map

   - extent IO cleanups and optimizations

   - error handling improvements

   - enhanced ASSERT() macro with optional format strings

   - cleanups:
       - remove unused code
       - naming unifications, dropped __, added prefix
       - merge similar functions
       - use common helpers for various data structures"

* tag 'for-6.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (198 commits)
  btrfs: move misplaced comment of btrfs_path::keep_locks
  btrfs: remove standalone "nologreplay" mount option
  btrfs: use a single variable to track return value at btrfs_page_mkwrite()
  btrfs: don't return VM_FAULT_SIGBUS on failure to set delalloc for mmap write
  btrfs: simplify early error checking in btrfs_page_mkwrite()
  btrfs: pass true to btrfs_delalloc_release_space() at btrfs_page_mkwrite()
  btrfs: fix wrong start offset for delalloc space release during mmap write
  btrfs: fix harmless race getting delayed ref head count when running delayed refs
  btrfs: log error codes during failures when writing super blocks
  btrfs: simplify error return logic when getting folio at prepare_one_folio()
  btrfs: return real error from __filemap_get_folio() calls
  btrfs: remove superfluous return value check at btrfs_dio_iomap_begin()
  btrfs: fix invalid data space release when truncating block in NOCOW mode
  btrfs: update Kconfig option descriptions
  btrfs: update list of features built under experimental config
  btrfs: send: remove btrfs_debug() calls
  btrfs: use boolean for delalloc argument to btrfs_free_reserved_extent()
  btrfs: use boolean for delalloc argument to btrfs_free_reserved_bytes()
  btrfs: fold error checks when allocating ordered extent and update comments
  btrfs: check we grabbed inode reference when allocating an ordered extent
  ...
parents 49fffac9 eeb133a6
Loading
Loading
Loading
Loading
+22 −10
Original line number Diff line number Diff line
@@ -52,10 +52,10 @@ config BTRFS_FS_RUN_SANITY_TESTS
	bool "Btrfs will run sanity tests upon loading"
	depends on BTRFS_FS
	help
	  This will run some basic sanity tests on the free space cache
	  code to make sure it is acting as it should.  These are mostly
	  regression tests and are only really interesting to btrfs
	  developers.
	  This will run sanity tests for core functionality like free space,
	  extent maps, extent io, extent buffers, inodes, qgroups and others,
	  at module load time.  These are mostly regression tests and are only
	  interesting to developers.

	  If unsure, say N.

@@ -63,9 +63,12 @@ config BTRFS_DEBUG
	bool "Btrfs debugging support"
	depends on BTRFS_FS
	help
	  Enable run-time debugging support for the btrfs filesystem. This may
	  enable additional and expensive checks with negative impact on
	  performance, or export extra information via sysfs.
	  Enable run-time debugging support for the btrfs filesystem.

	  Additional potentially expensive checks, debugging functionality or
	  sysfs exported information is enabled, like leak checks of internal
	  objects, optional forced space fragmentation and /sys/fs/btrfs/debug .
	  This has negative impact on performance.

	  If unsure, say N.

@@ -73,8 +76,10 @@ config BTRFS_ASSERT
	bool "Btrfs assert support"
	depends on BTRFS_FS
	help
	  Enable run-time assertion checking.  This will result in panics if
	  any of the assertions trip.  This is meant for btrfs developers only.
	  Enable run-time assertion checking. Additional safety checks are
	  done, simple enough not to affect performance but verify invariants
	  and assumptions of code to run properly. This may result in panics,
	  and is meant for developers but can be enabled in general.

	  If unsure, say N.

@@ -89,7 +94,14 @@ config BTRFS_EXPERIMENTAL

	  Current list:

	  - extent map shrinker - performance problems with too frequent shrinks
	  - COW fixup worker warning - last warning before removing the
				       functionality catching out-of-band page
				       dirtying, not necessary since 5.8

	  - RAID mirror read policy - additional read policies for balancing
				      reading from redundant block group
				      profiles (currently: pid, round-robin,
				      fixed devid)

	  - send stream protocol v3 - fs-verity support

+1 −2
Original line number Diff line number Diff line
@@ -219,8 +219,7 @@ static void run_ordered_work(struct btrfs_workqueue *wq,
		spin_lock_irqsave(lock, flags);
		if (list_empty(list))
			break;
		work = list_entry(list->next, struct btrfs_work,
				  ordered_list);
		work = list_first_entry(list, struct btrfs_work, ordered_list);
		if (!test_bit(WORK_DONE_BIT, &work->flags))
			break;
		/*
+6 −6
Original line number Diff line number Diff line
@@ -2877,7 +2877,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
		goto release;
	}
	if (path->slots[0] == 0) {
		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
		DEBUG_WARN();
		ret = -EUCLEAN;
		goto release;
	}
@@ -3134,7 +3134,7 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
		return;

	while (!list_empty(&node->upper)) {
		edge = list_entry(node->upper.next, struct btrfs_backref_edge,
		edge = list_first_entry(&node->upper, struct btrfs_backref_edge,
					list[LOWER]);
		list_del(&edge->list[LOWER]);
		list_del(&edge->list[UPPER]);
@@ -3473,7 +3473,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
		 * type BTRFS_TREE_BLOCK_REF_KEY
		 */
		ASSERT(list_is_singular(&cur->upper));
		edge = list_entry(cur->upper.next, struct btrfs_backref_edge,
		edge = list_first_entry(&cur->upper, struct btrfs_backref_edge,
					list[LOWER]);
		ASSERT(list_empty(&edge->list[UPPER]));
		exist = edge->node[UPPER];
@@ -3617,7 +3617,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,

		/* Sanity check, we shouldn't have any unchecked nodes */
		if (!upper->checked) {
			ASSERT(0);
			DEBUG_WARN("we should not have any unchecked nodes");
			return -EUCLEAN;
		}

+2 −2
Original line number Diff line number Diff line
@@ -423,8 +423,8 @@ struct btrfs_backref_node *btrfs_backref_alloc_node(
struct btrfs_backref_edge *btrfs_backref_alloc_edge(
		struct btrfs_backref_cache *cache);

#define		LINK_LOWER	(1 << 0)
#define		LINK_UPPER	(1 << 1)
#define		LINK_LOWER	(1U << 0)
#define		LINK_UPPER	(1U << 1)

void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
			     struct btrfs_backref_node *lower,
+26 −29
Original line number Diff line number Diff line
@@ -192,7 +192,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
		btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
				  repair_bbio->file_offset, fs_info->sectorsize,
				  repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
				  page_folio(bv->bv_page), bv->bv_offset, mirror);
				  bvec_phys(bv), mirror);
	} while (mirror != fbio->bbio->mirror_num);

done:
@@ -512,7 +512,7 @@ static void btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
	}
}

static blk_status_t btrfs_bio_csum(struct btrfs_bio *bbio)
static int btrfs_bio_csum(struct btrfs_bio *bbio)
{
	if (bbio->bio.bi_opf & REQ_META)
		return btree_csum_one_bio(bbio);
@@ -543,11 +543,11 @@ static void run_one_async_start(struct btrfs_work *work)
{
	struct async_submit_bio *async =
		container_of(work, struct async_submit_bio, work);
	blk_status_t ret;
	int ret;

	ret = btrfs_bio_csum(async->bbio);
	if (ret)
		async->bbio->bio.bi_status = ret;
		async->bbio->bio.bi_status = errno_to_blk_status(ret);
}

/*
@@ -674,8 +674,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
	bool use_append = btrfs_use_zone_append(bbio);
	struct btrfs_io_context *bioc = NULL;
	struct btrfs_io_stripe smap;
	blk_status_t ret;
	int error;
	blk_status_t status;
	int ret;

	if (!bbio->inode || btrfs_is_data_reloc_root(inode->root))
		smap.rst_search_commit_root = true;
@@ -683,10 +683,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		smap.rst_search_commit_root = false;

	btrfs_bio_counter_inc_blocked(fs_info);
	error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
	ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
			      &bioc, &smap, &mirror_num);
	if (error) {
		ret = errno_to_blk_status(error);
	if (ret) {
		status = errno_to_blk_status(ret);
		btrfs_bio_counter_dec(fs_info);
		goto end_bbio;
	}
@@ -700,7 +700,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)

		split = btrfs_split_bio(fs_info, bbio, map_length);
		if (IS_ERR(split)) {
			ret = errno_to_blk_status(PTR_ERR(split));
			status = errno_to_blk_status(PTR_ERR(split));
			btrfs_bio_counter_dec(fs_info);
			goto end_bbio;
		}
@@ -715,7 +715,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
	if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
		bbio->saved_iter = bio->bi_iter;
		ret = btrfs_lookup_bio_sums(bbio);
		if (ret)
		status = errno_to_blk_status(ret);
		if (status)
			goto fail;
	}

@@ -748,13 +749,15 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
				goto done;

			ret = btrfs_bio_csum(bbio);
			if (ret)
			status = errno_to_blk_status(ret);
			if (status)
				goto fail;
		} else if (use_append ||
			   (btrfs_is_zoned(fs_info) && inode &&
			    inode->flags & BTRFS_INODE_NODATASUM)) {
			ret = btrfs_alloc_dummy_sum(bbio);
			if (ret)
			status = errno_to_blk_status(ret);
			if (status)
				goto fail;
		}
	}
@@ -775,10 +778,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
		ASSERT(remaining);

		btrfs_bio_end_io(remaining, ret);
		btrfs_bio_end_io(remaining, status);
	}
end_bbio:
	btrfs_bio_end_io(bbio, ret);
	btrfs_bio_end_io(bbio, status);
	/* Do not submit another chunk */
	return true;
}
@@ -803,8 +806,7 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
 * freeing the bio.
 */
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
			    u64 length, u64 logical, struct folio *folio,
			    unsigned int folio_offset, int mirror_num)
			    u64 length, u64 logical, phys_addr_t paddr, int mirror_num)
{
	struct btrfs_io_stripe smap = { 0 };
	struct bio_vec bvec;
@@ -835,8 +837,7 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,

	bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
	bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
	ret = bio_add_folio(&bio, folio, length, folio_offset);
	ASSERT(ret);
	__bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr));
	ret = submit_bio_wait(&bio);
	if (ret) {
		/* try to remap that extent elsewhere? */
@@ -900,22 +901,18 @@ int __init btrfs_bioset_init(void)
		return -ENOMEM;
	if (bioset_init(&btrfs_clone_bioset, BIO_POOL_SIZE,
			offsetof(struct btrfs_bio, bio), 0))
		goto out_free_bioset;
		goto out;
	if (bioset_init(&btrfs_repair_bioset, BIO_POOL_SIZE,
			offsetof(struct btrfs_bio, bio),
			BIOSET_NEED_BVECS))
		goto out_free_clone_bioset;
		goto out;
	if (mempool_init_kmalloc_pool(&btrfs_failed_bio_pool, BIO_POOL_SIZE,
				      sizeof(struct btrfs_failed_bio)))
		goto out_free_repair_bioset;
		goto out;
	return 0;

out_free_repair_bioset:
	bioset_exit(&btrfs_repair_bioset);
out_free_clone_bioset:
	bioset_exit(&btrfs_clone_bioset);
out_free_bioset:
	bioset_exit(&btrfs_bioset);
out:
	btrfs_bioset_exit();
	return -ENOMEM;
}

Loading