Commit 8912c2fd authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs updates from David Sterba:
 "User visible changes, feature updates:

   - when using block size > page size, enable direct IO

   - fallback to buffered IO if the data profile has duplication,
     workaround to avoid checksum mismatches on block group profiles
     with redundancy, real direct IO is possible on single or RAID0

   - redo export of zoned statistics, moved from sysfs to
     /proc/pid/mountstats due to size limitations of the former

  Experimental features:

   - remove offload checksum tunable, intended to find best way to do it
     but since we've switched to offload to thread for everything we
     don't need it anymore

   - initial support for remap-tree feature, a translation layer of
     logical block addresses that allow changes without moving/rewriting
     blocks to do eg. relocation, or other changes that require COW

  Notable fixes:

   - automatic removal of accidentally leftover chunks when
     free-space-tree is enabled since mkfs.btrfs v6.16.1

   - zoned mode:
      - do not try to append to conventional zones when RAID is mixing
        zoned and conventional drives
      - fixup write pointers when mixing zoned and conventional on
        DUP/RAID* profiles

   - when using squota, relax deletion rules for qgroups with 0 members
     to allow easier recovery from accounting bugs, also add more checks
     to detect bad accounting

   - fix periodic reclaim scanning, properly check boundary conditions
     not to trigger it unexpectedly or miss the time to run it

   - trim:
      - continue after first error
      - change reporting to the first detected error
      - add more cancellation points
      - reduce contention of big device lock that can block other
        operations when there's lots of trimmed space

   - when chunk allocation is forced (needs experimental build) fix
     transaction abort when unexpected space layout is detected

  Core:

   - switch to crypto library API for checksumming, removed module
     dependencies, pointer indirections, etc.

   - error handling improvements

   - adjust how and where transaction commit or abort are done and are
     maybe not necessary

   - minor compression optimization to skip single block ranges

   - improve how compression folios are handled

   - new and updated selftests

   - cleanups, refactoring:
      - auto-freeing and other automatic variable cleanup conversion
      - structure size optimizations
      - condition annotations"

* tag 'for-6.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (137 commits)
  btrfs: get rid of compressed_bio::compressed_folios[]
  btrfs: get rid of compressed_folios[] usage for encoded writes
  btrfs: get rid of compressed_folios[] usage for compressed read
  btrfs: remove the old btrfs_compress_folios() infrastructure
  btrfs: switch to btrfs_compress_bio() interface for compressed writes
  btrfs: introduce btrfs_compress_bio() helper
  btrfs: zlib: introduce zlib_compress_bio() helper
  btrfs: zstd: introduce zstd_compress_bio() helper
  btrfs: lzo: introduce lzo_compress_bio() helper
  btrfs: zoned: factor out the zone loading part into a testable function
  btrfs: add cleanup function for btrfs_free_chunk_map
  btrfs: tests: add cleanup functions for test specific functions
  btrfs: raid56: fix memory leak of btrfs_raid_bio::stripe_uptodate_bitmap
  btrfs: tests: add unit tests for pending extent walking functions
  btrfs: fix EEXIST abort due to non-consecutive gaps in chunk allocation
  btrfs: fix transaction commit blocking during trim of unallocated space
  btrfs: handle user interrupt properly in btrfs_trim_fs()
  btrfs: preserve first error in btrfs_trim_fs()
  btrfs: continue trimming remaining devices on failure
  btrfs: do not BUG_ON() in btrfs_remove_block_group()
  ...
parents b29a7a8e 161ab30d
Loading
Loading
Loading
Loading
+5 −8
Original line number Diff line number Diff line
@@ -4,11 +4,8 @@ config BTRFS_FS
	tristate "Btrfs filesystem support"
	select BLK_CGROUP_PUNT_BIO
	select CRC32
	select CRYPTO
	select CRYPTO_CRC32C
	select CRYPTO_XXHASH
	select CRYPTO_SHA256
	select CRYPTO_BLAKE2B
	select CRYPTO_LIB_BLAKE2B
	select CRYPTO_LIB_SHA256
	select ZLIB_INFLATE
	select ZLIB_DEFLATE
	select LZO_COMPRESS
@@ -18,6 +15,7 @@ config BTRFS_FS
	select FS_IOMAP
	select RAID6_PQ
	select XOR_BLOCKS
	select XXHASH
	depends on PAGE_SIZE_LESS_THAN_256KB

	help
@@ -106,9 +104,6 @@ config BTRFS_EXPERIMENTAL

	  - send stream protocol v3 - fs-verity support

	  - checksum offload mode - sysfs knob to affect when checksums are
	                            calculated (at IO time, or in a thread)

	  - raid-stripe-tree - additional mapping of extents to devices to
			       support RAID1* profiles on zoned devices,
			       RAID56 not yet supported
@@ -121,4 +116,6 @@ config BTRFS_EXPERIMENTAL

	  - asynchronous checksum generation for data writes

	  - remap-tree - logical address remapping tree

	  If unsure, say N.
+2 −1
Original line number Diff line number Diff line
@@ -44,4 +44,5 @@ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
	tests/extent-buffer-tests.o tests/btrfs-tests.o \
	tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
	tests/free-space-tree-tests.o tests/extent-map-tests.o \
	tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o
	tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o \
	tests/chunk-allocation-tests.o
+30 −0
Original line number Diff line number Diff line
@@ -240,6 +240,26 @@ BTRFS_SETGET_FUNCS(block_group_flags, struct btrfs_block_group_item, flags, 64);
BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags,
			struct btrfs_block_group_item, flags, 64);

/* struct btrfs_block_group_item_v2 */
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_used, struct btrfs_block_group_item_v2,
			 used, 64);
BTRFS_SETGET_FUNCS(block_group_v2_used, struct btrfs_block_group_item_v2, used, 64);
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_chunk_objectid,
			 struct btrfs_block_group_item_v2, chunk_objectid, 64);
BTRFS_SETGET_FUNCS(block_group_v2_chunk_objectid,
		   struct btrfs_block_group_item_v2, chunk_objectid, 64);
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_flags,
			 struct btrfs_block_group_item_v2, flags, 64);
BTRFS_SETGET_FUNCS(block_group_v2_flags, struct btrfs_block_group_item_v2, flags, 64);
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_remap_bytes,
			 struct btrfs_block_group_item_v2, remap_bytes, 64);
BTRFS_SETGET_FUNCS(block_group_v2_remap_bytes, struct btrfs_block_group_item_v2,
		   remap_bytes, 64);
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_identity_remap_count,
			 struct btrfs_block_group_item_v2, identity_remap_count, 32);
BTRFS_SETGET_FUNCS(block_group_v2_identity_remap_count, struct btrfs_block_group_item_v2,
		   identity_remap_count, 32);

/* struct btrfs_free_space_info */
BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
		   extent_count, 32);
@@ -863,6 +883,12 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
			 uuid_tree_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_nr_global_roots, struct btrfs_super_block,
			 nr_global_roots, 64);
BTRFS_SETGET_STACK_FUNCS(super_remap_root, struct btrfs_super_block,
			 remap_root, 64);
BTRFS_SETGET_STACK_FUNCS(super_remap_root_generation, struct btrfs_super_block,
			 remap_root_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_remap_root_level, struct btrfs_super_block,
			 remap_root_level, 8);

/* struct btrfs_file_extent_item */
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item,
@@ -1010,6 +1036,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption,
BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size,
			 struct btrfs_verity_descriptor_item, size, 64);

BTRFS_SETGET_FUNCS(remap_address, struct btrfs_remap_item, address, 64);
BTRFS_SETGET_STACK_FUNCS(stack_remap_address, struct btrfs_remap_item,
			 address, 64);

/* Cast into the data area of the leaf. */
#define btrfs_item_ptr(leaf, slot, type)				\
	((type *)(btrfs_item_nr_offset(leaf, 0) + btrfs_item_offset(leaf, slot)))
+1 −3
Original line number Diff line number Diff line
@@ -3609,10 +3609,8 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
		}

		rb_node = rb_simple_insert(&cache->rb_root, &upper->simple_node);
		if (unlikely(rb_node)) {
		if (unlikely(rb_node))
			btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
			return -EUCLEAN;
		}

		list_add_tail(&edge->list[UPPER], &upper->lower);

+17 −18
Original line number Diff line number Diff line
@@ -97,7 +97,13 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
		bbio->orig_logical = orig_bbio->orig_logical;
		orig_bbio->orig_logical += map_length;
	}

	bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
	bbio->can_use_append = orig_bbio->can_use_append;
	bbio->is_scrub = orig_bbio->is_scrub;
	bbio->is_remap = orig_bbio->is_remap;
	bbio->async_csum = orig_bbio->async_csum;

	atomic_inc(&orig_bbio->pending_ios);
	return bbio;
}
@@ -480,6 +486,8 @@ static void btrfs_clone_write_end_io(struct bio *bio)

static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
	u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;

	if (!dev || !dev->bdev ||
	    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
	    (btrfs_op(bio) == BTRFS_MAP_WRITE &&
@@ -494,12 +502,13 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
	 * For zone append writing, bi_sector must point the beginning of the
	 * zone
	 */
	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
		u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
	if (btrfs_bio(bio)->can_use_append && btrfs_dev_is_sequential(dev, physical)) {
		u64 zone_start = round_down(physical, dev->fs_info->zone_size);

		ASSERT(btrfs_dev_is_sequential(dev, physical));
		bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
		bio->bi_opf &= ~REQ_OP_WRITE;
		bio->bi_opf |= REQ_OP_ZONE_APPEND;
	}
	btrfs_debug(dev->fs_info,
	"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -662,11 +671,6 @@ static bool should_async_write(struct btrfs_bio *bbio)
	bool auto_csum_mode = true;

#ifdef CONFIG_BTRFS_EXPERIMENTAL
	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
	enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);

	if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
		return true;
	/*
	 * Write bios will calculate checksum and submit bio at the same time.
	 * Unless explicitly required don't offload serial csum calculate and bio
@@ -747,7 +751,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
	u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
	u64 length = bio->bi_iter.bi_size;
	u64 map_length = length;
	bool use_append = btrfs_use_zone_append(bbio);
	struct btrfs_io_context *bioc = NULL;
	struct btrfs_io_stripe smap;
	blk_status_t status;
@@ -775,8 +778,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
	if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio))
		bbio->orig_logical = logical;

	bbio->can_use_append = btrfs_use_zone_append(bbio);

	map_length = min(map_length, length);
	if (use_append)
	if (bbio->can_use_append)
		map_length = btrfs_append_map_length(bbio, map_length);

	if (map_length < length) {
@@ -805,11 +810,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
	}

	if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
		if (use_append) {
			bio->bi_opf &= ~REQ_OP_WRITE;
			bio->bi_opf |= REQ_OP_ZONE_APPEND;
		}

		if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
			/*
			 * No locking for the list update, as we only add to
@@ -827,7 +827,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
		 */
		if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
		    !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
		    !btrfs_is_data_reloc_root(inode->root)) {
		    !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) {
			if (should_async_write(bbio) &&
			    btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
				goto done;
@@ -836,9 +836,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
			status = errno_to_blk_status(ret);
			if (status)
				goto fail;
		} else if (use_append ||
			   (btrfs_is_zoned(fs_info) && inode &&
			    inode->flags & BTRFS_INODE_NODATASUM)) {
		} else if (bbio->can_use_append ||
			   (btrfs_is_zoned(fs_info) && inode->flags & BTRFS_INODE_NODATASUM)) {
			ret = btrfs_alloc_dummy_sum(bbio);
			status = errno_to_blk_status(ret);
			if (status)
Loading