Commit 02c372e1 authored by Johannes Thumshirn's avatar Johannes Thumshirn Committed by David Sterba
Browse files

btrfs: add support for inserting raid stripe extents



Add support for inserting stripe extents into the raid stripe tree on
completion of every write that needs an extra logical-to-physical
translation when using RAID.

Inserting the stripe extents happens after the data I/O has completed,
this is done to

  a) support zone-append and
  b) rule out the possibility of a RAID-write-hole.

Signed-off-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 51502090
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
	   subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
	   lru_cache.o
	   lru_cache.o raid-stripe-tree.o

btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
+21 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include "rcu-string.h"
#include "zoned.h"
#include "file-item.h"
#include "raid-stripe-tree.h"

static struct bio_set btrfs_bioset;
static struct bio_set btrfs_clone_bioset;
@@ -415,6 +416,9 @@ static void btrfs_orig_write_end_io(struct bio *bio)
	else
		bio->bi_status = BLK_STS_OK;

	if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
		stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;

	btrfs_orig_bbio_end_io(bbio);
	btrfs_put_bioc(bioc);
}
@@ -426,6 +430,8 @@ static void btrfs_clone_write_end_io(struct bio *bio)
	if (bio->bi_status) {
		atomic_inc(&stripe->bioc->error);
		btrfs_log_dev_io_error(bio, stripe->dev);
	} else if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
		stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
	}

	/* Pass on control to the original bio this one was cloned from */
@@ -487,6 +493,7 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
	bio->bi_private = &bioc->stripes[dev_nr];
	bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
	bioc->stripes[dev_nr].bioc = bioc;
	bioc->size = bio->bi_iter.bi_size;
	btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}

@@ -496,6 +503,8 @@ static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
	if (!bioc) {
		/* Single mirror read/write fast path. */
		btrfs_bio(bio)->mirror_num = mirror_num;
		if (bio_op(bio) != REQ_OP_READ)
			btrfs_bio(bio)->orig_physical = smap->physical;
		bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
		if (bio_op(bio) != REQ_OP_READ)
			btrfs_bio(bio)->orig_physical = smap->physical;
@@ -688,6 +697,18 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
			bio->bi_opf |= REQ_OP_ZONE_APPEND;
		}

		if (is_data_bbio(bbio) && bioc &&
		    btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
			/*
			 * No locking for the list update, as we only add to
			 * the list in the I/O submission path, and list
			 * iteration only happens in the completion path, which
			 * can't happen until after the last submission.
			 */
			btrfs_get_bioc(bioc);
			list_add_tail(&bioc->rst_ordered_entry, &bbio->ordered->bioc_list);
		}

		/*
		 * Csum items for reloc roots have already been cloned at this
		 * point, so they are handled as part of the no-checksum case.
+1 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@
#include "file-item.h"
#include "orphan.h"
#include "tree-checker.h"
#include "raid-stripe-tree.h"

#undef SCRAMBLE_DELAYED_REFS

+7 −1
Original line number Diff line number Diff line
@@ -71,6 +71,7 @@
#include "super.h"
#include "orphan.h"
#include "backref.h"
#include "raid-stripe-tree.h"

struct btrfs_iget_args {
	u64 ino;
@@ -3091,6 +3092,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)

	trans->block_rsv = &inode->block_rsv;

	ret = btrfs_insert_raid_extent(trans, ordered_extent);
	if (ret)
		goto out;

	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
		compress_type = ordered_extent->compress_type;
	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
@@ -3224,7 +3229,8 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
{
	if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
	    !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
	    !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
	    list_empty(&ordered->bioc_list))
		btrfs_finish_ordered_zoned(ordered);
	return btrfs_finish_one_ordered(ordered);
}
+1 −0
Original line number Diff line number Diff line
@@ -191,6 +191,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
	INIT_LIST_HEAD(&entry->log_list);
	INIT_LIST_HEAD(&entry->root_extent_list);
	INIT_LIST_HEAD(&entry->work_list);
	INIT_LIST_HEAD(&entry->bioc_list);
	init_completion(&entry->completion);

	/*
Loading