Commit bc00965d authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by David Sterba
Browse files

btrfs: count super block write errors in device instead of tracking folio error state



Currently the error status of super block write is tracked in page/folio
status bit Error. For that we need to keep the reference for the whole
duration of write and wait.

Count the number of superblock writeback errors in the btrfs_device.
That means we don't need the folio to stay around until it's waited for,
and can avoid the extra call to folio_get/put.

Also remove a mention of PageError in a comment as it's the last mention
of the page Error state.

Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 617fb10e
Loading
Loading
Loading
Loading
+19 −27
Original line number Diff line number Diff line
@@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio)
				"lost super block write due to IO error on %s (%d)",
				btrfs_dev_name(device),
				blk_status_to_errno(bio->bi_status));
			folio_set_error(fi.folio);
			btrfs_dev_stat_inc_and_print(device,
						     BTRFS_DEV_STAT_WRITE_ERRS);
			/* Ensure failure if the primary sb fails. */
			if (bio->bi_opf & REQ_FUA)
				atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
					   &device->sb_write_errors);
			else
				atomic_inc(&device->sb_write_errors);
		}

		folio_unlock(fi.folio);
		folio_put(fi.folio);
	}
@@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device,
	struct address_space *mapping = device->bdev->bd_inode->i_mapping;
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
	int i;
	int errors = 0;
	int ret;
	u64 bytenr, bytenr_orig;

	atomic_set(&device->sb_write_errors, 0);

	if (max_mirrors == 0)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;

@@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
			btrfs_err(device->fs_info,
				"couldn't get super block location for mirror %d",
				i);
			errors++;
			atomic_inc(&device->sb_write_errors);
			continue;
		}
		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device,
			btrfs_err(device->fs_info,
			    "couldn't get super block page for bytenr %llu",
			    bytenr);
			errors++;
			atomic_inc(&device->sb_write_errors);
			continue;
		}
		ASSERT(folio_order(folio) == 0);

		/* Bump the refcount for wait_dev_supers() */
		folio_get(folio);

		offset = offset_in_folio(folio, bytenr);
		disk_super = folio_address(folio) + offset;
		memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
@@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device,
		submit_bio(bio);

		if (btrfs_advance_sb_log(device, i))
			errors++;
			atomic_inc(&device->sb_write_errors);
	}
	return errors < i ? 0 : -1;
	return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
}

/*
 * Wait for write completion of superblocks done by write_dev_supers,
 * @max_mirrors same for write and wait phases.
 *
 * Return number of errors when folio is not found or not marked up to date.
 * Return -1 if primary super block write failed or when there were no super block
 * copies written. Otherwise 0.
 */
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
@@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)

		folio = filemap_get_folio(device->bdev->bd_inode->i_mapping,
					  bytenr >> PAGE_SHIFT);
		if (IS_ERR(folio)) {
			errors++;
			if (i == 0)
				primary_failed = true;
		/* If the folio has been removed, then we know it completed. */
		if (IS_ERR(folio))
			continue;
		}
		ASSERT(folio_order(folio) == 0);

		/* Folio will be unlocked once the write completes. */
		folio_wait_locked(folio);
		if (folio_test_error(folio)) {
			errors++;
			if (i == 0)
				primary_failed = true;
		}

		/* Drop our reference */
		folio_put(folio);

		/* Drop the reference from the writing run */
		folio_put(folio);
	}

	/* log error, force error return */
	errors += atomic_read(&device->sb_write_errors);
	if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
		primary_failed = true;
	if (primary_failed) {
		btrfs_err(device->fs_info, "error writing primary super block to device %llu",
			  device->devid);
+1 −1
Original line number Diff line number Diff line
@@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
	 * can be no longer dirty nor marked anymore for writeback (if a
	 * subsequent modification to the extent buffer didn't happen before the
	 * transaction commit), which makes filemap_fdata[write|wait]_range not
	 * able to find the pages tagged with SetPageError at transaction
	 * able to find the pages which contain errors at transaction
	 * commit time. So if this happens we must abort the transaction,
	 * otherwise we commit a super block with btree roots that point to
	 * btree nodes/leafs whose content on disk is invalid - either garbage
+9 −0
Original line number Diff line number Diff line
@@ -92,6 +92,9 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT	(4)
#define BTRFS_DEV_STATE_NO_READA	(5)

/* Special value encoding failure to write primary super block. */
#define BTRFS_SUPER_PRIMARY_WRITE_ERROR		(INT_MAX / 2)

struct btrfs_fs_devices;

struct btrfs_device {
@@ -142,6 +145,12 @@ struct btrfs_device {
	/* type and info about this device */
	u64 type;

	/*
	 * Counter of super block write errors, values larger than
	 * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
	 */
	atomic_t sb_write_errors;

	/* minimal io size for this device */
	u32 sector_size;