Commit 33dfafa9 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: Fix safe errors by default



i.e. the start of automatic self healing:

If errors=continue or fix_safe, we now automatically fix simple errors
without user intervention.

New error action option: fix_safe

This replaces the existing errors=ro option, which gets a new slot, i.e.
existing errors=ro users now get errors=fix_safe.

This is currently only enabled for a limited set of errors - initially
just disk accounting; errors we would never not want to fix, and we
don't want to require user intervention (i.e. to make sure a bug report
gets filed).

Errors will still be counted in the superblock, so we (developers) will
still know they've been occuring if a bug report gets filed (as bug
reports typically include the errors superblock section).

Eventually we'll be enabling this for a much wider set of errors, after
we've done thorough error injection testing.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent a56da697
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -990,8 +990,9 @@ enum bch_version_upgrade_opts {

#define BCH_ERROR_ACTIONS()		\
	x(continue,		0)	\
	x(ro,			1)	\
	x(panic,		2)
	x(fix_safe,		1)	\
	x(panic,		2)	\
	x(ro,			3)

enum bch_error_actions {
#define x(t, n) BCH_ON_ERROR_##t = n,
+18 −1
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
	switch (c->opts.errors) {
	case BCH_ON_ERROR_continue:
		return false;
	case BCH_ON_ERROR_fix_safe:
	case BCH_ON_ERROR_ro:
		if (bch2_fs_emergency_read_only(c))
			bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
@@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
	prt_str(out, "ing");
}

static const u8 fsck_flags_extra[] = {
#define x(t, n, flags)		[BCH_FSCK_ERR_##t] = flags,
	BCH_SB_ERRS()
#undef x
};

int bch2_fsck_err(struct bch_fs *c,
		  enum bch_fsck_flags flags,
		  enum bch_sb_error_id err,
@@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c,
	int ret = -BCH_ERR_fsck_ignore;
	const char *action_orig = "fix?", *action = action_orig;

	if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
		flags |= fsck_flags_extra[err];

	if ((flags & FSCK_CAN_FIX) &&
	    test_bit(err, c->sb.errors_silent))
		return -BCH_ERR_fsck_fix;
@@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c,
		prt_printf(out, bch2_log_msg(c, ""));
#endif

	if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
	if ((flags & FSCK_CAN_FIX) &&
	    (flags & FSCK_AUTOFIX) &&
	    (c->opts.errors == BCH_ON_ERROR_continue ||
	     c->opts.errors == BCH_ON_ERROR_fix_safe)) {
		prt_str(out, ", ");
		prt_actioning(out, action);
		ret = -BCH_ERR_fsck_fix;
	} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
		if (c->opts.errors != BCH_ON_ERROR_continue ||
		    !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
			prt_str(out, ", shutting down");
+0 −7
Original line number Diff line number Diff line
@@ -108,13 +108,6 @@ struct fsck_err_state {
	char			*last_msg;
};

enum bch_fsck_flags {
	FSCK_CAN_FIX		= 1 << 0,
	FSCK_CAN_IGNORE		= 1 << 1,
	FSCK_NEED_FSCK		= 1 << 2,
	FSCK_NO_RATELIMIT	= 1 << 3,
};

#define fsck_err_count(_c, _err)	bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)

__printf(4, 5) __cold
+1 −1
Original line number Diff line number Diff line
@@ -137,7 +137,7 @@ enum fsck_err_opts {
	x(errors,			u8,				\
	  OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,			\
	  OPT_STR(bch2_error_actions),					\
	  BCH_SB_ERROR_ACTION,		BCH_ON_ERROR_ro,		\
	  BCH_SB_ERROR_ACTION,		BCH_ON_ERROR_fix_safe,		\
	  NULL,		"Action to take on filesystem error")		\
	x(metadata_replicas,		u8,				\
	  OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,			\
+286 −278

File changed.

Preview size limit exceeded, changes collapsed.