Commit f5d26fa3 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcachefs: bch_sb_field_errors



Add a new superblock section to keep counts of errors seen since
filesystem creation: we'll be addingcounters for every distinct fsck
error.

The new superblock section has entries of the for [ id, count,
time_of_last_error ]; this is intended to let us see what errors are
occuring - and getting fixed - via show-super output.

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 94119eeb
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ bcachefs-y := \
	reflink.o		\
	replicas.o		\
	sb-clean.o		\
	sb-errors.o		\
	sb-members.o		\
	siphash.o		\
	six.o			\
+9 −5
Original line number Diff line number Diff line
@@ -209,6 +209,7 @@
#include "nocow_locking_types.h"
#include "opts.h"
#include "recovery_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "util.h"

@@ -992,11 +993,6 @@ struct bch_fs {
	struct bio_set		dio_read_bioset;
	struct bio_set		nocow_flush_bioset;

	/* ERRORS */
	struct list_head	fsck_errors;
	struct mutex		fsck_error_lock;
	bool			fsck_alloc_err;

	/* QUOTAS */
	struct bch_memquota_type quotas[QTYP_NR];

@@ -1045,6 +1041,14 @@ struct bch_fs {
	struct bch2_time_stats	times[BCH_TIME_STAT_NR];

	struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];

	/* ERRORS */
	struct list_head	fsck_error_msgs;
	struct mutex		fsck_error_msgs_lock;
	bool			fsck_alloc_msgs_err;

	bch_sb_errors_cpu	fsck_error_counts;
	struct mutex		fsck_error_counts_lock;
};

extern struct wait_queue_head bch2_read_only_wait;
+13 −1
Original line number Diff line number Diff line
@@ -1218,7 +1218,8 @@ struct bch_sb_field {
	x(journal_seq_blacklist, 8)		\
	x(journal_v2,	9)			\
	x(counters,	10)			\
	x(members_v2,	11)
	x(members_v2,	11)			\
	x(errors,	12)

enum bch_sb_field_type {
#define x(f, nr)	BCH_SB_FIELD_##f = nr,
@@ -1621,6 +1622,17 @@ struct bch_sb_field_journal_seq_blacklist {
	__u64			_data[];
};

struct bch_sb_field_errors {
	struct bch_sb_field	field;
	struct bch_sb_field_error_entry {
		__le64		v;
		__le64		last_error_time;
	}			entries[];
};

LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,	struct bch_sb_field_error_entry, v,  0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,	struct bch_sb_field_error_entry, v, 16, 64);

/* Superblock: */

/*
+1 −0
Original line number Diff line number Diff line
@@ -213,6 +213,7 @@
	x(BCH_ERR_invalid_sb,		invalid_sb_crypt)			\
	x(BCH_ERR_invalid_sb,		invalid_sb_clean)			\
	x(BCH_ERR_invalid_sb,		invalid_sb_quota)			\
	x(BCH_ERR_invalid_sb,		invalid_sb_errors)			\
	x(BCH_ERR_invalid_sb,		invalid_sb_opt_compression)		\
	x(BCH_ERR_invalid,		invalid_bkey)				\
	x(BCH_ERR_operation_blocked,    nocow_lock_blocked)			\
+11 −11
Original line number Diff line number Diff line
@@ -117,27 +117,27 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
	if (test_bit(BCH_FS_FSCK_DONE, &c->flags))
		return NULL;

	list_for_each_entry(s, &c->fsck_errors, list)
	list_for_each_entry(s, &c->fsck_error_msgs, list)
		if (s->fmt == fmt) {
			/*
			 * move it to the head of the list: repeated fsck errors
			 * are common
			 */
			list_move(&s->list, &c->fsck_errors);
			list_move(&s->list, &c->fsck_error_msgs);
			return s;
		}

	s = kzalloc(sizeof(*s), GFP_NOFS);
	if (!s) {
		if (!c->fsck_alloc_err)
		if (!c->fsck_alloc_msgs_err)
			bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
		c->fsck_alloc_err = true;
		c->fsck_alloc_msgs_err = true;
		return NULL;
	}

	INIT_LIST_HEAD(&s->list);
	s->fmt = fmt;
	list_add(&s->list, &c->fsck_errors);
	list_add(&s->list, &c->fsck_error_msgs);
	return s;
}

@@ -153,7 +153,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
	prt_vprintf(out, fmt, args);
	va_end(args);

	mutex_lock(&c->fsck_error_lock);
	mutex_lock(&c->fsck_error_msgs_lock);
	s = fsck_err_get(c, fmt);
	if (s) {
		/*
@@ -163,7 +163,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
		 */
		if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
			ret = s->ret;
			mutex_unlock(&c->fsck_error_lock);
			mutex_unlock(&c->fsck_error_msgs_lock);
			printbuf_exit(&buf);
			return ret;
		}
@@ -258,7 +258,7 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
	if (s)
		s->ret = ret;

	mutex_unlock(&c->fsck_error_lock);
	mutex_unlock(&c->fsck_error_msgs_lock);

	printbuf_exit(&buf);

@@ -279,9 +279,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
{
	struct fsck_err_state *s, *n;

	mutex_lock(&c->fsck_error_lock);
	mutex_lock(&c->fsck_error_msgs_lock);

	list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
	list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
		if (s->ratelimited && s->last_msg)
			bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->last_msg);

@@ -290,5 +290,5 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
		kfree(s);
	}

	mutex_unlock(&c->fsck_error_lock);
	mutex_unlock(&c->fsck_error_msgs_lock);
}
Loading