Commit dfa8bad3 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: convey file I/O errors to the health monitor



Connect the fserror reporting to the health monitor so that xfs can send
events about file I/O errors to the xfs_healer daemon.  These events are
entirely informational because xfs cannot regenerate user data, so
hopefully the fsnotify I/O error event gets noticed by the relevant
management systems.

Signed-off-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent e76e0e3f
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -1019,6 +1019,9 @@ struct xfs_rtgroup_geometry {
#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV		(6)
#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV	(7)

/* file range events */
#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE	(8)

/* Health monitor event types */

/* status of the monitor itself */
@@ -1039,6 +1042,17 @@ struct xfs_rtgroup_geometry {
/* media errors */
#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR	(7)

/* pagecache I/O to a file range failed */
#define XFS_HEALTH_MONITOR_TYPE_BUFREAD		(8)
#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE	(9)

/* direct I/O to a file range failed */
#define XFS_HEALTH_MONITOR_TYPE_DIOREAD		(10)
#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE	(11)

/* out of band media error reported for a file range */
#define XFS_HEALTH_MONITOR_TYPE_DATALOST	(12)

/* lost events */
struct xfs_health_monitor_lost {
	__u64	count;
@@ -1079,6 +1093,15 @@ struct xfs_health_monitor_shutdown {
	__u32	reasons;
};

/* file range events */
struct xfs_health_monitor_filerange {
	__u64	pos;
	__u64	len;
	__u64	ino;
	__u32	gen;
	__u32	error;
};

/* disk media errors */
struct xfs_health_monitor_media {
	__u64	daddr;
@@ -1107,6 +1130,7 @@ struct xfs_health_monitor_event {
		struct xfs_health_monitor_inode inode;
		struct xfs_health_monitor_shutdown shutdown;
		struct xfs_health_monitor_media media;
		struct xfs_health_monitor_filerange filerange;
	} e;

	/* zeroes */
+85 −0
Original line number Diff line number Diff line
@@ -22,10 +22,12 @@
#include "xfs_healthmon.h"
#include "xfs_fsops.h"
#include "xfs_notify_failure.h"
#include "xfs_file.h"

#include <linux/anon_inodes.h>
#include <linux/eventpoll.h>
#include <linux/poll.h>
#include <linux/fserror.h>

/*
 * Live Health Monitoring
@@ -222,6 +224,27 @@ xfs_healthmon_merge_events(
			return true;
		}
		return false;

	case XFS_HEALTHMON_BUFREAD:
	case XFS_HEALTHMON_BUFWRITE:
	case XFS_HEALTHMON_DIOREAD:
	case XFS_HEALTHMON_DIOWRITE:
	case XFS_HEALTHMON_DATALOST:
		/* logically adjacent file ranges can merge */
		if (existing->fino != new->fino || existing->fgen != new->fgen)
			return false;

		if (existing->fpos + existing->flen == new->fpos) {
			existing->flen += new->flen;
			return true;
		}

		if (new->fpos + new->flen == existing->fpos) {
			existing->fpos = new->fpos;
			existing->flen += new->flen;
			return true;
		}
		return false;
	}

	return false;
@@ -578,6 +601,55 @@ xfs_healthmon_report_media(
	xfs_healthmon_put(hm);
}

static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
{
	switch (action) {
	case FSERR_BUFFERED_READ:
		return XFS_HEALTHMON_BUFREAD;
	case FSERR_BUFFERED_WRITE:
		return XFS_HEALTHMON_BUFWRITE;
	case FSERR_DIRECTIO_READ:
		return XFS_HEALTHMON_DIOREAD;
	case FSERR_DIRECTIO_WRITE:
		return XFS_HEALTHMON_DIOWRITE;
	case FSERR_DATA_LOST:
		return XFS_HEALTHMON_DATALOST;
	case FSERR_METADATA:
		/* filtered out by xfs_fs_report_error */
		break;
	}

	ASSERT(0);
	return -1;
}

/* Add a file io error event to the reporting queue. */
void
xfs_healthmon_report_file_ioerror(
	struct xfs_inode		*ip,
	const struct fserror_event	*p)
{
	struct xfs_healthmon_event	event = {
		.type			= file_ioerr_type(p->type),
		.domain			= XFS_HEALTHMON_FILERANGE,
		.fino			= ip->i_ino,
		.fgen			= VFS_I(ip)->i_generation,
		.fpos			= p->pos,
		.flen			= p->len,
		/* send positive error number to userspace */
		.error			= -p->error,
	};
	struct xfs_healthmon		*hm = xfs_healthmon_get(ip->i_mount);

	if (!hm)
		return;

	trace_xfs_healthmon_report_file_ioerror(hm, p);

	xfs_healthmon_push(hm, &event);
	xfs_healthmon_put(hm);
}

static inline void
xfs_healthmon_reset_outbuf(
	struct xfs_healthmon		*hm)
@@ -633,6 +705,7 @@ static const unsigned int domain_map[] = {
	[XFS_HEALTHMON_DATADEV]		= XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
	[XFS_HEALTHMON_RTDEV]		= XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
	[XFS_HEALTHMON_LOGDEV]		= XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
	[XFS_HEALTHMON_FILERANGE]	= XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
};

static const unsigned int type_map[] = {
@@ -644,6 +717,11 @@ static const unsigned int type_map[] = {
	[XFS_HEALTHMON_UNMOUNT]		= XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
	[XFS_HEALTHMON_SHUTDOWN]	= XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
	[XFS_HEALTHMON_MEDIA_ERROR]	= XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
	[XFS_HEALTHMON_BUFREAD]		= XFS_HEALTH_MONITOR_TYPE_BUFREAD,
	[XFS_HEALTHMON_BUFWRITE]	= XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
	[XFS_HEALTHMON_DIOREAD]		= XFS_HEALTH_MONITOR_TYPE_DIOREAD,
	[XFS_HEALTHMON_DIOWRITE]	= XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
	[XFS_HEALTHMON_DATALOST]	= XFS_HEALTH_MONITOR_TYPE_DATALOST,
};

/* Render event as a V0 structure */
@@ -701,6 +779,13 @@ xfs_healthmon_format_v0(
		hme.e.media.daddr = event->daddr;
		hme.e.media.bbcount = event->bbcount;
		break;
	case XFS_HEALTHMON_FILERANGE:
		hme.e.filerange.ino = event->fino;
		hme.e.filerange.gen = event->fgen;
		hme.e.filerange.pos = event->fpos;
		hme.e.filerange.len = event->flen;
		hme.e.filerange.error = abs(event->error);
		break;
	default:
		break;
	}
+21 −0
Original line number Diff line number Diff line
@@ -82,6 +82,13 @@ enum xfs_healthmon_type {

	/* media errors */
	XFS_HEALTHMON_MEDIA_ERROR,

	/* file range events */
	XFS_HEALTHMON_BUFREAD,
	XFS_HEALTHMON_BUFWRITE,
	XFS_HEALTHMON_DIOREAD,
	XFS_HEALTHMON_DIOWRITE,
	XFS_HEALTHMON_DATALOST,
};

enum xfs_healthmon_domain {
@@ -97,6 +104,9 @@ enum xfs_healthmon_domain {
	XFS_HEALTHMON_DATADEV,
	XFS_HEALTHMON_RTDEV,
	XFS_HEALTHMON_LOGDEV,

	/* file range events */
	XFS_HEALTHMON_FILERANGE,
};

struct xfs_healthmon_event {
@@ -139,6 +149,14 @@ struct xfs_healthmon_event {
			xfs_daddr_t	daddr;
			uint64_t	bbcount;
		};
		/* file range events */
		struct {
			xfs_ino_t	fino;
			loff_t		fpos;
			uint64_t	flen;
			uint32_t	fgen;
			int		error;
		};
	};
};

@@ -157,6 +175,9 @@ void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags);
void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev,
		xfs_daddr_t daddr, uint64_t bbcount);

void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip,
		const struct fserror_event *p);

long xfs_ioc_health_monitor(struct file *file,
		struct xfs_health_monitor __user *arg);

+12 −0
Original line number Diff line number Diff line
@@ -47,12 +47,14 @@
#include "xfs_parent.h"
#include "xfs_rtalloc.h"
#include "xfs_zone_alloc.h"
#include "xfs_healthmon.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"

#include <linux/magic.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fserror.h>

static const struct super_operations xfs_super_operations;

@@ -1301,6 +1303,15 @@ xfs_fs_show_stats(
	return 0;
}

static void
xfs_fs_report_error(
	const struct fserror_event	*event)
{
	/* healthmon already knows about non-inode and metadata errors */
	if (event->inode && event->type != FSERR_METADATA)
		xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event);
}

static const struct super_operations xfs_super_operations = {
	.alloc_inode		= xfs_fs_alloc_inode,
	.destroy_inode		= xfs_fs_destroy_inode,
@@ -1317,6 +1328,7 @@ static const struct super_operations xfs_super_operations = {
	.free_cached_objects	= xfs_fs_free_cached_objects,
	.shutdown		= xfs_fs_shutdown,
	.show_stats		= xfs_fs_show_stats,
	.report_error		= xfs_fs_report_error,
};

static int
+2 −0
Original line number Diff line number Diff line
@@ -54,6 +54,8 @@
#include "xfs_health.h"
#include "xfs_healthmon.h"
#include "xfs_notify_failure.h"
#include "xfs_file.h"
#include <linux/fserror.h>

/*
 * We include this last to have the helpers above available for the trace
Loading