Commit a48373e7 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: start creating infrastructure for health monitoring



Start creating helper functions and infrastructure to pass filesystem
health events to a health monitoring file.  Since this is an
administrative interface, we only support a single health monitor
process per filesystem, so we don't need to use anything fancy such as
notifier chains (== tons of indirect calls).

Signed-off-by: default avatar"Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 347b7042
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -88,6 +88,7 @@ xfs-y += xfs_aops.o \
				   xfs_globals.o \
				   xfs_handle.o \
				   xfs_health.o \
				   xfs_healthmon.o \
				   xfs_icache.o \
				   xfs_ioctl.o \
				   xfs_iomap.o \
+7 −0
Original line number Diff line number Diff line
@@ -1003,6 +1003,12 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_RMAPBT	(1U << 3)  /* reverse mappings */
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT	(1U << 4)  /* reference counts */

struct xfs_health_monitor {
	__u64	flags;		/* flags */
	__u8	format;		/* output format */
	__u8	pad[23];	/* zeroes */
};

/*
 * ioctl commands that are used by Linux filesystems
 */
@@ -1042,6 +1048,7 @@ struct xfs_rtgroup_geometry {
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
#define XFS_IOC_SCRUBV_METADATA	_IOWR('X', 64, struct xfs_scrub_vec_head)
#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
#define XFS_IOC_HEALTH_MONITOR	_IOW ('X', 68, struct xfs_health_monitor)

/*
 * ioctl commands that replace IRIX syssgi()'s
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
#include "xfs_rtgroup.h"
#include "xfs_healthmon.h"

#include <linux/fserror.h>

fs/xfs/xfs_healthmon.c

0 → 100644
+262 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
#include "xfs_ag.h"
#include "xfs_btree.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
#include "xfs_rtgroup.h"
#include "xfs_healthmon.h"

#include <linux/anon_inodes.h>
#include <linux/eventpoll.h>
#include <linux/poll.h>

/*
 * Live Health Monitoring
 * ======================
 *
 * Autonomous self-healing of XFS filesystems requires a means for the kernel
 * to send filesystem health events to a monitoring daemon in userspace.  To
 * accomplish this, we establish a thread_with_file kthread object to handle
 * translating internal events about filesystem health into a format that can
 * be parsed easily by userspace.  When those internal events occur, the core
 * filesystem code calls this health monitor to convey the events to userspace.
 * Userspace reads events from the file descriptor returned by the ioctl.
 *
 * The healthmon abstraction has a weak reference to the host filesystem mount
 * so that the queueing and processing of the events do not pin the mount and
 * cannot slow down the main filesystem.  The healthmon object can exist past
 * the end of the filesystem mount.
 */

/* sign of a detached health monitor */
#define DETACHED_MOUNT_COOKIE		((uintptr_t)0)

/* spinlock for atomically updating xfs_mount <-> xfs_healthmon pointers */
static DEFINE_SPINLOCK(xfs_healthmon_lock);

/* Grab a reference to the healthmon object for a given mount, if any. */
static struct xfs_healthmon *
xfs_healthmon_get(
	struct xfs_mount		*mp)
{
	struct xfs_healthmon		*hm;

	rcu_read_lock();
	hm = mp->m_healthmon;
	if (hm && !refcount_inc_not_zero(&hm->ref))
		hm = NULL;
	rcu_read_unlock();

	return hm;
}

/*
 * Release the reference to a healthmon object.  If there are no more holders,
 * free the health monitor after an RCU grace period to eliminate possibility
 * of races with xfs_healthmon_get.
 */
static void
xfs_healthmon_put(
	struct xfs_healthmon		*hm)
{
	if (refcount_dec_and_test(&hm->ref))
		kfree_rcu_mightsleep(hm);
}

/* Attach a health monitor to an xfs_mount.  Only one allowed at a time. */
STATIC int
xfs_healthmon_attach(
	struct xfs_mount	*mp,
	struct xfs_healthmon	*hm)
{
	spin_lock(&xfs_healthmon_lock);
	if (mp->m_healthmon != NULL) {
		spin_unlock(&xfs_healthmon_lock);
		return -EEXIST;
	}

	refcount_inc(&hm->ref);
	mp->m_healthmon = hm;
	hm->mount_cookie = (uintptr_t)mp->m_super;
	spin_unlock(&xfs_healthmon_lock);

	return 0;
}

/* Detach a xfs mount from a specific healthmon instance. */
STATIC void
xfs_healthmon_detach(
	struct xfs_healthmon	*hm)
{
	spin_lock(&xfs_healthmon_lock);
	if (hm->mount_cookie == DETACHED_MOUNT_COOKIE) {
		spin_unlock(&xfs_healthmon_lock);
		return;
	}

	XFS_M((struct super_block *)hm->mount_cookie)->m_healthmon = NULL;
	hm->mount_cookie = DETACHED_MOUNT_COOKIE;
	spin_unlock(&xfs_healthmon_lock);

	xfs_healthmon_put(hm);
}

/* Detach the xfs mount from this healthmon instance. */
void
xfs_healthmon_unmount(
	struct xfs_mount		*mp)
{
	struct xfs_healthmon		*hm = xfs_healthmon_get(mp);

	if (!hm)
		return;

	xfs_healthmon_detach(hm);
	xfs_healthmon_put(hm);
}

STATIC ssize_t
xfs_healthmon_read_iter(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	return -EIO;
}

/* Free the health monitoring information. */
STATIC int
xfs_healthmon_release(
	struct inode		*inode,
	struct file		*file)
{
	struct xfs_healthmon	*hm = file->private_data;

	/*
	 * We might be closing the healthmon file before the filesystem
	 * unmounts, because userspace processes can terminate at any time and
	 * for any reason.  Null out xfs_mount::m_healthmon so that another
	 * process can create another health monitor file.
	 */
	xfs_healthmon_detach(hm);

	xfs_healthmon_put(hm);
	return 0;
}

/* Validate ioctl parameters. */
static inline bool
xfs_healthmon_validate(
	const struct xfs_health_monitor	*hmo)
{
	if (hmo->flags)
		return false;
	if (hmo->format)
		return false;
	if (memchr_inv(&hmo->pad, 0, sizeof(hmo->pad)))
		return false;
	return true;
}

/* Emit some data about the health monitoring fd. */
static void
xfs_healthmon_show_fdinfo(
	struct seq_file		*m,
	struct file		*file)
{
	struct xfs_healthmon	*hm = file->private_data;

	seq_printf(m, "state:\t%s\ndev:\t%d:%d\n",
			hm->mount_cookie == DETACHED_MOUNT_COOKIE ?
				"dead" : "alive",
			MAJOR(hm->dev), MINOR(hm->dev));
}

static const struct file_operations xfs_healthmon_fops = {
	.owner		= THIS_MODULE,
	.show_fdinfo	= xfs_healthmon_show_fdinfo,
	.read_iter	= xfs_healthmon_read_iter,
	.release	= xfs_healthmon_release,
};

/*
 * Create a health monitoring file.  Returns an index to the fd table or a
 * negative errno.
 */
long
xfs_ioc_health_monitor(
	struct file			*file,
	struct xfs_health_monitor __user *arg)
{
	struct xfs_health_monitor	hmo;
	struct xfs_healthmon		*hm;
	struct xfs_inode		*ip = XFS_I(file_inode(file));
	struct xfs_mount		*mp = ip->i_mount;
	int				ret;

	/*
	 * The only intended user of the health monitoring system should be the
	 * xfs_healer daemon running on behalf of the whole filesystem in the
	 * initial user namespace.  IOWs, we don't allow unprivileged userspace
	 * (they can use fsnotify) nor do we allow containers.
	 */
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;
	if (ip->i_ino != mp->m_sb.sb_rootino)
		return -EPERM;
	if (current_user_ns() != &init_user_ns)
		return -EPERM;

	if (copy_from_user(&hmo, arg, sizeof(hmo)))
		return -EFAULT;

	if (!xfs_healthmon_validate(&hmo))
		return -EINVAL;

	hm = kzalloc(sizeof(*hm), GFP_KERNEL);
	if (!hm)
		return -ENOMEM;
	hm->dev = mp->m_super->s_dev;
	refcount_set(&hm->ref, 1);

	/*
	 * Try to attach this health monitor to the xfs_mount.  The monitor is
	 * considered live and will receive events if this succeeds.
	 */
	ret = xfs_healthmon_attach(mp, hm);
	if (ret)
		goto out_hm;

	/*
	 * Create the anonymous file and install a fd for it.  If it succeeds,
	 * the file owns hm and can go away at any time, so we must not access
	 * it again.  This must go last because we can't undo a fd table
	 * installation.
	 */
	ret = anon_inode_getfd("xfs_healthmon", &xfs_healthmon_fops, hm,
			O_CLOEXEC | O_RDONLY);
	if (ret < 0)
		goto out_mp;

	return ret;

out_mp:
	xfs_healthmon_detach(hm);
out_hm:
	ASSERT(refcount_read(&hm->ref) == 1);
	xfs_healthmon_put(hm);
	return ret;
}

fs/xfs/xfs_healthmon.h

0 → 100644
+36 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#ifndef __XFS_HEALTHMON_H__
#define __XFS_HEALTHMON_H__

struct xfs_healthmon {
	/*
	 * Weak reference to the xfs filesystem that is being monitored.  It
	 * will be set to zero when the filesystem detaches from the monitor.
	 * Do not dereference this pointer.
	 */
	uintptr_t			mount_cookie;

	/*
	 * Device number of the filesystem being monitored.  This is for
	 * consistent tracing even after unmount.
	 */
	dev_t				dev;

	/*
	 * Reference count of this structure.  The open healthmon fd holds one
	 * ref, the xfs_mount holds another ref if it points to this object,
	 * and running event handlers hold their own refs.
	 */
	refcount_t			ref;
};

void xfs_healthmon_unmount(struct xfs_mount *mp);

long xfs_ioc_health_monitor(struct file *file,
		struct xfs_health_monitor __user *arg);

#endif /* __XFS_HEALTHMON_H__ */
Loading