Commit 4787fc80 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: create a shadow rmap btree during rmap repair



Create an in-memory btree of rmap records instead of an array.  This
enables us to do live record collection instead of freezing the fs.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 32080a9b
Loading
Loading
Loading
Loading
+24 −13
Original line number Diff line number Diff line
@@ -269,6 +269,16 @@ xfs_rmap_check_irec(
	return NULL;
}

static inline xfs_failaddr_t
xfs_rmap_check_btrec(
	struct xfs_btree_cur		*cur,
	const struct xfs_rmap_irec	*irec)
{
	if (xfs_btree_is_mem_rmap(cur->bc_ops))
		return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
	return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
}

static inline int
xfs_rmap_complain_bad_rec(
	struct xfs_btree_cur		*cur,
@@ -277,6 +287,10 @@ xfs_rmap_complain_bad_rec(
{
	struct xfs_mount		*mp = cur->bc_mp;

	if (xfs_btree_is_mem_rmap(cur->bc_ops))
		xfs_warn(mp,
 "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
	else
		xfs_warn(mp,
 "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
			cur->bc_ag.pag->pag_agno, fa);
@@ -307,7 +321,7 @@ xfs_rmap_get_rec(

	fa = xfs_rmap_btrec_to_irec(rec, irec);
	if (!fa)
		fa = xfs_rmap_check_irec(cur->bc_ag.pag, irec);
		fa = xfs_rmap_check_btrec(cur, irec);
	if (fa)
		return xfs_rmap_complain_bad_rec(cur, fa, irec);

@@ -2404,15 +2418,12 @@ xfs_rmap_map_raw(
{
	struct xfs_owner_info	oinfo;

	oinfo.oi_owner = rmap->rm_owner;
	oinfo.oi_offset = rmap->rm_offset;
	oinfo.oi_flags = 0;
	if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
		oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
	if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
		oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
	xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
			rmap->rm_flags);

	if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
	if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
			       XFS_RMAP_UNWRITTEN)) ||
	    XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
		return xfs_rmap_map(cur, rmap->rm_startblock,
				rmap->rm_blockcount,
				rmap->rm_flags & XFS_RMAP_UNWRITTEN,
@@ -2442,7 +2453,7 @@ xfs_rmap_query_range_helper(

	fa = xfs_rmap_btrec_to_irec(rec, &irec);
	if (!fa)
		fa = xfs_rmap_check_irec(cur->bc_ag.pag, &irec);
		fa = xfs_rmap_check_btrec(cur, &irec);
	if (fa)
		return xfs_rmap_complain_bad_rec(cur, fa, &irec);

+149 −1
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@
#include "xfs_extent_busy.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"

static struct kmem_cache	*xfs_rmapbt_cur_cache;

@@ -541,6 +543,151 @@ xfs_rmapbt_init_cursor(
	return cur;
}

#ifdef CONFIG_XFS_BTREE_IN_MEM
static inline unsigned int
xfs_rmapbt_mem_block_maxrecs(
	unsigned int		blocklen,
	bool			leaf)
{
	if (leaf)
		return blocklen / sizeof(struct xfs_rmap_rec);
	return blocklen /
		(2 * sizeof(struct xfs_rmap_key) + sizeof(__be64));
}

/*
 * Validate an in-memory rmap btree block.  Callers are allowed to generate an
 * in-memory btree even if the ondisk feature is not enabled.
 */
static xfs_failaddr_t
xfs_rmapbt_mem_verify(
	struct xfs_buf		*bp)
{
	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
	xfs_failaddr_t		fa;
	unsigned int		level;
	unsigned int		maxrecs;

	if (!xfs_verify_magic(bp, block->bb_magic))
		return __this_address;

	fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
	if (fa)
		return fa;

	level = be16_to_cpu(block->bb_level);
	if (level >= xfs_rmapbt_maxlevels_ondisk())
		return __this_address;

	maxrecs = xfs_rmapbt_mem_block_maxrecs(
			XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0);
	return xfs_btree_memblock_verify(bp, maxrecs);
}

static void
xfs_rmapbt_mem_rw_verify(
	struct xfs_buf	*bp)
{
	xfs_failaddr_t	fa = xfs_rmapbt_mem_verify(bp);

	if (fa)
		xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}

/* skip crc checks on in-memory btrees to save time */
static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
	.name			= "xfs_rmapbt_mem",
	.magic			= { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
	.verify_read		= xfs_rmapbt_mem_rw_verify,
	.verify_write		= xfs_rmapbt_mem_rw_verify,
	.verify_struct		= xfs_rmapbt_mem_verify,
};

const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
	.name			= "mem_rmap",
	.type			= XFS_BTREE_TYPE_MEM,
	.geom_flags		= XFS_BTGEO_OVERLAPPING,

	.rec_len		= sizeof(struct xfs_rmap_rec),
	/* Overlapping btree; 2 keys per pointer. */
	.key_len		= 2 * sizeof(struct xfs_rmap_key),
	.ptr_len		= XFS_BTREE_LONG_PTR_LEN,

	.lru_refs		= XFS_RMAP_BTREE_REF,
	.statoff		= XFS_STATS_CALC_INDEX(xs_rmap_mem_2),

	.dup_cursor		= xfbtree_dup_cursor,
	.set_root		= xfbtree_set_root,
	.alloc_block		= xfbtree_alloc_block,
	.free_block		= xfbtree_free_block,
	.get_minrecs		= xfbtree_get_minrecs,
	.get_maxrecs		= xfbtree_get_maxrecs,
	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
	.init_high_key_from_rec	= xfs_rmapbt_init_high_key_from_rec,
	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
	.init_ptr_from_cur	= xfbtree_init_ptr_from_cur,
	.key_diff		= xfs_rmapbt_key_diff,
	.buf_ops		= &xfs_rmapbt_mem_buf_ops,
	.diff_two_keys		= xfs_rmapbt_diff_two_keys,
	.keys_inorder		= xfs_rmapbt_keys_inorder,
	.recs_inorder		= xfs_rmapbt_recs_inorder,
	.keys_contiguous	= xfs_rmapbt_keys_contiguous,
};

/* Create a cursor for an in-memory btree. */
struct xfs_btree_cur *
xfs_rmapbt_mem_cursor(
	struct xfs_perag	*pag,
	struct xfs_trans	*tp,
	struct xfbtree		*xfbt)
{
	struct xfs_btree_cur	*cur;
	struct xfs_mount	*mp = pag->pag_mount;

	cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
			xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
	cur->bc_mem.xfbtree = xfbt;
	cur->bc_nlevels = xfbt->nlevels;

	cur->bc_mem.pag = xfs_perag_hold(pag);
	return cur;
}

/* Create an in-memory rmap btree. */
int
xfs_rmapbt_mem_init(
	struct xfs_mount	*mp,
	struct xfbtree		*xfbt,
	struct xfs_buftarg	*btp,
	xfs_agnumber_t		agno)
{
	xfbt->owner = agno;
	return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops);
}

/* Compute the max possible height for reverse mapping btrees in memory. */
static unsigned int
xfs_rmapbt_mem_maxlevels(void)
{
	unsigned int		minrecs[2];
	unsigned int		blocklen;

	blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;

	minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2;
	minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2;

	/*
	 * How tall can an in-memory rmap btree become if we filled the entire
	 * AG with rmap records?
	 */
	return xfs_btree_compute_maxlevels(minrecs,
			XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
}
#else
# define xfs_rmapbt_mem_maxlevels()	(0)
#endif /* CONFIG_XFS_BTREE_IN_MEM */

/*
 * Install a new reverse mapping btree root.  Caller is responsible for
 * invalidating and freeing the old btree blocks.
@@ -611,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void)
	 * like if it consumes almost all the blocks in the AG due to maximal
	 * sharing factor.
	 */
	return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
	return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
		   xfs_rmapbt_mem_maxlevels());
}

/* Compute the maximum height of an rmap btree. */
+6 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
struct xbtree_afakeroot;
struct xfbtree;

/* rmaps only exist on crc enabled filesystems */
#define XFS_RMAP_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
@@ -62,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void);
int __init xfs_rmapbt_init_cur_cache(void);
void xfs_rmapbt_destroy_cur_cache(void);

struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag,
		struct xfs_trans *tp, struct xfbtree *xfbtree);
int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
		struct xfs_buftarg *btp, xfs_agnumber_t agno);

#endif /* __XFS_RMAP_BTREE_H__ */
+10 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@ extern const struct xfs_btree_ops xfs_finobt_ops;
extern const struct xfs_btree_ops xfs_bmbt_ops;
extern const struct xfs_btree_ops xfs_refcountbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;

static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
{
@@ -87,6 +88,15 @@ static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops)
	return ops == &xfs_rmapbt_ops;
}

#ifdef CONFIG_XFS_BTREE_IN_MEM
static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
{
	return ops == &xfs_rmapbt_mem_ops;
}
#else
# define xfs_btree_is_mem_rmap(...)	(false)
#endif

/* log size calculation functions */
int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int	xfs_log_calc_minimum_size(struct xfs_mount *);
+18 −0
Original line number Diff line number Diff line
@@ -31,12 +31,14 @@
#include "xfs_error.h"
#include "xfs_reflink.h"
#include "xfs_health.h"
#include "xfs_buf_mem.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/stats.h"
#include "scrub/xfile.h"

/*
 * Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -1147,3 +1149,19 @@ xrep_metadata_inode_forks(

	return 0;
}

/*
 * Set up an in-memory buffer cache so that we can use the xfbtree.  Allocating
 * a shmem file might take loks, so we cannot be in transaction context.  Park
 * our resources in the scrub context and let the teardown function take care
 * of them at the right time.
 */
int
xrep_setup_xfbtree(
	struct xfs_scrub	*sc,
	const char		*descr)
{
	ASSERT(sc->tp == NULL);

	return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
}
Loading