Commit a095686a authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: support in-memory btrees



Adapt the generic btree cursor code to be able to create a btree whose
buffers come from a (presumably in-memory) buftarg with a header block
that's specific to in-memory btrees.  We'll connect this to other parts
of online scrub in the next patches.

Note that in-memory btrees always have a block size matching the system
memory page size for efficiency reasons.  There are also a few things we
need to do to finalize a btree update; that's covered in the next patch.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 8c1771c4
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -2270,13 +2270,12 @@ follows:
   pointing to the xfile.

3. Pass the buffer cache target, buffer ops, and other information to
   ``xfbtree_create`` to write an initial tree header and root block to the
   xfile.
   ``xfbtree_init`` to initialize the passed in ``struct xfbtree`` and write an
   initial root block to the xfile.
   Each btree type should define a wrapper that passes necessary arguments to
   the creation function.
   For example, rmap btrees define ``xfs_rmapbt_mem_create`` to take care of
   all the necessary details for callers.
   A ``struct xfbtree`` object will be returned.

4. Pass the xfbtree object to the btree cursor creation function for the
   btree type.
+4 −0
Original line number Diff line number Diff line
@@ -131,6 +131,9 @@ config XFS_LIVE_HOOKS
config XFS_MEMORY_BUFS
	bool

config XFS_BTREE_IN_MEM
	bool

config XFS_ONLINE_SCRUB
	bool "XFS online metadata check support"
	default n
@@ -173,6 +176,7 @@ config XFS_ONLINE_REPAIR
	bool "XFS online metadata repair support"
	default n
	depends on XFS_FS && XFS_ONLINE_SCRUB
	select XFS_BTREE_IN_MEM
	help
	  If you say Y here you will be able to repair metadata on a
	  mounted XFS filesystem.  This feature is intended to reduce
+1 −0
Original line number Diff line number Diff line
@@ -138,6 +138,7 @@ endif
xfs-$(CONFIG_XFS_DRAIN_INTENTS)	+= xfs_drain.o
xfs-$(CONFIG_XFS_LIVE_HOOKS)	+= xfs_hooks.o
xfs-$(CONFIG_XFS_MEMORY_BUFS)	+= xfs_buf_mem.o
xfs-$(CONFIG_XFS_BTREE_IN_MEM)	+= libxfs/xfs_btree_mem.o

# online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
+219 −37
Original line number Diff line number Diff line
@@ -28,6 +28,8 @@
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_health.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"

/*
 * Btree magic numbers.
@@ -75,6 +77,25 @@ xfs_btree_check_fsblock_siblings(
	return NULL;
}

static inline xfs_failaddr_t
xfs_btree_check_memblock_siblings(
	struct xfs_buftarg	*btp,
	xfbno_t			bno,
	__be64			dsibling)
{
	xfbno_t			sibling;

	if (dsibling == cpu_to_be64(NULLFSBLOCK))
		return NULL;

	sibling = be64_to_cpu(dsibling);
	if (sibling == bno)
		return __this_address;
	if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling)))
		return __this_address;
	return NULL;
}

static inline xfs_failaddr_t
xfs_btree_check_agblock_siblings(
	struct xfs_perag	*pag,
@@ -164,6 +185,34 @@ __xfs_btree_check_fsblock(
	return fa;
}

/*
 * Check an in-memory btree block header.  Return the address of the failing
 * check, or NULL if everything is ok.
 */
static xfs_failaddr_t
__xfs_btree_check_memblock(
	struct xfs_btree_cur	*cur,
	struct xfs_btree_block	*block,
	int			level,
	struct xfs_buf		*bp)
{
	struct xfs_buftarg	*btp = cur->bc_mem.xfbtree->target;
	xfs_failaddr_t		fa;
	xfbno_t			bno;

	fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
	if (fa)
		return fa;

	bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
	fa = xfs_btree_check_memblock_siblings(btp, bno,
			block->bb_u.l.bb_leftsib);
	if (!fa)
		fa = xfs_btree_check_memblock_siblings(btp, bno,
				block->bb_u.l.bb_rightsib);
	return fa;
}

/*
 * Check a short btree block header.  Return the address of the failing check,
 * or NULL if everything is ok.
@@ -216,9 +265,17 @@ __xfs_btree_check_block(
	int			level,
	struct xfs_buf		*bp)
{
	if (cur->bc_ops->type == XFS_BTREE_TYPE_AG)
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_MEM:
		return __xfs_btree_check_memblock(cur, block, level, bp);
	case XFS_BTREE_TYPE_AG:
		return __xfs_btree_check_agblock(cur, block, level, bp);
	case XFS_BTREE_TYPE_INODE:
		return __xfs_btree_check_fsblock(cur, block, level, bp);
	default:
		ASSERT(0);
		return __this_address;
	}
}

static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur)
@@ -262,14 +319,22 @@ __xfs_btree_check_ptr(
	if (level <= 0)
		return -EFSCORRUPTED;

	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_MEM:
		if (!xfbtree_verify_bno(cur->bc_mem.xfbtree,
				be64_to_cpu((&ptr->l)[index])))
			return -EFSCORRUPTED;
		break;
	case XFS_BTREE_TYPE_INODE:
		if (!xfs_verify_fsbno(cur->bc_mp,
				be64_to_cpu((&ptr->l)[index])))
			return -EFSCORRUPTED;
	} else {
		break;
	case XFS_BTREE_TYPE_AG:
		if (!xfs_verify_agbno(cur->bc_ag.pag,
				be32_to_cpu((&ptr->s)[index])))
			return -EFSCORRUPTED;
		break;
	}

	return 0;
@@ -290,17 +355,26 @@ xfs_btree_check_ptr(

	error = __xfs_btree_check_ptr(cur, ptr, index, level);
	if (error) {
		if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
		switch (cur->bc_ops->type) {
		case XFS_BTREE_TYPE_MEM:
			xfs_err(cur->bc_mp,
"In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS.",
				cur->bc_ops->name, cur->bc_flags, level, index,
				__this_address);
			break;
		case XFS_BTREE_TYPE_INODE:
			xfs_err(cur->bc_mp,
"Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d.",
				cur->bc_ino.ip->i_ino,
				cur->bc_ino.whichfork, cur->bc_ops->name,
				level, index);
		} else {
			break;
		case XFS_BTREE_TYPE_AG:
			xfs_err(cur->bc_mp,
"AG %u: Corrupt %sbt pointer at level %d index %d.",
				cur->bc_ag.pag->pag_agno, cur->bc_ops->name,
				level, index);
			break;
		}
		xfs_btree_mark_sick(cur);
	}
@@ -457,11 +531,35 @@ xfs_btree_del_cursor(
	case XFS_BTREE_TYPE_INODE:
		/* nothing to do */
		break;
	case XFS_BTREE_TYPE_MEM:
		if (cur->bc_mem.pag)
			xfs_perag_put(cur->bc_mem.pag);
		break;
	}

	kmem_cache_free(cur->bc_cache, cur);
}

/* Return the buffer target for this btree's buffer. */
static inline struct xfs_buftarg *
xfs_btree_buftarg(
	struct xfs_btree_cur	*cur)
{
	if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
		return cur->bc_mem.xfbtree->target;
	return cur->bc_mp->m_ddev_targp;
}

/* Return the block size (in units of 512b sectors) for this btree. */
static inline unsigned int
xfs_btree_bbsize(
	struct xfs_btree_cur	*cur)
{
	if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
		return XFBNO_BBSIZE;
	return cur->bc_mp->m_bsize;
}

/*
 * Duplicate the btree cursor.
 * Allocate a new one, copy the record, re-get the buffers.
@@ -505,9 +603,10 @@ xfs_btree_dup_cursor(
		new->bc_levels[i].ra = cur->bc_levels[i].ra;
		bp = cur->bc_levels[i].bp;
		if (bp) {
			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
						   xfs_buf_daddr(bp), mp->m_bsize,
						   0, &bp,
			error = xfs_trans_read_buf(mp, tp,
					xfs_btree_buftarg(cur),
					xfs_buf_daddr(bp),
					xfs_btree_bbsize(cur), 0, &bp,
					cur->bc_ops->buf_ops);
			if (xfs_metadata_is_sick(error))
				xfs_btree_mark_sick(new);
@@ -885,6 +984,32 @@ xfs_btree_readahead_fsblock(
	return rval;
}

STATIC int
xfs_btree_readahead_memblock(
	struct xfs_btree_cur	*cur,
	int			lr,
	struct xfs_btree_block	*block)
{
	struct xfs_buftarg	*btp = cur->bc_mem.xfbtree->target;
	xfbno_t			left = be64_to_cpu(block->bb_u.l.bb_leftsib);
	xfbno_t			right = be64_to_cpu(block->bb_u.l.bb_rightsib);
	int			rval = 0;

	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
		xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE,
				cur->bc_ops->buf_ops);
		rval++;
	}

	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
		xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE,
				cur->bc_ops->buf_ops);
		rval++;
	}

	return rval;
}

STATIC int
xfs_btree_readahead_agblock(
	struct xfs_btree_cur	*cur,
@@ -939,9 +1064,17 @@ xfs_btree_readahead(
	cur->bc_levels[lev].ra |= lr;
	block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);

	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
		return xfs_btree_readahead_fsblock(cur, lr, block);
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_AG:
		return xfs_btree_readahead_agblock(cur, lr, block);
	case XFS_BTREE_TYPE_INODE:
		return xfs_btree_readahead_fsblock(cur, lr, block);
	case XFS_BTREE_TYPE_MEM:
		return xfs_btree_readahead_memblock(cur, lr, block);
	default:
		ASSERT(0);
		return 0;
	}
}

STATIC int
@@ -950,23 +1083,24 @@ xfs_btree_ptr_to_daddr(
	const union xfs_btree_ptr	*ptr,
	xfs_daddr_t			*daddr)
{
	xfs_fsblock_t		fsbno;
	xfs_agblock_t		agbno;
	int			error;

	error = xfs_btree_check_ptr(cur, ptr, 0, 1);
	if (error)
		return error;

	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
		fsbno = be64_to_cpu(ptr->l);
		*daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
	} else {
		agbno = be32_to_cpu(ptr->s);
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_AG:
		*daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
				agbno);
				be32_to_cpu(ptr->s));
		break;
	case XFS_BTREE_TYPE_INODE:
		*daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
		break;
	case XFS_BTREE_TYPE_MEM:
		*daddr = xfbno_to_daddr(be64_to_cpu(ptr->l));
		break;
	}

	return 0;
}

@@ -986,8 +1120,9 @@ xfs_btree_readahead_ptr(

	if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
		return;
	xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
			  cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
	xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
			xfs_btree_bbsize(cur) * count,
			cur->bc_ops->buf_ops);
}

/*
@@ -1172,9 +1307,17 @@ static inline __u64
xfs_btree_owner(
	struct xfs_btree_cur    *cur)
{
	if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_MEM:
		return cur->bc_mem.xfbtree->owner;
	case XFS_BTREE_TYPE_INODE:
		return cur->bc_ino.ip->i_ino;
	case XFS_BTREE_TYPE_AG:
		return cur->bc_ag.pag->pag_agno;
	default:
		ASSERT(0);
		return 0;
	}
}

void
@@ -1218,12 +1361,18 @@ xfs_btree_buf_to_ptr(
	struct xfs_buf		*bp,
	union xfs_btree_ptr	*ptr)
{
	if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
					xfs_buf_daddr(bp)));
	else {
	switch (cur->bc_ops->type) {
	case XFS_BTREE_TYPE_AG:
		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
					xfs_buf_daddr(bp)));
		break;
	case XFS_BTREE_TYPE_INODE:
		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
					xfs_buf_daddr(bp)));
		break;
	case XFS_BTREE_TYPE_MEM:
		ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp)));
		break;
	}
}

@@ -1242,15 +1391,14 @@ xfs_btree_get_buf_block(
	struct xfs_btree_block		**block,
	struct xfs_buf			**bpp)
{
	struct xfs_mount	*mp = cur->bc_mp;
	xfs_daddr_t			d;
	int				error;

	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
	if (error)
		return error;
	error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
			0, bpp);
	error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
			xfs_btree_bbsize(cur), 0, bpp);
	if (error)
		return error;

@@ -1281,8 +1429,8 @@ xfs_btree_read_buf_block(
	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
	if (error)
		return error;
	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
				   mp->m_bsize, flags, bpp,
	error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
			xfs_btree_bbsize(cur), flags, bpp,
			cur->bc_ops->buf_ops);
	if (xfs_metadata_is_sick(error))
		xfs_btree_mark_sick(cur);
@@ -4582,6 +4730,8 @@ xfs_btree_fsblock_verify(
	xfs_fsblock_t		fsb;
	xfs_failaddr_t		fa;

	ASSERT(!xfs_buftarg_is_mem(bp->b_target));

	/* numrecs verification */
	if (be16_to_cpu(block->bb_numrecs) > max_recs)
		return __this_address;
@@ -4596,6 +4746,36 @@ xfs_btree_fsblock_verify(
	return fa;
}

/* Verify an in-memory btree block. */
xfs_failaddr_t
xfs_btree_memblock_verify(
	struct xfs_buf		*bp,
	unsigned int		max_recs)
{
	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
	struct xfs_buftarg	*btp = bp->b_target;
	xfs_failaddr_t		fa;
	xfbno_t			bno;

	ASSERT(xfs_buftarg_is_mem(bp->b_target));

	/* numrecs verification */
	if (be16_to_cpu(block->bb_numrecs) > max_recs)
		return __this_address;

	/* sibling pointer verification */
	bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
	fa = xfs_btree_check_memblock_siblings(btp, bno,
			block->bb_u.l.bb_leftsib);
	if (fa)
		return fa;
	fa = xfs_btree_check_memblock_siblings(btp, bno,
			block->bb_u.l.bb_rightsib);
	if (fa)
		return fa;

	return NULL;
}
/**
 * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format
 *				      btree block
@@ -4637,6 +4817,8 @@ xfs_btree_agblock_verify(
	xfs_agblock_t		agbno;
	xfs_failaddr_t		fa;

	ASSERT(!xfs_buftarg_is_mem(bp->b_target));

	/* numrecs verification */
	if (be16_to_cpu(block->bb_numrecs) > max_recs)
		return __this_address;
+7 −0
Original line number Diff line number Diff line
@@ -112,6 +112,7 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y)
enum xfs_btree_type {
	XFS_BTREE_TYPE_AG,
	XFS_BTREE_TYPE_INODE,
	XFS_BTREE_TYPE_MEM,
};

struct xfs_btree_ops {
@@ -281,6 +282,10 @@ struct xfs_btree_cur
			struct xfs_buf		*agbp;
			struct xbtree_afakeroot	*afake;	/* for staging cursor */
		} bc_ag;
		struct {
			struct xfbtree		*xfbtree;
			struct xfs_perag	*pag;
		} bc_mem;
	};

	/* per-format private data */
@@ -455,6 +460,8 @@ xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp,
		uint64_t owner);
xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp,
		unsigned int max_recs);
xfs_failaddr_t xfs_btree_memblock_verify(struct xfs_buf *bp,
		unsigned int max_recs);

unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits,
		unsigned long long records);
Loading