Commit 8394a97c authored by Chandan Babu R's avatar Chandan Babu R
Browse files

Merge tag 'in-memory-btrees-6.9_2024-02-23' of...

Merge tag 'in-memory-btrees-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux

 into xfs-6.9-mergeC

xfs: support in-memory btrees

Online repair of the reverse-mapping btrees presens some unique
challenges.  To construct a new reverse mapping btree, we must scan the
entire filesystem, but we cannot afford to quiesce the entire filesystem
for the potentially lengthy scan.

For rmap btrees, therefore, we relax our requirements of totally atomic
repairs.  Instead, repairs will scan all inodes, construct a new reverse
mapping dataset, format a new btree, and commit it before anyone trips
over the corruption.  This is exactly the same strategy as was used in
the quotacheck and nlink scanners.

Unfortunately, the xfarray cannot perform key-based lookups and is
therefore unsuitable for supporting live updates.  Luckily, we already a
data structure that maintains an indexed rmap recordset -- the existing
rmap btree code!  Hence we port the existing btree and buffer target
code to be able to create a btree using the xfile we developed earlier.
Live hooks keep the in-memory btree up to date for any resources that
have already been scanned.

This approach is not maximally memory efficient, but we can use the same
rmap code that we do everywhere else, which provides improved stability
without growing the code base even more.  Note that in-memory btree
blocks are always page sized.

This patchset modifies the kernel xfs buffer cache to be capable of
using a xfile (aka a shmem file) as a backing device.  It then augments
the btree code to support creating btree cursors with buffers that come
from a buftarg other than the data device (namely an xfile-backed
buftarg).  For the userspace xfs buffer cache, we instead use a memfd or
an O_TMPFILE file as a backing device.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'in-memory-btrees-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: launder in-memory btree buffers before transaction commit
  xfs: support in-memory btrees
  xfs: add a xfs_btree_ptrs_equal helper
  xfs: support in-memory buffer cache targets
  xfs: teach buftargs to maintain their own buffer hashtable
parents aa8fb4bb 0dc63c8a
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -2270,13 +2270,12 @@ follows:
   pointing to the xfile.

3. Pass the buffer cache target, buffer ops, and other information to
   ``xfbtree_create`` to write an initial tree header and root block to the
   xfile.
   ``xfbtree_init`` to initialize the passed in ``struct xfbtree`` and write an
   initial root block to the xfile.
   Each btree type should define a wrapper that passes necessary arguments to
   the creation function.
   For example, rmap btrees define ``xfs_rmapbt_mem_create`` to take care of
   all the necessary details for callers.
   A ``struct xfbtree`` object will be returned.

4. Pass the xfbtree object to the btree cursor creation function for the
   btree type.
+8 −0
Original line number Diff line number Diff line
@@ -128,6 +128,12 @@ config XFS_LIVE_HOOKS
	bool
	select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL

config XFS_MEMORY_BUFS
	bool

config XFS_BTREE_IN_MEM
	bool

config XFS_ONLINE_SCRUB
	bool "XFS online metadata check support"
	default n
@@ -135,6 +141,7 @@ config XFS_ONLINE_SCRUB
	depends on TMPFS && SHMEM
	select XFS_LIVE_HOOKS
	select XFS_DRAIN_INTENTS
	select XFS_MEMORY_BUFS
	help
	  If you say Y here you will be able to check metadata on a
	  mounted XFS filesystem.  This feature is intended to reduce
@@ -169,6 +176,7 @@ config XFS_ONLINE_REPAIR
	bool "XFS online metadata repair support"
	default n
	depends on XFS_FS && XFS_ONLINE_SCRUB
	select XFS_BTREE_IN_MEM
	help
	  If you say Y here you will be able to repair metadata on a
	  mounted XFS filesystem.  This feature is intended to reduce
+2 −0
Original line number Diff line number Diff line
@@ -137,6 +137,8 @@ endif

xfs-$(CONFIG_XFS_DRAIN_INTENTS)	+= xfs_drain.o
xfs-$(CONFIG_XFS_LIVE_HOOKS)	+= xfs_hooks.o
xfs-$(CONFIG_XFS_MEMORY_BUFS)	+= xfs_buf_mem.o
xfs-$(CONFIG_XFS_BTREE_IN_MEM)	+= libxfs/xfs_btree_mem.o

# online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
+3 −3
Original line number Diff line number Diff line
@@ -264,7 +264,7 @@ xfs_free_perag(
		xfs_defer_drain_free(&pag->pag_intents_drain);

		cancel_delayed_work_sync(&pag->pag_blockgc_work);
		xfs_buf_hash_destroy(pag);
		xfs_buf_cache_destroy(&pag->pag_bcache);

		/* drop the mount's active reference */
		xfs_perag_rele(pag);
@@ -352,7 +352,7 @@ xfs_free_unused_perag_range(
		spin_unlock(&mp->m_perag_lock);
		if (!pag)
			break;
		xfs_buf_hash_destroy(pag);
		xfs_buf_cache_destroy(&pag->pag_bcache);
		xfs_defer_drain_free(&pag->pag_intents_drain);
		kfree(pag);
	}
@@ -419,7 +419,7 @@ xfs_initialize_perag(
		pag->pagb_tree = RB_ROOT;
#endif /* __KERNEL__ */

		error = xfs_buf_hash_init(pag);
		error = xfs_buf_cache_init(&pag->pag_bcache);
		if (error)
			goto out_remove_pag;

+1 −3
Original line number Diff line number Diff line
@@ -106,9 +106,7 @@ struct xfs_perag {
	int		pag_ici_reclaimable;	/* reclaimable inodes */
	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */

	/* buffer cache index */
	spinlock_t	pag_buf_lock;	/* lock for pag_buf_hash */
	struct rhashtable pag_buf_hash;
	struct xfs_buf_cache	pag_bcache;

	/* background prealloc block trimming */
	struct delayed_work	pag_blockgc_work;
Loading