Commit 5e60ca3f authored by Chandan Babu R's avatar Chandan Babu R
Browse files

Merge tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of...

Merge tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux

 into xfs-6.8-mergeB

xfs: prepare repair for bulk loading

Before we start merging the online repair functions, let's improve the
bulk loading code a bit.  First, we need to fix a misinteraction between
the AIL and the btree bulkloader wherein the delwri at the end of the
bulk load fails to queue a buffer for writeback if it happens to be on
the AIL list.

Second, we introduce a defer ops barrier object so that the process of
reaping blocks after a repair cannot queue more than two extents per EFI
log item.  This increases our exposure to leaking blocks if the system
goes down during a reap, but also should prevent transaction overflows,
which result in the system going down.

Third, we change the bulkloader itself to copy multiple records into a
block if possible, and add some debugging knobs so that developers can
control the slack factors, just like they can do for xfs_repair.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: constrain dirty buffers while formatting a staged btree
  xfs: move btree bulkload record initialization to ->get_record implementations
  xfs: add debug knobs to control btree bulk load slack factors
  xfs: read leaf blocks when computing keys for bulkloading into node blocks
  xfs: set XBF_DONE on newly formatted btree block that are ready for writing
  xfs: force all buffers to be written during btree bulk load
parents 0573676f e069d549
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block(
 * Read in the buffer at the given ptr and return the buffer and
 * the block pointer within the buffer.
 */
STATIC int
int
xfs_btree_read_buf_block(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_ptr	*ptr,
+3 −0
Original line number Diff line number Diff line
@@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
int xfs_btree_get_buf_block(struct xfs_btree_cur *cur,
		const union xfs_btree_ptr *ptr, struct xfs_btree_block **block,
		struct xfs_buf **bpp);
int xfs_btree_read_buf_block(struct xfs_btree_cur *cur,
		const union xfs_btree_ptr *ptr, int flags,
		struct xfs_btree_block **block, struct xfs_buf **bpp);
void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
		struct xfs_btree_block *block, const union xfs_btree_ptr *ptr,
		int lr);
+56 −22
Original line number Diff line number Diff line
@@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot(
/*
 * Put a btree block that we're loading onto the ordered list and release it.
 * The btree blocks will be written to disk when bulk loading is finished.
 * If we reach the dirty buffer threshold, flush them to disk before
 * continuing.
 */
static void
static int
xfs_btree_bload_drop_buf(
	struct xfs_btree_bload		*bbl,
	struct list_head		*buffers_list,
	struct xfs_buf			**bpp)
{
	if (*bpp == NULL)
		return;
	struct xfs_buf			*bp = *bpp;
	int				error;

	if (!xfs_buf_delwri_queue(*bpp, buffers_list))
		ASSERT(0);
	if (!bp)
		return 0;

	xfs_buf_relse(*bpp);
	/*
	 * Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent
	 * xfs_buf_read will not pointlessly reread the contents from the disk.
	 */
	bp->b_flags |= XBF_DONE;

	xfs_buf_delwri_queue_here(bp, buffers_list);
	xfs_buf_relse(bp);
	*bpp = NULL;
	bbl->nr_dirty++;

	if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty)
		return 0;

	error = xfs_buf_delwri_submit(buffers_list);
	if (error)
		return error;

	bbl->nr_dirty = 0;
	return 0;
}

/*
@@ -418,7 +439,10 @@ xfs_btree_bload_prep_block(
	 */
	if (*blockp)
		xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB);
	xfs_btree_bload_drop_buf(buffers_list, bpp);

	ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp);
	if (ret)
		return ret;

	/* Initialize the new btree block. */
	xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block);
@@ -436,22 +460,19 @@ STATIC int
xfs_btree_bload_leaf(
	struct xfs_btree_cur		*cur,
	unsigned int			recs_this_block,
	xfs_btree_bload_get_record_fn	get_record,
	xfs_btree_bload_get_records_fn	get_records,
	struct xfs_btree_block		*block,
	void				*priv)
{
	unsigned int			j;
	unsigned int			j = 1;
	int				ret;

	/* Fill the leaf block with records. */
	for (j = 1; j <= recs_this_block; j++) {
		union xfs_btree_rec	*block_rec;

		ret = get_record(cur, priv);
		if (ret)
	while (j <= recs_this_block) {
		ret = get_records(cur, j, block, recs_this_block - j + 1, priv);
		if (ret < 0)
			return ret;
		block_rec = xfs_btree_rec_addr(cur, j, block);
		cur->bc_ops->init_rec_from_cur(cur, block_rec);
		j += ret;
	}

	return 0;
@@ -485,7 +506,12 @@ xfs_btree_bload_node(

		ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr));

		ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block,
		/*
		 * Read the lower-level block in case the buffer for it has
		 * been reclaimed.  LRU refs will be set on the block, which is
		 * desirable if the new btree commits.
		 */
		ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block,
				&child_bp);
		if (ret)
			return ret;
@@ -764,6 +790,7 @@ xfs_btree_bload(
	cur->bc_nlevels = bbl->btree_height;
	xfs_btree_set_ptr_null(cur, &child_ptr);
	xfs_btree_set_ptr_null(cur, &ptr);
	bbl->nr_dirty = 0;

	xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
			&avg_per_block, &blocks, &blocks_with_extra);
@@ -789,7 +816,7 @@ xfs_btree_bload(
		trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr,
				nr_this_block);

		ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record,
		ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records,
				block, priv);
		if (ret)
			goto out;
@@ -802,7 +829,10 @@ xfs_btree_bload(
			xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1);
	}
	total_blocks += blocks;
	xfs_btree_bload_drop_buf(&buffers_list, &bp);

	ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
	if (ret)
		goto out;

	/* Populate the internal btree nodes. */
	for (level = 1; level < cur->bc_nlevels; level++) {
@@ -844,7 +874,11 @@ xfs_btree_bload(
				xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1);
		}
		total_blocks += blocks;
		xfs_btree_bload_drop_buf(&buffers_list, &bp);

		ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
		if (ret)
			goto out;

		xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1);
	}

+20 −5
Original line number Diff line number Diff line
@@ -47,7 +47,9 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
		int whichfork, const struct xfs_btree_ops *ops);

/* Bulk loading of staged btrees. */
typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv);
typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
		unsigned int idx, struct xfs_btree_block *block,
		unsigned int nr_wanted, void *priv);
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
		union xfs_btree_ptr *ptr, void *priv);
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
@@ -55,11 +57,14 @@ typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,

struct xfs_btree_bload {
	/*
	 * This function will be called nr_records times to load records into
	 * the btree.  The function does this by setting the cursor's bc_rec
	 * field in in-core format.  Records must be returned in sort order.
	 * This function will be called to load @nr_wanted records into the
	 * btree.  The implementation does this by setting the cursor's bc_rec
	 * field in in-core format and using init_rec_from_cur to set the
	 * records in the btree block.  Records must be returned in sort order.
	 * The function must return the number of records loaded or the usual
	 * negative errno.
	 */
	xfs_btree_bload_get_record_fn	get_record;
	xfs_btree_bload_get_records_fn	get_records;

	/*
	 * This function will be called nr_blocks times to obtain a pointer
@@ -107,6 +112,16 @@ struct xfs_btree_bload {
	 * height of the new btree.
	 */
	unsigned int			btree_height;

	/*
	 * Flush the new btree block buffer list to disk after this many blocks
	 * have been formatted.  Zero prohibits writing any buffers until all
	 * blocks have been formatted.
	 */
	uint16_t			max_dirty;

	/* Number of dirty buffers. */
	uint16_t			nr_dirty;
};

int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur,
+9 −3
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
 * btree bulk loading code calculates for us.  However, there are some
 * exceptions to this rule:
 *
 * (0) If someone turned one of the debug knobs.
 * (1) If this is a per-AG btree and the AG has less than 10% space free.
 * (2) If this is an inode btree and the FS has less than 10% space free.

@@ -47,9 +48,13 @@ xrep_newbt_estimate_slack(
	uint64_t		free;
	uint64_t		sz;

	/* Let the btree code compute the default slack values. */
	bload->leaf_slack = -1;
	bload->node_slack = -1;
	/*
	 * The xfs_globals values are set to -1 (i.e. take the bload defaults)
	 * unless someone has set them otherwise, so we just pull the values
	 * here.
	 */
	bload->leaf_slack = xfs_globals.bload_leaf_slack;
	bload->node_slack = xfs_globals.bload_node_slack;

	if (sc->ops->type == ST_PERAG) {
		free = sc->sa.pag->pagf_freeblks;
@@ -89,6 +94,7 @@ xrep_newbt_init_ag(
	xnr->alloc_hint = alloc_hint;
	xnr->resv = resv;
	INIT_LIST_HEAD(&xnr->resv_list);
	xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
	xrep_newbt_estimate_slack(xnr);
}

Loading