Commit 83bac569 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Carlos Maiolino
Browse files

xfs: prevent gc from picking the same zone twice



When we are picking a zone for gc it might already be in the pipeline
which can lead to us moving the same data twice resulting in in write
amplification and a very unfortunate case where we keep on garbage
collecting the zone we just filled with migrated data stopping all
forward progress.

Fix this by introducing a count of on-going GC operations on a zone, and
skip any zone with ongoing GC when picking a new victim.

Fixes: 080d01c4 ("xfs: implement zoned garbage collection")
Signed-off-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Co-developed-by: default avatarHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDamien Le Moal <dlemoal@kernel.org>
Reviewed-by: default avatarCarlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Tested-by: default avatarDamien Le Moal <dlemoal@kernel.org>
Signed-off-by: default avatarCarlos Maiolino <cem@kernel.org>
parent f477af0c
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -50,6 +50,12 @@ struct xfs_rtgroup {
		uint8_t			*rtg_rsum_cache;
		struct xfs_open_zone	*rtg_open_zone;
	};

	/*
	 * Count of outstanding GC operations for zoned XFS.  Any RTG with a
	 * non-zero rtg_gccount will not be picked as new GC victim.
	 */
	atomic_t		rtg_gccount;
};

/*
+27 −0
Original line number Diff line number Diff line
@@ -114,6 +114,8 @@ struct xfs_gc_bio {
	/* Open Zone being written to */
	struct xfs_open_zone		*oz;

	struct xfs_rtgroup		*victim_rtg;

	/* Bio used for reads and writes, including the bvec used by it */
	struct bio_vec			bv;
	struct bio			bio;	/* must be last */
@@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
	iter->rec_count = 0;
	iter->rec_idx = 0;
	iter->victim_rtg = victim_rtg;
	atomic_inc(&victim_rtg->rtg_gccount);
}

/*
@@ -362,6 +365,7 @@ xfs_zone_gc_query(

	return 0;
done:
	atomic_dec(&iter->victim_rtg->rtg_gccount);
	xfs_rtgroup_rele(iter->victim_rtg);
	iter->victim_rtg = NULL;
	return 0;
@@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
		if (!rtg)
			continue;

		/*
		 * If the zone is already undergoing GC, don't pick it again.
		 *
		 * This prevents us from picking one of the zones for which we
		 * already submitted GC I/O, but for which the remapping hasn't
		 * concluded yet.  This won't cause data corruption, but
		 * increases write amplification and slows down GC, so this is
		 * a bad thing.
		 */
		if (atomic_read(&rtg->rtg_gccount)) {
			xfs_rtgroup_rele(rtg);
			continue;
		}

		/* skip zones that are just waiting for a reset */
		if (rtg_rmap(rtg)->i_used_blocks == 0 ||
		    rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
	chunk->scratch = &data->scratch[data->scratch_idx];
	chunk->data = data;
	chunk->oz = oz;
	chunk->victim_rtg = iter->victim_rtg;
	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
	atomic_inc(&chunk->victim_rtg->rtg_gccount);

	bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
	bio->bi_end_io = xfs_zone_gc_end_io;
@@ -710,6 +731,8 @@ static void
xfs_zone_gc_free_chunk(
	struct xfs_gc_bio	*chunk)
{
	atomic_dec(&chunk->victim_rtg->rtg_gccount);
	xfs_rtgroup_rele(chunk->victim_rtg);
	list_del(&chunk->entry);
	xfs_open_zone_put(chunk->oz);
	xfs_irele(chunk->ip);
@@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
	split_chunk->oz = chunk->oz;
	atomic_inc(&chunk->oz->oz_ref);

	split_chunk->victim_rtg = chunk->victim_rtg;
	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
	atomic_inc(&chunk->victim_rtg->rtg_gccount);

	chunk->offset += split_len;
	chunk->len -= split_len;
	chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);