Commit 9db0d7c5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:
 "Just a single bug fix (and documentation for the issue)"

* tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: document another racy GC case in xfs_zoned_map_extent
  xfs: prevent gc from picking the same zone twice
parents cb7f9fc3 0db22d7e
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -50,6 +50,12 @@ struct xfs_rtgroup {
		uint8_t			*rtg_rsum_cache;
		struct xfs_open_zone	*rtg_open_zone;
	};

	/*
	 * Count of outstanding GC operations for zoned XFS.  Any RTG with a
	 * non-zero rtg_gccount will not be picked as new GC victim.
	 */
	atomic_t		rtg_gccount;
};

/*
+8 −0
Original line number Diff line number Diff line
@@ -246,6 +246,14 @@ xfs_zoned_map_extent(
	 * If a data write raced with this GC write, keep the existing data in
	 * the data fork, mark our newly written GC extent as reclaimable, then
	 * move on to the next extent.
	 *
	 * Note that this can also happen when racing with operations that do
	 * not actually invalidate the data, but just move it to a different
	 * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the
	 * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE).  If the
	 * data was just moved around, GC fails to free the zone, but the zone
	 * becomes a GC candidate again as soon as all previous GC I/O has
	 * finished and these blocks will be moved out eventually.
	 */
	if (old_startblock != NULLFSBLOCK &&
	    old_startblock != data.br_startblock)
+27 −0
Original line number Diff line number Diff line
@@ -114,6 +114,8 @@ struct xfs_gc_bio {
	/* Open Zone being written to */
	struct xfs_open_zone		*oz;

	struct xfs_rtgroup		*victim_rtg;

	/* Bio used for reads and writes, including the bvec used by it */
	struct bio_vec			bv;
	struct bio			bio;	/* must be last */
@@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
	iter->rec_count = 0;
	iter->rec_idx = 0;
	iter->victim_rtg = victim_rtg;
	atomic_inc(&victim_rtg->rtg_gccount);
}

/*
@@ -362,6 +365,7 @@ xfs_zone_gc_query(

	return 0;
done:
	atomic_dec(&iter->victim_rtg->rtg_gccount);
	xfs_rtgroup_rele(iter->victim_rtg);
	iter->victim_rtg = NULL;
	return 0;
@@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
		if (!rtg)
			continue;

		/*
		 * If the zone is already undergoing GC, don't pick it again.
		 *
		 * This prevents us from picking one of the zones for which we
		 * already submitted GC I/O, but for which the remapping hasn't
		 * concluded yet.  This won't cause data corruption, but
		 * increases write amplification and slows down GC, so this is
		 * a bad thing.
		 */
		if (atomic_read(&rtg->rtg_gccount)) {
			xfs_rtgroup_rele(rtg);
			continue;
		}

		/* skip zones that are just waiting for a reset */
		if (rtg_rmap(rtg)->i_used_blocks == 0 ||
		    rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
	chunk->scratch = &data->scratch[data->scratch_idx];
	chunk->data = data;
	chunk->oz = oz;
	chunk->victim_rtg = iter->victim_rtg;
	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
	atomic_inc(&chunk->victim_rtg->rtg_gccount);

	bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
	bio->bi_end_io = xfs_zone_gc_end_io;
@@ -710,6 +731,8 @@ static void
xfs_zone_gc_free_chunk(
	struct xfs_gc_bio	*chunk)
{
	atomic_dec(&chunk->victim_rtg->rtg_gccount);
	xfs_rtgroup_rele(chunk->victim_rtg);
	list_del(&chunk->entry);
	xfs_open_zone_put(chunk->oz);
	xfs_irele(chunk->ip);
@@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
	split_chunk->oz = chunk->oz;
	atomic_inc(&chunk->oz->oz_ref);

	split_chunk->victim_rtg = chunk->victim_rtg;
	atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
	atomic_inc(&chunk->victim_rtg->rtg_gccount);

	chunk->offset += split_len;
	chunk->len -= split_len;
	chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);