Commit 2c0ff615 authored by Carlos Maiolino's avatar Carlos Maiolino
Browse files

Merge branch 'xfs-7.1-merge' into for-next



Signed-off-by: default avatarCarlos Maiolino <cem@kernel.org>
parents e5966096 c1f95543
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -995,7 +995,8 @@ struct xfs_rtgroup_geometry {
	__u32 rg_sick;		/* o: sick things in ag */
	__u32 rg_checked;	/* o: checked metadata in ag */
	__u32 rg_flags;		/* i/o: flags for this ag */
	__u32 rg_reserved[27];	/* o: zero */
	__u32 rg_writepointer;  /* o: write pointer block offset for zoned */
	__u32 rg_reserved[26];	/* o: zero */
};
#define XFS_RTGROUP_GEOM_SICK_SUPER	(1U << 0)  /* superblock */
#define XFS_RTGROUP_GEOM_SICK_BITMAP	(1U << 1)  /* rtbitmap */
@@ -1003,6 +1004,8 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_RMAPBT	(1U << 3)  /* reverse mappings */
#define XFS_RTGROUP_GEOM_SICK_REFCNTBT	(1U << 4)  /* reference counts */

#define XFS_RTGROUP_GEOM_WRITEPOINTER  (1U << 0)  /* write pointer */

/* Health monitor event domains */

/* affects the whole fs */
+69 −26
Original line number Diff line number Diff line
@@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve(
			flags, ac);
}

/*
 * We need to lock the test/set EOF update as we can be racing with
 * other IO completions here to update the EOF. Failing to serialise
 * here can result in EOF moving backwards and Bad Things Happen when
 * that occurs.
 *
 * As IO completion only ever extends EOF, we can do an unlocked check
 * here to avoid taking the spinlock. If we land within the current EOF,
 * then we do not need to do an extending update at all, and we don't
 * need to take the lock to check this. If we race with an update moving
 * EOF, then we'll either still be beyond EOF and need to take the lock,
 * or we'll be within EOF and we don't need to take it at all.
 */
static int
xfs_dio_endio_set_isize(
	struct inode		*inode,
	loff_t			offset,
	ssize_t			size)
{
	struct xfs_inode	*ip = XFS_I(inode);

	if (offset + size <= i_size_read(inode))
		return 0;

	spin_lock(&ip->i_flags_lock);
	if (offset + size <= i_size_read(inode)) {
		spin_unlock(&ip->i_flags_lock);
		return 0;
	}

	i_size_write(inode, offset + size);
	spin_unlock(&ip->i_flags_lock);

	return xfs_setfilesize(ip, offset, size);
}

static int
xfs_zoned_dio_write_end_io(
	struct kiocb		*iocb,
	ssize_t			size,
	int			error,
	unsigned		flags)
{
	struct inode		*inode = file_inode(iocb->ki_filp);
	struct xfs_inode	*ip = XFS_I(inode);
	unsigned int		nofs_flag;

	ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));

	trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size);

	if (xfs_is_shutdown(ip->i_mount))
		return -EIO;

	if (error || !size)
		return error;

	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);

	nofs_flag = memalloc_nofs_save();
	error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size);
	memalloc_nofs_restore(nofs_flag);

	return error;
}

static int
xfs_dio_write_end_io(
	struct kiocb		*iocb,
@@ -572,8 +638,7 @@ xfs_dio_write_end_io(
	loff_t			offset = iocb->ki_pos;
	unsigned int		nofs_flag;

	ASSERT(!xfs_is_zoned_inode(ip) ||
	       !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW)));
	ASSERT(!xfs_is_zoned_inode(ip));

	trace_xfs_end_io_direct_write(ip, offset, size);

@@ -623,30 +688,8 @@ xfs_dio_write_end_io(
	 * with the on-disk inode size being outside the in-core inode size. We
	 * have no other method of updating EOF for AIO, so always do it here
	 * if necessary.
	 *
	 * We need to lock the test/set EOF update as we can be racing with
	 * other IO completions here to update the EOF. Failing to serialise
	 * here can result in EOF moving backwards and Bad Things Happen when
	 * that occurs.
	 *
	 * As IO completion only ever extends EOF, we can do an unlocked check
	 * here to avoid taking the spinlock. If we land within the current EOF,
	 * then we do not need to do an extending update at all, and we don't
	 * need to take the lock to check this. If we race with an update moving
	 * EOF, then we'll either still be beyond EOF and need to take the lock,
	 * or we'll be within EOF and we don't need to take it at all.
	 */
	if (offset + size <= i_size_read(inode))
		goto out;

	spin_lock(&ip->i_flags_lock);
	if (offset + size > i_size_read(inode)) {
		i_size_write(inode, offset + size);
		spin_unlock(&ip->i_flags_lock);
		error = xfs_setfilesize(ip, offset, size);
	} else {
		spin_unlock(&ip->i_flags_lock);
	}
	error = xfs_dio_endio_set_isize(inode, offset, size);

out:
	memalloc_nofs_restore(nofs_flag);
@@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io(
static const struct iomap_dio_ops xfs_dio_zoned_write_ops = {
	.bio_set	= &iomap_ioend_bioset,
	.submit_io	= xfs_dio_zoned_submit_io,
	.end_io		= xfs_dio_write_end_io,
	.end_io		= xfs_zoned_dio_write_end_io,
};

/*
+19 −0
Original line number Diff line number Diff line
@@ -37,12 +37,15 @@
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_handle.h"
#include "xfs_rtgroup.h"
#include "xfs_healthmon.h"
#include "xfs_verify_media.h"
#include "xfs_zone_priv.h"
#include "xfs_zone_alloc.h"

#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry(
{
	struct xfs_rtgroup	*rtg;
	struct xfs_rtgroup_geometry rgeo;
	xfs_rgblock_t		highest_rgbno;
	int			error;

	if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
@@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry(
	if (error)
		return error;

	if (xfs_has_zoned(mp)) {
		xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
		if (rtg->rtg_open_zone) {
			rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated;
		} else {
			highest_rgbno = xfs_rtrmap_highest_rgbno(rtg);
			if (highest_rgbno == NULLRGBLOCK)
				rgeo.rg_writepointer = 0;
			else
				rgeo.rg_writepointer = highest_rgbno + 1;
		}
		xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
		rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER;
	}

	if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
		return -EFAULT;
	return 0;
+21 −23
Original line number Diff line number Diff line
@@ -189,27 +189,16 @@ xfs_open_zone_mark_full(
		xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used);
}

static void
xfs_zone_record_blocks(
	struct xfs_trans	*tp,
static inline void
xfs_zone_inc_written(
	struct xfs_open_zone	*oz,
	xfs_fsblock_t		fsbno,
	xfs_filblks_t		len)
{
	struct xfs_mount	*mp = tp->t_mountp;
	struct xfs_rtgroup	*rtg = oz->oz_rtg;
	struct xfs_inode	*rmapip = rtg_rmap(rtg);

	trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len);
	xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL);

	xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
	xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
	rmapip->i_used_blocks += len;
	ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
	oz->oz_written += len;
	if (oz->oz_written == rtg_blocks(rtg))
	if (oz->oz_written == rtg_blocks(oz->oz_rtg))
		xfs_open_zone_mark_full(oz);
	xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);
}

/*
@@ -227,9 +216,7 @@ xfs_zone_skip_blocks(
	trace_xfs_zone_skip_blocks(oz, 0, len);

	xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
	oz->oz_written += len;
	if (oz->oz_written == rtg_blocks(rtg))
		xfs_open_zone_mark_full(oz);
	xfs_zone_inc_written(oz, len);
	xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);

	xfs_add_frextents(rtg_mount(rtg), len);
@@ -244,6 +231,8 @@ xfs_zoned_map_extent(
	xfs_fsblock_t		old_startblock)
{
	struct xfs_bmbt_irec	data;
	struct xfs_rtgroup	*rtg = oz->oz_rtg;
	struct xfs_inode	*rmapip = rtg_rmap(rtg);
	int			nmaps = 1;
	int			error;

@@ -302,7 +291,15 @@ xfs_zoned_map_extent(
		}
	}

	xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount);
	trace_xfs_zone_record_blocks(oz,
		xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock),
		new->br_blockcount);
	xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
	xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
	rmapip->i_used_blocks += new->br_blockcount;
	ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg));
	xfs_zone_inc_written(oz, new->br_blockcount);
	xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE);

	/* Map the new blocks into the data fork. */
	xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new);
@@ -681,10 +678,11 @@ xfs_select_zone_nowait(
	if (oz)
		goto out_unlock;

	if (pack_tight)
	if (pack_tight) {
		oz = xfs_select_open_zone_mru(zi, write_hint);
		if (oz)
			goto out_unlock;
	}

	/*
	 * See if we can open a new zone and use that so that data for different
@@ -695,7 +693,7 @@ xfs_select_zone_nowait(
		goto out_unlock;

	/*
	 * Try to find an zone that is an ok match to colocate data with.
	 * Try to find a zone that is an ok match to colocate data with.
	 */
	oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK);
	if (oz)
+18 −6
Original line number Diff line number Diff line
@@ -170,25 +170,37 @@ xfs_zoned_need_gc(
	s64			available, free, threshold;
	s32			remainder;

	/* If we have no reclaimable blocks, running GC is useless. */
	if (!xfs_zoned_have_reclaimable(mp->m_zone_info))
		return false;

	/*
	 * In order to avoid file fragmentation as much as possible, we should
	 * make sure that we can open enough zones. So trigger GC if the number
	 * of blocks immediately available for writes is lower than the total
	 * number of blocks from all possible open zones.
	 */
	available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE);

	if (available <
	    xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES))
		return true;

	free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
	/*
	 * For cases where the user wants to be more aggressive with GC,
	 * the sysfs attribute zonegc_low_space may be set to a non zero value,
	 * to indicate that GC should try to maintain at least zonegc_low_space
	 * percent of the free space to be directly available for writing. Check
	 * this here.
	 */
	if (!mp->m_zonegc_low_space)
		return false;

	free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS);
	threshold = div_s64_rem(free, 100, &remainder);
	threshold = threshold * mp->m_zonegc_low_space +
		    remainder * div_s64(mp->m_zonegc_low_space, 100);

	if (available < threshold)
		return true;

	return false;
	return available < threshold;
}

static struct xfs_zone_gc_data *
Loading