Loading fs/xfs/libxfs/xfs_fs.h +4 −1 Original line number Diff line number Diff line Loading @@ -995,7 +995,8 @@ struct xfs_rtgroup_geometry { __u32 rg_sick; /* o: sick things in ag */ __u32 rg_checked; /* o: checked metadata in ag */ __u32 rg_flags; /* i/o: flags for this ag */ __u32 rg_reserved[27]; /* o: zero */ __u32 rg_writepointer; /* o: write pointer block offset for zoned */ __u32 rg_reserved[26]; /* o: zero */ }; #define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */ #define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */ Loading @@ -1003,6 +1004,8 @@ struct xfs_rtgroup_geometry { #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ #define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */ /* Health monitor event domains */ /* affects the whole fs */ Loading fs/xfs/xfs_file.c +69 −26 Original line number Diff line number Diff line Loading @@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve( flags, ac); } /* * We need to lock the test/set EOF update as we can be racing with * other IO completions here to update the EOF. Failing to serialise * here can result in EOF moving backwards and Bad Things Happen when * that occurs. * * As IO completion only ever extends EOF, we can do an unlocked check * here to avoid taking the spinlock. If we land within the current EOF, * then we do not need to do an extending update at all, and we don't * need to take the lock to check this. If we race with an update moving * EOF, then we'll either still be beyond EOF and need to take the lock, * or we'll be within EOF and we don't need to take it at all. */ static int xfs_dio_endio_set_isize( struct inode *inode, loff_t offset, ssize_t size) { struct xfs_inode *ip = XFS_I(inode); if (offset + size <= i_size_read(inode)) return 0; spin_lock(&ip->i_flags_lock); if (offset + size <= i_size_read(inode)) { spin_unlock(&ip->i_flags_lock); return 0; } i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); return xfs_setfilesize(ip, offset, size); } static int xfs_zoned_dio_write_end_io( struct kiocb *iocb, ssize_t size, int error, unsigned flags) { struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); unsigned int nofs_flag; ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size); if (xfs_is_shutdown(ip->i_mount)) return -EIO; if (error || !size) return error; XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); nofs_flag = memalloc_nofs_save(); error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size); memalloc_nofs_restore(nofs_flag); return error; } static int xfs_dio_write_end_io( struct kiocb *iocb, Loading @@ -572,8 +638,7 @@ xfs_dio_write_end_io( loff_t offset = iocb->ki_pos; unsigned int nofs_flag; ASSERT(!xfs_is_zoned_inode(ip) || !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); ASSERT(!xfs_is_zoned_inode(ip)); trace_xfs_end_io_direct_write(ip, offset, size); Loading Loading @@ -623,30 +688,8 @@ xfs_dio_write_end_io( * with the on-disk inode size being outside the in-core inode size. We * have no other method of updating EOF for AIO, so always do it here * if necessary. * * We need to lock the test/set EOF update as we can be racing with * other IO completions here to update the EOF. Failing to serialise * here can result in EOF moving backwards and Bad Things Happen when * that occurs. * * As IO completion only ever extends EOF, we can do an unlocked check * here to avoid taking the spinlock. If we land within the current EOF, * then we do not need to do an extending update at all, and we don't * need to take the lock to check this. If we race with an update moving * EOF, then we'll either still be beyond EOF and need to take the lock, * or we'll be within EOF and we don't need to take it at all. */ if (offset + size <= i_size_read(inode)) goto out; spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) { i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); error = xfs_setfilesize(ip, offset, size); } else { spin_unlock(&ip->i_flags_lock); } error = xfs_dio_endio_set_isize(inode, offset, size); out: memalloc_nofs_restore(nofs_flag); Loading Loading @@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io( static const struct iomap_dio_ops xfs_dio_zoned_write_ops = { .bio_set = &iomap_ioend_bioset, .submit_io = xfs_dio_zoned_submit_io, .end_io = xfs_dio_write_end_io, .end_io = xfs_zoned_dio_write_end_io, }; /* Loading fs/xfs/xfs_ioctl.c +19 −0 Original line number Diff line number Diff line Loading @@ -37,12 +37,15 @@ #include "xfs_ioctl.h" #include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_rtrmap_btree.h" #include "xfs_file.h" #include "xfs_exchrange.h" #include "xfs_handle.h" #include "xfs_rtgroup.h" #include "xfs_healthmon.h" #include "xfs_verify_media.h" #include "xfs_zone_priv.h" #include "xfs_zone_alloc.h" #include <linux/mount.h> #include <linux/fileattr.h> Loading Loading @@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry( { struct xfs_rtgroup *rtg; struct xfs_rtgroup_geometry rgeo; xfs_rgblock_t highest_rgbno; int error; if (copy_from_user(&rgeo, arg, sizeof(rgeo))) Loading @@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry( if (error) return error; if (xfs_has_zoned(mp)) { xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); if (rtg->rtg_open_zone) { rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated; } else { highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); if (highest_rgbno == NULLRGBLOCK) rgeo.rg_writepointer = 0; else rgeo.rg_writepointer = highest_rgbno + 1; } xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER; } if (copy_to_user(arg, &rgeo, sizeof(rgeo))) return -EFAULT; return 0; Loading fs/xfs/xfs_zone_alloc.c +21 −23 Original line number Diff line number Diff line Loading @@ -189,27 +189,16 @@ xfs_open_zone_mark_full( xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used); } static void xfs_zone_record_blocks( struct xfs_trans *tp, static inline void xfs_zone_inc_written( struct xfs_open_zone *oz, xfs_fsblock_t fsbno, xfs_filblks_t len) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_inode *rmapip = rtg_rmap(rtg); trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len); xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); rmapip->i_used_blocks += len; ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) if (oz->oz_written == rtg_blocks(oz->oz_rtg)) xfs_open_zone_mark_full(oz); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); } /* Loading @@ -227,9 +216,7 @@ xfs_zone_skip_blocks( trace_xfs_zone_skip_blocks(oz, 0, len); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) xfs_open_zone_mark_full(oz); xfs_zone_inc_written(oz, len); xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); xfs_add_frextents(rtg_mount(rtg), len); Loading @@ -244,6 +231,8 @@ xfs_zoned_map_extent( xfs_fsblock_t old_startblock) { struct xfs_bmbt_irec data; struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_inode *rmapip = rtg_rmap(rtg); int nmaps = 1; int error; Loading Loading @@ -302,7 +291,15 @@ xfs_zoned_map_extent( } } xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock), new->br_blockcount); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); rmapip->i_used_blocks += new->br_blockcount; ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); xfs_zone_inc_written(oz, new->br_blockcount); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); Loading Loading @@ -681,10 +678,11 @@ xfs_select_zone_nowait( if (oz) goto out_unlock; if (pack_tight) if (pack_tight) { oz = xfs_select_open_zone_mru(zi, write_hint); if (oz) goto out_unlock; } /* * See if we can open a new zone and use that so that data for different Loading @@ -695,7 +693,7 @@ xfs_select_zone_nowait( goto out_unlock; /* * Try to find an zone that is an ok match to colocate data with. * Try to find a zone that is an ok match to colocate data with. */ oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK); if (oz) Loading fs/xfs/xfs_zone_gc.c +18 −6 Original line number Diff line number Diff line Loading @@ -170,25 +170,37 @@ xfs_zoned_need_gc( s64 available, free, threshold; s32 remainder; /* If we have no reclaimable blocks, running GC is useless. */ if (!xfs_zoned_have_reclaimable(mp->m_zone_info)) return false; /* * In order to avoid file fragmentation as much as possible, we should * make sure that we can open enough zones. So trigger GC if the number * of blocks immediately available for writes is lower than the total * number of blocks from all possible open zones. */ available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); if (available < xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); /* * For cases where the user wants to be more aggressive with GC, * the sysfs attribute zonegc_low_space may be set to a non zero value, * to indicate that GC should try to maintain at least zonegc_low_space * percent of the free space to be directly available for writing. Check * this here. */ if (!mp->m_zonegc_low_space) return false; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); threshold = div_s64_rem(free, 100, &remainder); threshold = threshold * mp->m_zonegc_low_space + remainder * div_s64(mp->m_zonegc_low_space, 100); if (available < threshold) return true; return false; return available < threshold; } static struct xfs_zone_gc_data * Loading Loading
fs/xfs/libxfs/xfs_fs.h +4 −1 Original line number Diff line number Diff line Loading @@ -995,7 +995,8 @@ struct xfs_rtgroup_geometry { __u32 rg_sick; /* o: sick things in ag */ __u32 rg_checked; /* o: checked metadata in ag */ __u32 rg_flags; /* i/o: flags for this ag */ __u32 rg_reserved[27]; /* o: zero */ __u32 rg_writepointer; /* o: write pointer block offset for zoned */ __u32 rg_reserved[26]; /* o: zero */ }; #define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */ #define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */ Loading @@ -1003,6 +1004,8 @@ struct xfs_rtgroup_geometry { #define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */ #define XFS_RTGROUP_GEOM_SICK_REFCNTBT (1U << 4) /* reference counts */ #define XFS_RTGROUP_GEOM_WRITEPOINTER (1U << 0) /* write pointer */ /* Health monitor event domains */ /* affects the whole fs */ Loading
fs/xfs/xfs_file.c +69 −26 Original line number Diff line number Diff line Loading @@ -560,6 +560,72 @@ xfs_zoned_write_space_reserve( flags, ac); } /* * We need to lock the test/set EOF update as we can be racing with * other IO completions here to update the EOF. Failing to serialise * here can result in EOF moving backwards and Bad Things Happen when * that occurs. * * As IO completion only ever extends EOF, we can do an unlocked check * here to avoid taking the spinlock. If we land within the current EOF, * then we do not need to do an extending update at all, and we don't * need to take the lock to check this. If we race with an update moving * EOF, then we'll either still be beyond EOF and need to take the lock, * or we'll be within EOF and we don't need to take it at all. */ static int xfs_dio_endio_set_isize( struct inode *inode, loff_t offset, ssize_t size) { struct xfs_inode *ip = XFS_I(inode); if (offset + size <= i_size_read(inode)) return 0; spin_lock(&ip->i_flags_lock); if (offset + size <= i_size_read(inode)) { spin_unlock(&ip->i_flags_lock); return 0; } i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); return xfs_setfilesize(ip, offset, size); } static int xfs_zoned_dio_write_end_io( struct kiocb *iocb, ssize_t size, int error, unsigned flags) { struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); unsigned int nofs_flag; ASSERT(!(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); trace_xfs_end_io_direct_write(ip, iocb->ki_pos, size); if (xfs_is_shutdown(ip->i_mount)) return -EIO; if (error || !size) return error; XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); nofs_flag = memalloc_nofs_save(); error = xfs_dio_endio_set_isize(inode, iocb->ki_pos, size); memalloc_nofs_restore(nofs_flag); return error; } static int xfs_dio_write_end_io( struct kiocb *iocb, Loading @@ -572,8 +638,7 @@ xfs_dio_write_end_io( loff_t offset = iocb->ki_pos; unsigned int nofs_flag; ASSERT(!xfs_is_zoned_inode(ip) || !(flags & (IOMAP_DIO_UNWRITTEN | IOMAP_DIO_COW))); ASSERT(!xfs_is_zoned_inode(ip)); trace_xfs_end_io_direct_write(ip, offset, size); Loading Loading @@ -623,30 +688,8 @@ xfs_dio_write_end_io( * with the on-disk inode size being outside the in-core inode size. We * have no other method of updating EOF for AIO, so always do it here * if necessary. * * We need to lock the test/set EOF update as we can be racing with * other IO completions here to update the EOF. Failing to serialise * here can result in EOF moving backwards and Bad Things Happen when * that occurs. * * As IO completion only ever extends EOF, we can do an unlocked check * here to avoid taking the spinlock. If we land within the current EOF, * then we do not need to do an extending update at all, and we don't * need to take the lock to check this. If we race with an update moving * EOF, then we'll either still be beyond EOF and need to take the lock, * or we'll be within EOF and we don't need to take it at all. */ if (offset + size <= i_size_read(inode)) goto out; spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) { i_size_write(inode, offset + size); spin_unlock(&ip->i_flags_lock); error = xfs_setfilesize(ip, offset, size); } else { spin_unlock(&ip->i_flags_lock); } error = xfs_dio_endio_set_isize(inode, offset, size); out: memalloc_nofs_restore(nofs_flag); Loading Loading @@ -688,7 +731,7 @@ xfs_dio_zoned_submit_io( static const struct iomap_dio_ops xfs_dio_zoned_write_ops = { .bio_set = &iomap_ioend_bioset, .submit_io = xfs_dio_zoned_submit_io, .end_io = xfs_dio_write_end_io, .end_io = xfs_zoned_dio_write_end_io, }; /* Loading
fs/xfs/xfs_ioctl.c +19 −0 Original line number Diff line number Diff line Loading @@ -37,12 +37,15 @@ #include "xfs_ioctl.h" #include "xfs_xattr.h" #include "xfs_rtbitmap.h" #include "xfs_rtrmap_btree.h" #include "xfs_file.h" #include "xfs_exchrange.h" #include "xfs_handle.h" #include "xfs_rtgroup.h" #include "xfs_healthmon.h" #include "xfs_verify_media.h" #include "xfs_zone_priv.h" #include "xfs_zone_alloc.h" #include <linux/mount.h> #include <linux/fileattr.h> Loading Loading @@ -413,6 +416,7 @@ xfs_ioc_rtgroup_geometry( { struct xfs_rtgroup *rtg; struct xfs_rtgroup_geometry rgeo; xfs_rgblock_t highest_rgbno; int error; if (copy_from_user(&rgeo, arg, sizeof(rgeo))) Loading @@ -433,6 +437,21 @@ xfs_ioc_rtgroup_geometry( if (error) return error; if (xfs_has_zoned(mp)) { xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); if (rtg->rtg_open_zone) { rgeo.rg_writepointer = rtg->rtg_open_zone->oz_allocated; } else { highest_rgbno = xfs_rtrmap_highest_rgbno(rtg); if (highest_rgbno == NULLRGBLOCK) rgeo.rg_writepointer = 0; else rgeo.rg_writepointer = highest_rgbno + 1; } xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); rgeo.rg_flags |= XFS_RTGROUP_GEOM_WRITEPOINTER; } if (copy_to_user(arg, &rgeo, sizeof(rgeo))) return -EFAULT; return 0; Loading
fs/xfs/xfs_zone_alloc.c +21 −23 Original line number Diff line number Diff line Loading @@ -189,27 +189,16 @@ xfs_open_zone_mark_full( xfs_zone_account_reclaimable(rtg, rtg_blocks(rtg) - used); } static void xfs_zone_record_blocks( struct xfs_trans *tp, static inline void xfs_zone_inc_written( struct xfs_open_zone *oz, xfs_fsblock_t fsbno, xfs_filblks_t len) { struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_inode *rmapip = rtg_rmap(rtg); trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(mp, fsbno), len); xfs_assert_ilocked(rtg_rmap(oz->oz_rtg), XFS_ILOCK_EXCL); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); rmapip->i_used_blocks += len; ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) if (oz->oz_written == rtg_blocks(oz->oz_rtg)) xfs_open_zone_mark_full(oz); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); } /* Loading @@ -227,9 +216,7 @@ xfs_zone_skip_blocks( trace_xfs_zone_skip_blocks(oz, 0, len); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); oz->oz_written += len; if (oz->oz_written == rtg_blocks(rtg)) xfs_open_zone_mark_full(oz); xfs_zone_inc_written(oz, len); xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP); xfs_add_frextents(rtg_mount(rtg), len); Loading @@ -244,6 +231,8 @@ xfs_zoned_map_extent( xfs_fsblock_t old_startblock) { struct xfs_bmbt_irec data; struct xfs_rtgroup *rtg = oz->oz_rtg; struct xfs_inode *rmapip = rtg_rmap(rtg); int nmaps = 1; int error; Loading Loading @@ -302,7 +291,15 @@ xfs_zoned_map_extent( } } xfs_zone_record_blocks(tp, oz, new->br_startblock, new->br_blockcount); trace_xfs_zone_record_blocks(oz, xfs_rtb_to_rgbno(tp->t_mountp, new->br_startblock), new->br_blockcount); xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP); xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP); rmapip->i_used_blocks += new->br_blockcount; ASSERT(rmapip->i_used_blocks <= rtg_blocks(rtg)); xfs_zone_inc_written(oz, new->br_blockcount); xfs_trans_log_inode(tp, rmapip, XFS_ILOG_CORE); /* Map the new blocks into the data fork. */ xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, new); Loading Loading @@ -681,10 +678,11 @@ xfs_select_zone_nowait( if (oz) goto out_unlock; if (pack_tight) if (pack_tight) { oz = xfs_select_open_zone_mru(zi, write_hint); if (oz) goto out_unlock; } /* * See if we can open a new zone and use that so that data for different Loading @@ -695,7 +693,7 @@ xfs_select_zone_nowait( goto out_unlock; /* * Try to find an zone that is an ok match to colocate data with. * Try to find a zone that is an ok match to colocate data with. */ oz = xfs_select_open_zone_lru(zi, write_hint, XFS_ZONE_ALLOC_OK); if (oz) Loading
fs/xfs/xfs_zone_gc.c +18 −6 Original line number Diff line number Diff line Loading @@ -170,25 +170,37 @@ xfs_zoned_need_gc( s64 available, free, threshold; s32 remainder; /* If we have no reclaimable blocks, running GC is useless. */ if (!xfs_zoned_have_reclaimable(mp->m_zone_info)) return false; /* * In order to avoid file fragmentation as much as possible, we should * make sure that we can open enough zones. So trigger GC if the number * of blocks immediately available for writes is lower than the total * number of blocks from all possible open zones. */ available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); if (available < xfs_rtgs_to_rfsbs(mp, mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); /* * For cases where the user wants to be more aggressive with GC, * the sysfs attribute zonegc_low_space may be set to a non zero value, * to indicate that GC should try to maintain at least zonegc_low_space * percent of the free space to be directly available for writing. Check * this here. */ if (!mp->m_zonegc_low_space) return false; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); threshold = div_s64_rem(free, 100, &remainder); threshold = threshold * mp->m_zonegc_low_space + remainder * div_s64(mp->m_zonegc_low_space, 100); if (available < threshold) return true; return false; return available < threshold; } static struct xfs_zone_gc_data * Loading