Unverified Commit cf40ebb2 authored by Christian Brauner's avatar Christian Brauner
Browse files

Merge patch series "add STATX_DIO_READ_ALIGN v3"

Christoph Hellwig <hch@lst.de> says:

File systems that write out of place usually require different alignment
for direct I/O writes than what they can do for reads.  This series tries
to address this by adding yet another statx field.

* patches from https://lore.kernel.org/r/20250109083109.1441561-1-hch@lst.de:
  xfs: report larger dio alignment for COW inodes
  xfs: report the correct read/write dio alignment for reflinked inodes
  xfs: cleanup xfs_vn_getattr
  fs: add STATX_DIO_READ_ALIGN
  fs: reformat the statx definition

Link: https://lore.kernel.org/r/20250109083109.1441561-1-hch@lst.de


Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parents 40384c84 468210ec
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
	tmp.stx_mnt_id = stat->mnt_id;
	tmp.stx_dio_mem_align = stat->dio_mem_align;
	tmp.stx_dio_offset_align = stat->dio_offset_align;
	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
	tmp.stx_subvol = stat->subvol;
	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
+10 −1
Original line number Diff line number Diff line
@@ -1204,7 +1204,16 @@ xfs_file_ioctl(
		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
		struct dioattr		da;

		da.d_mem =  da.d_miniosz = target->bt_logical_sectorsize;
		da.d_mem = target->bt_logical_sectorsize;

		/*
		 * See xfs_report_dioalign() for an explanation about why this
		 * reports a value larger than the sector size for COW inodes.
		 */
		if (xfs_is_cow_inode(ip))
			da.d_miniosz = xfs_inode_alloc_unitsize(ip);
		else
			da.d_miniosz = target->bt_logical_sectorsize;
		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);

		if (copy_to_user(arg, &da, sizeof(da)))
+38 −24
Original line number Diff line number Diff line
@@ -573,17 +573,43 @@ xfs_stat_blksize(
}

static void
xfs_get_atomic_write_attr(
xfs_report_dioalign(
	struct xfs_inode	*ip,
	unsigned int		*unit_min,
	unsigned int		*unit_max)
	struct kstat		*stat)
{
	if (!xfs_inode_can_atomicwrite(ip)) {
		*unit_min = *unit_max = 0;
		return;
	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
	struct block_device	*bdev = target->bt_bdev;

	stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;

	/*
	 * For COW inodes, we can only perform out of place writes of entire
	 * allocation units (blocks or RT extents).
	 * For writes smaller than the allocation unit, we must fall back to
	 * buffered I/O to perform read-modify-write cycles.  At best this is
	 * highly inefficient; at worst it leads to page cache invalidation
	 * races.  Tell applications to avoid this by reporting the larger write
	 * alignment in dio_offset_align, and the smaller read alignment in
	 * dio_read_offset_align.
	 */
	stat->dio_read_offset_align = bdev_logical_block_size(bdev);
	if (xfs_is_cow_inode(ip))
		stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
	else
		stat->dio_offset_align = stat->dio_read_offset_align;
}

	*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
static void
xfs_report_atomic_write(
	struct xfs_inode	*ip,
	struct kstat		*stat)
{
	unsigned int		unit_min = 0, unit_max = 0;

	if (xfs_inode_can_atomicwrite(ip))
		unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
	generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
}

STATIC int
@@ -647,22 +673,10 @@ xfs_vn_getattr(
		stat->rdev = inode->i_rdev;
		break;
	case S_IFREG:
		if (request_mask & STATX_DIOALIGN) {
			struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
			struct block_device	*bdev = target->bt_bdev;

			stat->result_mask |= STATX_DIOALIGN;
			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
			stat->dio_offset_align = bdev_logical_block_size(bdev);
		}
		if (request_mask & STATX_WRITE_ATOMIC) {
			unsigned int	unit_min, unit_max;

			xfs_get_atomic_write_attr(ip, &unit_min,
					&unit_max);
			generic_fill_statx_atomic_writes(stat,
					unit_min, unit_max);
		}
		if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
			xfs_report_dioalign(ip, stat);
		if (request_mask & STATX_WRITE_ATOMIC)
			xfs_report_atomic_write(ip, stat);
		fallthrough;
	default:
		stat->blksize = xfs_stat_blksize(ip);
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ struct kstat {
	u64		mnt_id;
	u32		dio_mem_align;
	u32		dio_offset_align;
	u32		dio_read_offset_align;
	u64		change_cookie;
	u64		subvol;
	u32		atomic_write_unit_min;
+75 −24
Original line number Diff line number Diff line
@@ -98,43 +98,93 @@ struct statx_timestamp {
 */
struct statx {
	/* 0x00 */
	__u32	stx_mask;	/* What results were written [uncond] */
	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
	/* What results were written [uncond] */
	__u32	stx_mask;

	/* Preferred general I/O size [uncond] */
	__u32	stx_blksize;

	/* Flags conveying information about the file [uncond] */
	__u64	stx_attributes;

	/* 0x10 */
	__u32	stx_nlink;	/* Number of hard links */
	__u32	stx_uid;	/* User ID of owner */
	__u32	stx_gid;	/* Group ID of owner */
	__u16	stx_mode;	/* File mode */
	/* Number of hard links */
	__u32	stx_nlink;

	/* User ID of owner */
	__u32	stx_uid;

	/* Group ID of owner */
	__u32	stx_gid;

	/* File mode */
	__u16	stx_mode;
	__u16	__spare0[1];

	/* 0x20 */
	__u64	stx_ino;	/* Inode number */
	__u64	stx_size;	/* File size */
	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
	/* Inode number */
	__u64	stx_ino;

	/* File size */
	__u64	stx_size;

	/* Number of 512-byte blocks allocated */
	__u64	stx_blocks;

	/* Mask to show what's supported in stx_attributes */
	__u64	stx_attributes_mask;

	/* 0x40 */
	struct statx_timestamp	stx_atime;	/* Last access time */
	struct statx_timestamp	stx_btime;	/* File creation time */
	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
	struct statx_timestamp	stx_mtime;	/* Last data modification time */
	/* Last access time */
	struct statx_timestamp	stx_atime;

	/* File creation time */
	struct statx_timestamp	stx_btime;

	/* Last attribute change time */
	struct statx_timestamp	stx_ctime;

	/* Last data modification time */
	struct statx_timestamp	stx_mtime;

	/* 0x80 */
	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
	/* Device ID of special file [if bdev/cdev] */
	__u32	stx_rdev_major;
	__u32	stx_rdev_minor;
	__u32	stx_dev_major;	/* ID of device containing file [uncond] */

	/* ID of device containing file [uncond] */
	__u32	stx_dev_major;
	__u32	stx_dev_minor;

	/* 0x90 */
	__u64	stx_mnt_id;
	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */

	/* Memory buffer alignment for direct I/O */
	__u32	stx_dio_mem_align;

	/* File offset alignment for direct I/O */
	__u32	stx_dio_offset_align;

	/* 0xa0 */
	__u64	stx_subvol;	/* Subvolume identifier */
	__u32	stx_atomic_write_unit_min;	/* Min atomic write unit in bytes */
	__u32	stx_atomic_write_unit_max;	/* Max atomic write unit in bytes */
	/* Subvolume identifier */
	__u64	stx_subvol;

	/* Min atomic write unit in bytes */
	__u32	stx_atomic_write_unit_min;

	/* Max atomic write unit in bytes */
	__u32	stx_atomic_write_unit_max;

	/* 0xb0 */
	__u32   stx_atomic_write_segments_max;	/* Max atomic write segment count */
	__u32   __spare1[1];
	/* Max atomic write segment count */
	__u32   stx_atomic_write_segments_max;

	/* File offset alignment for direct I/O reads */
	__u32	stx_dio_read_offset_align;

	/* 0xb8 */
	__u64	__spare3[9];	/* Spare space for future expansion */

	/* 0x100 */
};

@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
#define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */

#define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */