Commit 47c9f2b3 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.14-rc1.statx.dio' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs direct-io updates from Christian Brauner:
 "File systems that write out of place usually require different
  alignment for direct I/O writes than what they can do for reads.

  Add a separate dio read align field to statx, as many out of place
  write file systems can easily do reads aligned to the device sector
  size, but require bigger alignment for writes.

  This is usually papered over by falling back to buffered I/O for
  smaller writes and doing read-modify-write cycles, but performance for
  this sucks, so applications benefit from knowing the actual write
  alignment"

* tag 'vfs-6.14-rc1.statx.dio' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  xfs: report larger dio alignment for COW inodes
  xfs: report the correct read/write dio alignment for reflinked inodes
  xfs: cleanup xfs_vn_getattr
  fs: add STATX_DIO_READ_ALIGN
  fs: reformat the statx definition
parents 7e587c20 cf40ebb2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -725,6 +725,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
	tmp.stx_mnt_id = stat->mnt_id;
	tmp.stx_dio_mem_align = stat->dio_mem_align;
	tmp.stx_dio_offset_align = stat->dio_offset_align;
	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
	tmp.stx_subvol = stat->subvol;
	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
+10 −1
Original line number Diff line number Diff line
@@ -1204,7 +1204,16 @@ xfs_file_ioctl(
		struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
		struct dioattr		da;

		da.d_mem =  da.d_miniosz = target->bt_logical_sectorsize;
		da.d_mem = target->bt_logical_sectorsize;

		/*
		 * See xfs_report_dioalign() for an explanation about why this
		 * reports a value larger than the sector size for COW inodes.
		 */
		if (xfs_is_cow_inode(ip))
			da.d_miniosz = xfs_inode_alloc_unitsize(ip);
		else
			da.d_miniosz = target->bt_logical_sectorsize;
		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);

		if (copy_to_user(arg, &da, sizeof(da)))
+38 −24
Original line number Diff line number Diff line
@@ -573,17 +573,43 @@ xfs_stat_blksize(
}

static void
xfs_get_atomic_write_attr(
xfs_report_dioalign(
	struct xfs_inode	*ip,
	unsigned int		*unit_min,
	unsigned int		*unit_max)
	struct kstat		*stat)
{
	if (!xfs_inode_can_atomicwrite(ip)) {
		*unit_min = *unit_max = 0;
		return;
	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
	struct block_device	*bdev = target->bt_bdev;

	stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN;
	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;

	/*
	 * For COW inodes, we can only perform out of place writes of entire
	 * allocation units (blocks or RT extents).
	 * For writes smaller than the allocation unit, we must fall back to
	 * buffered I/O to perform read-modify-write cycles.  At best this is
	 * highly inefficient; at worst it leads to page cache invalidation
	 * races.  Tell applications to avoid this by reporting the larger write
	 * alignment in dio_offset_align, and the smaller read alignment in
	 * dio_read_offset_align.
	 */
	stat->dio_read_offset_align = bdev_logical_block_size(bdev);
	if (xfs_is_cow_inode(ip))
		stat->dio_offset_align = xfs_inode_alloc_unitsize(ip);
	else
		stat->dio_offset_align = stat->dio_read_offset_align;
}

	*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
static void
xfs_report_atomic_write(
	struct xfs_inode	*ip,
	struct kstat		*stat)
{
	unsigned int		unit_min = 0, unit_max = 0;

	if (xfs_inode_can_atomicwrite(ip))
		unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
	generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
}

STATIC int
@@ -647,22 +673,10 @@ xfs_vn_getattr(
		stat->rdev = inode->i_rdev;
		break;
	case S_IFREG:
		if (request_mask & STATX_DIOALIGN) {
			struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
			struct block_device	*bdev = target->bt_bdev;

			stat->result_mask |= STATX_DIOALIGN;
			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
			stat->dio_offset_align = bdev_logical_block_size(bdev);
		}
		if (request_mask & STATX_WRITE_ATOMIC) {
			unsigned int	unit_min, unit_max;

			xfs_get_atomic_write_attr(ip, &unit_min,
					&unit_max);
			generic_fill_statx_atomic_writes(stat,
					unit_min, unit_max);
		}
		if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
			xfs_report_dioalign(ip, stat);
		if (request_mask & STATX_WRITE_ATOMIC)
			xfs_report_atomic_write(ip, stat);
		fallthrough;
	default:
		stat->blksize = xfs_stat_blksize(ip);
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@ struct kstat {
	u64		mnt_id;
	u32		dio_mem_align;
	u32		dio_offset_align;
	u32		dio_read_offset_align;
	u64		change_cookie;
	u64		subvol;
	u32		atomic_write_unit_min;
+75 −24
Original line number Diff line number Diff line
@@ -98,43 +98,93 @@ struct statx_timestamp {
 */
struct statx {
	/* 0x00 */
	__u32	stx_mask;	/* What results were written [uncond] */
	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
	/* What results were written [uncond] */
	__u32	stx_mask;

	/* Preferred general I/O size [uncond] */
	__u32	stx_blksize;

	/* Flags conveying information about the file [uncond] */
	__u64	stx_attributes;

	/* 0x10 */
	__u32	stx_nlink;	/* Number of hard links */
	__u32	stx_uid;	/* User ID of owner */
	__u32	stx_gid;	/* Group ID of owner */
	__u16	stx_mode;	/* File mode */
	/* Number of hard links */
	__u32	stx_nlink;

	/* User ID of owner */
	__u32	stx_uid;

	/* Group ID of owner */
	__u32	stx_gid;

	/* File mode */
	__u16	stx_mode;
	__u16	__spare0[1];

	/* 0x20 */
	__u64	stx_ino;	/* Inode number */
	__u64	stx_size;	/* File size */
	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
	/* Inode number */
	__u64	stx_ino;

	/* File size */
	__u64	stx_size;

	/* Number of 512-byte blocks allocated */
	__u64	stx_blocks;

	/* Mask to show what's supported in stx_attributes */
	__u64	stx_attributes_mask;

	/* 0x40 */
	struct statx_timestamp	stx_atime;	/* Last access time */
	struct statx_timestamp	stx_btime;	/* File creation time */
	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
	struct statx_timestamp	stx_mtime;	/* Last data modification time */
	/* Last access time */
	struct statx_timestamp	stx_atime;

	/* File creation time */
	struct statx_timestamp	stx_btime;

	/* Last attribute change time */
	struct statx_timestamp	stx_ctime;

	/* Last data modification time */
	struct statx_timestamp	stx_mtime;

	/* 0x80 */
	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
	/* Device ID of special file [if bdev/cdev] */
	__u32	stx_rdev_major;
	__u32	stx_rdev_minor;
	__u32	stx_dev_major;	/* ID of device containing file [uncond] */

	/* ID of device containing file [uncond] */
	__u32	stx_dev_major;
	__u32	stx_dev_minor;

	/* 0x90 */
	__u64	stx_mnt_id;
	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */

	/* Memory buffer alignment for direct I/O */
	__u32	stx_dio_mem_align;

	/* File offset alignment for direct I/O */
	__u32	stx_dio_offset_align;

	/* 0xa0 */
	__u64	stx_subvol;	/* Subvolume identifier */
	__u32	stx_atomic_write_unit_min;	/* Min atomic write unit in bytes */
	__u32	stx_atomic_write_unit_max;	/* Max atomic write unit in bytes */
	/* Subvolume identifier */
	__u64	stx_subvol;

	/* Min atomic write unit in bytes */
	__u32	stx_atomic_write_unit_min;

	/* Max atomic write unit in bytes */
	__u32	stx_atomic_write_unit_max;

	/* 0xb0 */
	__u32   stx_atomic_write_segments_max;	/* Max atomic write segment count */
	__u32   __spare1[1];
	/* Max atomic write segment count */
	__u32   stx_atomic_write_segments_max;

	/* File offset alignment for direct I/O reads */
	__u32	stx_dio_read_offset_align;

	/* 0xb8 */
	__u64	__spare3[9];	/* Spare space for future expansion */

	/* 0x100 */
};

@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
#define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */

#define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */