Commit ee25861f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'vfs-6.12.fallocate' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fallocate updates from Christian Brauner:
 "This contains work to try and cleanup some the fallocate mode
  handling. Currently, it confusingly mixes operation modes and an
  optional flag.

  The work here tries to better define operation modes and optional
  flags allowing the core and filesystem code to use switch statements
  to switch on the operation mode"

* tag 'vfs-6.12.fallocate' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  xfs: refactor xfs_file_fallocate
  xfs: move the xfs_is_always_cow_inode check into xfs_alloc_file_space
  xfs: call xfs_flush_unmap_range from xfs_free_file_space
  fs: sort out the fallocate mode vs flag mess
  ext4: remove tracing for FALLOC_FL_NO_HIDE_STALE
  block: remove checks for FALLOC_FL_NO_HIDE_STALE
parents 3352633c 7fbabbb4
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -771,7 +771,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)

#define	BLKDEV_FALLOC_FL_SUPPORTED					\
		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
		 FALLOC_FL_ZERO_RANGE)

static long blkdev_fallocate(struct file *file, int mode, loff_t start,
			     loff_t len)
@@ -830,14 +830,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
					     len >> SECTOR_SHIFT, GFP_KERNEL,
					     BLKDEV_ZERO_NOFALLBACK);
		break;
	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
		if (error)
			goto fail;

		error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
					     len >> SECTOR_SHIFT, GFP_KERNEL);
		break;
	default:
		error = -EOPNOTSUPP;
	}
+25 −26
Original line number Diff line number Diff line
@@ -252,40 +252,39 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
	if (offset < 0 || len <= 0)
		return -EINVAL;

	/* Return error if mode is not supported */
	if (mode & ~FALLOC_FL_SUPPORTED_MASK)
	if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
		return -EOPNOTSUPP;

	/* Punch hole and zero range are mutually exclusive */
	if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
	    (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
	/*
	 * Modes are exclusive, even if that is not obvious from the encoding
	 * as bit masks and the mix with the flag in the same namespace.
	 *
	 * To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
	 * encoded as no bit set.
	 */
	switch (mode & FALLOC_FL_MODE_MASK) {
	case FALLOC_FL_ALLOCATE_RANGE:
	case FALLOC_FL_UNSHARE_RANGE:
	case FALLOC_FL_ZERO_RANGE:
		break;
	case FALLOC_FL_PUNCH_HOLE:
		if (!(mode & FALLOC_FL_KEEP_SIZE))
			return -EOPNOTSUPP;

	/* Punch hole must have keep size set */
	if ((mode & FALLOC_FL_PUNCH_HOLE) &&
	    !(mode & FALLOC_FL_KEEP_SIZE))
		break;
	case FALLOC_FL_COLLAPSE_RANGE:
	case FALLOC_FL_INSERT_RANGE:
		if (mode & FALLOC_FL_KEEP_SIZE)
			return -EOPNOTSUPP;

	/* Collapse range should only be used exclusively. */
	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
	    (mode & ~FALLOC_FL_COLLAPSE_RANGE))
		return -EINVAL;

	/* Insert range should only be used exclusively. */
	if ((mode & FALLOC_FL_INSERT_RANGE) &&
	    (mode & ~FALLOC_FL_INSERT_RANGE))
		return -EINVAL;

	/* Unshare range should only be used with allocate mode. */
	if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
	    (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
		return -EINVAL;
		break;
	default:
		return -EOPNOTSUPP;
	}

	if (!(file->f_mode & FMODE_WRITE))
		return -EBADF;

	/*
	 * We can only allow pure fallocate on append only files
	 * On append-only files only space preallocation is supported.
	 */
	if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
		return -EPERM;
+11 −0
Original line number Diff line number Diff line
@@ -653,6 +653,9 @@ xfs_alloc_file_space(
	xfs_bmbt_irec_t		imaps[1], *imapp;
	int			error;

	if (xfs_is_always_cow_inode(ip))
		return 0;

	trace_xfs_alloc_file_space(ip);

	if (xfs_is_shutdown(mp))
@@ -848,6 +851,14 @@ xfs_free_file_space(
	if (len <= 0)	/* if nothing being freed */
		return 0;

	/*
	 * Now AIO and DIO has drained we flush and (if necessary) invalidate
	 * the cached range over the first operation we are about to run.
	 */
	error = xfs_flush_unmap_range(ip, offset, len);
	if (error)
		return error;

	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);

+208 −145
Original line number Diff line number Diff line
@@ -852,194 +852,257 @@ static inline bool xfs_file_sync_writes(struct file *filp)
	return false;
}

#define	XFS_FALLOC_FL_SUPPORTED						\
		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
		 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |	\
		 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)

STATIC long
xfs_file_fallocate(
static int
xfs_falloc_newsize(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
	loff_t			len,
	loff_t			*new_size)
{
	struct inode		*inode = file_inode(file);
	struct xfs_inode	*ip = XFS_I(inode);
	long			error;
	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
	loff_t			new_size = 0;
	bool			do_file_insert = false;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;
	if (mode & ~XFS_FALLOC_FL_SUPPORTED)
		return -EOPNOTSUPP;

	xfs_ilock(ip, iolock);
	error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
	if (error)
		goto out_unlock;
	if ((mode & FALLOC_FL_KEEP_SIZE) || offset + len <= i_size_read(inode))
		return 0;
	*new_size = offset + len;
	return inode_newsize_ok(inode, *new_size);
}

	/*
	 * Must wait for all AIO to complete before we continue as AIO can
	 * change the file size on completion without holding any locks we
	 * currently hold. We must do this first because AIO can update both
	 * the on disk and in memory inode sizes, and the operations that follow
	 * require the in-memory size to be fully up-to-date.
	 */
	inode_dio_wait(inode);
static int
xfs_falloc_setsize(
	struct file		*file,
	loff_t			new_size)
{
	struct iattr iattr = {
		.ia_valid	= ATTR_SIZE,
		.ia_size	= new_size,
	};

	/*
	 * Now AIO and DIO has drained we flush and (if necessary) invalidate
	 * the cached range over the first operation we are about to run.
	 *
	 * We care about zero and collapse here because they both run a hole
	 * punch over the range first. Because that can zero data, and the range
	 * of invalidation for the shift operations is much larger, we still do
	 * the required flush for collapse in xfs_prepare_shift().
	 *
	 * Insert has the same range requirements as collapse, and we extend the
	 * file first which can zero data. Hence insert has the same
	 * flush/invalidate requirements as collapse and so they are both
	 * handled at the right time by xfs_prepare_shift().
	 */
	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
		    FALLOC_FL_COLLAPSE_RANGE)) {
		error = xfs_flush_unmap_range(ip, offset, len);
		if (error)
			goto out_unlock;
	if (!new_size)
		return 0;
	return xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file),
			&iattr);
}

	error = file_modified(file);
	if (error)
		goto out_unlock;
static int
xfs_falloc_collapse_range(
	struct file		*file,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	loff_t			new_size = i_size_read(inode) - len;
	int			error;

	if (mode & FALLOC_FL_PUNCH_HOLE) {
		error = xfs_free_file_space(ip, offset, len);
		if (error)
			goto out_unlock;
	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
		if (!xfs_is_falloc_aligned(ip, offset, len)) {
			error = -EINVAL;
			goto out_unlock;
		}
	if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
		return -EINVAL;

	/*
		 * There is no need to overlap collapse range with EOF,
		 * in which case it is effectively a truncate operation
	 * There is no need to overlap collapse range with EOF, in which case it
	 * is effectively a truncate operation
	 */
		if (offset + len >= i_size_read(inode)) {
			error = -EINVAL;
			goto out_unlock;
		}

		new_size = i_size_read(inode) - len;
	if (offset + len >= i_size_read(inode))
		return -EINVAL;

		error = xfs_collapse_file_space(ip, offset, len);
	error = xfs_collapse_file_space(XFS_I(inode), offset, len);
	if (error)
			goto out_unlock;
	} else if (mode & FALLOC_FL_INSERT_RANGE) {
		return error;
	return xfs_falloc_setsize(file, new_size);
}

static int
xfs_falloc_insert_range(
	struct file		*file,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	loff_t			isize = i_size_read(inode);
	int			error;

		if (!xfs_is_falloc_aligned(ip, offset, len)) {
			error = -EINVAL;
			goto out_unlock;
		}
	if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
		return -EINVAL;

	/*
	 * New inode size must not exceed ->s_maxbytes, accounting for
	 * possible signed overflow.
	 */
		if (inode->i_sb->s_maxbytes - isize < len) {
			error = -EFBIG;
			goto out_unlock;
		}
		new_size = isize + len;
	if (inode->i_sb->s_maxbytes - isize < len)
		return -EFBIG;

	/* Offset should be less than i_size */
		if (offset >= isize) {
			error = -EINVAL;
			goto out_unlock;
		}
		do_file_insert = true;
	} else {
		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
		    offset + len > i_size_read(inode)) {
			new_size = offset + len;
			error = inode_newsize_ok(inode, new_size);
	if (offset >= isize)
		return -EINVAL;

	error = xfs_falloc_setsize(file, isize + len);
	if (error)
				goto out_unlock;
		return error;

	/*
	 * Perform hole insertion now that the file size has been updated so
	 * that if we crash during the operation we don't leave shifted extents
	 * past EOF and hence losing access to the data that is contained within
	 * them.
	 */
	return xfs_insert_file_space(XFS_I(inode), offset, len);
}

		if (mode & FALLOC_FL_ZERO_RANGE) {
/*
			 * Punch a hole and prealloc the range.  We use a hole
			 * punch rather than unwritten extent conversion for two
			 * reasons:
 * Punch a hole and prealloc the range.  We use a hole punch rather than
 * unwritten extent conversion for two reasons:
 *
 *   1.) Hole punch handles partial block zeroing for us.
			 *   2.) If prealloc returns ENOSPC, the file range is
			 *       still zero-valued by virtue of the hole punch.
 *   2.) If prealloc returns ENOSPC, the file range is still zero-valued by
 *	 virtue of the hole punch.
 */
static int
xfs_falloc_zero_range(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	unsigned int		blksize = i_blocksize(inode);
	loff_t			new_size = 0;
	int			error;

			trace_xfs_zero_file_space(ip);
	trace_xfs_zero_file_space(XFS_I(inode));

			error = xfs_free_file_space(ip, offset, len);
	error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
	if (error)
				goto out_unlock;
		return error;

			len = round_up(offset + len, blksize) -
			      round_down(offset, blksize);
	error = xfs_free_file_space(XFS_I(inode), offset, len);
	if (error)
		return error;

	len = round_up(offset + len, blksize) - round_down(offset, blksize);
	offset = round_down(offset, blksize);
		} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
			error = xfs_reflink_unshare(ip, offset, len);
	error = xfs_alloc_file_space(XFS_I(inode), offset, len);
	if (error)
				goto out_unlock;
		} else {
			/*
			 * If always_cow mode we can't use preallocations and
			 * thus should not create them.
			 */
			if (xfs_is_always_cow_inode(ip)) {
				error = -EOPNOTSUPP;
				goto out_unlock;
			}
		return error;
	return xfs_falloc_setsize(file, new_size);
}

		if (!xfs_is_always_cow_inode(ip)) {
			error = xfs_alloc_file_space(ip, offset, len);
static int
xfs_falloc_unshare_range(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	loff_t			new_size = 0;
	int			error;

	error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
	if (error)
				goto out_unlock;
		return error;

	error = xfs_reflink_unshare(XFS_I(inode), offset, len);
	if (error)
		return error;

	error = xfs_alloc_file_space(XFS_I(inode), offset, len);
	if (error)
		return error;
	return xfs_falloc_setsize(file, new_size);
}

static int
xfs_falloc_allocate_range(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	loff_t			new_size = 0;
	int			error;

	/*
	 * If always_cow mode we can't use preallocations and thus should not
	 * create them.
	 */
	if (xfs_is_always_cow_inode(XFS_I(inode)))
		return -EOPNOTSUPP;

	error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
	if (error)
		return error;

	error = xfs_alloc_file_space(XFS_I(inode), offset, len);
	if (error)
		return error;
	return xfs_falloc_setsize(file, new_size);
}

	/* Change file size if needed */
	if (new_size) {
		struct iattr iattr;
#define	XFS_FALLOC_FL_SUPPORTED						\
		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
		 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |	\
		 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)

STATIC long
xfs_file_fallocate(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	struct xfs_inode	*ip = XFS_I(inode);
	long			error;
	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;
	if (mode & ~XFS_FALLOC_FL_SUPPORTED)
		return -EOPNOTSUPP;

		iattr.ia_valid = ATTR_SIZE;
		iattr.ia_size = new_size;
		error = xfs_vn_setattr_size(file_mnt_idmap(file),
					    file_dentry(file), &iattr);
	xfs_ilock(ip, iolock);
	error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
	if (error)
		goto out_unlock;
	}

	/*
	 * Perform hole insertion now that the file size has been
	 * updated so that if we crash during the operation we don't
	 * leave shifted extents past EOF and hence losing access to
	 * the data that is contained within them.
	 * Must wait for all AIO to complete before we continue as AIO can
	 * change the file size on completion without holding any locks we
	 * currently hold. We must do this first because AIO can update both
	 * the on disk and in memory inode sizes, and the operations that follow
	 * require the in-memory size to be fully up-to-date.
	 */
	if (do_file_insert) {
		error = xfs_insert_file_space(ip, offset, len);
	inode_dio_wait(inode);

	error = file_modified(file);
	if (error)
		goto out_unlock;

	switch (mode & FALLOC_FL_MODE_MASK) {
	case FALLOC_FL_PUNCH_HOLE:
		error = xfs_free_file_space(ip, offset, len);
		break;
	case FALLOC_FL_COLLAPSE_RANGE:
		error = xfs_falloc_collapse_range(file, offset, len);
		break;
	case FALLOC_FL_INSERT_RANGE:
		error = xfs_falloc_insert_range(file, offset, len);
		break;
	case FALLOC_FL_ZERO_RANGE:
		error = xfs_falloc_zero_range(file, mode, offset, len);
		break;
	case FALLOC_FL_UNSHARE_RANGE:
		error = xfs_falloc_unshare_range(file, mode, offset, len);
		break;
	case FALLOC_FL_ALLOCATE_RANGE:
		error = xfs_falloc_allocate_range(file, mode, offset, len);
		break;
	default:
		error = -EOPNOTSUPP;
		break;
	}

	if (xfs_file_sync_writes(file))
	if (!error && xfs_file_sync_writes(file))
		error = xfs_log_force_inode(ip);

out_unlock:
+12 −6
Original line number Diff line number Diff line
@@ -25,7 +25,13 @@ struct space_resv {
#define FS_IOC_UNRESVSP64	_IOW('X', 43, struct space_resv)
#define FS_IOC_ZERO_RANGE	_IOW('X', 57, struct space_resv)

#define	FALLOC_FL_SUPPORTED_MASK	(FALLOC_FL_KEEP_SIZE |		\
/*
 * Mask of all supported fallocate modes.  Only one can be set at a time.
 *
 * In addition to the mode bit, the mode argument can also encode flags.
 * FALLOC_FL_KEEP_SIZE is the only supported flag so far.
 */
#define FALLOC_FL_MODE_MASK	(FALLOC_FL_ALLOCATE_RANGE |	\
				 FALLOC_FL_PUNCH_HOLE |		\
				 FALLOC_FL_COLLAPSE_RANGE |	\
				 FALLOC_FL_ZERO_RANGE |		\
Loading