Commit f4265b8d authored by Zhang Yi's avatar Zhang Yi Committed by Christian Brauner
Browse files

ext4: add FALLOC_FL_WRITE_ZEROES support



Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable
the unmap write zeroes operation. This first allocates blocks as
unwritten, then issues a zero command outside of the running journal
handle, and finally converts them to a written state.

Signed-off-by: default avatarZhang Yi <yi.zhang@huawei.com>
Link: https://lore.kernel.org/20250619111806.3546162-10-yi.zhang@huaweicloud.com


Reviewed-by: default avatar"Martin K. Petersen" <martin.petersen@oracle.com>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent 912b6038
Loading
Loading
Loading
Loading
+55 −11
Original line number Diff line number Diff line
@@ -4501,6 +4501,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
	struct ext4_map_blocks map;
	unsigned int credits;
	loff_t epos, old_size = i_size_read(inode);
	unsigned int blkbits = inode->i_blkbits;
	bool alloc_zero = false;

	BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
	map.m_lblk = offset;
@@ -4513,6 +4515,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
	if (len <= EXT_UNWRITTEN_MAX_LEN)
		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;

	/*
	 * Do the actual write zero during a running journal transaction
	 * costs a lot. First allocate an unwritten extent and then
	 * convert it to written after zeroing it out.
	 */
	if (flags & EXT4_GET_BLOCKS_ZERO) {
		flags &= ~EXT4_GET_BLOCKS_ZERO;
		flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
		alloc_zero = true;
	}

	/*
	 * credits to insert 1 extent into extent tree
	 */
@@ -4549,9 +4562,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
		 * allow a full retry cycle for any remaining allocations
		 */
		retries = 0;
		map.m_lblk += ret;
		map.m_len = len = len - ret;
		epos = (loff_t)map.m_lblk << inode->i_blkbits;
		epos = (loff_t)(map.m_lblk + ret) << blkbits;
		inode_set_ctime_current(inode);
		if (new_size) {
			if (epos > new_size)
@@ -4571,6 +4582,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
		ret2 = ret3 ? ret3 : ret2;
		if (unlikely(ret2))
			break;

		if (alloc_zero &&
		    (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
			ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
						  map.m_len);
			if (likely(!ret2))
				ret2 = ext4_convert_unwritten_extents(NULL,
					inode, (loff_t)map.m_lblk << blkbits,
					(loff_t)map.m_len << blkbits);
			if (ret2)
				break;
		}

		map.m_lblk += ret;
		map.m_len = len = len - ret;
	}
	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
		goto retry;
@@ -4636,7 +4662,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
	if (end_lblk > start_lblk) {
		ext4_lblk_t zero_blks = end_lblk - start_lblk;

		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
		if (mode & FALLOC_FL_WRITE_ZEROES)
			flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
		else
			flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
				  EXT4_EX_NOCACHE);
		ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
					     new_size, flags);
		if (ret)
@@ -4745,11 +4775,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
	if (IS_ENCRYPTED(inode) &&
	    (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
		return -EOPNOTSUPP;
	/*
	 * Don't allow writing zeroes if the underlying device does not
	 * enable the unmap write zeroes operation.
	 */
	if ((mode & FALLOC_FL_WRITE_ZEROES) &&
	    !bdev_write_zeroes_unmap_sectors(inode->i_sb->s_bdev))
		return -EOPNOTSUPP;

	/* Return error if mode is not supported */
	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
		     FALLOC_FL_INSERT_RANGE))
		     FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
		     FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
		return -EOPNOTSUPP;

	inode_lock(inode);
@@ -4780,16 +4817,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
	if (ret)
		goto out_invalidate_lock;

	if (mode & FALLOC_FL_PUNCH_HOLE)
	switch (mode & FALLOC_FL_MODE_MASK) {
	case FALLOC_FL_PUNCH_HOLE:
		ret = ext4_punch_hole(file, offset, len);
	else if (mode & FALLOC_FL_COLLAPSE_RANGE)
		break;
	case FALLOC_FL_COLLAPSE_RANGE:
		ret = ext4_collapse_range(file, offset, len);
	else if (mode & FALLOC_FL_INSERT_RANGE)
		break;
	case FALLOC_FL_INSERT_RANGE:
		ret = ext4_insert_range(file, offset, len);
	else if (mode & FALLOC_FL_ZERO_RANGE)
		break;
	case FALLOC_FL_ZERO_RANGE:
	case FALLOC_FL_WRITE_ZEROES:
		ret = ext4_zero_range(file, offset, len, mode);
	else
		break;
	default:
		ret = -EOPNOTSUPP;
	}

out_invalidate_lock:
	filemap_invalidate_unlock(mapping);
+2 −1
Original line number Diff line number Diff line
@@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
	{ FALLOC_FL_KEEP_SIZE,		"KEEP_SIZE"},		\
	{ FALLOC_FL_PUNCH_HOLE,		"PUNCH_HOLE"},		\
	{ FALLOC_FL_COLLAPSE_RANGE,	"COLLAPSE_RANGE"},	\
	{ FALLOC_FL_ZERO_RANGE,		"ZERO_RANGE"})
	{ FALLOC_FL_ZERO_RANGE,		"ZERO_RANGE"},		\
	{ FALLOC_FL_WRITE_ZEROES,	"WRITE_ZEROES"})

TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);