Commit 44b4d13b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "This series introduces a device aliasing feature where user can carve
  out partitions but reclaim the space back by deleting aliased file in
  root dir.

  In addition to that, there're numerous minor bug fixes in zoned device
  support, checkpoint=disable, extent cache management, fiemap, and
  lazytime mount option. The full list of noticeable changes can be
  found below.

  Enhancements:
   - introduce device aliasing file
   - add stats in debugfs to show multiple devices
   - add a sysfs node to limit max read extent count per-inode
   - modify f2fs_is_checkpoint_ready logic to allow more data to be
     written with the CP disable
   - decrease spare area for pinned files for zoned devices

  Fixes:
   - Revert "f2fs: remove unreachable lazytime mount option parsing"
   - adjust unusable cap before checkpoint=disable mode
   - fix to drop all discards after creating snapshot on lvm device
   - fix to shrink read extent node in batches
   - fix changing cursegs if recovery fails on zoned device
   - fix to adjust appropriate length for fiemap
   - fix fiemap failure issue when page size is 16KB
   - fix to avoid forcing direct write to use buffered IO on inline_data
     inode
   - fix to map blocks correctly for direct write
   - fix to account dirty data in __get_secs_required()
   - fix null-ptr-deref in f2fs_submit_page_bio()
   - fix inconsistent update of i_blocks in release_compress_blocks and
     reserve_compress_blocks"

* tag 'f2fs-for-6.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits)
  f2fs: fix to drop all discards after creating snapshot on lvm device
  f2fs: add a sysfs node to limit max read extent count per-inode
  f2fs: fix to shrink read extent node in batches
  f2fs: print message if fscorrupted was found in f2fs_new_node_page()
  f2fs: clear SBI_POR_DOING before initing inmem curseg
  f2fs: fix changing cursegs if recovery fails on zoned device
  f2fs: adjust unusable cap before checkpoint=disable mode
  f2fs: fix to requery extent which cross boundary of inquiry
  f2fs: fix to adjust appropriate length for fiemap
  f2fs: clean up w/ F2FS_{BLK_TO_BYTES,BTYES_TO_BLK}
  f2fs: fix to do cast in F2FS_{BLK_TO_BYTES, BTYES_TO_BLK} to avoid overflow
  f2fs: replace deprecated strcpy with strscpy
  Revert "f2fs: remove unreachable lazytime mount option parsing"
  f2fs: fix to avoid forcing direct write to use buffered IO on inline_data inode
  f2fs: fix to map blocks correctly for direct write
  f2fs: fix race in concurrent f2fs_stop_gc_thread
  f2fs: fix fiemap failure issue when page size is 16KB
  f2fs: remove redundant atomic file check in defragment
  f2fs: fix to convert log type to segment data type correctly
  f2fs: clean up the unused variable additional_reserved_segments
  ...
parents fb527fc1 bc8aeb04
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -311,10 +311,13 @@ Description: Do background GC aggressively when set. Set to 0 by default.
		GC approach and turns SSR mode on.
		gc urgent low(2): lowers the bar of checking I/O idling in
		order to process outstanding discard commands and GC a
		little bit aggressively. uses cost benefit GC approach.
		little bit aggressively. always uses cost benefit GC approach,
		and will override age-threshold GC approach if ATGC is enabled
		at the same time.
		gc urgent mid(3): does GC forcibly in a period of given
		gc_urgent_sleep_time and executes a mid level of I/O idling check.
		uses cost benefit GC approach.
		always uses cost benefit GC approach, and will override
		age-threshold GC approach if ATGC is enabled at the same time.

What:		/sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date:		August 2017
@@ -819,3 +822,9 @@ Description: It controls the valid block ratio threshold not to trigger excessiv
		for zoned deivces. The initial value of it is 95(%). F2FS will stop the
		background GC thread from intiating GC for sections having valid blocks
		exceeding the ratio.

What:		/sys/fs/f2fs/<disk>/max_read_extent_count
Date:		November 2024
Contact:	"Chao Yu" <chao@kernel.org>
Description:	It controls max read extent count for per-inode, the value of threshold
		is 10240 by default.
+44 −0
Original line number Diff line number Diff line
@@ -943,3 +943,47 @@ NVMe Zoned Namespace devices
  can start before the zone-capacity and span across zone-capacity boundary.
  Such spanning segments are also considered as usable segments. All blocks
  past the zone-capacity are considered unusable in these segments.

Device aliasing feature
-----------------------

f2fs can utilize a special file called a "device aliasing file." This file allows
the entire storage device to be mapped with a single, large extent, not using
the usual f2fs node structures. This mapped area is pinned and primarily intended
for holding the space.

Essentially, this mechanism allows a portion of the f2fs area to be temporarily
reserved and used by another filesystem or for different purposes. Once that
external usage is complete, the device aliasing file can be deleted, releasing
the reserved space back to F2FS for its own use.

<use-case>

# ls /dev/vd*
/dev/vdb (32GB) /dev/vdc (32GB)
# mkfs.ext4 /dev/vdc
# mkfs.f2fs -c /dev/vdc@vdc.file /dev/vdb
# mount /dev/vdb /mnt/f2fs
# ls -l /mnt/f2fs
vdc.file
# df -h
/dev/vdb                            64G   33G   32G  52% /mnt/f2fs

# mount -o loop /dev/vdc /mnt/ext4
# df -h
/dev/vdb                            64G   33G   32G  52% /mnt/f2fs
/dev/loop7                          32G   24K   30G   1% /mnt/ext4
# umount /mnt/ext4

# f2fs_io getflags /mnt/f2fs/vdc.file
get a flag on /mnt/f2fs/vdc.file ret=0, flags=nocow(pinned),immutable
# f2fs_io setflags noimmutable /mnt/f2fs/vdc.file
get a flag on noimmutable ret=0, flags=800010
set a flag on /mnt/f2fs/vdc.file ret=0, flags=noimmutable
# rm /mnt/f2fs/vdc.file
# df -h
/dev/vdb                            64G  753M   64G   2% /mnt/f2fs

So, the key idea is, user can do any file operations on /dev/vdc, and
reclaim the space after the use, while the space is counted as /data.
That doesn't require modifying partition size and filesystem format.
+2 −3
Original line number Diff line number Diff line
@@ -296,9 +296,8 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl,
	struct posix_acl *clone = NULL;

	if (acl) {
		int size = sizeof(struct posix_acl) + acl->a_count *
				sizeof(struct posix_acl_entry);
		clone = kmemdup(acl, size, flags);
		clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count),
				flags);
		if (clone)
			refcount_set(&clone->a_refcount, 1);
	}
+1 −1
Original line number Diff line number Diff line
@@ -32,7 +32,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
	f2fs_build_fault_attr(sbi, 0, 0);
	if (!end_io)
		f2fs_flush_merged_writes(sbi);
	f2fs_handle_critical_error(sbi, reason, end_io);
	f2fs_handle_critical_error(sbi, reason);
}

/*
+50 −64
Original line number Diff line number Diff line
@@ -1679,7 +1679,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
		/* reserved delalloc block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_DELALLOC;
		if (flag != F2FS_GET_BLOCK_DIO || !is_hole)
		/* DIO READ and hole case, should not map the blocks. */
		if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
			map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
@@ -1821,16 +1822,6 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
	return true;
}

static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
{
	return (bytes >> inode->i_blkbits);
}

static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
{
	return (blks << inode->i_blkbits);
}

static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
@@ -1856,7 +1847,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
			return err;
		}

		phys = blks_to_bytes(inode, ni.blk_addr);
		phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
					get_inline_xattr_addrs(inode));
@@ -1888,7 +1879,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
			return err;
		}

		phys = blks_to_bytes(inode, ni.blk_addr);
		phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);
@@ -1904,30 +1895,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
	return (err < 0 ? err : 0);
}

static loff_t max_inode_blocks(struct inode *inode)
{
	loff_t result = ADDRS_PER_INODE(inode);
	loff_t leaf_count = ADDRS_PER_BLOCK(inode);

	/* two direct node blocks */
	result += (leaf_count * 2);

	/* two indirect node blocks */
	leaf_count *= NIDS_PER_BLOCK;
	result += (leaf_count * 2);

	/* one double indirect node block */
	leaf_count *= NIDS_PER_BLOCK;
	result += leaf_count;

	return result;
}

int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
	struct f2fs_map_blocks map;
	sector_t start_blk, last_blk;
	sector_t start_blk, last_blk, blk_len, max_len;
	pgoff_t next_pgofs;
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
@@ -1969,16 +1941,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			goto out;
	}

	if (bytes_to_blks(inode, len) == 0)
		len = blks_to_bytes(inode, 1);

	start_blk = bytes_to_blks(inode, start);
	last_blk = bytes_to_blks(inode, start + len - 1);
	start_blk = F2FS_BYTES_TO_BLK(start);
	last_blk = F2FS_BYTES_TO_BLK(start + len - 1);
	blk_len = last_blk - start_blk + 1;
	max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk;

next:
	memset(&map, 0, sizeof(map));
	map.m_lblk = start_blk;
	map.m_len = bytes_to_blks(inode, len);
	map.m_len = blk_len;
	map.m_next_pgofs = &next_pgofs;
	map.m_seg_type = NO_CHECK_TYPE;

@@ -1995,13 +1966,23 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
	if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
		start_blk = next_pgofs;

		if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
						max_inode_blocks(inode)))
		if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes)
			goto prep_next;

		flags |= FIEMAP_EXTENT_LAST;
	}

	/*
	 * current extent may cross boundary of inquiry, increase len to
	 * requery.
	 */
	if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) &&
				map.m_lblk + map.m_len - 1 == last_blk &&
				blk_len != max_len) {
		blk_len = max_len;
		goto next;
	}

	compr_appended = false;
	/* In a case of compressed cluster, append this to the last extent */
	if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
@@ -2033,14 +2014,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
	} else if (compr_appended) {
		unsigned int appended_blks = cluster_size -
						count_in_cluster + 1;
		size += blks_to_bytes(inode, appended_blks);
		size += F2FS_BLK_TO_BYTES(appended_blks);
		start_blk += appended_blks;
		compr_cluster = false;
	} else {
		logical = blks_to_bytes(inode, start_blk);
		logical = F2FS_BLK_TO_BYTES(start_blk);
		phys = __is_valid_data_blkaddr(map.m_pblk) ?
			blks_to_bytes(inode, map.m_pblk) : 0;
		size = blks_to_bytes(inode, map.m_len);
			F2FS_BLK_TO_BYTES(map.m_pblk) : 0;
		size = F2FS_BLK_TO_BYTES(map.m_len);
		flags = 0;

		if (compr_cluster) {
@@ -2048,13 +2029,13 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
			count_in_cluster += map.m_len;
			if (count_in_cluster == cluster_size) {
				compr_cluster = false;
				size += blks_to_bytes(inode, 1);
				size += F2FS_BLKSIZE;
			}
		} else if (map.m_flags & F2FS_MAP_DELALLOC) {
			flags = FIEMAP_EXTENT_UNWRITTEN;
		}

		start_blk += bytes_to_blks(inode, size);
		start_blk += F2FS_BYTES_TO_BLK(size);
	}

prep_next:
@@ -2092,7 +2073,7 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
					struct readahead_control *rac)
{
	struct bio *bio = *bio_ret;
	const unsigned blocksize = blks_to_bytes(inode, 1);
	const unsigned int blocksize = F2FS_BLKSIZE;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
@@ -2102,8 +2083,8 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio,

	block_in_file = (sector_t)index;
	last_block = block_in_file + nr_pages;
	last_block_in_file = bytes_to_blks(inode,
			f2fs_readpage_limit(inode) + blocksize - 1);
	last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
							blocksize - 1);
	if (last_block > last_block_in_file)
		last_block = last_block_in_file;

@@ -2203,7 +2184,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
	struct bio *bio = *bio_ret;
	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
	sector_t last_block_in_file;
	const unsigned blocksize = blks_to_bytes(inode, 1);
	const unsigned int blocksize = F2FS_BLKSIZE;
	struct decompress_io_ctx *dic = NULL;
	struct extent_info ei = {};
	bool from_dnode = true;
@@ -2212,8 +2193,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,

	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));

	last_block_in_file = bytes_to_blks(inode,
			f2fs_readpage_limit(inode) + blocksize - 1);
	last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
							blocksize - 1);

	/* get rid of pages beyond EOF */
	for (i = 0; i < cc->cluster_size; i++) {
@@ -2388,10 +2369,10 @@ static int f2fs_mpage_readpages(struct inode *inode,
		.nr_cpages = 0,
	};
	pgoff_t nc_cluster_idx = NULL_CLUSTER;
	pgoff_t index;
#endif
	unsigned nr_pages = rac ? readahead_count(rac) : 1;
	unsigned max_nr_pages = nr_pages;
	pgoff_t index;
	int ret = 0;

	map.m_pblk = 0;
@@ -2409,9 +2390,9 @@ static int f2fs_mpage_readpages(struct inode *inode,
			prefetchw(&folio->flags);
		}

#ifdef CONFIG_F2FS_FS_COMPRESSION
		index = folio_index(folio);

#ifdef CONFIG_F2FS_FS_COMPRESSION
		if (!f2fs_compressed_file(inode))
			goto read_single_page;

@@ -3444,6 +3425,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,

	if (!f2fs_lookup_read_extent_cache_block(inode, index,
						 &dn.data_blkaddr)) {
		if (IS_DEVICE_ALIASING(inode)) {
			err = -ENODATA;
			goto out;
		}

		if (locked) {
			err = f2fs_reserve_block(&dn, index);
			goto out;
@@ -3974,7 +3960,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
	 * to be very smart.
	 */
	cur_lblock = 0;
	last_lblock = bytes_to_blks(inode, i_size_read(inode));
	last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));

	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
		struct f2fs_map_blocks map;
@@ -4217,8 +4203,8 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	pgoff_t next_pgofs = 0;
	int err;

	map.m_lblk = bytes_to_blks(inode, offset);
	map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
	map.m_lblk = F2FS_BYTES_TO_BLK(offset);
	map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
	map.m_next_pgofs = &next_pgofs;
	map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
						inode->i_write_hint);
@@ -4229,7 +4215,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
	if (err)
		return err;

	iomap->offset = blks_to_bytes(inode, map.m_lblk);
	iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);

	/*
	 * When inline encryption is enabled, sometimes I/O to an encrypted file
@@ -4249,21 +4235,21 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
		if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
			return -EINVAL;

		iomap->length = blks_to_bytes(inode, map.m_len);
		iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
		iomap->type = IOMAP_MAPPED;
		iomap->flags |= IOMAP_F_MERGED;
		iomap->bdev = map.m_bdev;
		iomap->addr = blks_to_bytes(inode, map.m_pblk);
		iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
	} else {
		if (flags & IOMAP_WRITE)
			return -ENOTBLK;

		if (map.m_pblk == NULL_ADDR) {
			iomap->length = blks_to_bytes(inode, next_pgofs) -
			iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
							iomap->offset;
			iomap->type = IOMAP_HOLE;
		} else if (map.m_pblk == NEW_ADDR) {
			iomap->length = blks_to_bytes(inode, map.m_len);
			iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
			iomap->type = IOMAP_UNWRITTEN;
		} else {
			f2fs_bug_on(F2FS_I_SB(inode), 1);
Loading