Commit 9703d69d authored by Daeho Jeong's avatar Daeho Jeong Committed by Jaegeuk Kim
Browse files

f2fs: support file pinning for zoned devices



Support file pinning with conventional storage area for zoned devices

Signed-off-by: default avatarDaeho Jeong <daehojeong@google.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 4e0197f9
Loading
Loading
Loading
Loading
+39 −19
Original line number Diff line number Diff line
@@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
	unsigned int blkofs;
	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
	unsigned int secidx = start_blk / blk_per_sec;
	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
	unsigned int end_sec;
	int ret = 0;

	if (!blkcnt)
		return 0;
	end_sec = secidx + (blkcnt - 1) / blk_per_sec;

	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
	filemap_invalidate_lock(inode->i_mapping);

	set_inode_flag(inode, FI_ALIGNED_WRITE);
	set_inode_flag(inode, FI_OPU_WRITE);

	for (; secidx < end_sec; secidx++) {
	for (; secidx <= end_sec; secidx++) {
		unsigned int blkofs_end = secidx == end_sec ?
			(blkcnt - 1) % blk_per_sec : blk_per_sec - 1;

		f2fs_down_write(&sbi->pin_sem);

		f2fs_lock_op(sbi);
		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
		f2fs_unlock_op(sbi);
		ret = f2fs_allocate_pinning_section(sbi);
		if (ret) {
			f2fs_up_write(&sbi->pin_sem);
			break;
		}

		set_inode_flag(inode, FI_SKIP_WRITES);

		for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
		for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
			struct page *page;
			unsigned int blkidx = secidx * blk_per_sec + blkofs;

@@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis,
		nr_pblocks = map.m_len;

		if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
				nr_pblocks & sec_blks_mask) {
				nr_pblocks & sec_blks_mask ||
				!f2fs_valid_pinned_area(sbi, pblock)) {
			bool last_extent = false;

			not_aligned++;

			nr_pblocks = roundup(nr_pblocks, blks_per_sec);
			if (cur_lblock + nr_pblocks > sis->max)
				nr_pblocks -= blks_per_sec;

			if (!nr_pblocks) {
			/* this extent is last one */
				nr_pblocks = map.m_len;
				f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
				goto next;
			if (!nr_pblocks) {
				nr_pblocks = last_lblock - cur_lblock;
				last_extent = true;
			}

			ret = f2fs_migrate_blocks(inode, cur_lblock,
							nr_pblocks);
			if (ret)
			if (ret) {
				if (ret == -ENOENT)
					ret = -EINVAL;
				goto out;
			}

			if (!last_extent)
				goto retry;
		}
next:

		if (cur_lblock + nr_pblocks >= sis->max)
			nr_pblocks = sis->max - cur_lblock;

@@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
				sector_t *span)
{
	struct inode *inode = file_inode(file);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	int ret;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;

	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
	if (f2fs_readonly(sbi->sb))
		return -EROFS;

	if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
		f2fs_err(F2FS_I_SB(inode),
			"Swapfile not supported in LFS mode");
	if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
		f2fs_err(sbi, "Swapfile not supported in LFS mode");
		return -EINVAL;
	}

@@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,

	f2fs_precache_extents(inode);

	ret = filemap_fdatawrite(inode->i_mapping);
	if (ret < 0)
		return ret;

	ret = check_swap_activate(sis, file, span);
	if (ret < 0)
		return ret;

	stat_inc_swapfile_inode(inode);
	set_inode_flag(inode, FI_PIN_FILE);
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	f2fs_update_time(sbi, REQ_TIME);
	return ret;
}

+16 −1
Original line number Diff line number Diff line
@@ -3699,7 +3699,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
					unsigned int start, unsigned int end);
void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3877,6 +3878,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_gc_range(struct f2fs_sb_info *sbi,
		unsigned int start_seg, unsigned int end_seg,
		bool dry_run, unsigned int dry_run_sections);
int f2fs_resize_fs(struct file *filp, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
@@ -4531,6 +4535,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
	return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}

static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
					  block_t blkaddr)
{
	if (f2fs_sb_has_blkzoned(sbi)) {
		int devi = f2fs_target_device_index(sbi, blkaddr);

		return !bdev_is_zoned(FDEV(devi).bdev);
	}
	return true;
}

static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
{
	return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
+18 −6
Original line number Diff line number Diff line
@@ -1748,9 +1748,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,

		f2fs_down_write(&sbi->pin_sem);

		f2fs_lock_op(sbi);
		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
		f2fs_unlock_op(sbi);
		err = f2fs_allocate_pinning_section(sbi);
		if (err) {
			f2fs_up_write(&sbi->pin_sem);
			goto out_err;
		}

		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
@@ -3200,6 +3202,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
{
	struct inode *inode = file_inode(filp);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	__u32 pin;
	int ret = 0;

@@ -3209,7 +3212,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
	if (!S_ISREG(inode->i_mode))
		return -EINVAL;

	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
	if (f2fs_readonly(sbi->sb))
		return -EROFS;

	ret = mnt_want_write_file(filp);
@@ -3222,9 +3225,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
		clear_inode_flag(inode, FI_PIN_FILE);
		f2fs_i_gc_failures_write(inode, 0);
		goto done;
	} else if (f2fs_is_pinned_file(inode)) {
		goto done;
	}

	if (f2fs_should_update_outplace(inode, NULL)) {
	if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
		ret = -EFBIG;
		goto out;
	}

	/* Let's allow file pinning on zoned device. */
	if (!f2fs_sb_has_blkzoned(sbi) &&
	    f2fs_should_update_outplace(inode, NULL)) {
		ret = -EINVAL;
		goto out;
	}
@@ -3246,7 +3258,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
	set_inode_flag(inode, FI_PIN_FILE);
	ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
done:
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	f2fs_update_time(sbi, REQ_TIME);
out:
	inode_unlock(inode);
	mnt_drop_write_file(filp);
+10 −4
Original line number Diff line number Diff line
@@ -1974,10 +1974,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
	init_atgc_management(sbi);
}

static int f2fs_gc_range(struct f2fs_sb_info *sbi,
		unsigned int start_seg, unsigned int end_seg, bool dry_run)
int f2fs_gc_range(struct f2fs_sb_info *sbi,
		unsigned int start_seg, unsigned int end_seg,
		bool dry_run, unsigned int dry_run_sections)
{
	unsigned int segno;
	unsigned int gc_secs = dry_run_sections;

	for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
		struct gc_inode_list gc_list = {
@@ -1985,11 +1987,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi,
			.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
		};

		do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
		do_garbage_collect(sbi, segno, &gc_list, FG_GC,
						dry_run_sections == 0);
		put_gc_inode(&gc_list);

		if (!dry_run && get_valid_blocks(sbi, segno, true))
			return -EAGAIN;
		if (dry_run && dry_run_sections &&
		    !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
			break;

		if (fatal_signal_pending(current))
			return -ERESTARTSYS;
@@ -2027,7 +2033,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
		f2fs_allocate_segment_for_resize(sbi, type, start, end);

	/* do GC to move out valid blocks in the range */
	err = f2fs_gc_range(sbi, start, end, dry_run);
	err = f2fs_gc_range(sbi, start, end, dry_run, 0);
	if (err || dry_run)
		goto out;

+61 −8
Original line number Diff line number Diff line
@@ -2640,7 +2640,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
 * This function should be returned with success, otherwise BUG
 */
static void get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec)
			unsigned int *newseg, bool new_sec, bool pinning)
{
	struct free_segmap_info *free_i = FREE_I(sbi);
	unsigned int segno, secno, zoneno;
@@ -2658,6 +2658,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
			goto got_it;
	}

	/*
	 * If we format f2fs on zoned storage, let's try to get pinned sections
	 * from beginning of the storage, which should be a conventional one.
	 */
	if (f2fs_sb_has_blkzoned(sbi)) {
		segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
		hint = GET_SEC_FROM_SEG(sbi, segno);
	}

find_other_zone:
	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
	if (secno >= MAIN_SECS(sbi)) {
@@ -2756,21 +2766,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 * Allocate a current working segment.
 * This function always allocates a free segment in LFS manner.
 */
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno;
	bool pinning = type == CURSEG_COLD_DATA_PINNED;

	if (curseg->inited)
		write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));

	segno = __get_next_segno(sbi, type);
	get_new_segment(sbi, &segno, new_sec);
	get_new_segment(sbi, &segno, new_sec, pinning);
	if (new_sec && pinning &&
	    !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
		__set_free(sbi, segno);
		return -EAGAIN;
	}

	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
		curseg->fragment_remained_chunk =
				get_random_u32_inclusive(1, sbi->max_fragment_chunk);
	return 0;
}

static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -3043,7 +3062,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
	f2fs_up_read(&SM_I(sbi)->curseg_lock);
}

static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
						bool new_sec, bool force)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -3053,21 +3072,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
	    !curseg->next_blkoff &&
	    !get_valid_blocks(sbi, curseg->segno, new_sec) &&
	    !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
		return;
		return 0;

	old_segno = curseg->segno;
	new_curseg(sbi, type, true);
	if (new_curseg(sbi, type, true))
		return -EAGAIN;
	stat_inc_seg_type(sbi, curseg);
	locate_dirty_segment(sbi, old_segno);
	return 0;
}

void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
	int ret;

	f2fs_down_read(&SM_I(sbi)->curseg_lock);
	down_write(&SIT_I(sbi)->sentry_lock);
	__allocate_new_segment(sbi, type, true, force);
	ret = __allocate_new_segment(sbi, type, true, force);
	up_write(&SIT_I(sbi)->sentry_lock);
	f2fs_up_read(&SM_I(sbi)->curseg_lock);

	return ret;
}

int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
{
	int err;
	bool gc_required = true;

retry:
	f2fs_lock_op(sbi);
	err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
	f2fs_unlock_op(sbi);

	if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
		f2fs_down_write(&sbi->gc_lock);
		f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
		f2fs_up_write(&sbi->gc_lock);

		gc_required = false;
		goto retry;
	}

	return err;
}

void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
@@ -3433,6 +3480,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
	 * new segment.
	 */
	if (segment_full) {
		if (type == CURSEG_COLD_DATA_PINNED &&
		    !((curseg->segno + 1) % sbi->segs_per_sec))
			goto skip_new_segment;

		if (from_gc) {
			get_atssr_segment(sbi, type, se->type,
						AT_SSR, se->mtime);
@@ -3444,6 +3495,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
			stat_inc_seg_type(sbi, curseg);
		}
	}

skip_new_segment:
	/*
	 * segment dirty status should be updated after segment allocation,
	 * so we just need to update status only one time after previous
Loading