Commit 3e48a116 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "In this development cycle, we focused on several key performance
  optimizations:

   - introducing large folio support to enhance read speeds for
     immutable files

   - reducing checkpoint=enable latency by flushing only committed dirty
     pages

   - implementing tracepoints to diagnose and resolve lock priority
     inversion.

  Additionally, we introduced the packed_ssa feature to optimize the SSA
  footprint when utilizing large block sizes.

  Detail summary:

  Enhancements:
   - support large folio for immutable non-compressed case
   - support non-4KB block size without packed_ssa feature
   - optimize f2fs_enable_checkpoint() to avoid long delay
   - optimize f2fs_overwrite_io() for f2fs_iomap_begin
   - optimize NAT block loading during checkpoint write
   - add write latency stats for NAT and SIT blocks in
     f2fs_write_checkpoint
   - pin files do not require sbi->writepages lock for ordering
   - avoid f2fs_map_blocks() for consecutive holes in readpages
   - flush plug periodically during GC to maximize readahead effect
   - add tracepoints to catch lock overheads
   - add several sysfs entries to tune internal lock priorities

  Fixes:
   - fix lock priority inversion issue
   - fix incomplete block usage in compact SSA summaries
   - fix to show simulate_lock_timeout correctly
   - fix to avoid mapping wrong physical block for swapfile
   - fix IS_CHECKPOINTED flag inconsistency issue caused by
     concurrent atomic commit and checkpoint writes
   - fix to avoid UAF in f2fs_write_end_io()"

* tag 'f2fs-for-7.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (61 commits)
  f2fs: sysfs: introduce critical_task_priority
  f2fs: introduce trace_f2fs_priority_update
  f2fs: fix lock priority inversion issue
  f2fs: optimize f2fs_overwrite_io() for f2fs_iomap_begin
  f2fs: fix incomplete block usage in compact SSA summaries
  f2fs: decrease maximum flush retry count in f2fs_enable_checkpoint()
  f2fs: optimize NAT block loading during checkpoint write
  f2fs: change size parameter of __has_cursum_space() to unsigned int
  f2fs: add write latency stats for NAT and SIT blocks in f2fs_write_checkpoint
  f2fs: pin files do not require sbi->writepages lock for ordering
  f2fs: fix to show simulate_lock_timeout correctly
  f2fs: introduce FAULT_SKIP_WRITE
  f2fs: check skipped write in f2fs_enable_checkpoint()
  Revert "f2fs: add timeout in f2fs_enable_checkpoint()"
  f2fs: fix to unlock folio in f2fs_read_data_large_folio()
  f2fs: fix error path handling in f2fs_read_data_large_folio()
  f2fs: use folio_end_read
  f2fs: fix to avoid mapping wrong physical block for swapfile
  f2fs: avoid f2fs_map_blocks() for consecutive holes in readpages
  f2fs: advance index and offset after zeroing in large folio read
  ...
parents 770aaedb 52190933
Loading
Loading
Loading
Loading
+59 −3
Original line number Diff line number Diff line
@@ -520,7 +520,7 @@ What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
Date:		January 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	Give a way to change checkpoint merge daemon's io priority.
		Its default value is "be,3", which means "BE" I/O class and
		Its default value is "rt,3", which means "RT" I/O class and
		I/O priority "3". We can select the class between "rt" and "be",
		and set the I/O priority within valid range of it. "," delimiter
		is necessary in between I/O class and priority number.
@@ -732,7 +732,7 @@ Description: Support configuring fault injection type, should be
		FAULT_TRUNCATE                   0x00000400
		FAULT_READ_IO                    0x00000800
		FAULT_CHECKPOINT                 0x00001000
		FAULT_DISCARD                    0x00002000
		FAULT_DISCARD                    0x00002000 (obsolete)
		FAULT_WRITE_IO                   0x00004000
		FAULT_SLAB_ALLOC                 0x00008000
		FAULT_DQUOT_INIT                 0x00010000
@@ -741,8 +741,10 @@ Description: Support configuring fault injection type, should be
		FAULT_BLKADDR_CONSISTENCE        0x00080000
		FAULT_NO_SEGMENT                 0x00100000
		FAULT_INCONSISTENT_FOOTER        0x00200000
		FAULT_TIMEOUT                    0x00400000 (1000ms)
		FAULT_ATOMIC_TIMEOUT             0x00400000 (1000ms)
		FAULT_VMALLOC                    0x00800000
		FAULT_LOCK_TIMEOUT               0x01000000 (1000ms)
		FAULT_SKIP_WRITE                 0x02000000
		===========================      ==========

What:		/sys/fs/f2fs/<disk>/discard_io_aware_gran
@@ -939,3 +941,57 @@ Description: Controls write priority in multi-devices setups. A value of 0 means
		allocate_section_policy = 1  Prioritize writing to section before allocate_section_hint
		allocate_section_policy = 2  Prioritize writing to section after allocate_section_hint
		===========================  ==========================================================

What:		/sys/fs/f2fs/<disk>/max_lock_elapsed_time
Date:		December 2025
Contact:	"Chao Yu" <chao@kernel.org>
Description:	This is a threshold, once a thread enters critical region that lock covers, total
		elapsed time exceeds this threshold, f2fs will print tracepoint to dump information
		of related context. This sysfs entry can be used to control the value of threshold,
		by default, the value is 500 ms.

What:		/sys/fs/f2fs/<disk>/inject_timeout_type
Date:		December 2025
Contact:	"Chao Yu" <chao@kernel.org>
Description:	This sysfs entry can be used to change type of injected timeout:
		==========     ===============================
		Flag_Value     Flag_Description
		==========     ===============================
		0x00000000     No timeout (default)
		0x00000001     Simulate running time
		0x00000002     Simulate IO type sleep time
		0x00000003     Simulate Non-IO type sleep time
		0x00000004     Simulate runnable time
		==========     ===============================

What:		/sys/fs/f2fs/<disk>/adjust_lock_priority
Date:		January 2026
Contact:	"Chao Yu" <chao@kernel.org>
Description:	This sysfs entry can be used to enable/disable to adjust priority for task
		which is in critical region covered by lock.
		==========     ==================
		Flag_Value     Flag_Description
		==========     ==================
		0x00000000     Disabled (default)
		0x00000001     cp_rwsem
		0x00000002     node_change
		0x00000004     node_write
		0x00000008     gc_lock
		0x00000010     cp_global
		0x00000020     io_rwsem
		==========     ==================

What:		/sys/fs/f2fs/<disk>/lock_duration_priority
Date:		January 2026
Contact:	"Chao Yu" <chao@kernel.org>
Description:	f2fs can tune priority of thread which has entered into critical region covered by
		f2fs rwsemphore lock. This sysfs entry can be used to control priority value, the
		range is [100,139], by default the value is 120.

What:		/sys/fs/f2fs/<disk>/critical_task_priority
Date:		February 2026
Contact:	"Chao Yu" <chao@kernel.org>
Description:	It can be used to tune priority of f2fs critical task, e.g. f2fs_ckpt, f2fs_gc
		threads, limitation as below:
		- it requires user has CAP_SYS_NICE capability.
		- the range is [100, 139], by default the value is 100.
+47 −2
Original line number Diff line number Diff line
@@ -206,7 +206,7 @@ fault_type=%d Support configuring fault injection type, should be
			     FAULT_TRUNCATE                   0x00000400
			     FAULT_READ_IO                    0x00000800
			     FAULT_CHECKPOINT                 0x00001000
			     FAULT_DISCARD                    0x00002000
			     FAULT_DISCARD                    0x00002000 (obsolete)
			     FAULT_WRITE_IO                   0x00004000
			     FAULT_SLAB_ALLOC                 0x00008000
			     FAULT_DQUOT_INIT                 0x00010000
@@ -215,8 +215,10 @@ fault_type=%d Support configuring fault injection type, should be
			     FAULT_BLKADDR_CONSISTENCE        0x00080000
			     FAULT_NO_SEGMENT                 0x00100000
			     FAULT_INCONSISTENT_FOOTER        0x00200000
			     FAULT_TIMEOUT                    0x00400000 (1000ms)
			     FAULT_ATOMIC_TIMEOUT             0x00400000 (1000ms)
			     FAULT_VMALLOC                    0x00800000
			     FAULT_LOCK_TIMEOUT               0x01000000 (1000ms)
			     FAULT_SKIP_WRITE                 0x02000000
			     ===========================      ==========
mode=%s			 Control block allocation mode which supports "adaptive"
			 and "lfs". In "lfs" mode, there should be no random
@@ -1033,3 +1035,46 @@ the reserved space back to F2FS for its own use.
So, the key idea is, user can do any file operations on /dev/vdc, and
reclaim the space after the use, while the space is counted as /data.
That doesn't require modifying partition size and filesystem format.

Per-file Read-Only Large Folio Support
--------------------------------------

F2FS implements large folio support on the read path to leverage high-order
page allocation for significant performance gains. To minimize code complexity,
this support is currently excluded from the write path, which requires handling
complex optimizations such as compression and block allocation modes.

This optional feature is triggered only when a file's immutable bit is set.
Consequently, F2FS will return EOPNOTSUPP if a user attempts to open a cached
file with write permissions, even immediately after clearing the bit. Write
access is only restored once the cached inode is dropped. The usage flow is
demonstrated below:

.. code-block::

   # f2fs_io setflags immutable /data/testfile_read_seq

   /* flush and reload the inode to enable the large folio */
   # sync && echo 3 > /proc/sys/vm/drop_caches

   /* mmap(MAP_POPULATE) + mlock() */
   # f2fs_io read 128 0 1024 mmap 1 0 /data/testfile_read_seq

   /* mmap() + fadvise(POSIX_FADV_WILLNEED) + mlock() */
   # f2fs_io read 128 0 1024 fadvise 1 0 /data/testfile_read_seq

   /* mmap() + mlock2(MLOCK_ONFAULT) + madvise(MADV_POPULATE_READ) */
   # f2fs_io read 128 0 1024 madvise 1 0 /data/testfile_read_seq

   # f2fs_io clearflags immutable /data/testfile_read_seq

   # f2fs_io write 1 0 1 zero buffered /data/testfile_read_seq
   Failed to open /mnt/test/test: Operation not supported

   /* flush and reload the inode to disable the large folio */
   # sync && echo 3 > /proc/sys/vm/drop_caches

   # f2fs_io write 1 0 1 zero buffered /data/testfile_read_seq
   Written 4096 bytes with pattern = zero, total_time = 29 us, max_latency = 28 us

   # rm /data/testfile_read_seq
+230 −17
Original line number Diff line number Diff line
@@ -14,6 +14,9 @@
#include <linux/pagevec.h>
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/delayacct.h>
#include <linux/ioprio.h>
#include <linux/math64.h>

#include "f2fs.h"
#include "node.h"
@@ -21,6 +24,209 @@
#include "iostat.h"
#include <trace/events/f2fs.h>

static inline void get_lock_elapsed_time(struct f2fs_time_stat *ts)
{
	ts->total_time = ktime_get();
#ifdef CONFIG_64BIT
	ts->running_time = current->se.sum_exec_runtime;
#endif
#if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS)
	ts->runnable_time = current->sched_info.run_delay;
#endif
#ifdef CONFIG_TASK_DELAY_ACCT
	if (current->delays)
		ts->io_sleep_time = current->delays->blkio_delay;
#endif
}

static inline void trace_lock_elapsed_time_start(struct f2fs_rwsem *sem,
						struct f2fs_lock_context *lc)
{
	lc->lock_trace = trace_f2fs_lock_elapsed_time_enabled();
	if (!lc->lock_trace)
		return;

	get_lock_elapsed_time(&lc->ts);
}

static inline void trace_lock_elapsed_time_end(struct f2fs_rwsem *sem,
				struct f2fs_lock_context *lc, bool is_write)
{
	struct f2fs_time_stat tts;
	unsigned long long total_time;
	unsigned long long running_time = 0;
	unsigned long long runnable_time = 0;
	unsigned long long io_sleep_time = 0;
	unsigned long long other_time = 0;
	unsigned npm = NSEC_PER_MSEC;

	if (!lc->lock_trace)
		return;

	if (time_to_inject(sem->sbi, FAULT_LOCK_TIMEOUT))
		f2fs_schedule_timeout_killable(DEFAULT_FAULT_TIMEOUT, true);

	get_lock_elapsed_time(&tts);

	total_time = div_u64(tts.total_time - lc->ts.total_time, npm);
	if (total_time <= sem->sbi->max_lock_elapsed_time)
		return;

#ifdef CONFIG_64BIT
	running_time = div_u64(tts.running_time - lc->ts.running_time, npm);
#endif
#if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS)
	runnable_time = div_u64(tts.runnable_time - lc->ts.runnable_time, npm);
#endif
#ifdef CONFIG_TASK_DELAY_ACCT
	io_sleep_time = div_u64(tts.io_sleep_time - lc->ts.io_sleep_time, npm);
#endif
	if (total_time > running_time + io_sleep_time + runnable_time)
		other_time = total_time - running_time -
				io_sleep_time - runnable_time;

	trace_f2fs_lock_elapsed_time(sem->sbi, sem->name, is_write, current,
			get_current_ioprio(), total_time, running_time,
			runnable_time, io_sleep_time, other_time);
}

static bool need_uplift_priority(struct f2fs_rwsem *sem, bool is_write)
{
	if (!(sem->sbi->adjust_lock_priority & BIT(sem->name - 1)))
		return false;

	switch (sem->name) {
	/*
	 * writer is checkpoint which has high priority, let's just uplift
	 * priority for reader
	 */
	case LOCK_NAME_CP_RWSEM:
	case LOCK_NAME_NODE_CHANGE:
	case LOCK_NAME_NODE_WRITE:
		return !is_write;
	case LOCK_NAME_GC_LOCK:
	case LOCK_NAME_CP_GLOBAL:
	case LOCK_NAME_IO_RWSEM:
		return true;
	default:
		f2fs_bug_on(sem->sbi, 1);
	}
	return false;
}

static void uplift_priority(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc,
						bool is_write)
{
	lc->need_restore = false;
	if (!sem->sbi->adjust_lock_priority)
		return;
	if (rt_task(current))
		return;
	if (!need_uplift_priority(sem, is_write))
		return;
	lc->orig_nice = task_nice(current);
	lc->new_nice = PRIO_TO_NICE(sem->sbi->lock_duration_priority);
	if (lc->orig_nice <= lc->new_nice)
		return;
	set_user_nice(current, lc->new_nice);
	lc->need_restore = true;

	trace_f2fs_priority_uplift(sem->sbi, sem->name, is_write, current,
		NICE_TO_PRIO(lc->orig_nice), NICE_TO_PRIO(lc->new_nice));
}

static void restore_priority(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc,
						bool is_write)
{
	if (!lc->need_restore)
		return;
	/* someone has updated the priority */
	if (task_nice(current) != lc->new_nice)
		return;
	set_user_nice(current, lc->orig_nice);

	trace_f2fs_priority_restore(sem->sbi, sem->name, is_write, current,
		NICE_TO_PRIO(lc->orig_nice), NICE_TO_PRIO(lc->new_nice));
}

void f2fs_down_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	uplift_priority(sem, lc, false);
	f2fs_down_read(sem);
	trace_lock_elapsed_time_start(sem, lc);
}

int f2fs_down_read_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	uplift_priority(sem, lc, false);
	if (!f2fs_down_read_trylock(sem)) {
		restore_priority(sem, lc, false);
		return 0;
	}
	trace_lock_elapsed_time_start(sem, lc);
	return 1;
}

void f2fs_up_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	f2fs_up_read(sem);
	restore_priority(sem, lc, false);
	trace_lock_elapsed_time_end(sem, lc, false);
}

void f2fs_down_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	uplift_priority(sem, lc, true);
	f2fs_down_write(sem);
	trace_lock_elapsed_time_start(sem, lc);
}

int f2fs_down_write_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	uplift_priority(sem, lc, true);
	if (!f2fs_down_write_trylock(sem)) {
		restore_priority(sem, lc, true);
		return 0;
	}
	trace_lock_elapsed_time_start(sem, lc);
	return 1;
}

void f2fs_up_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc)
{
	f2fs_up_write(sem);
	restore_priority(sem, lc, true);
	trace_lock_elapsed_time_end(sem, lc, true);
}

void f2fs_lock_op(struct f2fs_sb_info *sbi, struct f2fs_lock_context *lc)
{
	f2fs_down_read_trace(&sbi->cp_rwsem, lc);
}

int f2fs_trylock_op(struct f2fs_sb_info *sbi, struct f2fs_lock_context *lc)
{
	if (time_to_inject(sbi, FAULT_LOCK_OP))
		return 0;

	return f2fs_down_read_trylock_trace(&sbi->cp_rwsem, lc);
}

void f2fs_unlock_op(struct f2fs_sb_info *sbi, struct f2fs_lock_context *lc)
{
	f2fs_up_read_trace(&sbi->cp_rwsem, lc);
}

static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
	f2fs_down_write(&sbi->cp_rwsem);
}

static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
	f2fs_up_write(&sbi->cp_rwsem);
}

#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3))

static struct kmem_cache *ino_entry_slab;
@@ -379,6 +585,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
	struct f2fs_lock_context lc;
	long diff, written;

	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -391,13 +598,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
		goto skip_write;

	/* if locked failed, cp will flush dirty pages instead */
	if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
	if (!f2fs_down_write_trylock_trace(&sbi->cp_global_sem, &lc))
		goto skip_write;

	trace_f2fs_writepages(mapping->host, wbc, META);
	diff = nr_pages_to_write(sbi, META, wbc);
	written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
	f2fs_up_write(&sbi->cp_global_sem);
	written = f2fs_sync_meta_pages(sbi, wbc->nr_to_write, FS_META_IO);
	f2fs_up_write_trace(&sbi->cp_global_sem, &lc);
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
	return 0;

@@ -407,8 +614,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
	return 0;
}

long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
				long nr_to_write, enum iostat_type io_type)
long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, long nr_to_write,
				enum iostat_type io_type)
{
	struct address_space *mapping = META_MAPPING(sbi);
	pgoff_t index = 0, prev = ULONG_MAX;
@@ -469,7 +676,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
	}
stop:
	if (nwritten)
		f2fs_submit_merged_write(sbi, type);
		f2fs_submit_merged_write(sbi, META);

	blk_finish_plug(&plug);

@@ -1312,8 +1519,7 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
			break;

		if (type == F2FS_DIRTY_META)
			f2fs_sync_meta_pages(sbi, META, LONG_MAX,
							FS_CP_META_IO);
			f2fs_sync_meta_pages(sbi, LONG_MAX, FS_CP_META_IO);
		else if (type == F2FS_WB_CP_DATA)
			f2fs_submit_merged_write(sbi, DATA);

@@ -1485,7 +1691,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	int err;

	/* Flush all the NAT/SIT pages */
	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
	f2fs_sync_meta_pages(sbi, LONG_MAX, FS_CP_META_IO);

	stat_cp_time(cpc, CP_TIME_SYNC_META);

@@ -1584,7 +1790,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	}

	/* Here, we have one bio having CP pack except cp pack 2 page */
	f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
	f2fs_sync_meta_pages(sbi, LONG_MAX, FS_CP_META_IO);
	stat_cp_time(cpc, CP_TIME_SYNC_CP_META);

	/* Wait for all dirty meta pages to be submitted for IO */
@@ -1646,6 +1852,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_lock_context lc;
	unsigned long long ckpt_ver;
	int err = 0;

@@ -1660,7 +1867,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
		f2fs_warn(sbi, "Start checkpoint disabled!");
	}
	if (cpc->reason != CP_RESIZE)
		f2fs_down_write(&sbi->cp_global_sem);
		f2fs_down_write_trace(&sbi->cp_global_sem, &lc);

	stat_cp_time(cpc, CP_TIME_LOCK);

@@ -1701,6 +1908,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
			goto out;
		}
	}
	stat_cp_time(cpc, CP_TIME_MERGE_WRITE);

	/*
	 * update checkpoint pack index
@@ -1717,10 +1925,11 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
		f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
		goto stop;
	}
	stat_cp_time(cpc, CP_TIME_FLUSH_NAT);

	f2fs_flush_sit_entries(sbi, cpc);

	stat_cp_time(cpc, CP_TIME_FLUSH_META);
	stat_cp_time(cpc, CP_TIME_FLUSH_SIT);

	/* save inmem log status */
	f2fs_save_inmem_curseg(sbi);
@@ -1750,7 +1959,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, CP_PHASE_FINISH_CHECKPOINT);
out:
	if (cpc->reason != CP_RESIZE)
		f2fs_up_write(&sbi->cp_global_sem);
		f2fs_up_write_trace(&sbi->cp_global_sem, &lc);
	return err;
}

@@ -1796,11 +2005,12 @@ void f2fs_destroy_checkpoint_caches(void)
static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
{
	struct cp_control cpc = { .reason = CP_SYNC, };
	struct f2fs_lock_context lc;
	int err;

	f2fs_down_write(&sbi->gc_lock);
	f2fs_down_write_trace(&sbi->gc_lock, &lc);
	err = f2fs_write_checkpoint(sbi, &cpc);
	f2fs_up_write(&sbi->gc_lock);
	f2fs_up_write_trace(&sbi->gc_lock, &lc);

	return err;
}
@@ -1888,11 +2098,12 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
	cpc.reason = __get_cp_reason(sbi);
	if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC ||
		sbi->umount_lock_holder == current) {
		struct f2fs_lock_context lc;
		int ret;

		f2fs_down_write(&sbi->gc_lock);
		f2fs_down_write_trace(&sbi->gc_lock, &lc);
		ret = f2fs_write_checkpoint(sbi, &cpc);
		f2fs_up_write(&sbi->gc_lock);
		f2fs_up_write_trace(&sbi->gc_lock, &lc);

		return ret;
	}
@@ -1947,6 +2158,8 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
	}

	set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
	set_user_nice(cprc->f2fs_issue_ckpt,
			PRIO_TO_NICE(sbi->critical_task_priority));

	return 0;
}
+10 −8
Original line number Diff line number Diff line
@@ -1291,6 +1291,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
	struct dnode_of_data dn;
	struct node_info ni;
	struct compress_io_ctx *cic;
	struct f2fs_lock_context lc;
	pgoff_t start_idx = start_idx_of_cluster(cc);
	unsigned int last_index = cc->cluster_size - 1;
	loff_t psize;
@@ -1309,8 +1310,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		 * checkpoint. This can only happen to quota writes which can cause
		 * the below discard race condition.
		 */
		f2fs_down_read(&sbi->node_write);
	} else if (!f2fs_trylock_op(sbi)) {
		f2fs_down_read_trace(&sbi->node_write, &lc);
	} else if (!f2fs_trylock_op(sbi, &lc)) {
		goto out_free;
	}

@@ -1434,9 +1435,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,

	f2fs_put_dnode(&dn);
	if (quota_inode)
		f2fs_up_read(&sbi->node_write);
		f2fs_up_read_trace(&sbi->node_write, &lc);
	else
		f2fs_unlock_op(sbi);
		f2fs_unlock_op(sbi, &lc);

	spin_lock(&fi->i_size_lock);
	if (fi->last_disk_size < psize)
@@ -1463,9 +1464,9 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
	f2fs_put_dnode(&dn);
out_unlock_op:
	if (quota_inode)
		f2fs_up_read(&sbi->node_write);
		f2fs_up_read_trace(&sbi->node_write, &lc);
	else
		f2fs_unlock_op(sbi);
		f2fs_unlock_op(sbi, &lc);
out_free:
	for (i = 0; i < cc->valid_nr_cpages; i++) {
		f2fs_compress_free_page(cc->cpages[i]);
@@ -1512,6 +1513,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
{
	struct address_space *mapping = cc->inode->i_mapping;
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
	struct f2fs_lock_context lc;
	int submitted, compr_blocks, i;
	int ret = 0;

@@ -1530,7 +1532,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,

	/* overwrite compressed cluster w/ normal cluster */
	if (compr_blocks > 0)
		f2fs_lock_op(sbi);
		f2fs_lock_op(sbi, &lc);

	for (i = 0; i < cc->cluster_size; i++) {
		struct folio *folio;
@@ -1586,7 +1588,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,

out:
	if (compr_blocks > 0)
		f2fs_unlock_op(sbi);
		f2fs_unlock_op(sbi, &lc);

	f2fs_balance_fs(sbi, true);
	return ret;
+376 −62

File changed.

Preview size limit exceeded, changes collapsed.

Loading