Commit ed78aeeb authored by Yongpeng Yang's avatar Yongpeng Yang Committed by Jaegeuk Kim
Browse files

f2fs: fix node_cnt race between extent node destroy and writeback



f2fs_destroy_extent_node() does not set FI_NO_EXTENT before clearing
extent nodes. When called from f2fs_drop_inode() with I_SYNC set,
concurrent kworker writeback can insert new extent nodes into the same
extent tree, racing with the destroy and triggering f2fs_bug_on() in
__destroy_extent_node(). The scenario is as follows:

drop inode                            writeback
 - iput
  - f2fs_drop_inode  // I_SYNC set
   - f2fs_destroy_extent_node
    - __destroy_extent_node
     - while (node_cnt) {
        write_lock(&et->lock)
        __free_extent_tree
        write_unlock(&et->lock)
                                       - __writeback_single_inode
                                        - f2fs_outplace_write_data
                                         - f2fs_update_read_extent_cache
                                          - __update_extent_tree_range
                                           // FI_NO_EXTENT not set,
                                           // insert new extent node
       } // node_cnt == 0, exit while
     - f2fs_bug_on(node_cnt)  // node_cnt > 0

Additionally, __update_extent_tree_range() only checks FI_NO_EXTENT for
EX_READ type, leaving EX_BLOCK_AGE updates completely unprotected.

This patch set FI_NO_EXTENT under et->lock in __destroy_extent_node(),
consistent with other callers (__update_extent_tree_range and
__drop_extent_tree) and check FI_NO_EXTENT for both EX_READ and
EX_BLOCK_AGE tree.

Fixes: 3fc5d5a1 ("f2fs: fix to shrink read extent node in batches")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarYongpeng Yang <yangyongpeng@xiaomi.com>
Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 2a3db1e0
Loading
Loading
Loading
Loading
+10 −7
Original line number Diff line number Diff line
@@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
	if (!__init_may_extent_tree(inode, type))
		return false;

	if (type == EX_READ) {
	if (is_inode_flag_set(inode, FI_NO_EXTENT))
		return false;

	if (type == EX_READ) {
		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
			return false;
@@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode,

	while (atomic_read(&et->node_cnt)) {
		write_lock(&et->lock);
		if (!is_inode_flag_set(inode, FI_NO_EXTENT))
			set_inode_flag(inode, FI_NO_EXTENT);
		node_cnt += __free_extent_tree(sbi, et, nr_shrink);
		write_unlock(&et->lock);
	}
@@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode,

	write_lock(&et->lock);

	if (type == EX_READ) {
	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
		write_unlock(&et->lock);
		return;
	}

	if (type == EX_READ) {
		prev = et->largest;
		dei.len = 0;