Commit 6593714d authored by Harshad Shirwadkar's avatar Harshad Shirwadkar Committed by Theodore Ts'o
Browse files

ext4: hold s_fc_lock while during fast commit



Leaving s_fc_lock in between during commit in ext4_fc_perform_commit()
function leaves room for subtle concurrency bugs where ext4_fc_del() may
delete an inode from the fast commit list, leaving list in an inconsistent
state.

Signed-off-by: default avatarHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250508175908.1004880-10-harshadshirwadkar@gmail.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 12e64e7f
Loading
Loading
Loading
Loading
+13 −31
Original line number Diff line number Diff line
@@ -424,6 +424,7 @@ static int __track_dentry_update(handle_t *handle, struct inode *inode,
	node->fcd_ino = inode->i_ino;
	take_dentry_name_snapshot(&node->fcd_name, dentry);
	INIT_LIST_HEAD(&node->fcd_dilist);
	INIT_LIST_HEAD(&node->fcd_list);
	mutex_lock(&sbi->s_fc_lock);
	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
@@ -985,8 +986,6 @@ static int ext4_fc_flush_data(journal_t *journal)

/* Commit all the directory entry updates */
static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
__acquires(&sbi->s_fc_lock)
__releases(&sbi->s_fc_lock)
{
	struct super_block *sb = journal->j_private;
	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1000,26 +999,22 @@ __releases(&sbi->s_fc_lock)
	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
			mutex_unlock(&sbi->s_fc_lock);
			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
				ret = -ENOSPC;
				goto lock_and_exit;
			}
			mutex_lock(&sbi->s_fc_lock);
			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
				return -ENOSPC;
			continue;
		}
		/*
		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
		 * corresponding inode pointer
		 * corresponding inode. Also, the corresponding inode could have been
		 * deleted, in which case, we don't need to do anything.
		 */
		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
		if (list_empty(&fc_dentry->fcd_dilist))
			continue;
		ei = list_first_entry(&fc_dentry->fcd_dilist,
				struct ext4_inode_info, i_fc_dilist);
		inode = &ei->vfs_inode;
		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);

		mutex_unlock(&sbi->s_fc_lock);

		/*
		 * We first write the inode and then the create dirent. This
		 * allows the recovery code to create an unnamed inode first
@@ -1029,23 +1024,14 @@ __releases(&sbi->s_fc_lock)
		 */
		ret = ext4_fc_write_inode(inode, crc);
		if (ret)
			goto lock_and_exit;

			return ret;
		ret = ext4_fc_write_inode_data(inode, crc);
		if (ret)
			goto lock_and_exit;

		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
			ret = -ENOSPC;
			goto lock_and_exit;
		}

		mutex_lock(&sbi->s_fc_lock);
			return ret;
		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry))
			return -ENOSPC;
	}
	return 0;
lock_and_exit:
	mutex_lock(&sbi->s_fc_lock);
	return ret;
}

static int ext4_fc_perform_commit(journal_t *journal)
@@ -1148,10 +1134,8 @@ static int ext4_fc_perform_commit(journal_t *journal)
	/* Step 6.2: Now write all the dentry updates. */
	mutex_lock(&sbi->s_fc_lock);
	ret = ext4_fc_commit_dentry_updates(journal, &crc);
	if (ret) {
		mutex_unlock(&sbi->s_fc_lock);
	if (ret)
		goto out;
	}

	/* Step 6.3: Now write all the changed inodes to disk. */
	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
@@ -1159,7 +1143,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
			continue;

		mutex_unlock(&sbi->s_fc_lock);
		ret = ext4_fc_write_inode_data(inode, &crc);
		if (ret)
			goto out;
@@ -1171,6 +1154,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
	ret = ext4_fc_write_tail(sb, crc);

out:
	mutex_unlock(&sbi->s_fc_lock);
	blk_finish_plug(&plug);
	return ret;
}
@@ -1353,11 +1337,9 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
					     fcd_list);
		list_del_init(&fc_dentry->fcd_list);
		list_del_init(&fc_dentry->fcd_dilist);
		mutex_unlock(&sbi->s_fc_lock);

		release_dentry_name_snapshot(&fc_dentry->fcd_name);
		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
		mutex_lock(&sbi->s_fc_lock);
	}

	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],