Commit 5f61b961 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba
Browse files

btrfs: fix inode lookup error handling during log replay



When replaying log trees we use read_one_inode() to get an inode, which is
just a wrapper around btrfs_iget_logging(), which in turn is a wrapper for
btrfs_iget(). But read_one_inode() always returns NULL for any error
that btrfs_iget_logging() / btrfs_iget() may return and this is a problem
because:

1) In many callers of read_one_inode() we convert the NULL into -EIO,
   which is not accurate since btrfs_iget() may return -ENOMEM and -ENOENT
   for example, besides -EIO and other errors. So during log replay we
   may end up reporting a false -EIO, which is confusing since we may
   not have had any IO error at all;

2) When replaying directory deletes, at replay_dir_deletes(), we assume
   the NULL returned from read_one_inode() means that the inode doesn't
   exist and then proceed as if no error had happened. This is wrong
   because unless btrfs_iget() returned ERR_PTR(-ENOENT), we had an
   actual error and the target inode may exist in the target subvolume
   root - this may later result in the log replay code failing at a
   later stage (if we are "lucky") or succeed but leaving some
   inconsistency in the filesystem.

So fix this by not ignoring errors from btrfs_iget_logging() and as
a consequence remove the read_one_inode() wrapper and just use
btrfs_iget_logging() directly. Also since btrfs_iget_logging() is
supposed to be called only against subvolume roots, just like
read_one_inode() which had a comment about it, add an assertion to
btrfs_iget_logging() to check that the target root corresponds to a
subvolume root.

Fixes: 5d4f98a2 ("Btrfs: Mixed back reference  (FORWARD ROLLING FORMAT CHANGE)")
Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 54a7081e
Loading
Loading
Loading
Loading
+62 −65
Original line number Diff line number Diff line
@@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r
	unsigned int nofs_flag;
	struct btrfs_inode *inode;

	/* Only meant to be called for subvolume roots and not for log roots. */
	ASSERT(is_fstree(btrfs_root_id(root)));

	/*
	 * We're holding a transaction handle whether we are logging or
	 * replaying a log tree, so we must make sure NOFS semantics apply
@@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
	return 0;
}

/*
 * simple helper to read an inode off the disk from a given root
 * This can only be called for subvolume roots and not for the log
 */
static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root,
						   u64 objectid)
{
	struct btrfs_inode *inode;

	inode = btrfs_iget_logging(objectid, root);
	if (IS_ERR(inode))
		return NULL;
	return inode;
}

/* replays a single extent in 'eb' at 'slot' with 'key' into the
 * subvolume 'root'.  path is released on entry and should be released
 * on exit.
@@ -674,9 +662,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
		return -EUCLEAN;
	}

	inode = read_one_inode(root, key->objectid);
	if (!inode)
		return -EIO;
	inode = btrfs_iget_logging(key->objectid, root);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	/*
	 * first check to see if we already have this extent in the
@@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,

	btrfs_release_path(path);

	inode = read_one_inode(root, location.objectid);
	if (!inode) {
		ret = -EIO;
	inode = btrfs_iget_logging(location.objectid, root);
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		inode = NULL;
		goto out;
	}

@@ -1169,10 +1158,10 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
				kfree(victim_name.name);
				return ret;
			} else if (!ret) {
				ret = -ENOENT;
				victim_parent = read_one_inode(root,
						parent_objectid);
				if (victim_parent) {
				victim_parent = btrfs_iget_logging(parent_objectid, root);
				if (IS_ERR(victim_parent)) {
					ret = PTR_ERR(victim_parent);
				} else {
					inc_nlink(&inode->vfs_inode);
					btrfs_release_path(path);

@@ -1317,9 +1306,9 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
			struct btrfs_inode *dir;

			btrfs_release_path(path);
			dir = read_one_inode(root, parent_id);
			if (!dir) {
				ret = -ENOENT;
			dir = btrfs_iget_logging(parent_id, root);
			if (IS_ERR(dir)) {
				ret = PTR_ERR(dir);
				kfree(name.name);
				goto out;
			}
@@ -1391,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
	 * copy the back ref in.  The link count fixup code will take
	 * care of the rest
	 */
	dir = read_one_inode(root, parent_objectid);
	if (!dir) {
		ret = -ENOENT;
	dir = btrfs_iget_logging(parent_objectid, root);
	if (IS_ERR(dir)) {
		ret = PTR_ERR(dir);
		dir = NULL;
		goto out;
	}

	inode = read_one_inode(root, inode_objectid);
	if (!inode) {
		ret = -EIO;
	inode = btrfs_iget_logging(inode_objectid, root);
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		inode = NULL;
		goto out;
	}

@@ -1411,12 +1402,14 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
			 * parent object can change from one array
			 * item to another.
			 */
			if (!dir)
				dir = read_one_inode(root, parent_objectid);
			if (!dir) {
				ret = -ENOENT;
				dir = btrfs_iget_logging(parent_objectid, root);
				if (IS_ERR(dir)) {
					ret = PTR_ERR(dir);
					dir = NULL;
					goto out;
				}
			}
		} else {
			ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
		}
@@ -1684,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
			break;

		btrfs_release_path(path);
		inode = read_one_inode(root, key.offset);
		if (!inode) {
			ret = -EIO;
		inode = btrfs_iget_logging(key.offset, root);
		if (IS_ERR(inode)) {
			ret = PTR_ERR(inode);
			break;
		}

@@ -1722,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
	struct btrfs_inode *inode;
	struct inode *vfs_inode;

	inode = read_one_inode(root, objectid);
	if (!inode)
		return -EIO;
	inode = btrfs_iget_logging(objectid, root);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	vfs_inode = &inode->vfs_inode;
	key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
@@ -1763,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
	struct btrfs_inode *dir;
	int ret;

	inode = read_one_inode(root, location->objectid);
	if (!inode)
		return -ENOENT;
	inode = btrfs_iget_logging(location->objectid, root);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	dir = read_one_inode(root, dirid);
	if (!dir) {
	dir = btrfs_iget_logging(dirid, root);
	if (IS_ERR(dir)) {
		iput(&inode->vfs_inode);
		return -EIO;
		return PTR_ERR(dir);
	}

	ret = btrfs_add_link(trans, dir, inode, name, 1, index);
@@ -1847,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
	bool update_size = true;
	bool name_added = false;

	dir = read_one_inode(root, key->objectid);
	if (!dir)
		return -EIO;
	dir = btrfs_iget_logging(key->objectid, root);
	if (IS_ERR(dir))
		return PTR_ERR(dir);

	ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
	if (ret)
@@ -2149,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
	btrfs_dir_item_key_to_cpu(eb, di, &location);
	btrfs_release_path(path);
	btrfs_release_path(log_path);
	inode = read_one_inode(root, location.objectid);
	if (!inode) {
		ret = -EIO;
	inode = btrfs_iget_logging(location.objectid, root);
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		inode = NULL;
		goto out;
	}

@@ -2303,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
	if (!log_path)
		return -ENOMEM;

	dir = read_one_inode(root, dirid);
	/* it isn't an error if the inode isn't there, that can happen
	 * because we replay the deletes before we copy in the inode item
	 * from the log
	dir = btrfs_iget_logging(dirid, root);
	/*
	 * It isn't an error if the inode isn't there, that can happen because
	 * we replay the deletes before we copy in the inode item from the log.
	 */
	if (!dir) {
	if (IS_ERR(dir)) {
		btrfs_free_path(log_path);
		return 0;
		ret = PTR_ERR(dir);
		if (ret == -ENOENT)
			ret = 0;
		return ret;
	}

	range_start = 0;
@@ -2469,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
				struct btrfs_inode *inode;
				u64 from;

				inode = read_one_inode(root, key.objectid);
				if (!inode) {
					ret = -EIO;
				inode = btrfs_iget_logging(key.objectid, root);
				if (IS_ERR(inode)) {
					ret = PTR_ERR(inode);
					break;
				}
				from = ALIGN(i_size_read(&inode->vfs_inode),