Commit 76d2e389 authored by Trond Myklebust's avatar Trond Myklebust
Browse files

NFS: Fix a race when updating an existing write



After nfs_lock_and_join_requests() tests for whether the request is
still attached to the mapping, nothing prevents a call to
nfs_inode_remove_request() from succeeding until we actually lock the
page group.
The reason is that whoever called nfs_inode_remove_request() doesn't
necessarily have a lock on the page group head.

So in order to avoid races, let's take the page group lock earlier in
nfs_lock_and_join_requests(), and hold it across the removal of the
request in nfs_inode_remove_request().

Reported-by: default avatarJeff Layton <jlayton@kernel.org>
Tested-by: default avatarJoe Quanaim <jdq@meta.com>
Tested-by: default avatarAndrew Steffen <aksteffen@meta.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Fixes: bd37d6fc ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@hammerspace.com>
parent c17b750b
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req)
	nfs_page_clear_headlock(req);
}

/*
 * nfs_page_group_sync_on_bit_locked
/**
 * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set
 * @req: request in page group
 * @bit: PG_* bit that is used to sync page group
 *
 * must be called with page group lock held
 */
static bool
nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
{
	struct nfs_page *head = req->wb_head;
	struct nfs_page *tmp;
+10 −19
Original line number Diff line number Diff line
@@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
	}
}

static int
nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
{
	int ret;

	if (!test_bit(PG_REMOVE, &req->wb_flags))
		return 0;
	ret = nfs_page_group_lock(req);
	if (ret)
		return ret;
	if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
		nfs_page_set_inode_ref(req, inode);
	nfs_page_group_unlock(req);
	return 0;
}

/**
@@ -585,19 +575,18 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
		}
	}

	ret = nfs_page_group_lock(head);
	if (ret < 0)
		goto out_unlock;

	/* Ensure that nobody removed the request before we locked it */
	if (head != folio->private) {
		nfs_page_group_unlock(head);
		nfs_unlock_and_release_request(head);
		goto retry;
	}

	ret = nfs_cancel_remove_inode(head, inode);
	if (ret < 0)
		goto out_unlock;

	ret = nfs_page_group_lock(head);
	if (ret < 0)
		goto out_unlock;
	nfs_cancel_remove_inode(head, inode);

	/* lock each request in the page group */
	for (subreq = head->wb_this_page;
@@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
{
	struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req));

	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
	nfs_page_group_lock(req);
	if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) {
		struct folio *folio = nfs_page_to_folio(req->wb_head);
		struct address_space *mapping = folio->mapping;

@@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
		}
		spin_unlock(&mapping->i_private_lock);
	}
	nfs_page_group_unlock(req);

	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
		atomic_long_dec(&nfsi->nrequests);
+1 −0
Original line number Diff line number Diff line
@@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head,
extern int nfs_page_group_lock(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int);
extern	int nfs_page_set_headlock(struct nfs_page *req);
extern void nfs_page_clear_headlock(struct nfs_page *req);
extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);