Commit 21fb4403 authored by NeilBrown's avatar NeilBrown Committed by Anna Schumaker
Browse files

nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh()



nfs_uuid_put() and nfs_close_local_fh() can race if a "struct
nfs_file_localio" is released at the same time that nfsd calls
nfs_localio_invalidate_clients().

It is important that neither of these functions completes after the
other has started looking at a given nfs_file_localio and before it
finishes.

If nfs_uuid_put() exits while nfs_close_local_fh() is closing ro_file
and rw_file it could return to __nfd_file_cache_purge() while some files
are still referenced so the purge may not succeed.

If nfs_close_local_fh() exits while nfsd_uuid_put() is still closing the
files then the "struct nfs_file_localio" could be freed while
nfsd_uuid_put() is still looking at it.  This side is currently handled
by copying the pointers out of ro_file and rw_file before deleting from
the list in nfsd_uuid.  We need to preserve this while ensuring that
nfsd_uuid_put() does wait for nfs_close_local_fh().

This patch use nfl->uuid and nfl->list to provide the required
interlock.

nfs_uuid_put() removes the nfs_file_localio from the list, then drops
locks and puts the two files, then reclaims the spinlock and sets
->nfs_uuid to NULL.

nfs_close_local_fh() operates in the reverse order, setting ->nfs_uuid
to NULL, then closing the files, then unlinking from the list.

If nfs_uuid_put() finds that ->nfs_uuid is already NULL, it waits for
the nfs_file_localio to be removed from the list.  If
nfs_close_local_fh() find that it has already been unlinked it waits for
->nfs_uuid to become NULL.  This ensure that one of the two tries to
close the files, but they each waits for the other.

As nfs_uuid_put() is making the list empty, change from a
list_for_each_safe loop to a while that always takes the first entry.
This makes the intent more clear.
Also don't move the list to a temporary local list as this would defeat
the guarantees required for the interlock.

Fixes: 86e00412 ("nfs: cache all open LOCALIO nfsd_file(s) in client")
Signed-off-by: default avatarNeilBrown <neil@brown.name>
Signed-off-by: default avatarAnna Schumaker <anna.schumaker@oracle.com>
parent 74fc55ab
Loading
Loading
Loading
Loading
+56 −25
Original line number Diff line number Diff line
@@ -151,8 +151,7 @@ EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
 */
static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
{
	LIST_HEAD(local_files);
	struct nfs_file_localio *nfl, *tmp;
	struct nfs_file_localio *nfl;

	spin_lock(&nfs_uuid->lock);
	if (unlikely(!rcu_access_pointer(nfs_uuid->net))) {
@@ -166,36 +165,48 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
		nfs_uuid->dom = NULL;
	}

	list_splice_init(&nfs_uuid->files, &local_files);
	spin_unlock(&nfs_uuid->lock);

	/* Walk list of files and ensure their last references dropped */
	list_for_each_entry_safe(nfl, tmp, &local_files, list) {

	while ((nfl = list_first_entry_or_null(&nfs_uuid->files,
					       struct nfs_file_localio,
					       list)) != NULL) {
		struct nfsd_file *ro_nf;
		struct nfsd_file *rw_nf;

		/* If nfs_uuid is already NULL, nfs_close_local_fh is
		 * closing and we must wait, else we unlink and close.
		 */
		if (rcu_access_pointer(nfl->nfs_uuid) == NULL) {
			/* nfs_close_local_fh() is doing the
			 * close and we must wait. until it unlinks
			 */
			wait_var_event_spinlock(nfl,
						list_first_entry_or_null(
							&nfs_uuid->files,
							struct nfs_file_localio,
							list) != nfl,
						&nfs_uuid->lock);
			continue;
		}

		ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL));
		rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL));

		spin_lock(&nfs_uuid->lock);
		/* Remove nfl from nfs_uuid->files list */
		list_del_init(&nfl->list);
		spin_unlock(&nfs_uuid->lock);
		/* Now we can allow racing nfs_close_local_fh() to
		 * skip the locking.
		 */
		RCU_INIT_POINTER(nfl->nfs_uuid, NULL);

		if (ro_nf)
			nfs_to_nfsd_file_put_local(ro_nf);
		if (rw_nf)
			nfs_to_nfsd_file_put_local(rw_nf);

		cond_resched();
	}

		spin_lock(&nfs_uuid->lock);
	BUG_ON(!list_empty(&nfs_uuid->files));
		/* Now we can allow racing nfs_close_local_fh() to
		 * skip the locking.
		 */
		RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
		wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
	}

	/* Remove client from nn->local_clients */
	if (nfs_uuid->list_lock) {
@@ -304,23 +315,43 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl)
		return;
	}

	ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL));
	rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL));

	spin_lock(&nfs_uuid->lock);
	/* Remove nfl from nfs_uuid->files list */
	list_del_init(&nfl->list);
	if (!rcu_access_pointer(nfl->nfs_uuid)) {
		/* nfs_uuid_put has finished here */
		spin_unlock(&nfs_uuid->lock);
		rcu_read_unlock();
	/* Now we can allow racing nfs_close_local_fh() to
	 * skip the locking.
		return;
	}
	if (list_empty(&nfs_uuid->files)) {
		/* nfs_uuid_put() has started closing files, wait for it
		 * to finished
		 */
		spin_unlock(&nfs_uuid->lock);
		rcu_read_unlock();
		wait_var_event(&nfl->nfs_uuid,
			       rcu_access_pointer(nfl->nfs_uuid) == NULL);
		return;
	}
	/* tell nfs_uuid_put() to wait for us */
	RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
	spin_unlock(&nfs_uuid->lock);
	rcu_read_unlock();

	ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL));
	rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL));
	if (ro_nf)
		nfs_to_nfsd_file_put_local(ro_nf);
	if (rw_nf)
		nfs_to_nfsd_file_put_local(rw_nf);

	/* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put()
	 * that we are done.  The moment we drop the spinlock the
	 * nfs_uuid could be freed.
	 */
	spin_lock(&nfs_uuid->lock);
	list_del_init(&nfl->list);
	wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
	spin_unlock(&nfs_uuid->lock);
}
EXPORT_SYMBOL_GPL(nfs_close_local_fh);