Commit 1054e8ff authored by Jeff Layton's avatar Jeff Layton Committed by Chuck Lever
Browse files

nfsd: prevent callback tasks running concurrently



The nfsd4_callback workqueue jobs exist to queue backchannel RPCs to
rpciod. Because they run in different workqueue contexts, the rpc_task
can run concurrently with the workqueue job itself, should it become
requeued. This is problematic as there is no locking when accessing the
fields in the nfsd4_callback.

Add a new unsigned long to nfsd4_callback and declare a new
NFSD4_CALLBACK_RUNNING flag to be set in it. When attempting to run a
workqueue job, do a test_and_set_bit() on that flag first, and don't
queue the workqueue job if it returns true. Clear NFSD4_CALLBACK_RUNNING
in nfsd41_destroy_cb().

This also gives us a more reliable mechanism for handling queueing
failures in codepaths where we have to take references under spinlocks.
We can now do the test_and_set_bit on NFSD4_CALLBACK_RUNNING first, and
only take references to the objects if that returns false.

Most of the nfsd4_run_cb() callers are converted to use this new flag or
the nfsd4_try_run_cb() wrapper. The main exception is the callback
channel probe, which has its own synchronization.

Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent 9254c8ae
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1312,6 +1312,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)

	trace_nfsd_cb_destroy(clp, cb);
	nfsd41_cb_release_slot(cb);
	clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
	if (cb->cb_ops && cb->cb_ops->release)
		cb->cb_ops->release(cb);
	nfsd41_cb_inflight_end(clp);
@@ -1632,6 +1633,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
	cb->cb_msg.rpc_argp = cb;
	cb->cb_msg.rpc_resp = cb;
	cb->cb_flags = 0;
	cb->cb_ops = ops;
	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
	cb->cb_status = 0;
+4 −3
Original line number Diff line number Diff line
@@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
	atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
	trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);

	if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) {
		refcount_inc(&ls->ls_stid.sc_count);
		nfsd4_run_cb(&ls->ls_recall);

	}
out_unlock:
	spin_unlock(&ls->ls_lock);
}
+1 −1
Original line number Diff line number Diff line
@@ -1847,7 +1847,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
		      NFSPROC4_CLNT_CB_OFFLOAD);
	trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
			      &cbo->co_fh, copy->cp_count, copy->nfserr);
	nfsd4_run_cb(&cbo->co_cb);
	nfsd4_try_run_cb(&cbo->co_cb);
}

/**
+10 −4
Original line number Diff line number Diff line
@@ -3232,9 +3232,11 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
	/* set to proper status when nfsd4_cb_getattr_done runs */
	ncf->ncf_cb_status = NFS4ERR_IO;

	if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) {
		refcount_inc(&dp->dl_stid.sc_count);
		nfsd4_run_cb(&ncf->ncf_getattr);
	}
}

static struct nfs4_client *create_client(struct xdr_netobj name,
		struct svc_rqst *rqstp, nfs4_verifier *verf)
@@ -5422,6 +5424,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
	bool queued;

	if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
		return;

	/*
	 * We're assuming the state code never drops its reference
	 * without first removing the lease.  Since we're in this lease
@@ -6910,7 +6916,7 @@ deleg_reaper(struct nfsd_net *nn)
		clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
						BIT(RCA4_TYPE_MASK_WDATA_DLG);
		trace_nfsd_cb_recall_any(clp->cl_ra);
		nfsd4_run_cb(&clp->cl_ra->ra_cb);
		nfsd4_try_run_cb(&clp->cl_ra->ra_cb);
	}
}

@@ -7839,7 +7845,7 @@ nfsd4_lm_notify(struct file_lock *fl)

	if (queue) {
		trace_nfsd_cb_notify_lock(lo, nbl);
		nfsd4_run_cb(&nbl->nbl_cb);
		nfsd4_try_run_cb(&nbl->nbl_cb);
	}
}

+9 −0
Original line number Diff line number Diff line
@@ -67,6 +67,8 @@ typedef struct {
struct nfsd4_callback {
	struct nfs4_client *cb_clp;
	struct rpc_message cb_msg;
#define NFSD4_CALLBACK_RUNNING		(0)
	unsigned long cb_flags;
	const struct nfsd4_callback_ops *cb_ops;
	struct work_struct cb_work;
	int cb_seq_status;
@@ -780,6 +782,13 @@ extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
		const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern bool nfsd4_run_cb(struct nfsd4_callback *cb);

static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb)
{
	if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags))
		WARN_ON_ONCE(!nfsd4_run_cb(cb));
}

extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
void nfsd4_async_copy_reaper(struct nfsd_net *nn);