Commit 583772ee authored by Jeff Layton's avatar Jeff Layton Committed by Chuck Lever
Browse files

nfsd: allow for up to 32 callback session slots



nfsd currently only uses a single slot in the callback channel, which is
proving to be a bottleneck in some cases. Widen the callback channel to
a max of 32 slots (subject to the client's target_maxreqs value).

Change the cb_holds_slot boolean to an integer that tracks the current
slot number (with -1 meaning "unassigned").  Move the callback slot
tracking info into the session. Add a new u32 that acts as a bitmap to
track which slots are in use, and a u32 to track the latest callback
target_slotid that the client reports. To protect the new fields, add
a new per-session spinlock (the se_lock). Fix nfsd41_cb_get_slot to always
search for the lowest slotid (using ffs()).

Finally, convert the session->se_cb_seq_nr field into an array of
ints and add the necessary handling to ensure that the seqids get
reset when the slot table grows after shrinking.

Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent c840b8e1
Loading
Loading
Loading
Loading
+88 −30
Original line number Diff line number Diff line
@@ -374,6 +374,19 @@ encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
	hdr->nops++;
}

static u32 highest_slotid(struct nfsd4_session *ses)
{
	u32 idx;

	spin_lock(&ses->se_lock);
	idx = fls(~ses->se_cb_slot_avail);
	if (idx > 0)
		--idx;
	idx = max(idx, ses->se_cb_highest_slot);
	spin_unlock(&ses->se_lock);
	return idx;
}

/*
 * CB_SEQUENCE4args
 *
@@ -400,15 +413,40 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
	encode_sessionid4(xdr, session);

	p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
	*p++ = cpu_to_be32(session->se_cb_seq_nr);	/* csa_sequenceid */
	*p++ = xdr_zero;			/* csa_slotid */
	*p++ = xdr_zero;			/* csa_highest_slotid */
	*p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]);	/* csa_sequenceid */
	*p++ = cpu_to_be32(cb->cb_held_slot);		/* csa_slotid */
	*p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */
	*p++ = xdr_zero;			/* csa_cachethis */
	xdr_encode_empty_array(p);		/* csa_referring_call_lists */

	hdr->nops++;
}

static void update_cb_slot_table(struct nfsd4_session *ses, u32 target)
{
	/* No need to do anything if nothing changed */
	if (likely(target == READ_ONCE(ses->se_cb_highest_slot)))
		return;

	spin_lock(&ses->se_lock);
	if (target > ses->se_cb_highest_slot) {
		int i;

		target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1);

		/*
		 * Growing the slot table. Reset any new sequences to 1.
		 *
		 * NB: There is some debate about whether the RFC requires this,
		 *     but the Linux client expects it.
		 */
		for (i = ses->se_cb_highest_slot + 1; i <= target; ++i)
			ses->se_cb_seq_nr[i] = 1;
	}
	ses->se_cb_highest_slot = target;
	spin_unlock(&ses->se_lock);
}

/*
 * CB_SEQUENCE4resok
 *
@@ -436,7 +474,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
	int status = -ESERVERFAULT;
	__be32 *p;
	u32 dummy;
	u32 seqid, slotid, target;

	/*
	 * If the server returns different values for sessionID, slotID or
@@ -452,21 +490,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
	}
	p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);

	dummy = be32_to_cpup(p++);
	if (dummy != session->se_cb_seq_nr) {
	seqid = be32_to_cpup(p++);
	if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) {
		dprintk("NFS: %s Invalid sequence number\n", __func__);
		goto out;
	}

	dummy = be32_to_cpup(p++);
	if (dummy != 0) {
	slotid = be32_to_cpup(p++);
	if (slotid != cb->cb_held_slot) {
		dprintk("NFS: %s Invalid slotid\n", __func__);
		goto out;
	}

	/*
	 * FIXME: process highest slotid and target highest slotid
	 */
	p++; // ignore current highest slot value

	target = be32_to_cpup(p++);
	update_cb_slot_table(session, target);
	status = 0;
out:
	cb->cb_seq_status = status;
@@ -1167,6 +1206,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
	spin_unlock(&clp->cl_lock);
}

static int grab_slot(struct nfsd4_session *ses)
{
	int idx;

	spin_lock(&ses->se_lock);
	idx = ffs(ses->se_cb_slot_avail) - 1;
	if (idx < 0 || idx > ses->se_cb_highest_slot) {
		spin_unlock(&ses->se_lock);
		return -1;
	}
	/* clear the bit for the slot */
	ses->se_cb_slot_avail &= ~BIT(idx);
	spin_unlock(&ses->se_lock);
	return idx;
}

/*
 * There's currently a single callback channel slot.
 * If the slot is available, then mark it busy.  Otherwise, set the
@@ -1175,28 +1230,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
	struct nfs4_client *clp = cb->cb_clp;
	struct nfsd4_session *ses = clp->cl_cb_session;

	if (!cb->cb_holds_slot &&
	    test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
	if (cb->cb_held_slot >= 0)
		return true;
	cb->cb_held_slot = grab_slot(ses);
	if (cb->cb_held_slot < 0) {
		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
		/* Race breaker */
		if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
			dprintk("%s slot is busy\n", __func__);
		cb->cb_held_slot = grab_slot(ses);
		if (cb->cb_held_slot < 0)
			return false;
		}
		rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
	}
	cb->cb_holds_slot = true;
	return true;
}

static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
	struct nfs4_client *clp = cb->cb_clp;
	struct nfsd4_session *ses = clp->cl_cb_session;

	if (cb->cb_holds_slot) {
		cb->cb_holds_slot = false;
		clear_bit(0, &clp->cl_cb_slot_busy);
	if (cb->cb_held_slot >= 0) {
		spin_lock(&ses->se_lock);
		ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
		spin_unlock(&ses->se_lock);
		cb->cb_held_slot = -1;
		rpc_wake_up_next(&clp->cl_cb_waitq);
	}
}
@@ -1213,8 +1272,8 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
}

/*
 * TODO: cb_sequence should support referring call lists, cachethis, multiple
 * slots, and mark callback channel down on communication errors.
 * TODO: cb_sequence should support referring call lists, cachethis,
 * and mark callback channel down on communication errors.
 */
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
@@ -1256,7 +1315,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
		return true;
	}

	if (!cb->cb_holds_slot)
	if (cb->cb_held_slot < 0)
		goto need_restart;

	/* This is the operation status code for CB_SEQUENCE */
@@ -1270,10 +1329,10 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
		 * If CB_SEQUENCE returns an error, then the state of the slot
		 * (sequence ID, cached reply) MUST NOT change.
		 */
		++session->se_cb_seq_nr;
		++session->se_cb_seq_nr[cb->cb_held_slot];
		break;
	case -ESERVERFAULT:
		++session->se_cb_seq_nr;
		++session->se_cb_seq_nr[cb->cb_held_slot];
		nfsd4_mark_cb_fault(cb->cb_clp);
		ret = false;
		break;
@@ -1299,17 +1358,16 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
	case -NFS4ERR_BADSLOT:
		goto retry_nowait;
	case -NFS4ERR_SEQ_MISORDERED:
		if (session->se_cb_seq_nr != 1) {
			session->se_cb_seq_nr = 1;
		if (session->se_cb_seq_nr[cb->cb_held_slot] != 1) {
			session->se_cb_seq_nr[cb->cb_held_slot] = 1;
			goto retry_nowait;
		}
		break;
	default:
		nfsd4_mark_cb_fault(cb->cb_clp);
	}
	nfsd41_cb_release_slot(cb);

	trace_nfsd_cb_free_slot(task, cb);
	nfsd41_cb_release_slot(cb);

	if (RPC_SIGNALLED(task))
		goto need_restart;
@@ -1529,7 +1587,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
	cb->cb_status = 0;
	cb->cb_need_restart = false;
	cb->cb_holds_slot = false;
	cb->cb_held_slot = -1;
}

/**
+11 −3
Original line number Diff line number Diff line
@@ -2010,6 +2010,10 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
	}

	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
	new->se_cb_slot_avail = ~0U;
	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
				      NFSD_BC_SLOT_TABLE_SIZE - 1);
	spin_lock_init(&new->se_lock);
	return new;
out_free:
	while (i--)
@@ -2140,11 +2144,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru

	INIT_LIST_HEAD(&new->se_conns);

	new->se_cb_seq_nr = 1;
	atomic_set(&new->se_ref, 0);
	new->se_dead = false;
	new->se_cb_prog = cses->callback_prog;
	new->se_cb_sec = cses->cb_sec;
	atomic_set(&new->se_ref, 0);

	for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx)
		new->se_cb_seq_nr[idx] = 1;

	idx = hash_sessionid(&new->se_sessionid);
	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
	spin_lock(&clp->cl_lock);
@@ -3153,7 +3160,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
	kref_init(&clp->cl_nfsdfs.cl_ref);
	nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
	clp->cl_time = ktime_get_boottime_seconds();
	clear_bit(0, &clp->cl_cb_slot_busy);
	copy_verf(clp, verf);
	memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
	clp->cl_cb_session = NULL;
@@ -3935,6 +3941,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
	cr_ses->flags &= ~SESSION4_PERSIST;
	/* Upshifting from TCP to RDMA is not supported */
	cr_ses->flags &= ~SESSION4_RDMA;
	/* Report the correct number of backchannel slots */
	cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;

	init_session(rqstp, new, conf, cr_ses);
	nfsd4_get_session_locked(new);
+9 −6
Original line number Diff line number Diff line
@@ -71,8 +71,8 @@ struct nfsd4_callback {
	struct work_struct cb_work;
	int cb_seq_status;
	int cb_status;
	int cb_held_slot;
	bool cb_need_restart;
	bool cb_holds_slot;
};

struct nfsd4_callback_ops {
@@ -304,6 +304,9 @@ struct nfsd4_conn {
	unsigned char cn_flags;
};

/* Maximum number of slots that nfsd will use in the backchannel */
#define NFSD_BC_SLOT_TABLE_SIZE		(sizeof(u32) * 8)

/*
 * Representation of a v4.1+ session. These are refcounted in a similar fashion
 * to the nfs4_client. References are only taken when the server is actively
@@ -311,6 +314,10 @@ struct nfsd4_conn {
 */
struct nfsd4_session {
	atomic_t		se_ref;
	spinlock_t		se_lock;
	u32			se_cb_slot_avail; /* bitmap of available slots */
	u32			se_cb_highest_slot;	/* highest slot client wants */
	u32			se_cb_prog;
	bool			se_dead;
	struct list_head	se_hash;	/* hash by sessionid */
	struct list_head	se_perclnt;
@@ -319,8 +326,7 @@ struct nfsd4_session {
	struct nfsd4_channel_attrs se_fchannel;
	struct nfsd4_cb_sec	se_cb_sec;
	struct list_head	se_conns;
	u32			se_cb_prog;
	u32			se_cb_seq_nr;
	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
};

@@ -454,9 +460,6 @@ struct nfs4_client {
	 */
	struct dentry		*cl_nfsd_info_dentry;

	/* for nfs41 callbacks */
	/* We currently support a single back channel with a single slot */
	unsigned long		cl_cb_slot_busy;
	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
						/* wait here for slots */
	struct net		*net;
+1 −1
Original line number Diff line number Diff line
@@ -1697,7 +1697,7 @@ TRACE_EVENT(nfsd_cb_free_slot,
		__entry->cl_id = sid->clientid.cl_id;
		__entry->seqno = sid->sequence;
		__entry->reserved = sid->reserved;
		__entry->slot_seqno = session->se_cb_seq_nr;
		__entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
	),
	TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
		" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",