Commit 71b43531 authored by Chuck Lever's avatar Chuck Lever
Browse files

svcrdma: Post Send WR chain



Eventually I'd like the server to post the reply's Send WR along
with any Write WRs using only a single call to ib_post_send(), in
order to reduce the NIC's doorbell rate.

To do this, add an anchor for a WR chain to svc_rdma_send_ctxt, and
refactor svc_rdma_send() to post this WR chain to the Send Queue. For
the moment, the posted chain will continue to contain a single Send
WR.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent fc709d82
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -210,6 +210,8 @@ struct svc_rdma_send_ctxt {

	struct svcxprt_rdma	*sc_rdma;
	struct ib_send_wr	sc_send_wr;
	struct ib_send_wr	*sc_wr_chain;
	int			sc_sqecount;
	struct ib_cqe		sc_cqe;
	struct xdr_buf		sc_hdrbuf;
	struct xdr_stream	sc_stream;
@@ -258,7 +260,7 @@ extern struct svc_rdma_send_ctxt *
		svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
				   struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_send(struct svcxprt_rdma *rdma,
extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
			      struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
				  struct svc_rdma_send_ctxt *sctxt,
+1 −1
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
	 */
	get_page(virt_to_page(rqst->rq_buffer));
	sctxt->sc_send_wr.opcode = IB_WR_SEND;
	return svc_rdma_send(rdma, sctxt);
	return svc_rdma_post_send(rdma, sctxt);
}

/* Server-side transport endpoint wants a whole page for its send
+33 −16
Original line number Diff line number Diff line
@@ -208,6 +208,9 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
	ctxt->sc_send_wr.num_sge = 0;
	ctxt->sc_cur_sge_no = 0;
	ctxt->sc_page_count = 0;
	ctxt->sc_wr_chain = &ctxt->sc_send_wr;
	ctxt->sc_sqecount = 1;

	return ctxt;

out_empty:
@@ -293,7 +296,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
	struct svc_rdma_send_ctxt *ctxt =
		container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);

	svc_rdma_wake_send_waiters(rdma, 1);
	svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);

	if (unlikely(wc->status != IB_WC_SUCCESS))
		goto flushed;
@@ -312,36 +315,44 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
}

/**
 * svc_rdma_send - Post a single Send WR
 * @rdma: transport on which to post the WR
 * @ctxt: send ctxt with a Send WR ready to post
 * svc_rdma_post_send - Post a WR chain to the Send Queue
 * @rdma: transport context
 * @ctxt: WR chain to post
 *
 * Copy fields in @ctxt to stack variables in order to guarantee
 * that these values remain available after the ib_post_send() call.
 * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
 *
 * Note there is potential for starvation when the Send Queue is
 * full because there is no order to when waiting threads are
 * awoken. The transport is typically provisioned with a deep
 * enough Send Queue that SQ exhaustion should be a rare event.
 *
 * Return values:
 *   %0: @ctxt's WR chain was posted successfully
 *   %-ENOTCONN: The connection was lost
 */
int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
int svc_rdma_post_send(struct svcxprt_rdma *rdma,
		       struct svc_rdma_send_ctxt *ctxt)
{
	struct ib_send_wr *wr = &ctxt->sc_send_wr;
	struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
	struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
	const struct ib_send_wr *bad_wr = first_wr;
	struct rpc_rdma_cid cid = ctxt->sc_cid;
	int ret;
	int ret, sqecount = ctxt->sc_sqecount;

	might_sleep();

	/* Sync the transport header buffer */
	ib_dma_sync_single_for_device(rdma->sc_pd->device,
				      wr->sg_list[0].addr,
				      wr->sg_list[0].length,
				      send_wr->sg_list[0].addr,
				      send_wr->sg_list[0].length,
				      DMA_TO_DEVICE);

	/* If the SQ is full, wait until an SQ entry is available */
	while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
		if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
			svc_rdma_wake_send_waiters(rdma, 1);
		if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
			svc_rdma_wake_send_waiters(rdma, sqecount);

			/* When the transport is torn down, assume
			 * ib_drain_sq() will trigger enough Send
@@ -358,13 +369,19 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
		}

		trace_svcrdma_post_send(ctxt);
		ret = ib_post_send(rdma->sc_qp, wr, NULL);
		ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
		if (ret) {
			trace_svcrdma_sq_post_err(rdma, &cid, ret);
			svc_xprt_deferred_close(&rdma->sc_xprt);
			svc_rdma_wake_send_waiters(rdma, 1);

			/* If even one WR was posted, there will be a
			 * Send completion that bumps sc_sq_avail.
			 */
			if (bad_wr == first_wr) {
				svc_rdma_wake_send_waiters(rdma, sqecount);
				break;
			}
		}
		return 0;
	}
	return -ENOTCONN;
@@ -884,7 +901,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
		sctxt->sc_send_wr.opcode = IB_WR_SEND;
	}

	return svc_rdma_send(rdma, sctxt);
	return svc_rdma_post_send(rdma, sctxt);
}

/**
@@ -948,7 +965,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
	sctxt->sc_send_wr.num_sge = 1;
	sctxt->sc_send_wr.opcode = IB_WR_SEND;
	sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
	if (svc_rdma_send(rdma, sctxt))
	if (svc_rdma_post_send(rdma, sctxt))
		goto put_ctxt;
	return;