Commit 67f57892 authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe
Browse files

RDMA/rxe: Merge request and complete tasks

Currently the rxe driver has three work queue tasks per qp.  These are the
req.task, comp.task and resp.task which call rxe_requester(),
rxe_completer() and rxe_responder() respectively directly or on work
queues. Each of these subroutines checks to see if there is work to be
performed on the send queue or on the response packet queue or the request
packet queue and will run until there is no work remaining or yield the
cpu and reschedule itself until there is no work remaining.

This commit combines the req.task and comp.task into a single send.task
and renames the resp.task to the recv.task. The combined send.task calls
rxe_requester() and rxe_completer() serially and continues until all work
on both the send queue and the response packet queue are done.

In various benchmarks the performance is either improved or left the
same. At high scale there is a significant reduction in the load on the
cpu.

This is the first step in combining these two tasks. Once they are
serialized cross rescheduling of req.task and comp.task can be more
efficiently handled by just letting the send.task continue to run. This
will be done in the next several patches.

Link: https://lore.kernel.org/r/20240329145513.35381-7-rpearsonhpe@gmail.com


Signed-off-by: default avatarBob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent ff30e453
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -122,7 +122,7 @@ void retransmit_timer(struct timer_list *t)
	spin_lock_irqsave(&qp->state_lock, flags);
	if (qp->valid) {
		qp->comp.timeout = 1;
		rxe_sched_task(&qp->comp.task);
		rxe_sched_task(&qp->send_task);
	}
	spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -133,14 +133,14 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)

	must_sched = skb_queue_len(&qp->resp_pkts) > 0;
	if (must_sched != 0)
		rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
		rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED);

	skb_queue_tail(&qp->resp_pkts, skb);

	if (must_sched)
		rxe_sched_task(&qp->comp.task);
		rxe_sched_task(&qp->send_task);
	else
		rxe_run_task(&qp->comp.task);
		rxe_run_task(&qp->send_task);
}

static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -325,7 +325,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
					qp->comp.psn = pkt->psn;
					if (qp->req.wait_psn) {
						qp->req.wait_psn = 0;
						rxe_sched_task(&qp->req.task);
						rxe_sched_task(&qp->send_task);
					}
				}
				return COMPST_ERROR_RETRY;
@@ -476,7 +476,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
	 */
	if (qp->req.wait_fence) {
		qp->req.wait_fence = 0;
		rxe_sched_task(&qp->req.task);
		rxe_sched_task(&qp->send_task);
	}
}

@@ -515,7 +515,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
		if (qp->req.need_rd_atomic) {
			qp->comp.timeout_retry = 0;
			qp->req.need_rd_atomic = 0;
			rxe_sched_task(&qp->req.task);
			rxe_sched_task(&qp->send_task);
		}
	}

@@ -541,7 +541,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,

		if (qp->req.wait_psn) {
			qp->req.wait_psn = 0;
			rxe_sched_task(&qp->req.task);
			rxe_sched_task(&qp->send_task);
		}
	}

@@ -737,7 +737,7 @@ int rxe_completer(struct rxe_qp *qp)

			if (qp->req.wait_psn) {
				qp->req.wait_psn = 0;
				rxe_sched_task(&qp->req.task);
				rxe_sched_task(&qp->send_task);
			}

			state = COMPST_DONE;
@@ -792,7 +792,7 @@ int rxe_completer(struct rxe_qp *qp)
							RXE_CNT_COMP_RETRY);
					qp->req.need_retry = 1;
					qp->comp.started_retry = 1;
					rxe_sched_task(&qp->req.task);
					rxe_sched_task(&qp->send_task);
				}
				goto done;

+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ static const struct rdma_stat_desc rxe_counter_descs[] = {
	[RXE_CNT_RCV_RNR].name             =  "rcvd_rnr_err",
	[RXE_CNT_SND_RNR].name             =  "send_rnr_err",
	[RXE_CNT_RCV_SEQ_ERR].name         =  "rcvd_seq_err",
	[RXE_CNT_COMPLETER_SCHED].name     =  "ack_deferred",
	[RXE_CNT_SENDER_SCHED].name        =  "ack_deferred",
	[RXE_CNT_RETRY_EXCEEDED].name      =  "retry_exceeded_err",
	[RXE_CNT_RNR_RETRY_EXCEEDED].name  =  "retry_rnr_exceeded_err",
	[RXE_CNT_COMP_RETRY].name          =  "completer_retry_err",
+1 −1
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ enum rxe_counters {
	RXE_CNT_RCV_RNR,
	RXE_CNT_SND_RNR,
	RXE_CNT_RCV_SEQ_ERR,
	RXE_CNT_COMPLETER_SCHED,
	RXE_CNT_SENDER_SCHED,
	RXE_CNT_RETRY_EXCEEDED,
	RXE_CNT_RNR_RETRY_EXCEEDED,
	RXE_CNT_COMP_RETRY,
+2 −1
Original line number Diff line number Diff line
@@ -164,7 +164,8 @@ void rxe_dealloc(struct ib_device *ib_dev);

int rxe_completer(struct rxe_qp *qp);
int rxe_requester(struct rxe_qp *qp);
int rxe_responder(struct rxe_qp *qp);
int rxe_sender(struct rxe_qp *qp);
int rxe_receiver(struct rxe_qp *qp);

/* rxe_icrc.c */
int rxe_icrc_init(struct rxe_dev *rxe);
+2 −2
Original line number Diff line number Diff line
@@ -351,7 +351,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)

	if (unlikely(qp->need_req_skb &&
		     skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
		rxe_sched_task(&qp->req.task);
		rxe_sched_task(&qp->send_task);

	rxe_put(qp);
}
@@ -443,7 +443,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
	if ((qp_type(qp) != IB_QPT_RC) &&
	    (pkt->mask & RXE_END_MASK)) {
		pkt->wqe->state = wqe_state_done;
		rxe_sched_task(&qp->comp.task);
		rxe_sched_task(&qp->send_task);
	}

	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
Loading