Commit b40ef2b8 authored by David Howells's avatar David Howells Committed by Jakub Kicinski
Browse files

rxrpc: Manage RTT per-call rather than per-peer



Manage the determination of RTT on a per-call (ie. per-RPC op) basis rather
than on a per-peer basis, averaging across all calls going to that peer.
The problem is that the RTT measurements from the initial packets on a call
may be off because the server may do some setting up (such as getting a
lock on a file) before accepting the rest of the data in the RPC and,
further, the RTT may be affected by server-side file operations, for
instance if a large amount of data is being written or read.

Note: When handling the FS.StoreData-type RPCs, for example, the server
uses the userStatus field in the header of ACK packets as supplementary
flow control to aid in managing this.  AF_RXRPC does not yet support this,
but it should be added.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent b5099340
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1453,7 +1453,7 @@ TRACE_EVENT(rxrpc_rtt_rx,
		    __entry->rtt = rtt;
		    __entry->srtt = srtt;
		    __entry->rto = rto;
		    __entry->min_rtt = minmax_get(&call->peer->min_rtt)
		    __entry->min_rtt = minmax_get(&call->min_rtt)
			   ),

	    TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u",
+21 −18
Original line number Diff line number Diff line
@@ -366,20 +366,9 @@ struct rxrpc_peer {
	unsigned short		hdrsize;	/* header size (IP + UDP + RxRPC) */
	unsigned short		tx_seg_max;	/* Maximum number of transmissable segments */

	/* calculated RTT cache */
#define RXRPC_RTT_CACHE_SIZE 32
	spinlock_t		rtt_input_lock;	/* RTT lock for input routine */
	ktime_t			rtt_last_req;	/* Time of last RTT request */
	unsigned int		rtt_count;	/* Number of samples we've got */
	unsigned int		rtt_taken;	/* Number of samples taken (wrapping) */
	struct minmax		min_rtt;	/* Estimated minimum RTT */

	u32			srtt_us;	/* smoothed round trip time << 3 in usecs */
	u32			mdev_us;	/* medium deviation			*/
	u32			mdev_max_us;	/* maximal mdev for the last rtt period	*/
	u32			rttvar_us;	/* smoothed mdev_max			*/
	u32			rto_us;		/* Retransmission timeout in usec */
	u8			backoff;	/* Backoff timeout (as shift) */
	/* Calculated RTT cache */
	unsigned int		recent_srtt_us;
	unsigned int		recent_rto_us;

	u8			cong_ssthresh;	/* Congestion slow-start threshold */
};
@@ -765,6 +754,18 @@ struct rxrpc_call {
	rxrpc_serial_t		acks_highest_serial; /* Highest serial number ACK'd */
	unsigned short		acks_nr_sacks;	/* Number of soft acks recorded */
	unsigned short		acks_nr_snacks;	/* Number of soft nacks recorded */

	/* Calculated RTT cache */
	ktime_t			rtt_last_req;	/* Time of last RTT request */
	unsigned int		rtt_count;	/* Number of samples we've got */
	unsigned int		rtt_taken;	/* Number of samples taken (wrapping) */
	struct minmax		min_rtt;	/* Estimated minimum RTT */
	u32			srtt_us;	/* smoothed round trip time << 3 in usecs */
	u32			mdev_us;	/* medium deviation			*/
	u32			mdev_max_us;	/* maximal mdev for the last rtt period	*/
	u32			rttvar_us;	/* smoothed mdev_max			*/
	u32			rto_us;		/* Retransmission timeout in usec */
	u8			backoff;	/* Backoff timeout (as shift) */
};

/*
@@ -1287,10 +1288,12 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
/*
 * rtt.c
 */
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
			rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans);
void rxrpc_peer_init_rtt(struct rxrpc_peer *);
void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
			int rtt_slot,
			rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
			ktime_t send_time, ktime_t resp_time);
ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans);
void rxrpc_call_init_rtt(struct rxrpc_call *call);

/*
 * rxkad.c
+9 −9
Original line number Diff line number Diff line
@@ -44,8 +44,8 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,

	trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);

	if (call->peer->srtt_us)
		delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC;
	if (call->srtt_us)
		delay = (call->srtt_us >> 3) * NSEC_PER_USEC;
	else
		delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay));
	ktime_add_ms(delay, call->tx_backoff);
@@ -105,7 +105,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
	};
	struct rxrpc_txqueue *tq = call->tx_queue;
	ktime_t lowest_xmit_ts = KTIME_MAX;
	ktime_t rto = rxrpc_get_rto_backoff(call->peer, false);
	ktime_t rto = rxrpc_get_rto_backoff(call, false);
	bool unacked = false;

	_enter("{%d,%d}", call->tx_bottom, call->tx_top);
@@ -195,7 +195,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
	} while ((tq = tq->next));

	if (lowest_xmit_ts < KTIME_MAX) {
		ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send);
		ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send);
		ktime_t resend_at = ktime_add(lowest_xmit_ts, delay);

		_debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now));
@@ -216,7 +216,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_
	 */
	if (!req.did_send) {
		ktime_t next_ping = ktime_add_us(call->acks_latest_ts,
						 call->peer->srtt_us >> 3);
						 call->srtt_us >> 3);

		if (ktime_sub(next_ping, req.now) <= 0)
			rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
@@ -366,8 +366,8 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call,
 */
static void rxrpc_send_initial_ping(struct rxrpc_call *call)
{
	if (call->peer->rtt_count < 3 ||
	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
	if (call->rtt_count < 3 ||
	    ktime_before(ktime_add_ms(call->rtt_last_req, 1000),
			 ktime_get_real()))
		rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
			       rxrpc_propose_ack_ping_for_params);
@@ -499,10 +499,10 @@ bool rxrpc_input_call_event(struct rxrpc_call *call)
			       rxrpc_propose_ack_rx_idle);

	if (call->ackr_nr_unacked > 2) {
		if (call->peer->rtt_count < 3)
		if (call->rtt_count < 3)
			rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
				       rxrpc_propose_ack_ping_for_rtt);
		else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
		else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000),
				      ktime_get_real()))
			rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
				       rxrpc_propose_ack_ping_for_old_rtt);
+2 −0
Original line number Diff line number Diff line
@@ -176,6 +176,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
	call->cong_cwnd = RXRPC_MIN_CWND;
	call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;

	rxrpc_call_init_rtt(call);

	call->rxnet = rxnet;
	call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK;
	atomic_inc(&rxnet->nr_calls);
+5 −5
Original line number Diff line number Diff line
@@ -71,11 +71,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
		/* We analyse the number of packets that get ACK'd per RTT
		 * period and increase the window if we managed to fill it.
		 */
		if (call->peer->rtt_count == 0)
		if (call->rtt_count == 0)
			goto out;
		if (ktime_before(call->acks_latest_ts,
				 ktime_add_us(call->cong_tstamp,
					      call->peer->srtt_us >> 3)))
					      call->srtt_us >> 3)))
			goto out_no_clear_ca;
		summary->change = rxrpc_cong_rtt_window_end;
		call->cong_tstamp = call->acks_latest_ts;
@@ -179,7 +179,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
	if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
		return;

	rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
	rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8));
	now = ktime_get_real();
	if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
		return;
@@ -200,7 +200,7 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call,
				      struct rxrpc_txqueue *tq,
				      int ix)
{
	rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
	rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1,
			   summary->acked_serial, summary->ack_serial,
			   ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]),
			   call->acks_latest_ts);
@@ -725,7 +725,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
			clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
			smp_mb(); /* Read data before setting avail bit */
			set_bit(i, &call->rtt_avail);
			rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
			rxrpc_call_add_rtt(call, type, i, acked_serial, ack_serial,
					   sent_at, resp_time);
			matched = true;
		}
Loading