Commit 7c482665 authored by David Howells's avatar David Howells Committed by Jakub Kicinski
Browse files

rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985]



When an rxrpc call is in its transmission phase and is sending a lot of
packets, stalls occasionally occur that cause severe performance
degradation (eg. increasing the transmission time for a 256MiB payload from
0.7s to 2.5s over a 10G link).

rxrpc already implements TCP-style congestion control [RFC5681] and this
helps mitigate the effects, but occasionally we're missing a time event
that deals with a missing ACK, leading to a stall until the RTO expires.

Fix this by implementing RACK/TLP in rxrpc.

Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 4ee4c2f8
Loading
Loading
Loading
Loading
+323 −19
Original line number Diff line number Diff line
@@ -305,7 +305,9 @@
#define rxrpc_txdata_traces \
	EM(rxrpc_txdata_inject_loss,		" *INJ-LOSS*") \
	EM(rxrpc_txdata_new_data,		" ") \
	E_(rxrpc_txdata_retransmit,		" *RETRANS*")
	EM(rxrpc_txdata_retransmit,		" *RETRANS*") \
	EM(rxrpc_txdata_tlp_new_data,		" *TLP-NEW*") \
	E_(rxrpc_txdata_tlp_retransmit,		" *TLP-RETRANS*")

#define rxrpc_receive_traces \
	EM(rxrpc_receive_end,			"END") \
@@ -353,11 +355,12 @@
	EM(rxrpc_timer_trace_hard,		"HardLimit") \
	EM(rxrpc_timer_trace_idle,		"IdleLimit") \
	EM(rxrpc_timer_trace_keepalive,		"KeepAlive") \
	EM(rxrpc_timer_trace_lost_ack,		"LostAck  ") \
	EM(rxrpc_timer_trace_ping,		"DelayPing") \
	EM(rxrpc_timer_trace_resend,		"Resend   ") \
	EM(rxrpc_timer_trace_resend_reset,	"ResendRst") \
	E_(rxrpc_timer_trace_resend_tx,		"ResendTx ")
	EM(rxrpc_timer_trace_rack_off,		"RACK-OFF ") \
	EM(rxrpc_timer_trace_rack_zwp,		"RACK-ZWP ") \
	EM(rxrpc_timer_trace_rack_reo,		"RACK-Reo ") \
	EM(rxrpc_timer_trace_rack_tlp_pto,	"TLP-PTO  ") \
	E_(rxrpc_timer_trace_rack_rto,		"RTO      ")

#define rxrpc_propose_ack_traces \
	EM(rxrpc_propose_ack_client_tx_end,	"ClTxEnd") \
@@ -478,9 +481,9 @@
	EM(rxrpc_txbuf_put_rotated,		"PUT ROTATED")	\
	EM(rxrpc_txbuf_put_send_aborted,	"PUT SEND-X ")	\
	EM(rxrpc_txbuf_put_trans,		"PUT TRANS  ")	\
	EM(rxrpc_txbuf_see_lost,		"SEE LOST   ")	\
	EM(rxrpc_txbuf_see_out_of_step,		"OUT-OF-STEP")	\
	EM(rxrpc_txbuf_see_send_more,		"SEE SEND+  ")	\
	E_(rxrpc_txbuf_see_unacked,		"SEE UNACKED")
	E_(rxrpc_txbuf_see_send_more,		"SEE SEND+  ")

#define rxrpc_tq_traces \
	EM(rxrpc_tq_alloc,			"ALLOC") \
@@ -505,6 +508,24 @@
	EM(rxrpc_rotate_trace_sack,		"soft-ack")	\
	E_(rxrpc_rotate_trace_snak,		"soft-nack")

#define rxrpc_rack_timer_modes \
	EM(RXRPC_CALL_RACKTIMER_OFF,		"---") \
	EM(RXRPC_CALL_RACKTIMER_RACK_REORDER,	"REO") \
	EM(RXRPC_CALL_RACKTIMER_TLP_PTO,	"TLP") \
	E_(RXRPC_CALL_RACKTIMER_RTO,		"RTO")

#define rxrpc_tlp_probe_traces \
	EM(rxrpc_tlp_probe_trace_busy,		"busy")		\
	EM(rxrpc_tlp_probe_trace_transmit_new,	"transmit-new")	\
	E_(rxrpc_tlp_probe_trace_retransmit,	"retransmit")

#define rxrpc_tlp_ack_traces \
	EM(rxrpc_tlp_ack_trace_acked,		"acked")	\
	EM(rxrpc_tlp_ack_trace_dup_acked,	"dup-acked")	\
	EM(rxrpc_tlp_ack_trace_hard_beyond,	"hard-beyond")	\
	EM(rxrpc_tlp_ack_trace_incomplete,	"incomplete")	\
	E_(rxrpc_tlp_ack_trace_new_data,	"new-data")

/*
 * Generate enums for tracing information.
 */
@@ -537,6 +558,8 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
enum rxrpc_sack_trace		{ rxrpc_sack_traces } __mode(byte);
enum rxrpc_skb_trace		{ rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace		{ rxrpc_timer_traces } __mode(byte);
enum rxrpc_tlp_ack_trace	{ rxrpc_tlp_ack_traces } __mode(byte);
enum rxrpc_tlp_probe_trace	{ rxrpc_tlp_probe_traces } __mode(byte);
enum rxrpc_tq_trace		{ rxrpc_tq_traces } __mode(byte);
enum rxrpc_tx_point		{ rxrpc_tx_points } __mode(byte);
enum rxrpc_txbuf_trace		{ rxrpc_txbuf_traces } __mode(byte);
@@ -567,6 +590,7 @@ rxrpc_conn_traces;
rxrpc_local_traces;
rxrpc_pmtud_reduce_traces;
rxrpc_propose_ack_traces;
rxrpc_rack_timer_modes;
rxrpc_receive_traces;
rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
@@ -576,6 +600,8 @@ rxrpc_rtt_tx_traces;
rxrpc_sack_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
rxrpc_tlp_ack_traces;
rxrpc_tlp_probe_traces;
rxrpc_tq_traces;
rxrpc_tx_points;
rxrpc_txbuf_traces;
@@ -618,6 +644,20 @@ TRACE_EVENT(rxrpc_local,
		      __entry->usage)
	    );

TRACE_EVENT(rxrpc_iothread_rx,
	    TP_PROTO(struct rxrpc_local *local, unsigned int nr_rx),
	    TP_ARGS(local, nr_rx),
	    TP_STRUCT__entry(
		    __field(unsigned int,	local)
		    __field(unsigned int,	nr_rx)
			     ),
	    TP_fast_assign(
		    __entry->local = local->debug_id;
		    __entry->nr_rx = nr_rx;
			   ),
	    TP_printk("L=%08x nrx=%u", __entry->local, __entry->nr_rx)
	    );

TRACE_EVENT(rxrpc_peer,
	    TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why),

@@ -1684,16 +1724,15 @@ TRACE_EVENT(rxrpc_drop_ack,
TRACE_EVENT(rxrpc_retransmit,
	    TP_PROTO(struct rxrpc_call *call,
		     struct rxrpc_send_data_req *req,
		     struct rxrpc_txbuf *txb, ktime_t expiry),
		     struct rxrpc_txbuf *txb),

	    TP_ARGS(call, req, txb, expiry),
	    TP_ARGS(call, req, txb),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(unsigned int,	qbase)
		    __field(rxrpc_seq_t,	seq)
		    __field(rxrpc_serial_t,	serial)
		    __field(ktime_t,		expiry)
			     ),

	    TP_fast_assign(
@@ -1701,15 +1740,13 @@ TRACE_EVENT(rxrpc_retransmit,
		    __entry->qbase = req->tq->qbase;
		    __entry->seq = req->seq;
		    __entry->serial = txb->serial;
		    __entry->expiry = expiry;
			   ),

	    TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld",
	    TP_printk("c=%08x tq=%x q=%x r=%x",
		      __entry->call,
		      __entry->qbase,
		      __entry->seq,
		      __entry->serial,
		      ktime_to_us(__entry->expiry))
		      __entry->serial)
	    );

TRACE_EVENT(rxrpc_congest,
@@ -1767,9 +1804,9 @@ TRACE_EVENT(rxrpc_congest,
	    );

TRACE_EVENT(rxrpc_reset_cwnd,
	    TP_PROTO(struct rxrpc_call *call, ktime_t now),
	    TP_PROTO(struct rxrpc_call *call, ktime_t since_last_tx, ktime_t rtt),

	    TP_ARGS(call, now),
	    TP_ARGS(call, since_last_tx, rtt),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call)
@@ -1779,6 +1816,7 @@ TRACE_EVENT(rxrpc_reset_cwnd,
		    __field(rxrpc_seq_t,		hard_ack)
		    __field(rxrpc_seq_t,		prepared)
		    __field(ktime_t,			since_last_tx)
		    __field(ktime_t,			rtt)
		    __field(bool,			has_data)
			     ),

@@ -1789,18 +1827,20 @@ TRACE_EVENT(rxrpc_reset_cwnd,
		    __entry->extra	= call->cong_extra;
		    __entry->hard_ack	= call->acks_hard_ack;
		    __entry->prepared	= call->send_top - call->tx_bottom;
		    __entry->since_last_tx = ktime_sub(now, call->tx_last_sent);
		    __entry->since_last_tx = since_last_tx;
		    __entry->rtt	= rtt;
		    __entry->has_data	= call->tx_bottom != call->tx_top;
			   ),

	    TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u",
	    TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu/%llu d=%u",
		      __entry->call,
		      __entry->hard_ack,
		      __print_symbolic(__entry->ca_state, rxrpc_ca_states),
		      __entry->cwnd,
		      __entry->extra,
		      __entry->prepared,
		      ktime_to_ns(__entry->since_last_tx),
		      ktime_to_us(__entry->since_last_tx),
		      ktime_to_us(__entry->rtt),
		      __entry->has_data)
	    );

@@ -1925,6 +1965,32 @@ TRACE_EVENT(rxrpc_resend,
		      __entry->transmitted)
	    );

TRACE_EVENT(rxrpc_resend_lost,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, unsigned long lost),

	    TP_ARGS(call, tq, lost),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_seq_t,	qbase)
		    __field(u8,			nr_rep)
		    __field(unsigned long,	lost)
			     ),

	    TP_fast_assign(
		    __entry->call = call->debug_id;
		    __entry->qbase = tq->qbase;
		    __entry->nr_rep = tq->nr_reported_acks;
		    __entry->lost = lost;
			   ),

	    TP_printk("c=%08x tq=%x lost=%016lx nr=%u",
		      __entry->call,
		      __entry->qbase,
		      __entry->lost,
		      __entry->nr_rep)
	    );

TRACE_EVENT(rxrpc_rotate,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq,
		     struct rxrpc_ack_summary *summary, rxrpc_seq_t seq,
@@ -2363,6 +2429,244 @@ TRACE_EVENT(rxrpc_pmtud_reduce,
		      __entry->serial, __entry->max_data)
	    );

TRACE_EVENT(rxrpc_rack,
	    TP_PROTO(struct rxrpc_call *call, ktime_t timo),

	    TP_ARGS(call, timo),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_serial_t,	ack_serial)
		    __field(rxrpc_seq_t,	seq)
		    __field(enum rxrpc_rack_timer_mode, mode)
		    __field(unsigned short,	nr_sent)
		    __field(unsigned short,	nr_lost)
		    __field(unsigned short,	nr_resent)
		    __field(unsigned short,	nr_sacked)
		    __field(ktime_t,		timo)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->ack_serial	= call->rx_serial;
		    __entry->seq	= call->rack_end_seq;
		    __entry->mode	= call->rack_timer_mode;
		    __entry->nr_sent	= call->tx_nr_sent;
		    __entry->nr_lost	= call->tx_nr_lost;
		    __entry->nr_resent	= call->tx_nr_resent;
		    __entry->nr_sacked	= call->acks_nr_sacks;
		    __entry->timo	= timo;
			   ),

	    TP_printk("c=%08x r=%08x q=%08x %s slrs=%u,%u,%u,%u t=%lld",
		      __entry->call, __entry->ack_serial, __entry->seq,
		      __print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
		      __entry->nr_sent, __entry->nr_lost,
		      __entry->nr_resent, __entry->nr_sacked,
		      ktime_to_us(__entry->timo))
	    );

TRACE_EVENT(rxrpc_rack_update,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary),

	    TP_ARGS(call, summary),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_serial_t,	ack_serial)
		    __field(rxrpc_seq_t,	seq)
		    __field(int,		xmit_ts)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->ack_serial	= call->rx_serial;
		    __entry->seq	= call->rack_end_seq;
		    __entry->xmit_ts	= ktime_sub(call->acks_latest_ts, call->rack_xmit_ts);
			   ),

	    TP_printk("c=%08x r=%08x q=%08x xt=%lld",
		      __entry->call, __entry->ack_serial, __entry->seq,
		      ktime_to_us(__entry->xmit_ts))
	    );

TRACE_EVENT(rxrpc_rack_scan_loss,
	    TP_PROTO(struct rxrpc_call *call),

	    TP_ARGS(call),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(ktime_t,		rack_rtt)
		    __field(ktime_t,		rack_reo_wnd)
			     ),

	    TP_fast_assign(
		    __entry->call		= call->debug_id;
		    __entry->rack_rtt		= call->rack_rtt;
		    __entry->rack_reo_wnd	= call->rack_reo_wnd;
			   ),

	    TP_printk("c=%08x rtt=%lld reow=%lld",
		      __entry->call, ktime_to_us(__entry->rack_rtt),
		      ktime_to_us(__entry->rack_reo_wnd))
	    );

TRACE_EVENT(rxrpc_rack_scan_loss_tq,
	    TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq,
		     unsigned long nacks),

	    TP_ARGS(call, tq, nacks),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_seq_t,	qbase)
		    __field(unsigned long,	nacks)
		    __field(unsigned long,	lost)
		    __field(unsigned long,	retrans)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->qbase	= tq->qbase;
		    __entry->nacks	= nacks;
		    __entry->lost	= tq->segment_lost;
		    __entry->retrans	= tq->segment_retransmitted;
			   ),

	    TP_printk("c=%08x q=%08x n=%lx l=%lx r=%lx",
		      __entry->call, __entry->qbase,
		      __entry->nacks, __entry->lost, __entry->retrans)
	    );

TRACE_EVENT(rxrpc_rack_detect_loss,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
		     rxrpc_seq_t seq),

	    TP_ARGS(call, summary, seq),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_serial_t,	ack_serial)
		    __field(rxrpc_seq_t,	seq)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->ack_serial	= call->rx_serial;
		    __entry->seq	= seq;
			   ),

	    TP_printk("c=%08x r=%08x q=%08x",
		      __entry->call, __entry->ack_serial, __entry->seq)
	    );

TRACE_EVENT(rxrpc_rack_mark_loss_tq,
	    TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq),

	    TP_ARGS(call, tq),

	    TP_STRUCT__entry(
		    __field(unsigned int,	call)
		    __field(rxrpc_seq_t,	qbase)
		    __field(rxrpc_seq_t,	trans)
		    __field(unsigned long,	acked)
		    __field(unsigned long,	lost)
		    __field(unsigned long,	retrans)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->qbase	= tq->qbase;
		    __entry->trans	= call->tx_transmitted;
		    __entry->acked	= tq->segment_acked;
		    __entry->lost	= tq->segment_lost;
		    __entry->retrans	= tq->segment_retransmitted;
			   ),

	    TP_printk("c=%08x tq=%08x txq=%08x a=%lx l=%lx r=%lx",
		      __entry->call, __entry->qbase, __entry->trans,
		      __entry->acked, __entry->lost, __entry->retrans)
	    );

TRACE_EVENT(rxrpc_tlp_probe,
	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_tlp_probe_trace trace),

	    TP_ARGS(call, trace),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call)
		    __field(rxrpc_serial_t,		serial)
		    __field(rxrpc_seq_t,		seq)
		    __field(enum rxrpc_tlp_probe_trace,	trace)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->serial	= call->tlp_serial;
		    __entry->seq	= call->tlp_seq;
		    __entry->trace	= trace;
			   ),

	    TP_printk("c=%08x r=%08x pq=%08x %s",
		      __entry->call, __entry->serial, __entry->seq,
		      __print_symbolic(__entry->trace, rxrpc_tlp_probe_traces))
	    );

TRACE_EVENT(rxrpc_tlp_ack,
	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
		     enum rxrpc_tlp_ack_trace trace),

	    TP_ARGS(call, summary, trace),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call)
		    __field(rxrpc_serial_t,		serial)
		    __field(rxrpc_seq_t,		tlp_seq)
		    __field(rxrpc_seq_t,		hard_ack)
		    __field(enum rxrpc_tlp_ack_trace,	trace)
			     ),

	    TP_fast_assign(
		    __entry->call	= call->debug_id;
		    __entry->serial	= call->tlp_serial;
		    __entry->tlp_seq	= call->tlp_seq;
		    __entry->hard_ack	= call->acks_hard_ack;
		    __entry->trace	= trace;
			   ),

	    TP_printk("c=%08x r=%08x pq=%08x hq=%08x %s",
		      __entry->call, __entry->serial,
		      __entry->tlp_seq, __entry->hard_ack,
		      __print_symbolic(__entry->trace, rxrpc_tlp_ack_traces))
	    );

TRACE_EVENT(rxrpc_rack_timer,
	    TP_PROTO(struct rxrpc_call *call, ktime_t delay, bool exp),

	    TP_ARGS(call, delay, exp),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call)
		    __field(bool,			exp)
		    __field(enum rxrpc_rack_timer_mode,	mode)
		    __field(ktime_t,			delay)
			     ),

	    TP_fast_assign(
		    __entry->call		= call->debug_id;
		    __entry->exp		= exp;
		    __entry->mode		= call->rack_timer_mode;
		    __entry->delay		= delay;
			   ),

	    TP_printk("c=%08x %s %s to=%lld",
		      __entry->call,
		      __entry->exp ? "Exp" : "Set",
		      __print_symbolic(__entry->mode, rxrpc_rack_timer_modes),
		      ktime_to_us(__entry->delay))
	    );

#undef EM
#undef E_

+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ rxrpc-y := \
	conn_object.o \
	conn_service.o \
	input.o \
	input_rack.o \
	insecure.o \
	io_thread.o \
	key.o \
+102 −5
Original line number Diff line number Diff line
@@ -621,6 +621,18 @@ enum rxrpc_ca_state {
	NR__RXRPC_CA_STATES
} __mode(byte);

/*
 * Current purpose of call RACK timer.  According to the RACK-TLP protocol
 * [RFC8985], the transmission timer (call->rack_timo_at) may only be used for
 * one of these at once.
 */
enum rxrpc_rack_timer_mode {
	RXRPC_CALL_RACKTIMER_OFF,		/* Timer not running */
	RXRPC_CALL_RACKTIMER_RACK_REORDER,	/* RACK reordering timer */
	RXRPC_CALL_RACKTIMER_TLP_PTO,		/* TLP timeout */
	RXRPC_CALL_RACKTIMER_RTO,		/* Retransmission timeout */
} __mode(byte);

/*
 * RxRPC call definition
 * - matched by { connection, call_id }
@@ -638,8 +650,7 @@ struct rxrpc_call {
	struct mutex		user_mutex;	/* User access mutex */
	struct sockaddr_rxrpc	dest_srx;	/* Destination address */
	ktime_t			delay_ack_at;	/* When DELAY ACK needs to happen */
	ktime_t			ack_lost_at;	/* When ACK is figured as lost */
	ktime_t			resend_at;	/* When next resend needs to happen */
	ktime_t			rack_timo_at;	/* When ACK is figured as lost */
	ktime_t			ping_at;	/* When next to send a ping */
	ktime_t			keepalive_at;	/* When next to send a keepalive ping */
	ktime_t			expect_rx_by;	/* When we expect to get a packet by */
@@ -695,8 +706,12 @@ struct rxrpc_call {
	rxrpc_seq_t		tx_bottom;	/* First packet in buffer */
	rxrpc_seq_t		tx_transmitted;	/* Highest packet transmitted */
	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */
	rxrpc_serial_t		tx_last_serial;	/* Serial of last DATA transmitted */
	u16			tx_backoff;	/* Delay to insert due to Tx failure (ms) */
	u8			tx_winsize;	/* Maximum size of Tx window */
	u16			tx_nr_sent;	/* Number of packets sent, but unacked */
	u16			tx_nr_lost;	/* Number of packets marked lost */
	u16			tx_nr_resent;	/* Number of packets resent, but unacked */
	u16			tx_winsize;	/* Maximum size of Tx window */
#define RXRPC_TX_MAX_WINDOW	128
	u8			tx_jumbo_max;	/* Maximum subpkts peer will accept */
	ktime_t			tx_last_sent;	/* Last time a transmission occurred */
@@ -725,6 +740,25 @@ struct rxrpc_call {
	u16			cong_cumul_acks; /* Cumulative ACK count */
	ktime_t			cong_tstamp;	/* Last time cwnd was changed */

	/* RACK-TLP [RFC8985] state. */
	ktime_t			rack_xmit_ts;	/* Latest transmission timestamp */
	ktime_t			rack_rtt;	/* RTT of most recently ACK'd segment */
	ktime_t			rack_rtt_ts;	/* Timestamp of rack_rtt */
	ktime_t			rack_reo_wnd;	/* Reordering window */
	unsigned int		rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */
	int			rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */
	rxrpc_seq_t		rack_fack;	/* Highest sequence so far ACK'd */
	rxrpc_seq_t		rack_end_seq;	/* Highest sequence seen */
	rxrpc_seq_t		rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */
	bool			rack_dsack_round_none; /* T if dsack_round is "None" */
	bool			rack_reordering_seen; /* T if detected reordering event */
	enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */
	bool			tlp_is_retrans;	/* T if unacked TLP retransmission */
	rxrpc_serial_t		tlp_serial;	/* Serial of TLP probe (or 0 if none in progress) */
	rxrpc_seq_t		tlp_seq;	/* Sequence of TLP probe */
	unsigned int		tlp_rtt_taken;	/* Last time RTT taken */
	ktime_t			tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */

	/* Receive-phase ACK management (ACKs we send). */
	u8			ackr_reason;	/* reason to ACK */
	u16			ackr_sack_base;	/* Starting slot in SACK table ring */
@@ -783,6 +817,9 @@ struct rxrpc_ack_summary {
	bool		retrans_timeo:1;	/* T if reTx due to timeout happened */
	bool		need_retransmit:1;	/* T if we need transmission */
	bool		rtt_sample_avail:1;	/* T if RTT sample available */
	bool		in_fast_or_rto_recovery:1;
	bool		exiting_fast_or_rto_recovery:1;
	bool		tlp_probe_acked:1;	/* T if the TLP probe seq was acked */
	u8 /*enum rxrpc_congest_change*/ change;
};

@@ -864,6 +901,7 @@ struct rxrpc_txqueue {
	unsigned long		segment_lost;	/* Bit-per-buf: Set if declared lost */
	unsigned long		segment_retransmitted; /* Bit-per-buf: Set if retransmitted */
	unsigned long		rtt_samples;	/* Bit-per-buf: Set if available for RTT */
	unsigned long		ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */

	/* The arrays we want to pack into as few cache lines as possible. */
	struct {
@@ -883,7 +921,9 @@ struct rxrpc_send_data_req {
	struct rxrpc_txqueue	*tq;		/* Tx queue segment holding first DATA */
	rxrpc_seq_t		seq;		/* Sequence of first data */
	int			n;		/* Number of DATA packets to glue into jumbo */
	bool			retrans;	/* T if this is a retransmission */
	bool			did_send;	/* T if did actually send */
	bool			tlp_probe;	/* T if this is a TLP probe */
	int /* enum rxrpc_txdata_trace */ trace;
};

@@ -943,8 +983,9 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
			enum rxrpc_propose_ack_trace why);
void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
			     enum rxrpc_propose_ack_trace);
void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response);

void rxrpc_resend_tlp(struct rxrpc_call *call);
void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit,
			      enum rxrpc_txdata_trace trace);
bool rxrpc_input_call_event(struct rxrpc_call *call);

/*
@@ -1123,6 +1164,32 @@ void rxrpc_congestion_degrade(struct rxrpc_call *);
void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);

/*
 * input_rack.c
 */
void rxrpc_input_rack_one(struct rxrpc_call *call,
			  struct rxrpc_ack_summary *summary,
			  struct rxrpc_txqueue *tq,
			  unsigned int ix);
void rxrpc_input_rack(struct rxrpc_call *call,
		      struct rxrpc_ack_summary *summary,
		      struct rxrpc_txqueue *tq,
		      unsigned long new_acks);
void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
					  struct rxrpc_ack_summary *summary);
ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now);
void rxrpc_tlp_send_probe(struct rxrpc_call *call);
void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary);
void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by);

/* Initialise TLP state [RFC8958 7.1]. */
static inline void rxrpc_tlp_init(struct rxrpc_call *call)
{
	call->tlp_serial = 0;
	call->tlp_seq = call->acks_hard_ack;
	call->tlp_is_retrans = false;
}

/*
 * io_thread.c
 */
@@ -1402,6 +1469,11 @@ static inline u32 latest(u32 seq1, u32 seq2)
	return after(seq1, seq2) ? seq1 : seq2;
}

static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq)
{
	return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase;
}

static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
{
	rxrpc_get_skb(skb, rxrpc_skb_get_call_rx);
@@ -1409,6 +1481,31 @@ static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk
	rxrpc_poke_call(call, rxrpc_call_poke_rx_packet);
}

/*
 * Calculate how much space there is for transmitting more DATA packets.
 */
static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call)
{
	int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra);
	int transmitted = call->tx_top - call->tx_bottom;

	return max(winsize - transmitted, 0);
}

static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call)
{
	return call->acks_nr_sacks + call->tx_nr_lost;
}

/*
 * Calculate the number of transmitted DATA packets assumed to be in flight
 * [approx RFC6675].
 */
static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call)
{
	return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent;
}

/*
 * debug tracing
 */
+86 −161

File changed.

Preview size limit exceeded, changes collapsed.

+1 −2
Original line number Diff line number Diff line
@@ -160,8 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
	call->ackr_window	= 1;
	call->ackr_wtop		= 1;
	call->delay_ack_at	= KTIME_MAX;
	call->ack_lost_at	= KTIME_MAX;
	call->resend_at		= KTIME_MAX;
	call->rack_timo_at	= KTIME_MAX;
	call->ping_at		= KTIME_MAX;
	call->keepalive_at	= KTIME_MAX;
	call->expect_rx_by	= KTIME_MAX;
Loading