Commit d1e59a46 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

tcp: add cwnd_event_tx_start to tcp_congestion_ops



(tcp_congestion_ops)->cwnd_event() is called very often, with
@event oscillating between CA_EVENT_TX_START and other values.

This is not branch prediction friendly.

Provide a new cwnd_event_tx_start pointer dedicated for CA_EVENT_TX_START.

Both BBR and CUBIC benefit from this change, since they only care
about CA_EVENT_TX_START.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 4/4 grow/shrink: 3/1 up/down: 564/-568 (-4)
Function                                     old     new   delta
bbr_cwnd_event_tx_start                        -     450    +450
cubictcp_cwnd_event_tx_start                   -      70     +70
__pfx_cubictcp_cwnd_event_tx_start             -      16     +16
__pfx_bbr_cwnd_event_tx_start                  -      16     +16
tcp_unregister_congestion_control             93      99      +6
tcp_update_congestion_control                518     521      +3
tcp_register_congestion_control              422     425      +3
__tcp_transmit_skb                          3308    3306      -2
__pfx_cubictcp_cwnd_event                     16       -     -16
__pfx_bbr_cwnd_event                          16       -     -16
cubictcp_cwnd_event                           80       -     -80
bbr_cwnd_event                               454       -    -454
Total: Before=25240512, After=25240508, chg -0.00%

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260323234920.1097858-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 112f4c63
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1341,6 +1341,9 @@ struct tcp_congestion_ops {
	/* call when cwnd event occurs (optional) */
	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);

	/* call when CA_EVENT_TX_START cwnd event occurs (optional) */
	void (*cwnd_event_tx_start)(struct sock *sk);

	/* call when ack arrives (optional) */
	void (*in_ack_event)(struct sock *sk, u32 flags);

@@ -1440,6 +1443,11 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);

	if (event == CA_EVENT_TX_START) {
	    if (icsk->icsk_ca_ops->cwnd_event_tx_start)
			icsk->icsk_ca_ops->cwnd_event_tx_start(sk);
		return;
	}
	if (icsk->icsk_ca_ops->cwnd_event)
		icsk->icsk_ca_ops->cwnd_event(sk, event);
}
+5 −0
Original line number Diff line number Diff line
@@ -272,6 +272,10 @@ static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
}

static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk)
{
}

static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
{
}
@@ -313,6 +317,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
	.cong_avoid = bpf_tcp_ca_cong_avoid,
	.set_state = bpf_tcp_ca_set_state,
	.cwnd_event = bpf_tcp_ca_cwnd_event,
	.cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start,
	.in_ack_event = bpf_tcp_ca_in_ack_event,
	.pkts_acked = bpf_tcp_ca_pkts_acked,
	.min_tso_segs = bpf_tcp_ca_min_tso_segs,
+4 −4
Original line number Diff line number Diff line
@@ -330,12 +330,12 @@ static void bbr_save_cwnd(struct sock *sk)
		bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}

__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
__bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bbr *bbr = inet_csk_ca(sk);

	if (event == CA_EVENT_TX_START && tp->app_limited) {
	if (tp->app_limited) {
		bbr->idle_restart = 1;
		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
		bbr->ack_epoch_acked = 0;
@@ -1149,7 +1149,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
	.cong_control	= bbr_main,
	.sndbuf_expand	= bbr_sndbuf_expand,
	.undo_cwnd	= bbr_undo_cwnd,
	.cwnd_event	= bbr_cwnd_event,
	.cwnd_event_tx_start	= bbr_cwnd_event_tx_start,
	.ssthresh	= bbr_ssthresh,
	.min_tso_segs	= bbr_min_tso_segs,
	.get_info	= bbr_get_info,
@@ -1161,7 +1161,7 @@ BTF_ID_FLAGS(func, bbr_init)
BTF_ID_FLAGS(func, bbr_main)
BTF_ID_FLAGS(func, bbr_sndbuf_expand)
BTF_ID_FLAGS(func, bbr_undo_cwnd)
BTF_ID_FLAGS(func, bbr_cwnd_event)
BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start)
BTF_ID_FLAGS(func, bbr_ssthresh)
BTF_ID_FLAGS(func, bbr_min_tso_segs)
BTF_ID_FLAGS(func, bbr_set_state)
+16 −19
Original line number Diff line number Diff line
@@ -139,9 +139,8 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk)
		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}

__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk)
{
	if (event == CA_EVENT_TX_START) {
	struct bictcp *ca = inet_csk_ca(sk);
	u32 now = tcp_jiffies32;
	s32 delta;
@@ -156,8 +155,6 @@ __bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event e
		if (after(ca->epoch_start, now))
			ca->epoch_start = now;
	}
		return;
	}
}

/* calculate the cubic root of x using a table lookup followed by one
@@ -481,7 +478,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
	.cong_avoid	= cubictcp_cong_avoid,
	.set_state	= cubictcp_state,
	.undo_cwnd	= tcp_reno_undo_cwnd,
	.cwnd_event	= cubictcp_cwnd_event,
	.cwnd_event_tx_start = cubictcp_cwnd_event_tx_start,
	.pkts_acked     = cubictcp_acked,
	.owner		= THIS_MODULE,
	.name		= "cubic",
@@ -492,7 +489,7 @@ BTF_ID_FLAGS(func, cubictcp_init)
BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh)
BTF_ID_FLAGS(func, cubictcp_cong_avoid)
BTF_ID_FLAGS(func, cubictcp_state)
BTF_ID_FLAGS(func, cubictcp_cwnd_event)
BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start)
BTF_ID_FLAGS(func, cubictcp_acked)
BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids)

+9 −3
Original line number Diff line number Diff line
@@ -203,15 +203,19 @@ __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
		tcp_plb_update_state_upon_rto(sk, &ca->plb);
		dctcp_react_to_loss(sk);
		break;
	case CA_EVENT_TX_START:
		tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
		break;
	default:
		/* Don't care for the rest. */
		break;
	}
}

__bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk)
{
	struct dctcp *ca = inet_csk_ca(sk);

	tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */
}

static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
			     union tcp_cc_info *info)
{
@@ -252,6 +256,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
	.init		= dctcp_init,
	.in_ack_event   = dctcp_update_alpha,
	.cwnd_event	= dctcp_cwnd_event,
	.cwnd_event_tx_start = dctcp_cwnd_event_tx_start,
	.ssthresh	= dctcp_ssthresh,
	.cong_avoid	= tcp_reno_cong_avoid,
	.undo_cwnd	= dctcp_cwnd_undo,
@@ -275,6 +280,7 @@ BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids)
BTF_ID_FLAGS(func, dctcp_init)
BTF_ID_FLAGS(func, dctcp_update_alpha)
BTF_ID_FLAGS(func, dctcp_cwnd_event)
BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start)
BTF_ID_FLAGS(func, dctcp_ssthresh)
BTF_ID_FLAGS(func, dctcp_cwnd_undo)
BTF_ID_FLAGS(func, dctcp_state)
Loading