Commit 9a011277 authored by Ilpo Järvinen's avatar Ilpo Järvinen Committed by Paolo Abeni
Browse files

tcp: accecn: add AccECN rx byte counters



These three byte counters track IP ECN field payload byte sums for
all arriving (acceptable) packets for ECT0, ECT1, and CE. The
AccECN option (added by a later patch in the series) echoes these
counters back to sender side; therefore, it is placed within the
group of tcp_sock_write_txrx.

Below are the pahole outcomes before and after this patch, in which
the group size of tcp_sock_write_txrx is increased from 95 + 4 to
107 + 4 and an extra 4-byte hole is created but will be exploited
in later patches:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        app_limited;          /*  2584     4 */
    u32                        rcv_wnd;              /*  2588     4 */
    struct tcp_options_received rx_opt;              /*  2592    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2616     0 */

    [...]
    /* size: 3200, cachelines: 50, members: 166 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2576     4 */
    u32                        received_ce;          /*  2580     4 */
    u32                        received_ecn_bytes[3];/*  2584    12 */
    u32                        app_limited;          /*  2596     4 */
    u32                        rcv_wnd;              /*  2600     4 */
    struct tcp_options_received rx_opt;              /*  2604    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2628     0 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    /* size: 3200, cachelines: 50, members: 167 */
}

Signed-off-by: default avatarIlpo Järvinen <ij@kernel.org>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Co-developed-by: default avatarChia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Signed-off-by: default avatarChia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250916082434.100722-4-chia-yu.chang@nokia-bell-labs.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 3cae3427
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -102,6 +102,7 @@ u32 prr_out read_mostly read_m
u32                           delivered               read_mostly         read_write          tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx)
u32                           delivered_ce            read_mostly         read_write          tcp_rate_skb_sent(tx);tcp_rate_gen(rx)
u32                           received_ce             read_mostly         read_write
u32[3]                        received_ecn_bytes      read_mostly         read_write
u8:4                          received_ce_pending     read_mostly         read_write
u8:2                          syn_ect_snt             write_mostly        read_write
u8:2                          syn_ect_rcv             read_mostly         read_write
+4 −0
Original line number Diff line number Diff line
@@ -306,6 +306,10 @@ struct tcp_sock {
	u32	delivered;	/* Total data packets delivered incl. rexmits */
	u32	delivered_ce;	/* Like the above but only ECE marked packets */
	u32	received_ce;	/* Like the above but for rcvd CE marked pkts */
	u32	received_ecn_bytes[3]; /* received byte counters for three ECN
					* types: INET_ECN_ECT_1, INET_ECN_ECT_0,
					* and INET_ECN_CE
					*/
	u32	app_limited;	/* limited until "delivered" reaches this val */
	u32	rcv_wnd;	/* Current receiver window		*/
/*
+28 −1
Original line number Diff line number Diff line
@@ -171,7 +171,7 @@ static inline void tcp_accecn_third_ack(struct sock *sk,

/* Updates Accurate ECN received counters from the received IP ECN field */
static inline void tcp_ecn_received_counters(struct sock *sk,
					     const struct sk_buff *skb)
					     const struct sk_buff *skb, u32 len)
{
	u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
	u8 is_ce = INET_ECN_is_ce(ecnfield);
@@ -191,7 +191,22 @@ static inline void tcp_ecn_received_counters(struct sock *sk,
		tp->received_ce += pcount;
		tp->received_ce_pending = min(tp->received_ce_pending + pcount,
					      0xfU);

		if (len > 0)
			tp->received_ecn_bytes[ecnfield - 1] += len;
	}
}

/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
 * initialized at the start of	the half-connection. [...] These byte counters
 * reflect only the TCP payload length, excluding TCP header and TCP options.
 */
static inline void tcp_ecn_received_counters_payload(struct sock *sk,
						     const struct sk_buff *skb)
{
	const struct tcphdr *th = (const struct tcphdr *)skb->data;

	tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
}

/* AccECN specification, 5.1: [...] a server can determine that it
@@ -232,10 +247,22 @@ static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
	return ace && ace != 0x3;
}

static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
{
	BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
	BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
	BUILD_BUG_ON(INET_ECN_CE != 0x3);

	counter_array[INET_ECN_ECT_1 - 1] = 0;
	counter_array[INET_ECN_ECT_0 - 1] = 0;
	counter_array[INET_ECN_CE - 1] = 0;
}

static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
{
	tp->received_ce = 0;
	tp->received_ce_pending = 0;
	__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
}

/* Used for make_synack to form the ACE flags */
+2 −1
Original line number Diff line number Diff line
@@ -5142,6 +5142,7 @@ static void __init tcp_struct_check(void)
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
@@ -5149,7 +5150,7 @@ static void __init tcp_struct_check(void)
	/* 32bit arches with 8byte alignment on u64 fields might need padding
	 * before tcp_clock_cache.
	 */
	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 95 + 4);
	CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);

	/* RX read-write hotpath cache lines */
	CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
+4 −3
Original line number Diff line number Diff line
@@ -6163,7 +6163,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
					flag |= __tcp_replace_ts_recent(tp,
									delta);

				tcp_ecn_received_counters(sk, skb);
				tcp_ecn_received_counters(sk, skb, 0);

				/* We know that such packets are checksummed
				 * on entry.
@@ -6213,7 +6213,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
			/* Bulk data transfer: receiver */
			tcp_cleanup_skb(skb);
			__skb_pull(skb, tcp_header_len);
			tcp_ecn_received_counters(sk, skb);
			tcp_ecn_received_counters(sk, skb,
						  len - tcp_header_len);
			eaten = tcp_queue_rcv(sk, skb, &fragstolen);

			tcp_event_data_recv(sk, skb);
@@ -6254,7 +6255,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
		return;

step5:
	tcp_ecn_received_counters(sk, skb);
	tcp_ecn_received_counters_payload(sk, skb);

	reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
	if ((int)reason < 0) {
Loading