Commit cbe35adf authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'selftests-bpf-retire-bpf_tcp_helpers-h'

Martin KaFai Lau says:

====================
selftests/bpf: Retire bpf_tcp_helpers.h

From: Martin KaFai Lau <martin.lau@kernel.org>

The earlier commit 8e6d9ae2 ("selftests/bpf: Use bpf_tracing.h instead of bpf_tcp_helpers.h")
removed the bpf_tcp_helpers.h usages from the non networking tests.

This patch set is a continuation of this effort to retire
the bpf_tcp_helpers.h from the networking tests (mostly tcp-cc related).

The main usage of the bpf_tcp_helpers.h is the partial kernel
socket definitions (e.g. sock, tcp_sock). New fields are kept adding
back to those partial socket definitions while everything is available
in the vmlinux.h. The recent bpf_cc_cubic.c test tried to extend
bpf_tcp_helpers.c but eventually used the vmlinux.h instead. To avoid
this unnecessary detour for new tests and have one consistent way
of using the kernel sockets, this patch set retires the bpf_tcp_helpers.h
usages and consolidates the tests to use vmlinux.h instead.
====================

Link: https://lore.kernel.org/r/20240509175026.3423614-1-martin.lau@linux.dev


Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 00936709 6a650816
Loading
Loading
Loading
Loading
+0 −241
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BPF_TCP_HELPERS_H
#define __BPF_TCP_HELPERS_H

#include <stdbool.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>

#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)

#ifndef SOL_TCP
#define SOL_TCP 6
#endif

#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX	16
#endif

#define tcp_jiffies32 ((__u32)bpf_jiffies64())

struct sock_common {
	unsigned char	skc_state;
	__u16		skc_num;
} __attribute__((preserve_access_index));

enum sk_pacing {
	SK_PACING_NONE		= 0,
	SK_PACING_NEEDED	= 1,
	SK_PACING_FQ		= 2,
};

struct sock {
	struct sock_common	__sk_common;
#define sk_state		__sk_common.skc_state
	unsigned long		sk_pacing_rate;
	__u32			sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));

struct inet_sock {
	struct sock		sk;
} __attribute__((preserve_access_index));

struct inet_connection_sock {
	struct inet_sock	  icsk_inet;
	__u8			  icsk_ca_state:6,
				  icsk_ca_setsockopt:1,
				  icsk_ca_dst_locked:1;
	struct {
		__u8		  pending;
	} icsk_ack;
	__u64			  icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));

struct request_sock {
	struct sock_common		__req_common;
} __attribute__((preserve_access_index));

struct tcp_sock {
	struct inet_connection_sock	inet_conn;

	__u32	rcv_nxt;
	__u32	snd_nxt;
	__u32	snd_una;
	__u32	window_clamp;
	__u8	ecn_flags;
	__u32	delivered;
	__u32	delivered_ce;
	__u32	snd_cwnd;
	__u32	snd_cwnd_cnt;
	__u32	snd_cwnd_clamp;
	__u32	snd_ssthresh;
	__u8	syn_data:1,	/* SYN includes data */
		syn_fastopen:1,	/* SYN includes Fast Open option */
		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
		syn_fastopen_ch:1, /* Active TFO re-enabling probe */
		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
		save_syn:1,	/* Save headers of SYN packet */
		is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
		syn_smc:1;	/* SYN includes SMC */
	__u32	max_packets_out;
	__u32	lsndtime;
	__u32	prior_cwnd;
	__u64	tcp_mstamp;	/* most recent packet received/sent */
	bool	is_mptcp;
} __attribute__((preserve_access_index));

static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
	return (struct inet_connection_sock *)sk;
}

static __always_inline void *inet_csk_ca(const struct sock *sk)
{
	return (void *)inet_csk(sk)->icsk_ca_priv;
}

static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
{
	return (struct tcp_sock *)sk;
}

static __always_inline bool before(__u32 seq1, __u32 seq2)
{
	return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) 	before(seq1, seq2)

#define	TCP_ECN_OK		1
#define	TCP_ECN_QUEUE_CWR	2
#define	TCP_ECN_DEMAND_CWR	4
#define	TCP_ECN_SEEN		8

enum inet_csk_ack_state_t {
	ICSK_ACK_SCHED	= 1,
	ICSK_ACK_TIMER  = 2,
	ICSK_ACK_PUSHED = 4,
	ICSK_ACK_PUSHED2 = 8,
	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
};

enum tcp_ca_event {
	CA_EVENT_TX_START = 0,
	CA_EVENT_CWND_RESTART = 1,
	CA_EVENT_COMPLETE_CWR = 2,
	CA_EVENT_LOSS = 3,
	CA_EVENT_ECN_NO_CE = 4,
	CA_EVENT_ECN_IS_CE = 5,
};

struct ack_sample {
	__u32 pkts_acked;
	__s32 rtt_us;
	__u32 in_flight;
} __attribute__((preserve_access_index));

struct rate_sample {
	__u64  prior_mstamp; /* starting timestamp for interval */
	__u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
	__s32  delivered;		/* number of packets delivered over interval */
	long interval_us;	/* time for tp->delivered to incr "delivered" */
	__u32 snd_interval_us;	/* snd interval for delivered packets */
	__u32 rcv_interval_us;	/* rcv interval for delivered packets */
	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
	int  losses;		/* number of packets marked lost upon ACK */
	__u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
	__u32  prior_in_flight;	/* in flight before this ACK */
	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
	bool is_retrans;	/* is sample from retransmission? */
	bool is_ack_delayed;	/* is this (likely) a delayed ACK? */
} __attribute__((preserve_access_index));

#define TCP_CA_NAME_MAX		16
#define TCP_CONG_NEEDS_ECN	0x2

struct tcp_congestion_ops {
	char name[TCP_CA_NAME_MAX];
	__u32 flags;

	/* initialize private data (optional) */
	void (*init)(struct sock *sk);
	/* cleanup private data  (optional) */
	void (*release)(struct sock *sk);

	/* return slow start threshold (required) */
	__u32 (*ssthresh)(struct sock *sk);
	/* do new cwnd calculation (required) */
	void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
	/* call before changing ca_state (optional) */
	void (*set_state)(struct sock *sk, __u8 new_state);
	/* call when cwnd event occurs (optional) */
	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
	/* call when ack arrives (optional) */
	void (*in_ack_event)(struct sock *sk, __u32 flags);
	/* new value of cwnd after loss (required) */
	__u32  (*undo_cwnd)(struct sock *sk);
	/* hook for packet ack accounting (optional) */
	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
	/* override sysctl_tcp_min_tso_segs */
	__u32 (*min_tso_segs)(struct sock *sk);
	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
	__u32 (*sndbuf_expand)(struct sock *sk);
	/* call when packets are delivered to update cwnd and pacing rate,
	 * after all the ca_state processing. (optional)
	 */
	void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
	void *owner;
};

#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({			\
	typeof(x) __x = (x);			\
	typeof(y) __y = (y);			\
	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })

static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
	return tp->snd_cwnd < tp->snd_ssthresh;
}

static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
{
	const struct tcp_sock *tp = tcp_sk(sk);

	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
	if (tcp_in_slow_start(tp))
		return tp->snd_cwnd < 2 * tp->max_packets_out;

	return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}

static __always_inline bool tcp_cc_eq(const char *a, const char *b)
{
	int i;

	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
		if (a[i] != b[i])
			return false;
		if (!a[i])
			break;
	}

	return true;
}

extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;

struct mptcp_sock {
	struct inet_connection_sock	sk;

	__u32		token;
	struct sock	*first;
	char		ca_name[TCP_CA_NAME_MAX];
} __attribute__((preserve_access_index));

#endif
+16 −26
Original line number Diff line number Diff line
@@ -13,15 +13,9 @@
 *    kernel functions.
 */

#include "vmlinux.h"

#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tracing_net.h"

#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
BPF_PROG(name, args)

#define USEC_PER_SEC 1000000UL
#define TCP_PACING_SS_RATIO (200)
@@ -40,16 +34,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;

static struct inet_connection_sock *inet_csk(const struct sock *sk)
{
	return (struct inet_connection_sock *)sk;
}

static struct tcp_sock *tcp_sk(const struct sock *sk)
{
	return (struct tcp_sock *)sk;
}

static bool before(__u32 seq1, __u32 seq2)
{
	return (__s32)(seq1-seq2) < 0;
@@ -126,17 +110,20 @@ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
	return flag & FLAG_DATA_ACKED;
}

void BPF_STRUCT_OPS(bpf_cubic_init, struct sock *sk)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
	cubictcp_init(sk);
}

void BPF_STRUCT_OPS(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
	cubictcp_cwnd_event(sk, event);
}

void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
	      const struct rate_sample *rs)
{
	struct tcp_sock *tp = tcp_sk(sk);
@@ -163,23 +150,26 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag
	tcp_update_pacing_rate(sk);
}

__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
	return cubictcp_recalc_ssthresh(sk);
}

void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
	cubictcp_state(sk, new_state);
}

void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
		const struct ack_sample *sample)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
	cubictcp_acked(sk, sample);
}

__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
	return tcp_reno_undo_cwnd(sk);
}
+41 −33
Original line number Diff line number Diff line
@@ -14,14 +14,22 @@
 *    "ca->ack_cnt / delta" operation.
 */

#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>

char _license[] SEC("license") = "GPL";

#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
static bool before(__u32 seq1, __u32 seq2)
{
	return (__s32)(seq1-seq2) < 0;
}
#define after(seq2, seq1) 	before(seq1, seq2)

extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;

#define BICTCP_BETA_SCALE    1024	/* Scale factor beta calculation
					 * max_cwnd = snd_cwnd * beta
@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
				/ (bic_scale * 10);

/* BIC TCP Parameters */
struct bictcp {
struct bpf_bictcp {
	__u32	cnt;		/* increase cwnd by 1 after ACKs */
	__u32	last_max_cwnd;	/* last maximum snd_cwnd */
	__u32	last_cwnd;	/* the last snd_cwnd */
@@ -91,7 +99,7 @@ struct bictcp {
	__u32	curr_rtt;	/* the minimum rtt of current round */
};

static inline void bictcp_reset(struct bictcp *ca)
static void bictcp_reset(struct bpf_bictcp *ca)
{
	ca->cnt = 0;
	ca->last_max_cwnd = 0;
@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC	1000000UL
#define USEC_PER_JIFFY	(USEC_PER_SEC / HZ)

static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
	return dividend / divisor;
}
@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64

#define BITS_PER_U64 (sizeof(__u64) * 8)
static __always_inline int fls64(__u64 x)
static int fls64(__u64 x)
{
	int num = BITS_PER_U64 - 1;

@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x)
	return num + 1;
}

static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
static __u32 bictcp_clock_us(const struct sock *sk)
{
	return tcp_sk(sk)->tcp_mstamp;
}

static __always_inline void bictcp_hystart_reset(struct sock *sk)
static void bictcp_hystart_reset(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);

	ca->round_start = ca->last_ack = bictcp_clock_us(sk);
	ca->end_seq = tp->snd_nxt;
@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
	ca->sample_cnt = 0;
}

/* "struct_ops/" prefix is a requirement */
SEC("struct_ops/bpf_cubic_init")
SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);

	bictcp_reset(ca);

@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}

/* "struct_ops" prefix is a requirement */
SEC("struct_ops/bpf_cubic_cwnd_event")
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
	if (event == CA_EVENT_TX_START) {
		struct bictcp *ca = inet_csk_ca(sk);
		struct bpf_bictcp *ca = inet_csk_ca(sk);
		__u32 now = tcp_jiffies32;
		__s32 delta;

@@ -230,7 +236,7 @@ static const __u8 v[] = {
 * Newton-Raphson iteration.
 * Avg err ~= 0.195%
 */
static __always_inline __u32 cubic_root(__u64 a)
static __u32 cubic_root(__u64 a)
{
	__u32 x, b, shift;

@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/*
 * Compute congestion window to use.
 */
static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
					  __u32 acked)
static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
{
	__u32 delta, bic_target, max_cnt;
	__u64 offs, t;
@@ -377,11 +382,11 @@ static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
	ca->cnt = max(ca->cnt, 2U);
}

/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);

	if (!tcp_is_cwnd_limited(sk))
		return;
@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke
	tcp_cong_avoid_ai(tp, ca->cnt, acked);
}

__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);

	ca->epoch_start = 0;	/* end of epoch */

@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}

void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
	if (new_state == TCP_CA_Loss) {
		bictcp_reset(inet_csk_ca(sk));
@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
 * We apply another 100% factor because @rate is doubled at this point.
 * We cap the cushion to 1ms.
 */
static __always_inline __u32 hystart_ack_delay(struct sock *sk)
static __u32 hystart_ack_delay(struct sock *sk)
{
	unsigned long rate;

@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
		   div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
}

static __always_inline void hystart_update(struct sock *sk, __u32 delay)
static void hystart_update(struct sock *sk, __u32 delay)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);
	__u32 threshold;

	if (hystart_detect & HYSTART_ACK_TRAIN) {
@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)

int bpf_cubic_acked_called = 0;

void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
		    const struct ack_sample *sample)
SEC("struct_ops")
void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	struct bictcp *ca = inet_csk_ca(sk);
	struct bpf_bictcp *ca = inet_csk_ca(sk);
	__u32 delay;

	bpf_cubic_acked_called = 1;
@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,

extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;

__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
SEC("struct_ops")
__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
	return tcp_reno_undo_cwnd(sk);
}
+34 −28
Original line number Diff line number Diff line
@@ -6,15 +6,23 @@
 * the kernel BPF logic.
 */

#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include <errno.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"

#ifndef EBUSY
#define EBUSY 16
#endif
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min_not_zero(x, y) ({			\
	typeof(x) __x = (x);			\
	typeof(y) __y = (y);			\
	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
static bool before(__u32 seq1, __u32 seq2)
{
	return (__s32)(seq1-seq2) < 0;
}

char _license[] SEC("license") = "GPL";

@@ -35,7 +43,7 @@ struct {

#define DCTCP_MAX_ALPHA	1024U

struct dctcp {
struct bpf_dctcp {
	__u32 old_delivered;
	__u32 old_delivered_ce;
	__u32 prior_rcv_nxt;
@@ -48,8 +56,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;

static __always_inline void dctcp_reset(const struct tcp_sock *tp,
					struct dctcp *ca)
static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
{
	ca->next_seq = tp->snd_nxt;

@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
	ca->old_delivered_ce = tp->delivered_ce;
}

SEC("struct_ops/dctcp_init")
SEC("struct_ops")
void BPF_PROG(dctcp_init, struct sock *sk)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	struct dctcp *ca = inet_csk_ca(sk);
	struct bpf_dctcp *ca = inet_csk_ca(sk);
	int *stg;

	if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
	dctcp_reset(tp, ca);
}

SEC("struct_ops/dctcp_ssthresh")
SEC("struct_ops")
__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
{
	struct dctcp *ca = inet_csk_ca(sk);
	struct bpf_dctcp *ca = inet_csk_ca(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	ca->loss_cwnd = tp->snd_cwnd;
	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}

SEC("struct_ops/dctcp_update_alpha")
SEC("struct_ops")
void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	struct dctcp *ca = inet_csk_ca(sk);
	struct bpf_dctcp *ca = inet_csk_ca(sk);

	/* Expired RTT */
	if (!before(tp->snd_una, ca->next_seq)) {
@@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
	}
}

static __always_inline void dctcp_react_to_loss(struct sock *sk)
static void dctcp_react_to_loss(struct sock *sk)
{
	struct dctcp *ca = inet_csk_ca(sk);
	struct bpf_dctcp *ca = inet_csk_ca(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	ca->loss_cwnd = tp->snd_cwnd;
	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
}

SEC("struct_ops/dctcp_state")
SEC("struct_ops")
void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
{
	if (new_state == TCP_CA_Recovery &&
@@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
	 */
}

static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{
	struct tcp_sock *tp = tcp_sk(sk);

@@ -179,8 +186,7 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
 * S:	0 <- last pkt was non-CE
 *	1 <- last pkt was CE
 */
static __always_inline
void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
				 __u32 *prior_rcv_nxt, __u32 *ce_state)
{
	__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
	dctcp_ece_ack_cwr(sk, new_ce_state);
}

SEC("struct_ops/dctcp_cwnd_event")
SEC("struct_ops")
void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
	struct dctcp *ca = inet_csk_ca(sk);
	struct bpf_dctcp *ca = inet_csk_ca(sk);

	switch (ev) {
	case CA_EVENT_ECN_IS_CE:
@@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
	}
}

SEC("struct_ops/dctcp_cwnd_undo")
SEC("struct_ops")
__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
{
	const struct dctcp *ca = inet_csk_ca(sk);
	const struct bpf_dctcp *ca = inet_csk_ca(sk);

	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}

extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;

SEC("struct_ops/dctcp_reno_cong_avoid")
SEC("struct_ops")
void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
	tcp_reno_cong_avoid(sk, ack, acked);
+3 −7
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */

#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/tcp.h>
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"

char _license[] SEC("license") = "GPL";
const char cubic[] = "cubic";

void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
SEC("struct_ops")
void BPF_PROG(dctcp_nouse_release, struct sock *sk)
{
	bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
		       (void *)cubic, sizeof(cubic));
Loading