Commit 42386ae4 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-add-skb-sk-to-more-control-packets'

Eric Dumazet says:

====================
tcp: add skb->sk to more control packets

Currently, TCP can set skb->sk for a variety of transmit packets.

However, packets sent on behalf of a TIME_WAIT sockets do not
have an attached socket.

Same issue for RST packets.

We want to change this, in order to increase eBPF program
capabilities.

This is slightly risky, because various layers could
be confused by TIME_WAIT sockets showing up in skb->sk.

v2: audited all sk_to_full_sk() users and addressed Martin feedback.
====================

Link: https://patch.msgid.link/20241010174817.1543642-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 76d37e4f 79636038
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
	int __ret = 0;							       \
	if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) {		       \
		typeof(sk) __sk = sk_to_full_sk(sk);			       \
		if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) &&	       \
		if (__sk && __sk == skb_to_full_sk(skb) &&	       \
		    cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))	       \
			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
						      CGROUP_INET_EGRESS); \
+6 −2
Original line number Diff line number Diff line
@@ -321,8 +321,10 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
static inline struct sock *sk_to_full_sk(struct sock *sk)
{
#ifdef CONFIG_INET
	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
	if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
		sk = inet_reqsk(sk)->rsk_listener;
	if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
		sk = NULL;
#endif
	return sk;
}
@@ -331,8 +333,10 @@ static inline struct sock *sk_to_full_sk(struct sock *sk)
static inline const struct sock *sk_const_to_full_sk(const struct sock *sk)
{
#ifdef CONFIG_INET
	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
	if (sk && READ_ONCE(sk->sk_state) == TCP_NEW_SYN_RECV)
		sk = ((const struct request_sock *)sk)->rsk_listener;
	if (sk && READ_ONCE(sk->sk_state) == TCP_TIME_WAIT)
		sk = NULL;
#endif
	return sk;
}
+2 −1
Original line number Diff line number Diff line
@@ -288,7 +288,8 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
	return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0;
}

void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
			   struct sk_buff *skb,
			   const struct ip_options *sopt,
			   __be32 daddr, __be32 saddr,
			   const struct ip_reply_arg *arg,
+19 −0
Original line number Diff line number Diff line
@@ -1760,6 +1760,15 @@ void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
void sock_pfree(struct sk_buff *skb);

static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
{
	skb_orphan(skb);
	if (refcount_inc_not_zero(&sk->sk_refcnt)) {
		skb->sk = sk;
		skb->destructor = sock_edemux;
	}
}
#else
#define sock_edemux sock_efree
#endif
@@ -2802,6 +2811,16 @@ static inline bool sk_listener(const struct sock *sk)
	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}

/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT
 * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE)
 * TCP RST and ACK can be attached to TIME_WAIT.
 */
static inline bool sk_listener_or_tw(const struct sock *sk)
{
	return (1 << READ_ONCE(sk->sk_state)) &
	       (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT);
}

void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
		       int type);
+1 −5
Original line number Diff line number Diff line
@@ -6778,8 +6778,6 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
		/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
		 * sock refcnt is decremented to prevent a request_sock leak.
		 */
		if (!sk_fullsock(sk2))
			sk2 = NULL;
		if (sk2 != sk) {
			sock_gen_put(sk);
			/* Ensure there is no need to bump sk2 refcnt */
@@ -6826,8 +6824,6 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
		/* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
		 * sock refcnt is decremented to prevent a request_sock leak.
		 */
		if (!sk_fullsock(sk2))
			sk2 = NULL;
		if (sk2 != sk) {
			sock_gen_put(sk);
			/* Ensure there is no need to bump sk2 refcnt */
@@ -7276,7 +7272,7 @@ BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
{
	sk = sk_to_full_sk(sk);

	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
	if (sk && sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
		return (unsigned long)sk;

	return (unsigned long)NULL;
Loading