Commit 97664c1a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-rcv-drop-reasons'

Jason Xing says:

====================
introduce drop reasons for tcp receive path

When I was debugging the reason about why the skb should be dropped in
syn cookie mode, I found out that this NOT_SPECIFIED reason is too
general. Thus I decided to refine it.

v10
Link: https://lore.kernel.org/netdev/20240223193321.6549-1-kuniyu@amazon.com/
1. fix three nit problems (Kuniyuki)
2. add reviewed-by tag (Kuniyuki)

v9
Link: https://lore.kernel.org/netdev/20240222113003.67558-1-kerneljasonxing@gmail.com/
1. nit: remove one unneeded 'else' (David)
2. add reviewed-by tags (Eric, David)

v8
Link: https://lore.kernel.org/netdev/20240221025732.68157-1-kerneljasonxing@gmail.com/
1. refine part of codes in patch [03/10] and patch [10/10] (Eric)
2. squash patch [11/11] in the last version into patch [10/11] (Eric)
3. add reviewed-by tags (Eric)

v7
Link: https://lore.kernel.org/all/20240219032838.91723-1-kerneljasonxing@gmail.com/
1. fix some misspelled problem (Kuniyuki)
2. remove redundant codes in tcp_v6_do_rcv() (Kuniyuki)
3. add reviewed-by tag in patch [02/11] (Kuniyuki)

v6
Link: https://lore.kernel.org/all/c987d2c79e4a4655166eb8eafef473384edb37fb.camel@redhat.com/
Link: https://lore.kernel.org/all/CAL+tcoAgSjwsmFnDh_Gs9ZgMi-y5awtVx+4VhJPNRADjo7LLSA@mail.gmail.com/
1. Take one case into consideration in tcp_v6_do_rcv(), behave like old
days, or else it will trigger errors (Paolo).
2. Extend NO_SOCKET reason to consider two more reasons for request
socket and child socket.

v5:
Link: https://lore.kernel.org/netdev/20240213134205.8705-1-kerneljasonxing@gmail.com/
Link: https://lore.kernel.org/netdev/20240213140508.10878-1-kerneljasonxing@gmail.com/
1. Use SKB_DROP_REASON_IP_OUTNOROUTES instead of introducing a new
   one (Eric, David)
2. Reuse SKB_DROP_REASON_NOMEM to handle failure of request socket
   allocation (Eric)
3. Reuse NO_SOCKET instead of introducing COOKIE_NOCHILD
4. avoid duplication of these opt_skb tests/actions (Eric)
5. Use new name (TCP_ABORT_ON_DATA) for readability (David)
6. Reuse IP_OUTNOROUTES instead of INVALID_DST (Eric)

---
HISTORY
This series is combined with 2 series sent before suggested by Jakub. So
I'm going to separately write changelogs for each of them.

PATCH 1/11 - 5/11
preivious Link: https://lore.kernel.org/netdev/20240213134205.8705-1-kerneljasonxing@gmail.com/
Summary
1. introduce all the dropreasons we need, [1/11] patch.
2. use new dropreasons in ipv4 cookie check, [2/11],[3/11] patch.
3. use new dropreasons ipv6 cookie check, [4/11],[5/11] patch.

v4:
Link: https://lore.kernel.org/netdev/20240212172302.3f95e454@kernel.org/
1. Fix misspelled name in Kdoc as suggested by Jakub.

v3:
Link: https://lore.kernel.org/all/CANn89iK40SoyJ8fS2U5kp3pDruo=zfQNPL-ppOF+LYaS9z-MVA@mail.gmail.com/
1. Split that patch into some smaller ones as suggested by Eric.

v2:
Link: https://lore.kernel.org/all/20240204104601.55760-1-kerneljasonxing@gmail.com/
1. change the title of 2/2 patch.
2. fix some warnings checkpatch tool showed before.
3. use return value instead of adding more parameters suggested by Eric.

PATCH 6/11 - 11/11
previous Link: https://lore.kernel.org/netdev/20240213140508.10878-1-kerneljasonxing@gmail.com/
v4:
Link: https://lore.kernel.org/netdev/CANn89iJar+H3XkQ8HpsirH7b-_sbFe9NBUdAAO3pNJK3CKr_bg@mail.gmail.com/
Link: https://lore.kernel.org/netdev/20240213131205.4309-1-kerneljasonxing@gmail.com/
Already got rid of @acceptable in tcp_rcv_state_process(), so I need to
remove *TCP_CONNREQNOTACCEPTABLE related codes which I wrote in the v3
series.

v3:
Link: https://lore.kernel.org/all/CANn89iK40SoyJ8fS2U5kp3pDruo=zfQNPL-ppOF+LYaS9z-MVA@mail.gmail.com/
1. Split that patch into some smaller ones as suggested by Eric.

v2:
Link: https://lore.kernel.org/all/20240204104601.55760-1-kerneljasonxing@gmail.com/


1. change the title of 2/2 patch.
2. fix some warnings checkpatch tool showed before.
3. use return value instead of adding more parameters suggested by Eric.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 12a686c2 ee01defe
Loading
Loading
Loading
Loading
+24 −2
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
	FN(TCP_AOFAILURE)		\
	FN(SOCKET_BACKLOG)		\
	FN(TCP_FLAGS)			\
	FN(TCP_ABORT_ON_DATA)		\
	FN(TCP_ZEROWINDOW)		\
	FN(TCP_OLD_DATA)		\
	FN(TCP_OVERWINDOW)		\
@@ -37,6 +38,7 @@
	FN(TCP_RFC7323_PAWS)		\
	FN(TCP_OLD_SEQUENCE)		\
	FN(TCP_INVALID_SEQUENCE)	\
	FN(TCP_INVALID_ACK_SEQUENCE)	\
	FN(TCP_RESET)			\
	FN(TCP_INVALID_SYN)		\
	FN(TCP_CLOSE)			\
@@ -54,6 +56,7 @@
	FN(NEIGH_QUEUEFULL)		\
	FN(NEIGH_DEAD)			\
	FN(TC_EGRESS)			\
	FN(SECURITY_HOOK)		\
	FN(QDISC_DROP)			\
	FN(CPU_BACKLOG)			\
	FN(XDP)				\
@@ -105,7 +108,13 @@ enum skb_drop_reason {
	SKB_CONSUMED,
	/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
	SKB_DROP_REASON_NOT_SPECIFIED,
	/** @SKB_DROP_REASON_NO_SOCKET: socket not found */
	/**
	 * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used.
	 * Reason could be one of three cases:
	 * 1) no established/listening socket found during lookup process
	 * 2) no valid request socket during 3WHS process
	 * 3) no valid child socket during 3WHS process
	 */
	SKB_DROP_REASON_NO_SOCKET,
	/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
	SKB_DROP_REASON_PKT_TOO_SMALL,
@@ -197,6 +206,11 @@ enum skb_drop_reason {
	SKB_DROP_REASON_SOCKET_BACKLOG,
	/** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
	SKB_DROP_REASON_TCP_FLAGS,
	/**
	 * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to
	 * LINUX_MIB_TCPABORTONDATA
	 */
	SKB_DROP_REASON_TCP_ABORT_ON_DATA,
	/**
	 * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
	 * see LINUX_MIB_TCPZEROWINDOWDROP
@@ -221,13 +235,19 @@ enum skb_drop_reason {
	SKB_DROP_REASON_TCP_OFOMERGE,
	/**
	 * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
	 * LINUX_MIB_PAWSESTABREJECTED
	 * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
	 */
	SKB_DROP_REASON_TCP_RFC7323_PAWS,
	/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
	SKB_DROP_REASON_TCP_OLD_SEQUENCE,
	/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
	SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
	/**
	 * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
	 * field because ack sequence is not in the window between snd_una
	 * and snd_nxt
	 */
	SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE,
	/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
	SKB_DROP_REASON_TCP_RESET,
	/**
@@ -271,6 +291,8 @@ enum skb_drop_reason {
	SKB_DROP_REASON_NEIGH_DEAD,
	/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
	SKB_DROP_REASON_TC_EGRESS,
	/** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
	SKB_DROP_REASON_SECURITY_HOOK,
	/**
	 * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
	 * failed to enqueue to current qdisc)
+3 −3
Original line number Diff line number Diff line
@@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -396,7 +396,7 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
			   struct request_sock *req, bool fastopen,
			   bool *lost_race);
int tcp_child_process(struct sock *parent, struct sock *child,
enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
				       struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
+16 −5
Original line number Diff line number Diff line
@@ -408,6 +408,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
	struct rtable *rt;
	__u8 rcv_wscale;
	int full_space;
	SKB_DR(reason);

	if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
	    !th->ack || th->rst)
@@ -420,8 +421,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
		if (IS_ERR(req))
			goto out;
	}
	if (!req)
	if (!req) {
		SKB_DR_SET(reason, NO_SOCKET);
		goto out_drop;
	}

	ireq = inet_rsk(req);

@@ -433,8 +436,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
	 */
	RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));

	if (security_inet_conn_request(sk, skb, req))
	if (security_inet_conn_request(sk, skb, req)) {
		SKB_DR_SET(reason, SECURITY_HOOK);
		goto out_free;
	}

	tcp_ao_syncookie(sk, skb, req, AF_INET);

@@ -451,8 +456,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
			   ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
	security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
	rt = ip_route_output_key(net, &fl4);
	if (IS_ERR(rt))
	if (IS_ERR(rt)) {
		SKB_DR_SET(reason, IP_OUTNOROUTES);
		goto out_free;
	}

	/* Try to redo what tcp_v4_send_synack did. */
	req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
@@ -475,12 +482,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
	/* ip_queue_xmit() depends on our flow being setup
	 * Normal sockets get it right from inet_csk_route_child_sock()
	 */
	if (ret)
	if (!ret) {
		SKB_DR_SET(reason, NO_SOCKET);
		goto out_drop;
	}
	inet_sk(ret)->cork.fl.u.ip4 = fl4;
out:
	return ret;
out_free:
	reqsk_free(req);
out_drop:
	kfree_skb_reason(skb, reason);
	return NULL;
}
+16 −8
Original line number Diff line number Diff line
@@ -6361,6 +6361,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
				inet_csk_reset_xmit_timer(sk,
						ICSK_TIME_RETRANS,
						TCP_TIMEOUT_MIN, TCP_RTO_MAX);
			SKB_DR_SET(reason, TCP_INVALID_ACK_SEQUENCE);
			goto reset_and_undo;
		}

@@ -6369,6 +6370,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
			     tcp_time_stamp_ts(tp))) {
			NET_INC_STATS(sock_net(sk),
					LINUX_MIB_PAWSACTIVEREJECTED);
			SKB_DR_SET(reason, TCP_RFC7323_PAWS);
			goto reset_and_undo;
		}

@@ -6572,7 +6574,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
reset_and_undo:
	tcp_clear_options(&tp->rx_opt);
	tp->rx_opt.mss_clamp = saved_clamp;
	return 1;
	/* we can reuse/return @reason to its caller to handle the exception */
	return reason;
}

static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
@@ -6616,7 +6619,8 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
 *	address independent.
 */

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
enum skb_drop_reason
tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -6632,7 +6636,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

	case TCP_LISTEN:
		if (th->ack)
			return 1;
			return SKB_DROP_REASON_TCP_FLAGS;

		if (th->rst) {
			SKB_DR_SET(reason, TCP_RESET);
@@ -6701,8 +6705,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
				  FLAG_NO_CHALLENGE_ACK);

	if ((int)reason <= 0) {
		if (sk->sk_state == TCP_SYN_RECV)
			return 1;	/* send one RST */
		if (sk->sk_state == TCP_SYN_RECV) {
			/* send one RST */
			if (!reason)
				return SKB_DROP_REASON_TCP_OLD_ACK;
			return -reason;
		}
		/* accept old ack during closing */
		if ((int)reason < 0) {
			tcp_send_challenge_ack(sk);
@@ -6778,7 +6786,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
		if (READ_ONCE(tp->linger2) < 0) {
			tcp_done(sk);
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
			return 1;
			return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
		}
		if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
		    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
@@ -6787,7 +6795,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
				tcp_fastopen_active_disable(sk);
			tcp_done(sk);
			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
			return 1;
			return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
		}

		tmo = tcp_fin_time(sk);
@@ -6852,7 +6860,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
				tcp_reset(sk, skb);
				return 1;
				return SKB_DROP_REASON_TCP_ABORT_ON_DATA;
			}
		}
		fallthrough;
+10 −7
Original line number Diff line number Diff line
@@ -1907,7 +1907,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
		return 0;
	}

	reason = SKB_DROP_REASON_NOT_SPECIFIED;
	if (tcp_checksum_complete(skb))
		goto csum_err;

@@ -1915,9 +1914,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
		struct sock *nsk = tcp_v4_cookie_check(sk, skb);

		if (!nsk)
			goto discard;
			return 0;
		if (nsk != sk) {
			if (tcp_child_process(sk, nsk, skb)) {
			reason = tcp_child_process(sk, nsk, skb);
			if (reason) {
				rsk = nsk;
				goto reset;
			}
@@ -1926,7 +1926,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
	} else
		sock_rps_save_rxhash(sk, skb);

	if (tcp_rcv_state_process(sk, skb)) {
	reason = tcp_rcv_state_process(sk, skb);
	if (reason) {
		rsk = sk;
		goto reset;
	}
@@ -2275,10 +2276,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
		if (nsk == sk) {
			reqsk_put(req);
			tcp_v4_restore_cb(skb);
		} else if (tcp_child_process(sk, nsk, skb)) {
		} else {
			drop_reason = tcp_child_process(sk, nsk, skb);
			if (drop_reason) {
				tcp_v4_send_reset(nsk, skb);
				goto discard_and_relse;
		} else {
			}
			sock_put(sk);
			return 0;
		}
Loading