Commit abfa70b3 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'tcp-__tcp_close-changes'

Eric Dumazet says:

====================
tcp: __tcp_close() changes

First patch fixes a rare bug.

Second patch adds a corresponding packetdrill test.

Third patch is a small optimization.
====================

Link: https://patch.msgid.link/20250903084720.1168904-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 69777753 b13592d2
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -3099,8 +3099,8 @@ bool tcp_check_oom(const struct sock *sk, int shift)

void __tcp_close(struct sock *sk, long timeout)
{
	bool data_was_unread = false;
	struct sk_buff *skb;
	int data_was_unread = 0;
	int state;

	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
@@ -3118,13 +3118,14 @@ void __tcp_close(struct sock *sk, long timeout)
	 *  descriptor close, not protocol-sourced closes, because the
	 *  reader process may not have drained the data yet!
	 */
	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
		u32 end_seq = TCP_SKB_CB(skb)->end_seq;

		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
			len--;
		data_was_unread += len;
		__kfree_skb(skb);
			end_seq--;
		if (after(end_seq, tcp_sk(sk)->copied_seq))
			data_was_unread = true;
		tcp_eat_recv_skb(sk, skb);
	}

	/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
+32 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0

--mss=1000

`./defaults.sh`

// Initialize connection
    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
  +.1 < . 1:1(0) ack 1 win 32792


   +0 accept(3, ..., ...) = 4
   +0 < . 1:1001(1000) ack 1 win 32792
   +0 > . 1:1(0) ack 1001
   +0 read(4, ..., 1000) = 1000

// resend the payload + a FIN
   +0 < F. 1:1001(1000) ack 1 win 32792
// Why do we have a delay and no dsack ?
   +0~+.04 > . 1:1(0) ack 1002

   +0 close(4) = 0

// According to RFC 2525, section 2.17
// we should _not_ send an RST here, because there was no data to consume.
   +0 > F. 1:1(0) ack 1002