Commit 1d7e4538 authored by David Howells's avatar David Howells Committed by Jakub Kicinski
Browse files

ipv4, ipv6: Use splice_eof() to flush



Allow splice to undo the effects of MSG_MORE after prematurely ending a
splice/sendfile due to getting an EOF condition (->splice_read() returned
0) after splice had called sendmsg() with MSG_MORE set when the user didn't
set MSG_MORE.

For UDP, a pending packet will not be emitted if the socket is closed
before it is flushed; with this change, it be flushed by ->splice_eof().

For TCP, it's not clear that MSG_MORE is actually effective.

Suggested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/


Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Kuniyuki Iwashima <kuniyu@amazon.com>
cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
cc: David Ahern <dsahern@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d4c1e80b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
		   struct sock *newsk);
int inet_send_prepare(struct sock *sk);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
void inet_splice_eof(struct socket *sock);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
		      size_t size, int flags);
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+1 −0
Original line number Diff line number Diff line
@@ -327,6 +327,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
			 size_t size, struct ubuf_info *uarg);
void tcp_splice_eof(struct socket *sock);
int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
		 int flags);
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+1 −0
Original line number Diff line number Diff line
@@ -278,6 +278,7 @@ int udp_get_port(struct sock *sk, unsigned short snum,
int udp_err(struct sk_buff *, u32);
int udp_abort(struct sock *sk, int err);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
void udp_splice_eof(struct socket *sock);
int udp_push_pending_frames(struct sock *sk);
void udp_flush_pending_frames(struct sock *sk);
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
+18 −0
Original line number Diff line number Diff line
@@ -831,6 +831,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(inet_sendmsg);

void inet_splice_eof(struct socket *sock)
{
	const struct proto *prot;
	struct sock *sk = sock->sk;

	if (unlikely(inet_send_prepare(sk)))
		return;

	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
	prot = READ_ONCE(sk->sk_prot);
	if (prot->splice_eof)
		prot->splice_eof(sock);
}
EXPORT_SYMBOL_GPL(inet_splice_eof);

ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
		      size_t size, int flags)
{
@@ -1050,6 +1065,7 @@ const struct proto_ops inet_stream_ops = {
#ifdef CONFIG_MMU
	.mmap		   = tcp_mmap,
#endif
	.splice_eof	   = inet_splice_eof,
	.sendpage	   = inet_sendpage,
	.splice_read	   = tcp_splice_read,
	.read_sock	   = tcp_read_sock,
@@ -1084,6 +1100,7 @@ const struct proto_ops inet_dgram_ops = {
	.read_skb	   = udp_read_skb,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.splice_eof	   = inet_splice_eof,
	.sendpage	   = inet_sendpage,
	.set_peek_off	   = sk_set_peek_off,
#ifdef CONFIG_COMPAT
@@ -1115,6 +1132,7 @@ static const struct proto_ops inet_sockraw_ops = {
	.sendmsg	   = inet_sendmsg,
	.recvmsg	   = inet_recvmsg,
	.mmap		   = sock_no_mmap,
	.splice_eof	   = inet_splice_eof,
	.sendpage	   = inet_sendpage,
#ifdef CONFIG_COMPAT
	.compat_ioctl	   = inet_compat_ioctl,
+16 −0
Original line number Diff line number Diff line
@@ -1371,6 +1371,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(tcp_sendmsg);

void tcp_splice_eof(struct socket *sock)
{
	struct sock *sk = sock->sk;
	struct tcp_sock *tp = tcp_sk(sk);
	int mss_now, size_goal;

	if (!tcp_write_queue_tail(sk))
		return;

	lock_sock(sk);
	mss_now = tcp_send_mss(sk, &size_goal, 0);
	tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
	release_sock(sk);
}
EXPORT_SYMBOL_GPL(tcp_splice_eof);

/*
 *	Handle reading urgent data. BSD has very simple semantics for
 *	this, no blocking and very strange errors 8)
Loading