Commit e1a8fde7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block

Pull io_uring 'more data in socket' support from Jens Axboe:
 "To be able to fully utilize the 'poll first' support in the core
  io_uring branch, it's advantageous knowing if the socket was empty
  after a receive. This adds support for that"

* tag 'for-5.19/io_uring-net-2022-05-22' of git://git.kernel.dk/linux-block:
  io_uring: return hint on whether more data is available after receive
  tcp: pass back data left in socket after receive
parents 368da430 f548a12e
Loading
Loading
Loading
Loading
+14 −4
Original line number Diff line number Diff line
@@ -6115,6 +6115,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
	struct io_async_msghdr iomsg, *kmsg;
	struct io_sr_msg *sr = &req->sr_msg;
	struct socket *sock;
	unsigned int cflags;
	unsigned flags;
	int ret, min_ret = 0;
	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -6154,6 +6155,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
	if (flags & MSG_WAITALL)
		min_ret = iov_iter_count(&kmsg->msg.msg_iter);

	kmsg->msg.msg_get_inq = 1;
	ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags);
	if (ret < min_ret) {
		if (ret == -EAGAIN && force_nonblock)
@@ -6178,7 +6180,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
		ret += sr->done_io;
	else if (sr->done_io)
		ret = sr->done_io;
	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
	cflags = io_put_kbuf(req, issue_flags);
	if (kmsg->msg.msg_inq)
		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
	__io_req_complete(req, issue_flags, ret, cflags);
	return 0;
}

@@ -6188,6 +6193,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
	struct msghdr msg;
	struct socket *sock;
	struct iovec iov;
	unsigned int cflags;
	unsigned flags;
	int ret, min_ret = 0;
	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -6214,11 +6220,12 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
		goto out_free;

	msg.msg_name = NULL;
	msg.msg_namelen = 0;
	msg.msg_control = NULL;
	msg.msg_get_inq = 1;
	msg.msg_flags = 0;
	msg.msg_controllen = 0;
	msg.msg_namelen = 0;
	msg.msg_iocb = NULL;
	msg.msg_flags = 0;

	flags = sr->msg_flags;
	if (force_nonblock)
@@ -6249,7 +6256,10 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
		ret += sr->done_io;
	else if (sr->done_io)
		ret = sr->done_io;
	__io_req_complete(req, issue_flags, ret, io_put_kbuf(req, issue_flags));
	cflags = io_put_kbuf(req, issue_flags);
	if (msg.msg_inq)
		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
	__io_req_complete(req, issue_flags, ret, cflags);
	return 0;
}

+5 −1
Original line number Diff line number Diff line
@@ -50,6 +50,9 @@ struct linger {
struct msghdr {
	void		*msg_name;	/* ptr to socket address structure */
	int		msg_namelen;	/* size of socket address structure */

	int		msg_inq;	/* output, data left in socket */

	struct iov_iter	msg_iter;	/* data */

	/*
@@ -62,8 +65,9 @@ struct msghdr {
		void __user	*msg_control_user;
	};
	bool		msg_control_is_user : 1;
	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
	bool		msg_get_inq : 1;/* return INQ after receive */
	unsigned int	msg_flags;	/* flags on received message */
	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
};

+2 −0
Original line number Diff line number Diff line
@@ -258,9 +258,11 @@ struct io_uring_cqe {
 *
 * IORING_CQE_F_BUFFER	If set, the upper 16 bits are the buffer ID
 * IORING_CQE_F_MORE	If set, parent SQE will generate more CQE entries
 * IORING_CQE_F_SOCK_NONEMPTY	If set, more data to read after socket recv
 */
#define IORING_CQE_F_BUFFER		(1U << 0)
#define IORING_CQE_F_MORE		(1U << 1)
#define IORING_CQE_F_SOCK_NONEMPTY	(1U << 2)

enum {
	IORING_CQE_BUFFER_SHIFT		= 16,
+10 −6
Original line number Diff line number Diff line
@@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
	if (sk->sk_state == TCP_LISTEN)
		goto out;

	if (tp->recvmsg_inq)
	if (tp->recvmsg_inq) {
		*cmsg_flags = TCP_CMSG_INQ;
		msg->msg_get_inq = 1;
	}
	timeo = sock_rcvtimeo(sk, nonblock);

	/* Urgent data needs to be handled specially. */
@@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
		int flags, int *addr_len)
{
	int cmsg_flags = 0, ret, inq;
	int cmsg_flags = 0, ret;
	struct scm_timestamping_internal tss;

	if (unlikely(flags & MSG_ERRQUEUE))
@@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
	release_sock(sk);
	sk_defer_free_flush(sk);

	if (cmsg_flags && ret >= 0) {
	if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
		if (cmsg_flags & TCP_CMSG_TS)
			tcp_recv_timestamp(msg, sk, &tss);
		if (cmsg_flags & TCP_CMSG_INQ) {
			inq = tcp_inq_hint(sk);
			put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
		if (msg->msg_get_inq) {
			msg->msg_inq = tcp_inq_hint(sk);
			if (cmsg_flags & TCP_CMSG_INQ)
				put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
					 sizeof(msg->msg_inq), &msg->msg_inq);
		}
	}
	return ret;