Commit f4e1fb04 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Jakub Kicinski
Browse files

af_unix: Use cached value for SOCK_STREAM in unix_inq_len().



Compared to TCP, ioctl(SIOCINQ) for AF_UNIX SOCK_STREAM socket is more
expensive, as unix_inq_len() requires iterating through the receive queue
and accumulating skb->len.

Let's cache the value for SOCK_STREAM to a new field during sendmsg()
and recvmsg().

The field is protected by the receive queue lock.

Note that ioctl(SIOCINQ) for SOCK_DGRAM returns the length of the first
skb in the queue.

SOCK_SEQPACKET still requires iterating through the queue because we do
not touch functions shared with unix_dgram_ops.  But, if really needed,
we can support it by switching __skb_try_recv_datagram() to a custom
version.

Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: default avatarWillem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/20250702223606.1054680-5-kuniyu@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d0aac854
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ struct unix_sock {
#define peer_wait		peer_wq.wait
	wait_queue_entry_t	peer_wake;
	struct scm_stat		scm_stat;
	int			inq_len;
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
	struct sk_buff		*oob_skb;
#endif
+28 −10
Original line number Diff line number Diff line
@@ -2297,6 +2297,7 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,

	spin_lock(&other->sk_receive_queue.lock);
	WRITE_ONCE(ousk->oob_skb, skb);
	WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
	__skb_queue_tail(&other->sk_receive_queue, skb);
	spin_unlock(&other->sk_receive_queue.lock);

@@ -2319,6 +2320,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
	struct sock *sk = sock->sk;
	struct sk_buff *skb = NULL;
	struct sock *other = NULL;
	struct unix_sock *otheru;
	struct scm_cookie scm;
	bool fds_sent = false;
	int err, sent = 0;
@@ -2342,13 +2344,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
	if (msg->msg_namelen) {
		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
		goto out_err;
	} else {
	}

	other = unix_peer(sk);
	if (!other) {
		err = -ENOTCONN;
		goto out_err;
	}
	}

	otheru = unix_sk(other);

	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
		goto out_pipe;
@@ -2417,7 +2421,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,

		unix_maybe_add_creds(skb, sk, other);
		scm_stat_add(other, skb);
		skb_queue_tail(&other->sk_receive_queue, skb);

		spin_lock(&other->sk_receive_queue.lock);
		WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
		__skb_queue_tail(&other->sk_receive_queue, skb);
		spin_unlock(&other->sk_receive_queue.lock);

		unix_state_unlock(other);
		other->sk_data_ready(other);
		sent += size;
@@ -2704,6 +2713,7 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)

	if (!(state->flags & MSG_PEEK)) {
		WRITE_ONCE(u->oob_skb, NULL);
		WRITE_ONCE(u->inq_len, u->inq_len - 1);

		if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
		    !unix_skb_len(oob_skb->prev)) {
@@ -2808,6 +2818,8 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
		return -EAGAIN;
	}

	WRITE_ONCE(u->inq_len, u->inq_len - skb->len);

#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
	if (skb == u->oob_skb) {
		WRITE_ONCE(u->oob_skb, NULL);
@@ -2988,7 +3000,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
			if (unix_skb_len(skb))
				break;

			skb_unlink(skb, &sk->sk_receive_queue);
			spin_lock(&sk->sk_receive_queue.lock);
			WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
			__skb_unlink(skb, &sk->sk_receive_queue);
			spin_unlock(&sk->sk_receive_queue.lock);

			consume_skb(skb);

			if (scm.fp)
@@ -3159,9 +3175,11 @@ long unix_inq_len(struct sock *sk)
	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
		return -EINVAL;

	if (sk->sk_type == SOCK_STREAM)
		return READ_ONCE(unix_sk(sk)->inq_len);

	spin_lock(&sk->sk_receive_queue.lock);
	if (sk->sk_type == SOCK_STREAM ||
	    sk->sk_type == SOCK_SEQPACKET) {
	if (sk->sk_type == SOCK_SEQPACKET) {
		skb_queue_walk(&sk->sk_receive_queue, skb)
			amount += unix_skb_len(skb);
	} else {