Commit 5d6b58c9 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski
Browse files

net: lockless sock_i_ino()



Followup of commit c51da3f7 ("net: remove sock_i_uid()")

A recent syzbot report was the trigger for this change.

Over the years, we had many problems caused by the
read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid().

We could fix smc_diag_dump_proto() or make a more radical move:

Instead of waiting for new syzbot reports, cache the socket
inode number in sk->sk_ino, so that we no longer
need to acquire sk->sk_callback_lock in sock_i_ino().

This makes socket dumps faster (one less cache line miss,
and two atomic ops avoided).

Prior art:

commit 25a9c8a4 ("netlink: Add __sock_i_ino() for __netlink_diag_dump().")
commit 4f9bf2a2 ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.")
commit efc3dbc3 ("rds: Make rds_sock_lock BH rather than IRQ safe.")

Fixes: d2d6422f ("x86: Allow to enable PREEMPT_RT.")
Reported-by: default avatar <syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com>
Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u


Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: default avatarSebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent b4ada061
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -285,6 +285,7 @@ struct sk_filter;
  *	@sk_ack_backlog: current listen backlog
  *	@sk_max_ack_backlog: listen backlog set in listen()
  *	@sk_uid: user id of owner
  *	@sk_ino: inode number (zero if orphaned)
  *	@sk_prefer_busy_poll: prefer busypolling over softirq processing
  *	@sk_busy_poll_budget: napi processing budget when busypolling
  *	@sk_priority: %SO_PRIORITY setting
@@ -518,6 +519,7 @@ struct sock {
	u32			sk_ack_backlog;
	u32			sk_max_ack_backlog;
	kuid_t			sk_uid;
	unsigned long		sk_ino;
	spinlock_t		sk_peer_lock;
	int			sk_bind_phc;
	struct pid		*sk_peer_pid;
@@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk)
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
	sk->sk_socket = sock;
	if (sock) {
		WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
		WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
	}
}

static inline wait_queue_head_t *sk_sleep(struct sock *sk)
@@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk)
	sk_set_socket(sk, NULL);
	sk->sk_wq  = NULL;
	/* Note: sk_uid is unchanged. */
	WRITE_ONCE(sk->sk_ino, 0);
	write_unlock_bh(&sk->sk_callback_lock);
}

@@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
	rcu_assign_pointer(sk->sk_wq, &parent->wq);
	parent->sk = sk;
	sk_set_socket(sk, parent);
	WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
	security_sock_graft(sk, parent);
	write_unlock_bh(&sk->sk_callback_lock);
}

static inline unsigned long sock_i_ino(const struct sock *sk)
{
	/* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
	return READ_ONCE(sk->sk_ino);
}

static inline kuid_t sk_uid(const struct sock *sk)
{
	/* Paired with WRITE_ONCE() in sockfs_setattr() */
	return READ_ONCE(sk->sk_uid);
}

unsigned long __sock_i_ino(struct sock *sk);
unsigned long sock_i_ino(struct sock *sk);

static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
{
	return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);
+0 −22
Original line number Diff line number Diff line
@@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */

unsigned long __sock_i_ino(struct sock *sk)
{
	unsigned long ino;

	read_lock(&sk->sk_callback_lock);
	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
	read_unlock(&sk->sk_callback_lock);
	return ino;
}
EXPORT_SYMBOL(__sock_i_ino);

unsigned long sock_i_ino(struct sock *sk)
{
	unsigned long ino;

	local_bh_disable();
	ino = __sock_i_ino(sk);
	local_bh_enable();
	return ino;
}
EXPORT_SYMBOL(sock_i_ino);

/*
 * Allocate a skb from the socket's send buffer.
 */
+0 −1
Original line number Diff line number Diff line
@@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
	write_lock_bh(&sk->sk_callback_lock);
	rcu_assign_pointer(sk->sk_wq, &parent->wq);
	sk_set_socket(sk, parent);
	WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
	write_unlock_bh(&sk->sk_callback_lock);
}

+1 −1
Original line number Diff line number Diff line
@@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
				 NETLINK_CB(cb->skb).portid,
				 cb->nlh->nlmsg_seq,
				 NLM_F_MULTI,
				 __sock_i_ino(sk)) < 0) {
				 sock_i_ino(sk)) < 0) {
			ret = 1;
			break;
		}