Commit d40ce48c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'af_unix-replace-unix_table_lock-with-per-hash-locks'

Kuniyuki Iwashima says:

====================
af_unix: Replace unix_table_lock with per-hash locks.

The hash table of AF_UNIX sockets is protected by a single big lock,
unix_table_lock.  This series replaces it with small per-hash locks.

1st -  2nd : Misc refactoring
3rd -  8th : Separate BSD/abstract address logics
9th - 11th : Prep to save a hash in each socket
12th       : Replace the big lock
13th       : Speed up autobind()

Note to maintainers:
The 12th patch adds two kinds of Sparse warnings on patchwork:

  about unix_table_double_lock/unlock()
    We can avoid this by adding two apparent acquires/releases annotations,
    but there are the same kinds of warnings about unix_state_double_lock().

  about unix_next_socket() and unix_seq_stop() (/proc/net/unix)
    This is because Sparse does not understand logic in unix_next_socket(),
    which leaves a spin lock held until it returns NULL.
    Also, tcp_seq_stop() causes a warning for the same reason.

These warnings seem reasonable, but let me know if there is any better way.
Please see [0] for details.

[0]: https://lore.kernel.org/netdev/20211117001611.74123-1-kuniyu@amazon.co.jp/
====================

Link: https://lore.kernel.org/r/20211124021431.48956-1-kuniyu@amazon.co.jp


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 442b03c3 9acbc584
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -20,13 +20,12 @@ struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_BITS	8

extern unsigned int unix_tot_inflight;
extern spinlock_t unix_table_lock;
extern spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];

struct unix_address {
	refcount_t	refcnt;
	int		len;
	unsigned int	hash;
	struct sockaddr_un name[];
};

+331 −236

File changed.

Preview size limit exceeded, changes collapsed.

+12 −11
Original line number Diff line number Diff line
@@ -13,13 +13,14 @@

static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
	/* might or might not have unix_table_lock */
	/* might or might not have unix_table_locks */
	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);

	if (!addr)
		return 0;

	return nla_put(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short),
	return nla_put(nlskb, UNIX_DIAG_NAME,
		       addr->len - offsetof(struct sockaddr_un, sun_path),
		       addr->name->sun_path);
}

@@ -203,13 +204,13 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
	s_slot = cb->args[0];
	num = s_num = cb->args[1];

	spin_lock(&unix_table_lock);
	for (slot = s_slot;
	     slot < ARRAY_SIZE(unix_socket_table);
	     s_num = 0, slot++) {
		struct sock *sk;

		num = 0;
		spin_lock(&unix_table_locks[slot]);
		sk_for_each(sk, &unix_socket_table[slot]) {
			if (!net_eq(sock_net(sk), net))
				continue;
@@ -220,14 +221,16 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
			if (sk_diag_dump(sk, skb, req,
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq,
					 NLM_F_MULTI) < 0)
					 NLM_F_MULTI) < 0) {
				spin_unlock(&unix_table_locks[slot]);
				goto done;
			}
next:
			num++;
		}
		spin_unlock(&unix_table_locks[slot]);
	}
done:
	spin_unlock(&unix_table_lock);
	cb->args[0] = slot;
	cb->args[1] = num;

@@ -236,21 +239,19 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)

static struct sock *unix_lookup_by_ino(unsigned int ino)
{
	int i;
	struct sock *sk;
	int i;

	spin_lock(&unix_table_lock);
	for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
		spin_lock(&unix_table_locks[i]);
		sk_for_each(sk, &unix_socket_table[i])
			if (ino == sock_i_ino(sk)) {
				sock_hold(sk);
				spin_unlock(&unix_table_lock);

				spin_unlock(&unix_table_locks[i]);
				return sk;
			}
		spin_unlock(&unix_table_locks[i]);
	}

	spin_unlock(&unix_table_lock);
	return NULL;
}

+1 −1
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx)
		       sock_i_ino(sk));

	if (unix_sk->addr) {
		if (!UNIX_ABSTRACT(unix_sk)) {
		if (unix_sk->addr->name->sun_path[0]) {
			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
		} else {
			/* The name of the abstract UNIX domain socket starts
+0 −2

File changed.

Preview size limit exceeded, changes collapsed.

Loading