Commit eb0718fb authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Paolo Abeni
Browse files

af_unix: Annotate data-races around sk->sk_state in unix_write_space() and poll().



unix_poll() and unix_dgram_poll() read sk->sk_state locklessly and
calls unix_writable() which also reads sk->sk_state without holding
unix_state_lock().

Let's use READ_ONCE() in unix_poll() and unix_dgram_poll() and pass
it to unix_writable().

While at it, we remove TCP_SYN_SENT check in unix_dgram_poll() as
that state does not exist for AF_UNIX socket since the code was added.

Fixes: 1586a587 ("af_unix: do not report POLLOUT on listeners")
Fixes: 3c73419c ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4 ("Linux-2.6.12-rc2")
Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parent 3a0f38eb
Loading
Loading
Loading
Loading
+12 −13
Original line number Diff line number Diff line
@@ -530,9 +530,9 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
	return 0;
}

static int unix_writable(const struct sock *sk)
static int unix_writable(const struct sock *sk, unsigned char state)
{
	return sk->sk_state != TCP_LISTEN &&
	return state != TCP_LISTEN &&
	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
}

@@ -541,7 +541,7 @@ static void unix_write_space(struct sock *sk)
	struct socket_wq *wq;

	rcu_read_lock();
	if (unix_writable(sk)) {
	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
		wq = rcu_dereference(sk->sk_wq);
		if (skwq_has_sleeper(wq))
			wake_up_interruptible_sync_poll(&wq->wait,
@@ -3129,12 +3129,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
	struct sock *sk = sock->sk;
	unsigned char state;
	__poll_t mask;
	u8 shutdown;

	sock_poll_wait(file, sock, wait);
	mask = 0;
	shutdown = READ_ONCE(sk->sk_shutdown);
	state = READ_ONCE(sk->sk_state);

	/* exceptional events? */
	if (READ_ONCE(sk->sk_err))
@@ -3156,14 +3158,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa

	/* Connection-based need to check for termination and startup */
	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
	    sk->sk_state == TCP_CLOSE)
	    state == TCP_CLOSE)
		mask |= EPOLLHUP;

	/*
	 * we set writable also when the other side has shut down the
	 * connection. This prevents stuck sockets.
	 */
	if (unix_writable(sk))
	if (unix_writable(sk, state))
		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;

	return mask;
@@ -3174,12 +3176,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
{
	struct sock *sk = sock->sk, *other;
	unsigned int writable;
	unsigned char state;
	__poll_t mask;
	u8 shutdown;

	sock_poll_wait(file, sock, wait);
	mask = 0;
	shutdown = READ_ONCE(sk->sk_shutdown);
	state = READ_ONCE(sk->sk_state);

	/* exceptional events? */
	if (READ_ONCE(sk->sk_err) ||
@@ -3199,19 +3203,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
		mask |= EPOLLIN | EPOLLRDNORM;

	/* Connection-based need to check for termination and startup */
	if (sk->sk_type == SOCK_SEQPACKET) {
		if (sk->sk_state == TCP_CLOSE)
	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
		mask |= EPOLLHUP;
		/* connection hasn't started yet? */
		if (sk->sk_state == TCP_SYN_SENT)
			return mask;
	}

	/* No write status requested, avoid expensive OUT tests. */
	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
		return mask;

	writable = unix_writable(sk);
	writable = unix_writable(sk, state);
	if (writable) {
		unix_state_lock(sk);