Commit 711741f9 authored by Paulo Alcantara's avatar Paulo Alcantara Committed by Steve French
Browse files

smb: client: fix potential deadlock when reconnecting channels



Fix cifs_signal_cifsd_for_reconnect() to take the correct lock order
and prevent the following deadlock from happening

======================================================
WARNING: possible circular locking dependency detected
6.16.0-rc3-build2+ #1301 Tainted: G S      W
------------------------------------------------------
cifsd/6055 is trying to acquire lock:
ffff88810ad56038 (&tcp_ses->srv_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x134/0x200

but task is already holding lock:
ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&ret_buf->chan_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_setup_session+0x81/0x4b0
       cifs_get_smb_ses+0x771/0x900
       cifs_mount_get_session+0x7e/0x170
       cifs_mount+0x92/0x2d0
       cifs_smb3_do_mount+0x161/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #1 (&ret_buf->ses_lock){+.+.}-{3:3}:
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_match_super+0x101/0x320
       sget+0xab/0x270
       cifs_smb3_do_mount+0x1e0/0x460
       smb3_get_tree+0x55/0x90
       vfs_get_tree+0x46/0x180
       do_new_mount+0x1b0/0x2e0
       path_mount+0x6ee/0x740
       do_mount+0x98/0xe0
       __do_sys_mount+0x148/0x180
       do_syscall_64+0xa4/0x260
       entry_SYSCALL_64_after_hwframe+0x76/0x7e

-> #0 (&tcp_ses->srv_lock){+.+.}-{3:3}:
       check_noncircular+0x95/0xc0
       check_prev_add+0x115/0x2f0
       validate_chain+0x1cf/0x270
       __lock_acquire+0x60e/0x780
       lock_acquire.part.0+0xb4/0x1f0
       _raw_spin_lock+0x2f/0x40
       cifs_signal_cifsd_for_reconnect+0x134/0x200
       __cifs_reconnect+0x8f/0x500
       cifs_handle_standard+0x112/0x280
       cifs_demultiplex_thread+0x64d/0xbc0
       kthread+0x2f7/0x310
       ret_from_fork+0x2a/0x230
       ret_from_fork_asm+0x1a/0x30

other info that might help us debug this:

Chain exists of:
  &tcp_ses->srv_lock --> &ret_buf->ses_lock --> &ret_buf->chan_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&ret_buf->chan_lock);
                               lock(&ret_buf->ses_lock);
                               lock(&ret_buf->chan_lock);
  lock(&tcp_ses->srv_lock);

 *** DEADLOCK ***

3 locks held by cifsd/6055:
 #0: ffffffff857de398 (&cifs_tcp_ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x7b/0x200
 #1: ffff888119c64060 (&ret_buf->ses_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0x9c/0x200
 #2: ffff888119c64330 (&ret_buf->chan_lock){+.+.}-{3:3}, at: cifs_signal_cifsd_for_reconnect+0xcf/0x200

Cc: linux-cifs@vger.kernel.org
Reported-by: default avatarDavid Howells <dhowells@redhat.com>
Fixes: d7d7a66a ("cifs: avoid use of global locks for high contention data")
Reviewed-by: default avatarDavid Howells <dhowells@redhat.com>
Tested-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarPaulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent e97f9540
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
	struct list_head tcp_ses_list;
	struct list_head smb_ses_list;
	struct list_head rlist; /* reconnect list */
	spinlock_t srv_lock;  /* protect anything here that is not protected */
	__u64 conn_id; /* connection identifier (useful for debugging) */
	int srv_count; /* reference counter */
+36 −22
Original line number Diff line number Diff line
@@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work)
			   (SMB_INTERFACE_POLL_INTERVAL * HZ));
}

#define set_need_reco(server) \
do { \
	spin_lock(&server->srv_lock); \
	if (server->tcpStatus != CifsExiting) \
		server->tcpStatus = CifsNeedReconnect; \
	spin_unlock(&server->srv_lock); \
} while (0)

/*
 * Update the tcpStatus for the server.
 * This is used to signal the cifsd thread to call cifs_reconnect
@@ -137,39 +145,45 @@ void
cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
				bool all_channels)
{
	struct TCP_Server_Info *pserver;
	struct TCP_Server_Info *nserver;
	struct cifs_ses *ses;
	LIST_HEAD(reco);
	int i;

	/* If server is a channel, select the primary channel */
	pserver = SERVER_IS_CHAN(server) ? server->primary_server : server;

	/* if we need to signal just this channel */
	if (!all_channels) {
		spin_lock(&server->srv_lock);
		if (server->tcpStatus != CifsExiting)
			server->tcpStatus = CifsNeedReconnect;
		spin_unlock(&server->srv_lock);
		set_need_reco(server);
		return;
	}

	spin_lock(&cifs_tcp_ses_lock);
	list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
		if (cifs_ses_exiting(ses))
	if (SERVER_IS_CHAN(server))
		server = server->primary_server;
	scoped_guard(spinlock, &cifs_tcp_ses_lock) {
		set_need_reco(server);
		list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
			spin_lock(&ses->ses_lock);
			if (ses->ses_status == SES_EXITING) {
				spin_unlock(&ses->ses_lock);
				continue;
			}
			spin_lock(&ses->chan_lock);
		for (i = 0; i < ses->chan_count; i++) {
			if (!ses->chans[i].server)
			for (i = 1; i < ses->chan_count; i++) {
				nserver = ses->chans[i].server;
				if (!nserver)
					continue;

			spin_lock(&ses->chans[i].server->srv_lock);
			if (ses->chans[i].server->tcpStatus != CifsExiting)
				ses->chans[i].server->tcpStatus = CifsNeedReconnect;
			spin_unlock(&ses->chans[i].server->srv_lock);
				nserver->srv_count++;
				list_add(&nserver->rlist, &reco);
			}
			spin_unlock(&ses->chan_lock);
			spin_unlock(&ses->ses_lock);
		}
	}

	list_for_each_entry_safe(server, nserver, &reco, rlist) {
		list_del_init(&server->rlist);
		set_need_reco(server);
		cifs_put_tcp_session(server, 0);
	}
	spin_unlock(&cifs_tcp_ses_lock);
}

/*