Commit a6d8fb54 authored by Shyam Prasad N's avatar Shyam Prasad N Committed by Steve French
Browse files

cifs: distribute channels across interfaces based on speed



Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.

This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.

Also make sure that we don't mix rdma and non-rdma for channels.

Signed-off-by: default avatarShyam Prasad N <sprasad@microsoft.com>
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent 0c51cc6f
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
	struct cifs_ses *ses;
	struct cifs_tcon *tcon;
	struct cifs_server_iface *iface;
	size_t iface_weight = 0, iface_min_speed = 0;
	struct cifs_server_iface *last_iface = NULL;
	int c, i, j;

	seq_puts(m,
@@ -549,11 +551,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
					   "\tLast updated: %lu seconds ago",
					   ses->iface_count,
					   (jiffies - ses->iface_last_update) / HZ);

			last_iface = list_last_entry(&ses->iface_list,
						     struct cifs_server_iface,
						     iface_head);
			iface_min_speed = last_iface->speed;

			j = 0;
			list_for_each_entry(iface, &ses->iface_list,
						 iface_head) {
				seq_printf(m, "\n\t%d)", ++j);
				cifs_dump_iface(m, iface);

				iface_weight = iface->speed / iface_min_speed;
				seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
					   "\n\t\tAllocated channels: %u\n",
					   iface->weight_fulfilled,
					   iface_weight,
					   iface->num_channels);

				if (is_ses_using_iface(ses, iface))
					seq_puts(m, "\t\t[CONNECTED]\n");
			}
+2 −0
Original line number Diff line number Diff line
@@ -969,6 +969,8 @@ struct cifs_server_iface {
	struct list_head iface_head;
	struct kref refcount;
	size_t speed;
	size_t weight_fulfilled;
	unsigned int num_channels;
	unsigned int rdma_capable : 1;
	unsigned int rss_capable : 1;
	unsigned int is_active : 1; /* unset if non existent */
+70 −14
Original line number Diff line number Diff line
@@ -179,7 +179,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
	int left;
	int rc = 0;
	int tries = 0;
	size_t iface_weight = 0, iface_min_speed = 0;
	struct cifs_server_iface *iface = NULL, *niface = NULL;
	struct cifs_server_iface *last_iface = NULL;

	spin_lock(&ses->chan_lock);

@@ -207,21 +209,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
	}
	spin_unlock(&ses->chan_lock);

	/*
	 * Keep connecting to same, fastest, iface for all channels as
	 * long as its RSS. Try next fastest one if not RSS or channel
	 * creation fails.
	 */
	spin_lock(&ses->iface_lock);
	iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
				 iface_head);
	spin_unlock(&ses->iface_lock);

	while (left > 0) {

		tries++;
		if (tries > 3*ses->chan_max) {
			cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
			cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
				 left);
			break;
		}
@@ -229,17 +221,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
		spin_lock(&ses->iface_lock);
		if (!ses->iface_count) {
			spin_unlock(&ses->iface_lock);
			cifs_dbg(VFS, "server %s does not advertise interfaces\n",
				      ses->server->hostname);
			break;
		}

		if (!iface)
			iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
						 iface_head);
		last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
					     iface_head);
		iface_min_speed = last_iface->speed;

		list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
				    iface_head) {
			/* do not mix rdma and non-rdma interfaces */
			if (iface->rdma_capable != ses->server->rdma)
				continue;

			/* skip ifaces that are unusable */
			if (!iface->is_active ||
			    (is_ses_using_iface(ses, iface) &&
			     !iface->rss_capable)) {
			     !iface->rss_capable))
				continue;

			/* check if we already allocated enough channels */
			iface_weight = iface->speed / iface_min_speed;

			if (iface->weight_fulfilled >= iface_weight)
				continue;
			}

			/* take ref before unlock */
			kref_get(&iface->refcount);
@@ -256,10 +266,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
				continue;
			}

			cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
			iface->num_channels++;
			iface->weight_fulfilled++;
			cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
				 &iface->sockaddr);
			break;
		}

		/* reached end of list. reset weight_fulfilled and start over */
		if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
			list_for_each_entry(iface, &ses->iface_list, iface_head)
				iface->weight_fulfilled = 0;
			spin_unlock(&ses->iface_lock);
			iface = NULL;
			continue;
		}
		spin_unlock(&ses->iface_lock);

		left--;
@@ -278,8 +299,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{
	unsigned int chan_index;
	size_t iface_weight = 0, iface_min_speed = 0;
	struct cifs_server_iface *iface = NULL;
	struct cifs_server_iface *old_iface = NULL;
	struct cifs_server_iface *last_iface = NULL;
	int rc = 0;

	spin_lock(&ses->chan_lock);
@@ -299,13 +322,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
	spin_unlock(&ses->chan_lock);

	spin_lock(&ses->iface_lock);
	if (!ses->iface_count) {
		spin_unlock(&ses->iface_lock);
		cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
		return 0;
	}

	last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
				     iface_head);
	iface_min_speed = last_iface->speed;

	/* then look for a new one */
	list_for_each_entry(iface, &ses->iface_list, iface_head) {
		/* do not mix rdma and non-rdma interfaces */
		if (iface->rdma_capable != server->rdma)
			continue;

		if (!iface->is_active ||
		    (is_ses_using_iface(ses, iface) &&
		     !iface->rss_capable)) {
			continue;
		}

		/* check if we already allocated enough channels */
		iface_weight = iface->speed / iface_min_speed;

		if (iface->weight_fulfilled >= iface_weight)
			continue;

		kref_get(&iface->refcount);
		break;
	}
@@ -321,10 +365,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
		cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
			 &old_iface->sockaddr,
			 &iface->sockaddr);

		old_iface->num_channels--;
		if (old_iface->weight_fulfilled)
			old_iface->weight_fulfilled--;
		iface->num_channels++;
		iface->weight_fulfilled++;

		kref_put(&old_iface->refcount, release_iface);
	} else if (old_iface) {
		cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
			 &old_iface->sockaddr);

		old_iface->num_channels--;
		if (old_iface->weight_fulfilled)
			old_iface->weight_fulfilled--;

		kref_put(&old_iface->refcount, release_iface);
	} else {
		WARN_ON(!iface);