Commit a6d8fb54 authored by Shyam Prasad N's avatar Shyam Prasad N Committed by Steve French

cifs: distribute channels across interfaces based on speed

Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.

This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.

Also make sure that we don't mix rdma and non-rdma for channels.
Signed-off-by: default avatarShyam Prasad N <sprasad@microsoft.com>
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent 0c51cc6f
...@@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -284,6 +284,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct cifs_ses *ses; struct cifs_ses *ses;
struct cifs_tcon *tcon; struct cifs_tcon *tcon;
struct cifs_server_iface *iface; struct cifs_server_iface *iface;
size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *last_iface = NULL;
int c, i, j; int c, i, j;
seq_puts(m, seq_puts(m,
...@@ -549,11 +551,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ...@@ -549,11 +551,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
"\tLast updated: %lu seconds ago", "\tLast updated: %lu seconds ago",
ses->iface_count, ses->iface_count,
(jiffies - ses->iface_last_update) / HZ); (jiffies - ses->iface_last_update) / HZ);
last_iface = list_last_entry(&ses->iface_list,
struct cifs_server_iface,
iface_head);
iface_min_speed = last_iface->speed;
j = 0; j = 0;
list_for_each_entry(iface, &ses->iface_list, list_for_each_entry(iface, &ses->iface_list,
iface_head) { iface_head) {
seq_printf(m, "\n\t%d)", ++j); seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface); cifs_dump_iface(m, iface);
iface_weight = iface->speed / iface_min_speed;
seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
"\n\t\tAllocated channels: %u\n",
iface->weight_fulfilled,
iface_weight,
iface->num_channels);
if (is_ses_using_iface(ses, iface)) if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n"); seq_puts(m, "\t\t[CONNECTED]\n");
} }
......
...@@ -969,6 +969,8 @@ struct cifs_server_iface { ...@@ -969,6 +969,8 @@ struct cifs_server_iface {
struct list_head iface_head; struct list_head iface_head;
struct kref refcount; struct kref refcount;
size_t speed; size_t speed;
size_t weight_fulfilled;
unsigned int num_channels;
unsigned int rdma_capable : 1; unsigned int rdma_capable : 1;
unsigned int rss_capable : 1; unsigned int rss_capable : 1;
unsigned int is_active : 1; /* unset if non existent */ unsigned int is_active : 1; /* unset if non existent */
......
...@@ -179,7 +179,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) ...@@ -179,7 +179,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
int left; int left;
int rc = 0; int rc = 0;
int tries = 0; int tries = 0;
size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL, *niface = NULL; struct cifs_server_iface *iface = NULL, *niface = NULL;
struct cifs_server_iface *last_iface = NULL;
spin_lock(&ses->chan_lock); spin_lock(&ses->chan_lock);
...@@ -207,21 +209,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) ...@@ -207,21 +209,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
} }
spin_unlock(&ses->chan_lock); spin_unlock(&ses->chan_lock);
/*
* Keep connecting to same, fastest, iface for all channels as
* long as its RSS. Try next fastest one if not RSS or channel
* creation fails.
*/
spin_lock(&ses->iface_lock);
iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
spin_unlock(&ses->iface_lock);
while (left > 0) { while (left > 0) {
tries++; tries++;
if (tries > 3*ses->chan_max) { if (tries > 3*ses->chan_max) {
cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n", cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
left); left);
break; break;
} }
...@@ -229,17 +221,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) ...@@ -229,17 +221,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
spin_lock(&ses->iface_lock); spin_lock(&ses->iface_lock);
if (!ses->iface_count) { if (!ses->iface_count) {
spin_unlock(&ses->iface_lock); spin_unlock(&ses->iface_lock);
cifs_dbg(VFS, "server %s does not advertise interfaces\n",
ses->server->hostname);
break; break;
} }
if (!iface)
iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
iface_min_speed = last_iface->speed;
list_for_each_entry_safe_from(iface, niface, &ses->iface_list, list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
iface_head) { iface_head) {
/* do not mix rdma and non-rdma interfaces */
if (iface->rdma_capable != ses->server->rdma)
continue;
/* skip ifaces that are unusable */ /* skip ifaces that are unusable */
if (!iface->is_active || if (!iface->is_active ||
(is_ses_using_iface(ses, iface) && (is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) { !iface->rss_capable))
continue;
/* check if we already allocated enough channels */
iface_weight = iface->speed / iface_min_speed;
if (iface->weight_fulfilled >= iface_weight)
continue; continue;
}
/* take ref before unlock */ /* take ref before unlock */
kref_get(&iface->refcount); kref_get(&iface->refcount);
...@@ -256,10 +266,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) ...@@ -256,10 +266,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
continue; continue;
} }
cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n", iface->num_channels++;
iface->weight_fulfilled++;
cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
&iface->sockaddr); &iface->sockaddr);
break; break;
} }
/* reached end of list. reset weight_fulfilled and start over */
if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
list_for_each_entry(iface, &ses->iface_list, iface_head)
iface->weight_fulfilled = 0;
spin_unlock(&ses->iface_lock);
iface = NULL;
continue;
}
spin_unlock(&ses->iface_lock); spin_unlock(&ses->iface_lock);
left--; left--;
...@@ -278,8 +299,10 @@ int ...@@ -278,8 +299,10 @@ int
cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
{ {
unsigned int chan_index; unsigned int chan_index;
size_t iface_weight = 0, iface_min_speed = 0;
struct cifs_server_iface *iface = NULL; struct cifs_server_iface *iface = NULL;
struct cifs_server_iface *old_iface = NULL; struct cifs_server_iface *old_iface = NULL;
struct cifs_server_iface *last_iface = NULL;
int rc = 0; int rc = 0;
spin_lock(&ses->chan_lock); spin_lock(&ses->chan_lock);
...@@ -299,13 +322,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) ...@@ -299,13 +322,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_unlock(&ses->chan_lock); spin_unlock(&ses->chan_lock);
spin_lock(&ses->iface_lock); spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
return 0;
}
last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
iface_head);
iface_min_speed = last_iface->speed;
/* then look for a new one */ /* then look for a new one */
list_for_each_entry(iface, &ses->iface_list, iface_head) { list_for_each_entry(iface, &ses->iface_list, iface_head) {
/* do not mix rdma and non-rdma interfaces */
if (iface->rdma_capable != server->rdma)
continue;
if (!iface->is_active || if (!iface->is_active ||
(is_ses_using_iface(ses, iface) && (is_ses_using_iface(ses, iface) &&
!iface->rss_capable)) { !iface->rss_capable)) {
continue; continue;
} }
/* check if we already allocated enough channels */
iface_weight = iface->speed / iface_min_speed;
if (iface->weight_fulfilled >= iface_weight)
continue;
kref_get(&iface->refcount); kref_get(&iface->refcount);
break; break;
} }
...@@ -321,10 +365,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) ...@@ -321,10 +365,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
&old_iface->sockaddr, &old_iface->sockaddr,
&iface->sockaddr); &iface->sockaddr);
old_iface->num_channels--;
if (old_iface->weight_fulfilled)
old_iface->weight_fulfilled--;
iface->num_channels++;
iface->weight_fulfilled++;
kref_put(&old_iface->refcount, release_iface); kref_put(&old_iface->refcount, release_iface);
} else if (old_iface) { } else if (old_iface) {
cifs_dbg(FYI, "releasing ref to iface: %pIS\n", cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
&old_iface->sockaddr); &old_iface->sockaddr);
old_iface->num_channels--;
if (old_iface->weight_fulfilled)
old_iface->weight_fulfilled--;
kref_put(&old_iface->refcount, release_iface); kref_put(&old_iface->refcount, release_iface);
} else { } else {
WARN_ON(!iface); WARN_ON(!iface);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment