Commit e9f0fb88 authored by Mahesh Bandewar's avatar Mahesh Bandewar Committed by David S. Miller

bonding: Add tlb_dynamic_lb parameter for tlb mode

The aggresive load balancing causes packet re-ordering as active
flows are moved from a slave to another within the group. Sometime
this aggresive lb is not necessary if the preference is for less
re-ordering. This parameter if used with value "0" disables
this dynamic flow shuffling minimizing packet re-ordering. Of course
the side effect is that it has to live with the static load balancing
that the hashing distribution provides. This impact is less severe if
the correct xmit-hashing-policy is used for the tlb setup.

The default value of the parameter is set to "1" mimicing the earlier
behavior.

Ran the netperf test with 200 stream for 1 min between two hosts with
4x1G trunk (xmit-lb mode with xmit-policy L3+4) before and after these
changes. Following was the command used for those 200 instances -

    netperf -t TCP_RR -l 60 -s 5 -H <host> -- -r81920,81920

Transactions per second:
    Before change: 1,367.11
    After  change: 1,470.65

Change-Id: Ie3f75c77282cf602e83a6e833c6eb164e72a0990
Signed-off-by: default avatarMahesh Bandewar <maheshb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f05b42ea
...@@ -585,13 +585,19 @@ mode ...@@ -585,13 +585,19 @@ mode
balance-tlb or 5 balance-tlb or 5
Adaptive transmit load balancing: channel bonding that Adaptive transmit load balancing: channel bonding that
does not require any special switch support. The does not require any special switch support.
outgoing traffic is distributed according to the
current load (computed relative to the speed) on each In tlb_dynamic_lb=1 mode; the outgoing traffic is
slave. Incoming traffic is received by the current distributed according to the current load (computed
slave. If the receiving slave fails, another slave relative to the speed) on each slave.
takes over the MAC address of the failed receiving
slave. In tlb_dynamic_lb=0 mode; the load balancing based on
current load is disabled and the load is distributed
only using the hash distribution.
Incoming traffic is received by the current slave.
If the receiving slave fails, another slave takes over
the MAC address of the failed receiving slave.
Prerequisite: Prerequisite:
...@@ -736,6 +742,28 @@ primary_reselect ...@@ -736,6 +742,28 @@ primary_reselect
This option was added for bonding version 3.6.0. This option was added for bonding version 3.6.0.
tlb_dynamic_lb
Specifies if dynamic shuffling of flows is enabled in tlb
mode. The value has no effect on any other modes.
The default behavior of tlb mode is to shuffle active flows across
slaves based on the load in that interval. This gives nice lb
characteristics but can cause packet reordering. If re-ordering is
a concern use this variable to disable flow shuffling and rely on
load balancing provided solely by the hash distribution.
xmit-hash-policy can be used to select the appropriate hashing for
the setup.
The sysfs entry can be used to change the setting per bond device
and the initial value is derived from the module parameter. The
sysfs entry is allowed to be changed only if the bond device is
down.
The default value is "1" that enables flow shuffling while value "0"
disables it. This option was added in bonding driver 3.7.1
updelay updelay
Specifies the time, in milliseconds, to wait before enabling a Specifies the time, in milliseconds, to wait before enabling a
......
...@@ -1356,7 +1356,8 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, ...@@ -1356,7 +1356,8 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
if (!tx_slave) { if (!tx_slave) {
/* unbalanced or unassigned, send through primary */ /* unbalanced or unassigned, send through primary */
tx_slave = rcu_dereference(bond->curr_active_slave); tx_slave = rcu_dereference(bond->curr_active_slave);
bond_info->unbalanced_load += skb->len; if (bond->params.tlb_dynamic_lb)
bond_info->unbalanced_load += skb->len;
} }
if (tx_slave && SLAVE_IS_OK(tx_slave)) { if (tx_slave && SLAVE_IS_OK(tx_slave)) {
...@@ -1369,7 +1370,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, ...@@ -1369,7 +1370,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
goto out; goto out;
} }
if (tx_slave) { if (tx_slave && bond->params.tlb_dynamic_lb) {
_lock_tx_hashtbl(bond); _lock_tx_hashtbl(bond);
__tlb_clear_slave(bond, tx_slave, 0); __tlb_clear_slave(bond, tx_slave, 0);
_unlock_tx_hashtbl(bond); _unlock_tx_hashtbl(bond);
...@@ -1399,11 +1400,21 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1399,11 +1400,21 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
/* In case of IPX, it will falback to L2 hash */ /* In case of IPX, it will falback to L2 hash */
case htons(ETH_P_IPV6): case htons(ETH_P_IPV6):
hash_index = bond_xmit_hash(bond, skb); hash_index = bond_xmit_hash(bond, skb);
tx_slave = tlb_choose_channel(bond, hash_index & 0xFF, skb->len); if (bond->params.tlb_dynamic_lb) {
tx_slave = tlb_choose_channel(bond,
hash_index & 0xFF,
skb->len);
} else {
struct list_head *iter;
int idx = hash_index % bond->slave_cnt;
bond_for_each_slave_rcu(bond, tx_slave, iter)
if (--idx < 0)
break;
}
break; break;
} }
} }
return bond_do_alb_xmit(skb, bond, tx_slave); return bond_do_alb_xmit(skb, bond, tx_slave);
} }
......
...@@ -3096,7 +3096,8 @@ static int bond_open(struct net_device *bond_dev) ...@@ -3096,7 +3096,8 @@ static int bond_open(struct net_device *bond_dev)
*/ */
if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB)))
return -ENOMEM; return -ENOMEM;
queue_delayed_work(bond->wq, &bond->alb_work, 0); if (bond->params.tlb_dynamic_lb)
queue_delayed_work(bond->wq, &bond->alb_work, 0);
} }
if (bond->params.miimon) /* link check interval, in milliseconds. */ if (bond->params.miimon) /* link check interval, in milliseconds. */
...@@ -4304,6 +4305,7 @@ static int bond_check_params(struct bond_params *params) ...@@ -4304,6 +4305,7 @@ static int bond_check_params(struct bond_params *params)
params->min_links = min_links; params->min_links = min_links;
params->lp_interval = lp_interval; params->lp_interval = lp_interval;
params->packets_per_slave = packets_per_slave; params->packets_per_slave = packets_per_slave;
params->tlb_dynamic_lb = 1; /* Default value */
if (packets_per_slave > 0) { if (packets_per_slave > 0) {
params->reciprocal_packets_per_slave = params->reciprocal_packets_per_slave =
reciprocal_value(packets_per_slave); reciprocal_value(packets_per_slave);
......
...@@ -70,6 +70,8 @@ static int bond_option_mode_set(struct bonding *bond, ...@@ -70,6 +70,8 @@ static int bond_option_mode_set(struct bonding *bond,
const struct bond_opt_value *newval); const struct bond_opt_value *newval);
static int bond_option_slaves_set(struct bonding *bond, static int bond_option_slaves_set(struct bonding *bond,
const struct bond_opt_value *newval); const struct bond_opt_value *newval);
static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
const struct bond_opt_value *newval);
static const struct bond_opt_value bond_mode_tbl[] = { static const struct bond_opt_value bond_mode_tbl[] = {
...@@ -179,6 +181,12 @@ static const struct bond_opt_value bond_lp_interval_tbl[] = { ...@@ -179,6 +181,12 @@ static const struct bond_opt_value bond_lp_interval_tbl[] = {
{ NULL, -1, 0}, { NULL, -1, 0},
}; };
static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = {
{ "off", 0, 0},
{ "on", 1, BOND_VALFLAG_DEFAULT},
{ NULL, -1, 0}
};
static const struct bond_option bond_opts[] = { static const struct bond_option bond_opts[] = {
[BOND_OPT_MODE] = { [BOND_OPT_MODE] = {
.id = BOND_OPT_MODE, .id = BOND_OPT_MODE,
...@@ -364,6 +372,15 @@ static const struct bond_option bond_opts[] = { ...@@ -364,6 +372,15 @@ static const struct bond_option bond_opts[] = {
.flags = BOND_OPTFLAG_RAWVAL, .flags = BOND_OPTFLAG_RAWVAL,
.set = bond_option_slaves_set .set = bond_option_slaves_set
}, },
[BOND_OPT_TLB_DYNAMIC_LB] = {
.id = BOND_OPT_TLB_DYNAMIC_LB,
.name = "dynamic_lb",
.desc = "Enable dynamic flow shuffling",
.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)),
.values = bond_tlb_dynamic_lb_tbl,
.flags = BOND_OPTFLAG_IFDOWN,
.set = bond_option_tlb_dynamic_lb_set,
},
{ } { }
}; };
...@@ -1337,3 +1354,13 @@ static int bond_option_slaves_set(struct bonding *bond, ...@@ -1337,3 +1354,13 @@ static int bond_option_slaves_set(struct bonding *bond,
ret = -EPERM; ret = -EPERM;
goto out; goto out;
} }
static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
pr_info("%s: Setting dynamic-lb to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
bond->params.tlb_dynamic_lb = newval->value;
return 0;
}
...@@ -62,6 +62,7 @@ enum { ...@@ -62,6 +62,7 @@ enum {
BOND_OPT_RESEND_IGMP, BOND_OPT_RESEND_IGMP,
BOND_OPT_LP_INTERVAL, BOND_OPT_LP_INTERVAL,
BOND_OPT_SLAVES, BOND_OPT_SLAVES,
BOND_OPT_TLB_DYNAMIC_LB,
BOND_OPT_LAST BOND_OPT_LAST
}; };
......
...@@ -1039,6 +1039,34 @@ static ssize_t bonding_store_lp_interval(struct device *d, ...@@ -1039,6 +1039,34 @@ static ssize_t bonding_store_lp_interval(struct device *d,
static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR,
bonding_show_lp_interval, bonding_store_lp_interval); bonding_show_lp_interval, bonding_store_lp_interval);
static ssize_t bonding_show_tlb_dynamic_lb(struct device *d,
struct device_attribute *attr,
char *buf)
{
struct bonding *bond = to_bond(d);
return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);
}
static ssize_t bonding_store_tlb_dynamic_lb(struct device *d,
struct device_attribute *attr,
const char *buf,
size_t count)
{
struct bonding *bond = to_bond(d);
int ret;
ret = bond_opt_tryset_rtnl(bond, BOND_OPT_TLB_DYNAMIC_LB,
(char *)buf);
if (!ret)
ret = count;
return ret;
}
static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR,
bonding_show_tlb_dynamic_lb,
bonding_store_tlb_dynamic_lb);
static ssize_t bonding_show_packets_per_slave(struct device *d, static ssize_t bonding_show_packets_per_slave(struct device *d,
struct device_attribute *attr, struct device_attribute *attr,
char *buf) char *buf)
...@@ -1099,6 +1127,7 @@ static struct attribute *per_bond_attrs[] = { ...@@ -1099,6 +1127,7 @@ static struct attribute *per_bond_attrs[] = {
&dev_attr_min_links.attr, &dev_attr_min_links.attr,
&dev_attr_lp_interval.attr, &dev_attr_lp_interval.attr,
&dev_attr_packets_per_slave.attr, &dev_attr_packets_per_slave.attr,
&dev_attr_tlb_dynamic_lb.attr,
NULL, NULL,
}; };
......
...@@ -174,6 +174,7 @@ struct bond_params { ...@@ -174,6 +174,7 @@ struct bond_params {
int resend_igmp; int resend_igmp;
int lp_interval; int lp_interval;
int packets_per_slave; int packets_per_slave;
int tlb_dynamic_lb;
struct reciprocal_value reciprocal_packets_per_slave; struct reciprocal_value reciprocal_packets_per_slave;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment