Commit ee637714 authored by Mahesh Bandewar's avatar Mahesh Bandewar Committed by David S. Miller

bonding: Simplify the xmit function for modes that use xmit_hash

Earlier change to use usable slave array for TLB mode had an additional
performance advantage. So extending the same logic to all other modes
that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
Also consolidating this with the earlier TLB change.

The main idea is to build the usable slaves array in the control path
and use that array for slave selection during xmit operation.

Measured performance in a setup with a bond of 4x1G NICs with 200
instances of netperf for the modes involved (3ad, xor, tlb)
cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5

Mode        TPS-Before   TPS-After

802.3ad   : 468,694      493,101
TLB (lb=0): 392,583      392,965
XOR       : 475,696      484,517
Signed-off-by: default avatarMahesh Bandewar <maheshb@google.com>
Signed-off-by: default avatarNikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d7021325
...@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; ...@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
/* ================= main 802.3ad protocol functions ================== */ /* ================= main 802.3ad protocol functions ================== */
static int ad_lacpdu_send(struct port *port); static int ad_lacpdu_send(struct port *port);
static int ad_marker_send(struct port *port, struct bond_marker *marker); static int ad_marker_send(struct port *port, struct bond_marker *marker);
static void ad_mux_machine(struct port *port); static void ad_mux_machine(struct port *port, bool *update_slave_arr);
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
static void ad_tx_machine(struct port *port); static void ad_tx_machine(struct port *port);
static void ad_periodic_machine(struct port *port); static void ad_periodic_machine(struct port *port);
static void ad_port_selection_logic(struct port *port); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
static void ad_agg_selection_logic(struct aggregator *aggregator); static void ad_agg_selection_logic(struct aggregator *aggregator,
bool *update_slave_arr);
static void ad_clear_agg(struct aggregator *aggregator); static void ad_clear_agg(struct aggregator *aggregator);
static void ad_initialize_agg(struct aggregator *aggregator); static void ad_initialize_agg(struct aggregator *aggregator);
static void ad_initialize_port(struct port *port, int lacp_fast); static void ad_initialize_port(struct port *port, int lacp_fast);
static void ad_enable_collecting_distributing(struct port *port); static void ad_enable_collecting_distributing(struct port *port,
static void ad_disable_collecting_distributing(struct port *port); bool *update_slave_arr);
static void ad_disable_collecting_distributing(struct port *port,
bool *update_slave_arr);
static void ad_marker_info_received(struct bond_marker *marker_info, static void ad_marker_info_received(struct bond_marker *marker_info,
struct port *port); struct port *port);
static void ad_marker_response_received(struct bond_marker *marker, static void ad_marker_response_received(struct bond_marker *marker,
...@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker) ...@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
/** /**
* ad_mux_machine - handle a port's mux state machine * ad_mux_machine - handle a port's mux state machine
* @port: the port we're looking at * @port: the port we're looking at
* @update_slave_arr: Does slave array need update?
*/ */
static void ad_mux_machine(struct port *port) static void ad_mux_machine(struct port *port, bool *update_slave_arr)
{ {
mux_states_t last_state; mux_states_t last_state;
...@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port) ...@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port)
switch (port->sm_mux_state) { switch (port->sm_mux_state) {
case AD_MUX_DETACHED: case AD_MUX_DETACHED:
port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION; port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
ad_disable_collecting_distributing(port); ad_disable_collecting_distributing(port,
update_slave_arr);
port->actor_oper_port_state &= ~AD_STATE_COLLECTING; port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
port->ntt = true; port->ntt = true;
...@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port) ...@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port)
port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION; port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
port->actor_oper_port_state &= ~AD_STATE_COLLECTING; port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
ad_disable_collecting_distributing(port); ad_disable_collecting_distributing(port,
update_slave_arr);
port->ntt = true; port->ntt = true;
break; break;
case AD_MUX_COLLECTING_DISTRIBUTING: case AD_MUX_COLLECTING_DISTRIBUTING:
port->actor_oper_port_state |= AD_STATE_COLLECTING; port->actor_oper_port_state |= AD_STATE_COLLECTING;
port->actor_oper_port_state |= AD_STATE_DISTRIBUTING; port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
ad_enable_collecting_distributing(port); ad_enable_collecting_distributing(port,
update_slave_arr);
port->ntt = true; port->ntt = true;
break; break;
default: default:
...@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port) ...@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
/** /**
* ad_port_selection_logic - select aggregation groups * ad_port_selection_logic - select aggregation groups
* @port: the port we're looking at * @port: the port we're looking at
* @update_slave_arr: Does slave array need update?
* *
* Select aggregation groups, and assign each port for it's aggregetor. The * Select aggregation groups, and assign each port for it's aggregetor. The
* selection logic is called in the inititalization (after all the handshkes), * selection logic is called in the inititalization (after all the handshkes),
* and after every lacpdu receive (if selected is off). * and after every lacpdu receive (if selected is off).
*/ */
static void ad_port_selection_logic(struct port *port) static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
{ {
struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
struct port *last_port = NULL, *curr_port; struct port *last_port = NULL, *curr_port;
...@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port) ...@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
__agg_ports_are_ready(port->aggregator)); __agg_ports_are_ready(port->aggregator));
aggregator = __get_first_agg(port); aggregator = __get_first_agg(port);
ad_agg_selection_logic(aggregator); ad_agg_selection_logic(aggregator, update_slave_arr);
} }
/* Decide if "agg" is a better choice for the new active aggregator that /* Decide if "agg" is a better choice for the new active aggregator that
...@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg) ...@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
/** /**
* ad_agg_selection_logic - select an aggregation group for a team * ad_agg_selection_logic - select an aggregation group for a team
* @aggregator: the aggregator we're looking at * @aggregator: the aggregator we're looking at
* @update_slave_arr: Does slave array need update?
* *
* It is assumed that only one aggregator may be selected for a team. * It is assumed that only one aggregator may be selected for a team.
* *
...@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg) ...@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
* __get_active_agg() won't work correctly. This function should be better * __get_active_agg() won't work correctly. This function should be better
* called with the bond itself, and retrieve the first agg from it. * called with the bond itself, and retrieve the first agg from it.
*/ */
static void ad_agg_selection_logic(struct aggregator *agg) static void ad_agg_selection_logic(struct aggregator *agg,
bool *update_slave_arr)
{ {
struct aggregator *best, *active, *origin; struct aggregator *best, *active, *origin;
struct bonding *bond = agg->slave->bond; struct bonding *bond = agg->slave->bond;
...@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg) ...@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
__disable_port(port); __disable_port(port);
} }
} }
/* Slave array needs update. */
*update_slave_arr = true;
} }
/* if the selected aggregator is of join individuals /* if the selected aggregator is of join individuals
...@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast) ...@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
/** /**
* ad_enable_collecting_distributing - enable a port's transmit/receive * ad_enable_collecting_distributing - enable a port's transmit/receive
* @port: the port we're looking at * @port: the port we're looking at
* @update_slave_arr: Does slave array need update?
* *
* Enable @port if it's in an active aggregator * Enable @port if it's in an active aggregator
*/ */
static void ad_enable_collecting_distributing(struct port *port) static void ad_enable_collecting_distributing(struct port *port,
bool *update_slave_arr)
{ {
if (port->aggregator->is_active) { if (port->aggregator->is_active) {
pr_debug("Enabling port %d(LAG %d)\n", pr_debug("Enabling port %d(LAG %d)\n",
port->actor_port_number, port->actor_port_number,
port->aggregator->aggregator_identifier); port->aggregator->aggregator_identifier);
__enable_port(port); __enable_port(port);
/* Slave array needs update */
*update_slave_arr = true;
} }
} }
/** /**
* ad_disable_collecting_distributing - disable a port's transmit/receive * ad_disable_collecting_distributing - disable a port's transmit/receive
* @port: the port we're looking at * @port: the port we're looking at
* @update_slave_arr: Does slave array need update?
*/ */
static void ad_disable_collecting_distributing(struct port *port) static void ad_disable_collecting_distributing(struct port *port,
bool *update_slave_arr)
{ {
if (port->aggregator && if (port->aggregator &&
!MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
...@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port) ...@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
port->actor_port_number, port->actor_port_number,
port->aggregator->aggregator_identifier); port->aggregator->aggregator_identifier);
__disable_port(port); __disable_port(port);
/* Slave array needs an update */
*update_slave_arr = true;
} }
} }
...@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave) ...@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
struct bonding *bond = slave->bond; struct bonding *bond = slave->bond;
struct slave *slave_iter; struct slave *slave_iter;
struct list_head *iter; struct list_head *iter;
bool dummy_slave_update; /* Ignore this value as caller updates array */
/* Sync against bond_3ad_state_machine_handler() */ /* Sync against bond_3ad_state_machine_handler() */
spin_lock_bh(&bond->mode_lock); spin_lock_bh(&bond->mode_lock);
...@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave) ...@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
ad_clear_agg(aggregator); ad_clear_agg(aggregator);
if (select_new_active_agg) if (select_new_active_agg)
ad_agg_selection_logic(__get_first_agg(port)); ad_agg_selection_logic(__get_first_agg(port),
&dummy_slave_update);
} else { } else {
netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n"); netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
} }
...@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave) ...@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
/* select new active aggregator */ /* select new active aggregator */
temp_aggregator = __get_first_agg(port); temp_aggregator = __get_first_agg(port);
if (temp_aggregator) if (temp_aggregator)
ad_agg_selection_logic(temp_aggregator); ad_agg_selection_logic(temp_aggregator,
&dummy_slave_update);
} }
} }
} }
...@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave) ...@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
if (select_new_active_agg) { if (select_new_active_agg) {
netdev_info(bond->dev, "Removing an active aggregator\n"); netdev_info(bond->dev, "Removing an active aggregator\n");
/* select new active aggregator */ /* select new active aggregator */
ad_agg_selection_logic(__get_first_agg(port)); ad_agg_selection_logic(__get_first_agg(port),
&dummy_slave_update);
} }
} }
break; break;
...@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) ...@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
struct slave *slave; struct slave *slave;
struct port *port; struct port *port;
bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
bool update_slave_arr = false;
/* Lock to protect data accessed by all (e.g., port->sm_vars) and /* Lock to protect data accessed by all (e.g., port->sm_vars) and
* against running with bond_3ad_unbind_slave. ad_rx_machine may run * against running with bond_3ad_unbind_slave. ad_rx_machine may run
...@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) ...@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
} }
aggregator = __get_first_agg(port); aggregator = __get_first_agg(port);
ad_agg_selection_logic(aggregator); ad_agg_selection_logic(aggregator, &update_slave_arr);
} }
bond_3ad_set_carrier(bond); bond_3ad_set_carrier(bond);
} }
...@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) ...@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
ad_rx_machine(NULL, port); ad_rx_machine(NULL, port);
ad_periodic_machine(port); ad_periodic_machine(port);
ad_port_selection_logic(port); ad_port_selection_logic(port, &update_slave_arr);
ad_mux_machine(port); ad_mux_machine(port, &update_slave_arr);
ad_tx_machine(port); ad_tx_machine(port);
/* turn off the BEGIN bit, since we already handled it */ /* turn off the BEGIN bit, since we already handled it */
...@@ -2093,6 +2118,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work) ...@@ -2093,6 +2118,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
rcu_read_unlock(); rcu_read_unlock();
spin_unlock_bh(&bond->mode_lock); spin_unlock_bh(&bond->mode_lock);
if (update_slave_arr)
bond_slave_arr_work_rearm(bond, 0);
if (should_notify_rtnl && rtnl_trylock()) { if (should_notify_rtnl && rtnl_trylock()) {
bond_slave_state_notify(bond); bond_slave_state_notify(bond);
rtnl_unlock(); rtnl_unlock();
...@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) ...@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
port->sm_vars |= AD_PORT_BEGIN; port->sm_vars |= AD_PORT_BEGIN;
spin_unlock_bh(&slave->bond->mode_lock); spin_unlock_bh(&slave->bond->mode_lock);
/* RTNL is held and mode_lock is released so it's safe
* to update slave_array here.
*/
bond_update_slave_arr(slave->bond, NULL);
} }
/** /**
...@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) ...@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
return ret; return ret;
} }
int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
struct slave *slave, *first_ok_slave;
struct aggregator *agg;
struct ad_info ad_info;
struct list_head *iter;
int slaves_in_agg;
int slave_agg_no;
int agg_id;
if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
goto err_free;
}
slaves_in_agg = ad_info.ports;
agg_id = ad_info.aggregator_id;
if (slaves_in_agg == 0) {
netdev_dbg(dev, "active aggregator is empty\n");
goto err_free;
}
slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
first_ok_slave = NULL;
bond_for_each_slave_rcu(bond, slave, iter) {
agg = SLAVE_AD_INFO(slave)->port.aggregator;
if (!agg || agg->aggregator_identifier != agg_id)
continue;
if (slave_agg_no >= 0) {
if (!first_ok_slave && bond_slave_can_tx(slave))
first_ok_slave = slave;
slave_agg_no--;
continue;
}
if (bond_slave_can_tx(slave)) {
bond_dev_queue_xmit(bond, skb, slave->dev);
goto out;
}
}
if (slave_agg_no >= 0) {
netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
agg_id);
goto err_free;
}
/* we couldn't find any suitable slave after the agg_no, so use the
* first suitable found, if found.
*/
if (first_ok_slave)
bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
else
goto err_free;
out:
return NETDEV_TX_OK;
err_free:
/* no suitable interface, frame not sent */
dev_kfree_skb_any(skb);
goto out;
}
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave) struct slave *slave)
{ {
......
...@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond) ...@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond)
static void tlb_deinitialize(struct bonding *bond) static void tlb_deinitialize(struct bonding *bond)
{ {
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct tlb_up_slave *arr;
spin_lock_bh(&bond->mode_lock); spin_lock_bh(&bond->mode_lock);
...@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond) ...@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond)
bond_info->tx_hashtbl = NULL; bond_info->tx_hashtbl = NULL;
spin_unlock_bh(&bond->mode_lock); spin_unlock_bh(&bond->mode_lock);
arr = rtnl_dereference(bond_info->slave_arr);
if (arr)
kfree_rcu(arr, rcu);
} }
static long long compute_gap(struct slave *slave) static long long compute_gap(struct slave *slave)
...@@ -1336,39 +1331,9 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, ...@@ -1336,39 +1331,9 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static int bond_tlb_update_slave_arr(struct bonding *bond,
struct slave *skipslave)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *tx_slave;
struct list_head *iter;
struct tlb_up_slave *new_arr, *old_arr;
new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
GFP_ATOMIC);
if (!new_arr)
return -ENOMEM;
bond_for_each_slave(bond, tx_slave, iter) {
if (!bond_slave_can_tx(tx_slave))
continue;
if (skipslave == tx_slave)
continue;
new_arr->arr[new_arr->count++] = tx_slave;
}
old_arr = rtnl_dereference(bond_info->slave_arr);
rcu_assign_pointer(bond_info->slave_arr, new_arr);
if (old_arr)
kfree_rcu(old_arr, rcu);
return 0;
}
int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
{ {
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct ethhdr *eth_data; struct ethhdr *eth_data;
struct slave *tx_slave = NULL; struct slave *tx_slave = NULL;
u32 hash_index; u32 hash_index;
...@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) ...@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
hash_index & 0xFF, hash_index & 0xFF,
skb->len); skb->len);
} else { } else {
struct tlb_up_slave *slaves; struct bond_up_slave *slaves;
unsigned int count;
slaves = rcu_dereference(bond_info->slave_arr); slaves = rcu_dereference(bond->slave_arr);
if (slaves && slaves->count) count = slaves ? ACCESS_ONCE(slaves->count) : 0;
if (likely(count))
tx_slave = slaves->arr[hash_index % tx_slave = slaves->arr[hash_index %
slaves->count]; count];
} }
break; break;
} }
...@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) ...@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
rlb_clear_slave(bond, slave); rlb_clear_slave(bond, slave);
} }
if (bond_is_nondyn_tlb(bond))
if (bond_tlb_update_slave_arr(bond, slave))
pr_err("Failed to build slave-array for TLB mode.\n");
} }
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
...@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char ...@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
} }
if (bond_is_nondyn_tlb(bond)) { if (bond_is_nondyn_tlb(bond)) {
if (bond_tlb_update_slave_arr(bond, NULL)) if (bond_update_slave_arr(bond, NULL))
pr_err("Failed to build slave-array for TLB mode.\n"); pr_err("Failed to build slave-array for TLB mode.\n");
} }
} }
......
...@@ -139,19 +139,11 @@ struct tlb_slave_info { ...@@ -139,19 +139,11 @@ struct tlb_slave_info {
*/ */
}; };
struct tlb_up_slave {
unsigned int count;
struct rcu_head rcu;
struct slave *arr[0];
};
struct alb_bond_info { struct alb_bond_info {
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
u32 unbalanced_load; u32 unbalanced_load;
int tx_rebalance_counter; int tx_rebalance_counter;
int lp_counter; int lp_counter;
/* -------- non-dynamic tlb mode only ---------*/
struct tlb_up_slave __rcu *slave_arr; /* Up slaves */
/* -------- rlb parameters -------- */ /* -------- rlb parameters -------- */
int rlb_enabled; int rlb_enabled;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */ struct rlb_client_info *rx_hashtbl; /* Receive hash table */
......
...@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev); ...@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev);
static void bond_uninit(struct net_device *bond_dev); static void bond_uninit(struct net_device *bond_dev);
static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats); struct rtnl_link_stats64 *stats);
static void bond_slave_arr_handler(struct work_struct *work);
/*---------------------------- General routines -----------------------------*/ /*---------------------------- General routines -----------------------------*/
...@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) ...@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
unblock_netpoll_tx(); unblock_netpoll_tx();
} }
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
slave_dev->name, slave_dev->name,
bond_is_active_slave(new_slave) ? "an active" : "a backup", bond_is_active_slave(new_slave) ? "an active" : "a backup",
...@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev, ...@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev,
if (BOND_MODE(bond) == BOND_MODE_8023AD) if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_unbind_slave(slave); bond_3ad_unbind_slave(slave);
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, slave);
netdev_info(bond_dev, "Releasing %s interface %s\n", netdev_info(bond_dev, "Releasing %s interface %s\n",
bond_is_active_slave(slave) ? "active" : "backup", bond_is_active_slave(slave) ? "active" : "backup",
slave_dev->name); slave_dev->name);
...@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond) ...@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond)
bond_alb_handle_link_change(bond, slave, bond_alb_handle_link_change(bond, slave,
BOND_LINK_UP); BOND_LINK_UP);
if (BOND_MODE(bond) == BOND_MODE_XOR)
bond_update_slave_arr(bond, NULL);
if (!bond->curr_active_slave || slave == primary) if (!bond->curr_active_slave || slave == primary)
goto do_failover; goto do_failover;
...@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond) ...@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond)
bond_alb_handle_link_change(bond, slave, bond_alb_handle_link_change(bond, slave,
BOND_LINK_DOWN); BOND_LINK_DOWN);
if (BOND_MODE(bond) == BOND_MODE_XOR)
bond_update_slave_arr(bond, NULL);
if (slave == rcu_access_pointer(bond->curr_active_slave)) if (slave == rcu_access_pointer(bond->curr_active_slave))
goto do_failover; goto do_failover;
...@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) ...@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
if (slave_state_changed) { if (slave_state_changed) {
bond_slave_state_change(bond); bond_slave_state_change(bond);
if (BOND_MODE(bond) == BOND_MODE_XOR)
bond_update_slave_arr(bond, NULL);
} else if (do_failover) { } else if (do_failover) {
block_netpoll_tx(); block_netpoll_tx();
bond_select_active_slave(bond); bond_select_active_slave(bond);
...@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event, ...@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event,
if (old_duplex != slave->duplex) if (old_duplex != slave->duplex)
bond_3ad_adapter_duplex_changed(slave); bond_3ad_adapter_duplex_changed(slave);
} }
/* Refresh slave-array if applicable!
* If the setup does not use miimon or arpmon (mode-specific!),
* then these events will not cause the slave-array to be
* refreshed. This will cause xmit to use a slave that is not
* usable. Avoid such situation by refeshing the array at these
* events. If these (miimon/arpmon) parameters are configured
* then array gets refreshed twice and that should be fine!
*/
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
break; break;
case NETDEV_DOWN: case NETDEV_DOWN:
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
break; break;
case NETDEV_CHANGEMTU: case NETDEV_CHANGEMTU:
/* TODO: Should slaves be allowed to /* TODO: Should slaves be allowed to
...@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond) ...@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond)
else else
INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
} }
static void bond_work_cancel_all(struct bonding *bond) static void bond_work_cancel_all(struct bonding *bond)
...@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond) ...@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond)
cancel_delayed_work_sync(&bond->alb_work); cancel_delayed_work_sync(&bond->alb_work);
cancel_delayed_work_sync(&bond->ad_work); cancel_delayed_work_sync(&bond->ad_work);
cancel_delayed_work_sync(&bond->mcast_work); cancel_delayed_work_sync(&bond->mcast_work);
cancel_delayed_work_sync(&bond->slave_arr_work);
} }
static int bond_open(struct net_device *bond_dev) static int bond_open(struct net_device *bond_dev)
...@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev) ...@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev)
bond_3ad_initiate_agg_selection(bond, 1); bond_3ad_initiate_agg_selection(bond, 1);
} }
if (bond_mode_uses_xmit_hash(bond))
bond_update_slave_arr(bond, NULL);
return 0; return 0;
} }
...@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d ...@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
/* In bond_xmit_xor() , we determine the output device by using a pre- /* Use this to update slave_array when (a) it's not appropriate to update
* determined xmit_hash_policy(), If the selected device is not enabled, * slave_array right away (note that update_slave_array() may sleep)
* find the next active slave. * and / or (b) RTNL is not held.
*/ */
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
{ {
struct bonding *bond = netdev_priv(bond_dev); queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
int slave_cnt = ACCESS_ONCE(bond->slave_cnt); }
if (likely(slave_cnt)) /* Slave array work handler. Holds only RTNL */
bond_xmit_slave_id(bond, skb, static void bond_slave_arr_handler(struct work_struct *work)
bond_xmit_hash(bond, skb) % slave_cnt); {
else struct bonding *bond = container_of(work, struct bonding,
slave_arr_work.work);
int ret;
if (!rtnl_trylock())
goto err;
ret = bond_update_slave_arr(bond, NULL);
rtnl_unlock();
if (ret) {
pr_warn_ratelimited("Failed to update slave array from WT\n");
goto err;
}
return;
err:
bond_slave_arr_work_rearm(bond, 1);
}
/* Build the usable slaves array in control path for modes that use xmit-hash
* to determine the slave interface -
* (a) BOND_MODE_8023AD
* (b) BOND_MODE_XOR
* (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
*
* The caller is expected to hold RTNL only and NO other lock!
*/
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
{
struct slave *slave;
struct list_head *iter;
struct bond_up_slave *new_arr, *old_arr;
int slaves_in_agg;
int agg_id = 0;
int ret = 0;
#ifdef CONFIG_LOCKDEP
WARN_ON(lockdep_is_held(&bond->mode_lock));
#endif
new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
GFP_KERNEL);
if (!new_arr) {
ret = -ENOMEM;
pr_err("Failed to build slave-array.\n");
goto out;
}
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct ad_info ad_info;
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("bond_3ad_get_active_agg_info failed\n");
kfree_rcu(new_arr, rcu);
/* No active aggragator means it's not safe to use
* the previous array.
*/
old_arr = rtnl_dereference(bond->slave_arr);
if (old_arr) {
RCU_INIT_POINTER(bond->slave_arr, NULL);
kfree_rcu(old_arr, rcu);
}
goto out;
}
slaves_in_agg = ad_info.ports;
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) {
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
struct aggregator *agg;
agg = SLAVE_AD_INFO(slave)->port.aggregator;
if (!agg || agg->aggregator_identifier != agg_id)
continue;
}
if (!bond_slave_can_tx(slave))
continue;
if (skipslave == slave)
continue;
new_arr->arr[new_arr->count++] = slave;
}
old_arr = rtnl_dereference(bond->slave_arr);
rcu_assign_pointer(bond->slave_arr, new_arr);
if (old_arr)
kfree_rcu(old_arr, rcu);
out:
if (ret != 0 && skipslave) {
int idx;
/* Rare situation where caller has asked to skip a specific
* slave but allocation failed (most likely!). BTW this is
* only possible when the call is initiated from
* __bond_release_one(). In this situation; overwrite the
* skipslave entry in the array with the last entry from the
* array to avoid a situation where the xmit path may choose
* this to-be-skipped slave to send a packet out.
*/
old_arr = rtnl_dereference(bond->slave_arr);
for (idx = 0; idx < old_arr->count; idx++) {
if (skipslave == old_arr->arr[idx]) {
old_arr->arr[idx] =
old_arr->arr[old_arr->count-1];
old_arr->count--;
break;
}
}
}
return ret;
}
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
*/
int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
struct slave *slave;
struct bond_up_slave *slaves;
unsigned int count;
slaves = rcu_dereference(bond->slave_arr);
count = slaves ? ACCESS_ONCE(slaves->count) : 0;
if (likely(count)) {
slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
bond_dev_queue_xmit(bond, skb, slave->dev);
} else {
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
atomic_long_inc(&dev->tx_dropped);
}
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
...@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev ...@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
return bond_xmit_roundrobin(skb, dev); return bond_xmit_roundrobin(skb, dev);
case BOND_MODE_ACTIVEBACKUP: case BOND_MODE_ACTIVEBACKUP:
return bond_xmit_activebackup(skb, dev); return bond_xmit_activebackup(skb, dev);
case BOND_MODE_8023AD:
case BOND_MODE_XOR: case BOND_MODE_XOR:
return bond_xmit_xor(skb, dev); return bond_3ad_xor_xmit(skb, dev);
case BOND_MODE_BROADCAST: case BOND_MODE_BROADCAST:
return bond_xmit_broadcast(skb, dev); return bond_xmit_broadcast(skb, dev);
case BOND_MODE_8023AD:
return bond_3ad_xmit_xor(skb, dev);
case BOND_MODE_ALB: case BOND_MODE_ALB:
return bond_alb_xmit(skb, dev); return bond_alb_xmit(skb, dev);
case BOND_MODE_TLB: case BOND_MODE_TLB:
...@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev) ...@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev); struct bonding *bond = netdev_priv(bond_dev);
struct list_head *iter; struct list_head *iter;
struct slave *slave; struct slave *slave;
struct bond_up_slave *arr;
bond_netpoll_cleanup(bond_dev); bond_netpoll_cleanup(bond_dev);
...@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev) ...@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev)
__bond_release_one(bond_dev, slave->dev, true); __bond_release_one(bond_dev, slave->dev, true);
netdev_info(bond_dev, "Released all slaves\n"); netdev_info(bond_dev, "Released all slaves\n");
arr = rtnl_dereference(bond->slave_arr);
if (arr) {
RCU_INIT_POINTER(bond->slave_arr, NULL);
kfree_rcu(arr, rcu);
}
list_del(&bond->bond_list); list_del(&bond->bond_list);
bond_debug_unregister(bond); bond_debug_unregister(bond);
......
...@@ -179,6 +179,12 @@ struct slave { ...@@ -179,6 +179,12 @@ struct slave {
struct rtnl_link_stats64 slave_stats; struct rtnl_link_stats64 slave_stats;
}; };
struct bond_up_slave {
unsigned int count;
struct rcu_head rcu;
struct slave *arr[0];
};
/* /*
* Link pseudo-state only used internally by monitors * Link pseudo-state only used internally by monitors
*/ */
...@@ -193,6 +199,7 @@ struct bonding { ...@@ -193,6 +199,7 @@ struct bonding {
struct slave __rcu *curr_active_slave; struct slave __rcu *curr_active_slave;
struct slave __rcu *current_arp_slave; struct slave __rcu *current_arp_slave;
struct slave __rcu *primary_slave; struct slave __rcu *primary_slave;
struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
bool force_primary; bool force_primary;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *, int (*recv_probe)(const struct sk_buff *, struct bonding *,
...@@ -222,6 +229,7 @@ struct bonding { ...@@ -222,6 +229,7 @@ struct bonding {
struct delayed_work alb_work; struct delayed_work alb_work;
struct delayed_work ad_work; struct delayed_work ad_work;
struct delayed_work mcast_work; struct delayed_work mcast_work;
struct delayed_work slave_arr_work;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
/* debugging support via debugfs */ /* debugging support via debugfs */
struct dentry *debug_dir; struct dentry *debug_dir;
...@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link); ...@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
struct net_device *end_dev, struct net_device *end_dev,
int level); int level);
int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
void bond_create_proc_entry(struct bonding *bond); void bond_create_proc_entry(struct bonding *bond);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment