Commit d0aa9595 authored by Jeff Garzik's avatar Jeff Garzik

[netdrvr bonding] add 802.3ad support

Contributed by Intel, with updates by
Jay Vosburgh @ IBM (bonding maintainer)
parent eb7040af
#
# Makefile for the Ethernet Bonding driver
#
O_TARGET := bonding.o
obj-y := bond_main.o \
bond_3ad.o \
bond_alb.o
obj-m := $(O_TARGET)
include $(TOPDIR)/Rules.make
/*
* Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*
*
* Changes:
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Added support for lacp_rate module param.
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Based on discussion on mailing list, changed locking scheme
* to use lock/unlock or lock_bh/unlock_bh appropriately instead
* of lock_irqsave/unlock_irqrestore. The new scheme helps exposing
* hidden bugs and solves system hangs that occurred due to the fact
* that holding lock_irqsave doesn't prevent softirqs from running.
* This also increases total throughput since interrupts are not
* blocked on each transmitted packets or monitor timeout.
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Renamed bond_3ad_link_status_changed() to
* bond_3ad_handle_link_change() for compatibility with TLB.
*/
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/ethtool.h>
#include <linux/if_bonding.h>
#include "bonding.h"
#include "bond_3ad.h"
// General definitions
#define AD_SHORT_TIMEOUT 1
#define AD_LONG_TIMEOUT 0
#define AD_STANDBY 0x2
#define AD_MAX_TX_IN_SECOND 3
#define AD_COLLECTOR_MAX_DELAY 0
// Timer definitions(43.4.4 in the 802.3ad standard)
#define AD_FAST_PERIODIC_TIME 1
#define AD_SLOW_PERIODIC_TIME 30
#define AD_SHORT_TIMEOUT_TIME (3*AD_FAST_PERIODIC_TIME)
#define AD_LONG_TIMEOUT_TIME (3*AD_SLOW_PERIODIC_TIME)
#define AD_CHURN_DETECTION_TIME 60
#define AD_AGGREGATE_WAIT_TIME 2
// Port state definitions(43.4.2.2 in the 802.3ad standard)
#define AD_STATE_LACP_ACTIVITY 0x1
#define AD_STATE_LACP_TIMEOUT 0x2
#define AD_STATE_AGGREGATION 0x4
#define AD_STATE_SYNCHRONIZATION 0x8
#define AD_STATE_COLLECTING 0x10
#define AD_STATE_DISTRIBUTING 0x20
#define AD_STATE_DEFAULTED 0x40
#define AD_STATE_EXPIRED 0x80
// Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard)
#define AD_PORT_BEGIN 0x1
#define AD_PORT_LACP_ENABLED 0x2
#define AD_PORT_ACTOR_CHURN 0x4
#define AD_PORT_PARTNER_CHURN 0x8
#define AD_PORT_READY 0x10
#define AD_PORT_READY_N 0x20
#define AD_PORT_MATCHED 0x40
#define AD_PORT_STANDBY 0x80
#define AD_PORT_SELECTED 0x100
#define AD_PORT_MOVED 0x200
// Port Key definitions
// key is determined according to the link speed, duplex and
// user key(which is yet not supported)
// ------------------------------------------------------------
// Port key : | User key | Speed |Duplex|
// ------------------------------------------------------------
// 16 6 1 0
#define AD_DUPLEX_KEY_BITS 0x1
#define AD_SPEED_KEY_BITS 0x3E
#define AD_USER_KEY_BITS 0xFFC0
//dalloun
#define AD_LINK_SPEED_BITMASK_1MBPS 0x1
#define AD_LINK_SPEED_BITMASK_10MBPS 0x2
#define AD_LINK_SPEED_BITMASK_100MBPS 0x4
#define AD_LINK_SPEED_BITMASK_1000MBPS 0x8
//endalloun
// compare MAC addresses
#define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN)
static struct mac_addr null_mac_addr = {{0, 0, 0, 0, 0, 0}};
static u16 ad_ticks_per_sec;
// ================= 3AD api to bonding and kernel code ==================
static u16 __get_link_speed(struct port *port);
static u8 __get_duplex(struct port *port);
static inline void __initialize_port_locks(struct port *port);
static inline void __deinitialize_port_locks(struct port *port);
//conversions
static void __ntohs_lacpdu(struct lacpdu *lacpdu);
static u16 __ad_timer_to_ticks(u16 timer_type, u16 Par);
// ================= ad code helper functions ==================
//needed by ad_rx_machine(...)
static void __record_pdu(struct lacpdu *lacpdu, struct port *port);
static void __record_default(struct port *port);
static void __update_selected(struct lacpdu *lacpdu, struct port *port);
static void __update_default_selected(struct port *port);
static void __choose_matched(struct lacpdu *lacpdu, struct port *port);
static void __update_ntt(struct lacpdu *lacpdu, struct port *port);
//needed for ad_mux_machine(..)
static void __attach_bond_to_agg(struct port *port);
static void __detach_bond_from_agg(struct port *port);
static int __agg_ports_are_ready(struct aggregator *aggregator);
static void __set_agg_ports_ready(struct aggregator *aggregator, int val);
//needed for ad_agg_selection_logic(...)
static u32 __get_agg_bandwidth(struct aggregator *aggregator);
static struct aggregator *__get_active_agg(struct aggregator *aggregator);
// ================= main 802.3ad protocol functions ==================
static int ad_lacpdu_send(struct port *port);
static int ad_marker_send(struct port *port, struct marker *marker);
static void ad_mux_machine(struct port *port);
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
static void ad_tx_machine(struct port *port);
static void ad_periodic_machine(struct port *port);
static void ad_port_selection_logic(struct port *port);
static void ad_agg_selection_logic(struct aggregator *aggregator);
static void ad_clear_agg(struct aggregator *aggregator);
static void ad_initialize_agg(struct aggregator *aggregator);
static void ad_initialize_port(struct port *port, int lacp_fast);
static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
static void ad_enable_collecting_distributing(struct port *port);
static void ad_disable_collecting_distributing(struct port *port);
static void ad_marker_info_received(struct marker *marker_info, struct port *port);
static void ad_marker_response_received(struct marker *marker, struct port *port);
/////////////////////////////////////////////////////////////////////////////////
// ================= api to bonding and kernel code ==================
/////////////////////////////////////////////////////////////////////////////////
/**
* __get_bond_by_port - get the port's bonding struct
* @port: the port we're looking at
*
* Return @port's bonding struct, or %NULL if it can't be found.
*/
static inline struct bonding *__get_bond_by_port(struct port *port)
{
if (port->slave == NULL) {
return NULL;
}
return bond_get_bond_by_slave(port->slave);
}
/**
* __get_first_port - get the first port in the bond
* @bond: the bond we're looking at
*
* Return the port of the first slave in @bond, or %NULL if it can't be found.
*/
static inline struct port *__get_first_port(struct bonding *bond)
{
struct slave *slave = bond->next;
if (slave == (struct slave *)bond) {
return NULL;
}
return &(SLAVE_AD_INFO(slave).port);
}
/**
* __get_next_port - get the next port in the bond
* @port: the port we're looking at
*
* Return the port of the slave that is next in line of @port's slave in the
* bond, or %NULL if it can't be found.
*/
static inline struct port *__get_next_port(struct port *port)
{
struct bonding *bond = __get_bond_by_port(port);
struct slave *slave = port->slave;
// If there's no bond for this port, or this is the last slave
if ((bond == NULL) || (slave->next == bond->next)) {
return NULL;
}
return &(SLAVE_AD_INFO(slave->next).port);
}
/**
* __get_first_agg - get the first aggregator in the bond
* @bond: the bond we're looking at
*
* Return the aggregator of the first slave in @bond, or %NULL if it can't be
* found.
*/
static inline struct aggregator *__get_first_agg(struct port *port)
{
struct bonding *bond = __get_bond_by_port(port);
// If there's no bond for this port, or this is the last slave
if ((bond == NULL) || (bond->next == (struct slave *)bond)) {
return NULL;
}
return &(SLAVE_AD_INFO(bond->next).aggregator);
}
/**
* __get_next_agg - get the next aggregator in the bond
* @aggregator: the aggregator we're looking at
*
* Return the aggregator of the slave that is next in line of @aggregator's
* slave in the bond, or %NULL if it can't be found.
*/
static inline struct aggregator *__get_next_agg(struct aggregator *aggregator)
{
struct slave *slave = aggregator->slave;
struct bonding *bond = bond_get_bond_by_slave(slave);
// If there's no bond for this aggregator, or this is the last slave
if ((bond == NULL) || (slave->next == bond->next)) {
return NULL;
}
return &(SLAVE_AD_INFO(slave->next).aggregator);
}
/**
* __disable_port - disable the port's slave
* @port: the port we're looking at
*
*/
static inline void __disable_port(struct port *port)
{
bond_set_slave_inactive_flags(port->slave);
}
/**
* __enable_port - enable the port's slave, if it's up
* @port: the port we're looking at
*
*/
static inline void __enable_port(struct port *port)
{
struct slave *slave = port->slave;
if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) {
bond_set_slave_active_flags(slave);
}
}
/**
* __port_is_enabled - check if the port's slave is in active state
* @port: the port we're looking at
*
*/
static inline int __port_is_enabled(struct port *port)
{
return(port->slave->state == BOND_STATE_ACTIVE);
}
/**
* __get_agg_selection_mode - get the aggregator selection mode
* @port: the port we're looking at
*
* Get the aggregator selection mode. Can be %BANDWIDTH or %COUNT.
*/
static inline u32 __get_agg_selection_mode(struct port *port)
{
struct bonding *bond = __get_bond_by_port(port);
if (bond == NULL) {
return AD_BANDWIDTH;
}
return BOND_AD_INFO(bond).agg_select_mode;
}
/**
* __check_agg_selection_timer - check if the selection timer has expired
* @port: the port we're looking at
*
*/
static inline int __check_agg_selection_timer(struct port *port)
{
struct bonding *bond = __get_bond_by_port(port);
if (bond == NULL) {
return 0;
}
return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0;
}
/**
* __get_rx_machine_lock - lock the port's RX machine
* @port: the port we're looking at
*
*/
static inline void __get_rx_machine_lock(struct port *port)
{
spin_lock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
}
/**
* __release_rx_machine_lock - unlock the port's RX machine
* @port: the port we're looking at
*
*/
static inline void __release_rx_machine_lock(struct port *port)
{
spin_unlock(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
}
/**
* __get_link_speed - get a port's speed
* @port: the port we're looking at
*
* Return @port's speed in 802.3ad bitmask format. i.e. one of:
* 0,
* %AD_LINK_SPEED_BITMASK_10MBPS,
* %AD_LINK_SPEED_BITMASK_100MBPS,
* %AD_LINK_SPEED_BITMASK_1000MBPS
*/
static u16 __get_link_speed(struct port *port)
{
struct slave *slave = port->slave;
u16 speed;
/* this if covers only a special case: when the configuration starts with
* link down, it sets the speed to 0.
* This is done in spite of the fact that the e100 driver reports 0 to be
* compatible with MVT in the future.*/
if (slave->link != BOND_LINK_UP) {
speed=0;
} else {
switch (slave->speed) {
case SPEED_10:
speed = AD_LINK_SPEED_BITMASK_10MBPS;
break;
case SPEED_100:
speed = AD_LINK_SPEED_BITMASK_100MBPS;
break;
case SPEED_1000:
speed = AD_LINK_SPEED_BITMASK_1000MBPS;
break;
default:
speed = 0; // unknown speed value from ethtool. shouldn't happen
break;
}
}
BOND_PRINT_DBG(("Port %d Received link speed %d update from adapter", port->actor_port_number, speed));
return speed;
}
/**
* __get_duplex - get a port's duplex
* @port: the port we're looking at
*
* Return @port's duplex in 802.3ad bitmask format. i.e.:
* 0x01 if in full duplex
* 0x00 otherwise
*/
static u8 __get_duplex(struct port *port)
{
struct slave *slave = port->slave;
u8 retval;
// handling a special case: when the configuration starts with
// link down, it sets the duplex to 0.
if (slave->link != BOND_LINK_UP) {
retval=0x0;
} else {
switch (slave->duplex) {
case DUPLEX_FULL:
retval=0x1;
BOND_PRINT_DBG(("Port %d Received status full duplex update from adapter", port->actor_port_number));
break;
case DUPLEX_HALF:
default:
retval=0x0;
BOND_PRINT_DBG(("Port %d Received status NOT full duplex update from adapter", port->actor_port_number));
break;
}
}
return retval;
}
/**
* __initialize_port_locks - initialize a port's RX machine spinlock
* @port: the port we're looking at
*
*/
static inline void __initialize_port_locks(struct port *port)
{
// make sure it isn't called twice
spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
}
/**
* __deinitialize_port_locks - deinitialize a port's RX machine spinlock
* @port: the port we're looking at
*
*/
static inline void __deinitialize_port_locks(struct port *port)
{
}
//conversions
/**
* __ntohs_lacpdu - convert the contents of a LACPDU to host byte order
* @lacpdu: the speicifed lacpdu
*
* For each multi-byte field in the lacpdu, convert its content
*/
static void __ntohs_lacpdu(struct lacpdu *lacpdu)
{
if (lacpdu) {
lacpdu->actor_system_priority = ntohs(lacpdu->actor_system_priority);
lacpdu->actor_key = ntohs(lacpdu->actor_key);
lacpdu->actor_port_priority = ntohs(lacpdu->actor_port_priority);
lacpdu->actor_port = ntohs(lacpdu->actor_port);
lacpdu->partner_system_priority = ntohs(lacpdu->partner_system_priority);
lacpdu->partner_key = ntohs(lacpdu->partner_key);
lacpdu->partner_port_priority = ntohs(lacpdu->partner_port_priority);
lacpdu->partner_port = ntohs(lacpdu->partner_port);
lacpdu->collector_max_delay = ntohs(lacpdu->collector_max_delay);
}
}
/**
* __ad_timer_to_ticks - convert a given timer type to AD module ticks
* @timer_type: which timer to operate
* @par: timer parameter. see below
*
* If @timer_type is %current_while_timer, @par indicates long/short timer.
* If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME,
* %SLOW_PERIODIC_TIME.
*/
static u16 __ad_timer_to_ticks(u16 timer_type, u16 par)
{
u16 retval=0; //to silence the compiler
switch (timer_type) {
case AD_CURRENT_WHILE_TIMER: // for rx machine usage
if (par) { // for short or long timeout
retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout
} else {
retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout
}
break;
case AD_ACTOR_CHURN_TIMER: // for local churn machine
retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);
break;
case AD_PERIODIC_TIMER: // for periodic machine
retval = (par*ad_ticks_per_sec); // long timeout
break;
case AD_PARTNER_CHURN_TIMER: // for remote churn machine
retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);
break;
case AD_WAIT_WHILE_TIMER: // for selection machine
retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec);
break;
}
return retval;
}
/////////////////////////////////////////////////////////////////////////////////
// ================= ad_rx_machine helper functions ==================
/////////////////////////////////////////////////////////////////////////////////
/**
* __record_pdu - record parameters from a received lacpdu
* @lacpdu: the lacpdu we've received
* @port: the port we're looking at
*
* Record the parameter values for the Actor carried in a received lacpdu as
* the current partner operational parameter values and sets
* actor_oper_port_state.defaulted to FALSE.
*/
static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
{
// validate lacpdu and port
if (lacpdu && port) {
// record the new parameter values for the partner operational
port->partner_oper_port_number = lacpdu->actor_port;
port->partner_oper_port_priority = lacpdu->actor_port_priority;
port->partner_oper_system = lacpdu->actor_system;
port->partner_oper_system_priority = lacpdu->actor_system_priority;
port->partner_oper_key = lacpdu->actor_key;
// zero partener's lase states
port->partner_oper_port_state = 0;
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_ACTIVITY);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_LACP_TIMEOUT);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_AGGREGATION);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_COLLECTING);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DISTRIBUTING);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_DEFAULTED);
port->partner_oper_port_state |= (lacpdu->actor_state & AD_STATE_EXPIRED);
// set actor_oper_port_state.defaulted to FALSE
port->actor_oper_port_state &= ~AD_STATE_DEFAULTED;
// set the partner sync. to on if the partner is sync. and the port is matched
if ((port->sm_vars & AD_PORT_MATCHED) && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
port->partner_oper_port_state |= AD_STATE_SYNCHRONIZATION;
} else {
port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
}
}
}
/**
* __record_default - record default parameters
* @port: the port we're looking at
*
* This function records the default parameter values for the partner carried
* in the Partner Admin parameters as the current partner operational parameter
* values and sets actor_oper_port_state.defaulted to TRUE.
*/
static void __record_default(struct port *port)
{
// validate the port
if (port) {
// record the partner admin parameters
port->partner_oper_port_number = port->partner_admin_port_number;
port->partner_oper_port_priority = port->partner_admin_port_priority;
port->partner_oper_system = port->partner_admin_system;
port->partner_oper_system_priority = port->partner_admin_system_priority;
port->partner_oper_key = port->partner_admin_key;
port->partner_oper_port_state = port->partner_admin_port_state;
// set actor_oper_port_state.defaulted to true
port->actor_oper_port_state |= AD_STATE_DEFAULTED;
}
}
/**
* __update_selected - update a port's Selected variable from a received lacpdu
* @lacpdu: the lacpdu we've received
* @port: the port we're looking at
*
* Update the value of the selected variable, using parameter values from a
* newly received lacpdu. The parameter values for the Actor carried in the
* received PDU are compared with the corresponding operational parameter
* values for the ports partner. If one or more of the comparisons shows that
* the value(s) received in the PDU differ from the current operational values,
* then selected is set to FALSE and actor_oper_port_state.synchronization is
* set to out_of_sync. Otherwise, selected remains unchanged.
*/
static void __update_selected(struct lacpdu *lacpdu, struct port *port)
{
// validate lacpdu and port
if (lacpdu && port) {
// check if any parameter is different
if ((lacpdu->actor_port != port->partner_oper_port_number) ||
(lacpdu->actor_port_priority != port->partner_oper_port_priority) ||
MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->partner_oper_system)) ||
(lacpdu->actor_system_priority != port->partner_oper_system_priority) ||
(lacpdu->actor_key != port->partner_oper_key) ||
((lacpdu->actor_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION))
) {
// update the state machine Selected variable
port->sm_vars &= ~AD_PORT_SELECTED;
}
}
}
/**
* __update_default_selected - update a port's Selected variable from Partner
* @port: the port we're looking at
*
* This function updates the value of the selected variable, using the partner
* administrative parameter values. The administrative values are compared with
* the corresponding operational parameter values for the partner. If one or
* more of the comparisons shows that the administrative value(s) differ from
* the current operational values, then Selected is set to FALSE and
* actor_oper_port_state.synchronization is set to OUT_OF_SYNC. Otherwise,
* Selected remains unchanged.
*/
static void __update_default_selected(struct port *port)
{
// validate the port
if (port) {
// check if any parameter is different
if ((port->partner_admin_port_number != port->partner_oper_port_number) ||
(port->partner_admin_port_priority != port->partner_oper_port_priority) ||
MAC_ADDRESS_COMPARE(&(port->partner_admin_system), &(port->partner_oper_system)) ||
(port->partner_admin_system_priority != port->partner_oper_system_priority) ||
(port->partner_admin_key != port->partner_oper_key) ||
((port->partner_admin_port_state & AD_STATE_AGGREGATION) != (port->partner_oper_port_state & AD_STATE_AGGREGATION))
) {
// update the state machine Selected variable
port->sm_vars &= ~AD_PORT_SELECTED;
}
}
}
/**
* __choose_matched - update a port's matched variable from a received lacpdu
* @lacpdu: the lacpdu we've received
* @port: the port we're looking at
*
* Update the value of the matched variable, using parameter values from a
* newly received lacpdu. Parameter values for the partner carried in the
* received PDU are compared with the corresponding operational parameter
* values for the actor. Matched is set to TRUE if all of these parameters
* match and the PDU parameter partner_state.aggregation has the same value as
* actor_oper_port_state.aggregation and lacp will actively maintain the link
* in the aggregation. Matched is also set to TRUE if the value of
* actor_state.aggregation in the received PDU is set to FALSE, i.e., indicates
* an individual link and lacp will actively maintain the link. Otherwise,
* matched is set to FALSE. LACP is considered to be actively maintaining the
* link if either the PDU's actor_state.lacp_activity variable is TRUE or both
* the actor's actor_oper_port_state.lacp_activity and the PDU's
* partner_state.lacp_activity variables are TRUE.
*/
static void __choose_matched(struct lacpdu *lacpdu, struct port *port)
{
// validate lacpdu and port
if (lacpdu && port) {
// check if all parameters are alike
if (((lacpdu->partner_port == port->actor_port_number) &&
(lacpdu->partner_port_priority == port->actor_port_priority) &&
!MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) &&
(lacpdu->partner_system_priority == port->actor_system_priority) &&
(lacpdu->partner_key == port->actor_oper_port_key) &&
((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) ||
// or this is individual link(aggregation == FALSE)
((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0)
) {
// update the state machine Matched variable
port->sm_vars |= AD_PORT_MATCHED;
} else {
port->sm_vars &= ~AD_PORT_MATCHED;
}
}
}
/**
* __update_ntt - update a port's ntt variable from a received lacpdu
* @lacpdu: the lacpdu we've received
* @port: the port we're looking at
*
* Updates the value of the ntt variable, using parameter values from a newly
* received lacpdu. The parameter values for the partner carried in the
* received PDU are compared with the corresponding operational parameter
* values for the Actor. If one or more of the comparisons shows that the
* value(s) received in the PDU differ from the current operational values,
* then ntt is set to TRUE. Otherwise, ntt remains unchanged.
*/
static void __update_ntt(struct lacpdu *lacpdu, struct port *port)
{
// validate lacpdu and port
if (lacpdu && port) {
// check if any parameter is different
if ((lacpdu->partner_port != port->actor_port_number) ||
(lacpdu->partner_port_priority != port->actor_port_priority) ||
MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) ||
(lacpdu->partner_system_priority != port->actor_system_priority) ||
(lacpdu->partner_key != port->actor_oper_port_key) ||
((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) ||
((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) ||
((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) ||
((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION))
) {
// set ntt to be TRUE
port->ntt = 1;
}
}
}
/**
* __attach_bond_to_agg
* @port: the port we're looking at
*
* Handle the attaching of the port's control parser/multiplexer and the
* aggregator. This function does nothing since the parser/multiplexer of the
* receive and the parser/multiplexer of the aggregator are already combined.
*/
static void __attach_bond_to_agg(struct port *port)
{
port=NULL; // just to satisfy the compiler
// This function does nothing since the parser/multiplexer of the receive
// and the parser/multiplexer of the aggregator are already combined
}
/**
* __detach_bond_to_agg
* @port: the port we're looking at
*
* Handle the detaching of the port's control parser/multiplexer from the
* aggregator. This function does nothing since the parser/multiplexer of the
* receive and the parser/multiplexer of the aggregator are already combined.
*/
static void __detach_bond_from_agg(struct port *port)
{
port=NULL; // just to satisfy the compiler
// This function does nothing sience the parser/multiplexer of the receive
// and the parser/multiplexer of the aggregator are already combined
}
/**
* __agg_ports_are_ready - check if all ports in an aggregator are ready
* @aggregator: the aggregator we're looking at
*
*/
static int __agg_ports_are_ready(struct aggregator *aggregator)
{
struct port *port;
int retval = 1;
if (aggregator) {
// scan all ports in this aggregator to verfy if they are all ready
for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) {
if (!(port->sm_vars & AD_PORT_READY_N)) {
retval = 0;
break;
}
}
}
return retval;
}
/**
* __set_agg_ports_ready - set value of Ready bit in all ports of an aggregator
* @aggregator: the aggregator we're looking at
* @val: Should the ports' ready bit be set on or off
*
*/
static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
{
struct port *port;
for (port=aggregator->lag_ports; port; port=port->next_port_in_aggregator) {
if (val) {
port->sm_vars |= AD_PORT_READY;
} else {
port->sm_vars &= ~AD_PORT_READY;
}
}
}
/**
* __get_agg_bandwidth - get the total bandwidth of an aggregator
* @aggregator: the aggregator we're looking at
*
*/
static u32 __get_agg_bandwidth(struct aggregator *aggregator)
{
u32 bandwidth=0;
u32 basic_speed;
if (aggregator->num_of_ports) {
basic_speed = __get_link_speed(aggregator->lag_ports);
switch (basic_speed) {
case AD_LINK_SPEED_BITMASK_1MBPS:
bandwidth = aggregator->num_of_ports;
break;
case AD_LINK_SPEED_BITMASK_10MBPS:
bandwidth = aggregator->num_of_ports * 10;
break;
case AD_LINK_SPEED_BITMASK_100MBPS:
bandwidth = aggregator->num_of_ports * 100;
break;
case AD_LINK_SPEED_BITMASK_1000MBPS:
bandwidth = aggregator->num_of_ports * 1000;
break;
default:
bandwidth=0; // to silent the compilor ....
}
}
return bandwidth;
}
/**
* __get_active_agg - get the current active aggregator
* @aggregator: the aggregator we're looking at
*
*/
static struct aggregator *__get_active_agg(struct aggregator *aggregator)
{
struct aggregator *retval = NULL;
for (; aggregator; aggregator = __get_next_agg(aggregator)) {
if (aggregator->is_active) {
retval = aggregator;
break;
}
}
return retval;
}
//////////////////////////////////////////////////////////////////////////////////////
// ================= main 802.3ad protocol code ======================================
//////////////////////////////////////////////////////////////////////////////////////
/**
* ad_lacpdu_send - send out a lacpdu packet on a given port
* @port: the port we're looking at
*
* Returns: 0 on success
* < 0 on error
*/
static int ad_lacpdu_send(struct port *port)
{
struct slave *slave = port->slave;
struct sk_buff *skb;
struct lacpdu_header *lacpdu_header;
int length = sizeof(struct lacpdu_header);
struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
skb = dev_alloc_skb(length);
if (!skb) {
return -ENOMEM;
}
skb->dev = slave->dev;
skb->mac.raw = skb->data;
skb->nh.raw = skb->data + ETH_HLEN;
skb->protocol = PKT_TYPE_LACPDU;
lacpdu_header = (struct lacpdu_header *)skb_put(skb, length);
lacpdu_header->ad_header.destination_address = lacpdu_multicast_address;
/* Note: source addres is set to be the member's PERMANENT address, because we use it
to identify loopback lacpdus in receive. */
lacpdu_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr));
lacpdu_header->ad_header.length_type = PKT_TYPE_LACPDU;
lacpdu_header->lacpdu = port->lacpdu; // struct copy
dev_queue_xmit(skb);
return 0;
}
/**
* ad_marker_send - send marker information/response on a given port
* @port: the port we're looking at
* @marker: marker data to send
*
* Returns: 0 on success
* < 0 on error
*/
static int ad_marker_send(struct port *port, struct marker *marker)
{
struct slave *slave = port->slave;
struct sk_buff *skb;
struct marker_header *marker_header;
int length = sizeof(struct marker_header);
struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
skb = dev_alloc_skb(length + 16);
if (!skb) {
return -ENOMEM;
}
skb_reserve(skb, 16);
skb->dev = slave->dev;
skb->mac.raw = skb->data;
skb->nh.raw = skb->data + ETH_HLEN;
skb->protocol = PKT_TYPE_LACPDU;
marker_header = (struct marker_header *)skb_put(skb, length);
marker_header->ad_header.destination_address = lacpdu_multicast_address;
/* Note: source addres is set to be the member's PERMANENT address, because we use it
to identify loopback MARKERs in receive. */
marker_header->ad_header.source_address = *((struct mac_addr *)(slave->perm_hwaddr));
marker_header->ad_header.length_type = PKT_TYPE_LACPDU;
marker_header->marker = *marker; // struct copy
dev_queue_xmit(skb);
return 0;
}
/**
* ad_mux_machine - handle a port's mux state machine
* @port: the port we're looking at
*
*/
static void ad_mux_machine(struct port *port)
{
mux_states_t last_state;
// keep current State Machine state to compare later if it was changed
last_state = port->sm_mux_state;
if (port->sm_vars & AD_PORT_BEGIN) {
port->sm_mux_state = AD_MUX_DETACHED; // next state
} else {
switch (port->sm_mux_state) {
case AD_MUX_DETACHED:
if ((port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if SELECTED or STANDBY
port->sm_mux_state = AD_MUX_WAITING; // next state
}
break;
case AD_MUX_WAITING:
// if SELECTED == FALSE return to DETACH state
if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED
port->sm_vars &= ~AD_PORT_READY_N;
// in order to withhold the Selection Logic to check all ports READY_N value
// every callback cycle to update ready variable, we check READY_N and update READY here
__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
port->sm_mux_state = AD_MUX_DETACHED; // next state
break;
}
// check if the wait_while_timer expired
if (port->sm_mux_timer_counter && !(--port->sm_mux_timer_counter)) {
port->sm_vars |= AD_PORT_READY_N;
}
// in order to withhold the selection logic to check all ports READY_N value
// every callback cycle to update ready variable, we check READY_N and update READY here
__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
// if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state
if ((port->sm_vars & AD_PORT_READY) && !port->sm_mux_timer_counter) {
port->sm_mux_state = AD_MUX_ATTACHED; // next state
}
break;
case AD_MUX_ATTACHED:
// check also if agg_select_timer expired(so the edable port will take place only after this timer)
if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) {
port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state
} else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) { // if UNSELECTED or STANDBY
port->sm_vars &= ~AD_PORT_READY_N;
// in order to withhold the selection logic to check all ports READY_N value
// every callback cycle to update ready variable, we check READY_N and update READY here
__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
port->sm_mux_state = AD_MUX_DETACHED;// next state
}
break;
case AD_MUX_COLLECTING_DISTRIBUTING:
if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) ||
!(port->partner_oper_port_state & AD_STATE_SYNCHRONIZATION)
) {
port->sm_mux_state = AD_MUX_ATTACHED;// next state
} else {
// if port state hasn't changed make
// sure that a collecting distributing
// port in an active aggregator is enabled
if (port->aggregator &&
port->aggregator->is_active &&
!__port_is_enabled(port)) {
__enable_port(port);
}
}
break;
default: //to silence the compiler
break;
}
}
// check if the state machine was changed
if (port->sm_mux_state != last_state) {
BOND_PRINT_DBG(("Mux Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_mux_state));
switch (port->sm_mux_state) {
case AD_MUX_DETACHED:
__detach_bond_from_agg(port);
port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
ad_disable_collecting_distributing(port);
port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
port->ntt = 1;
break;
case AD_MUX_WAITING:
port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);
break;
case AD_MUX_ATTACHED:
__attach_bond_to_agg(port);
port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
ad_disable_collecting_distributing(port);
port->ntt = 1;
break;
case AD_MUX_COLLECTING_DISTRIBUTING:
port->actor_oper_port_state |= AD_STATE_COLLECTING;
port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
ad_enable_collecting_distributing(port);
port->ntt = 1;
break;
default: //to silence the compiler
break;
}
}
}
/**
* ad_rx_machine - handle a port's rx State Machine
* @lacpdu: the lacpdu we've received
* @port: the port we're looking at
*
* If lacpdu arrived, stop previous timer (if exists) and set the next state as
* CURRENT. If timer expired set the state machine in the proper state.
* In other cases, this function checks if we need to switch to other state.
*/
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
{
rx_states_t last_state;
// Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
__get_rx_machine_lock(port);
// keep current State Machine state to compare later if it was changed
last_state = port->sm_rx_state;
// check if state machine should change state
// first, check if port was reinitialized
if (port->sm_vars & AD_PORT_BEGIN) {
port->sm_rx_state = AD_RX_INITIALIZE; // next state
}
// check if port is not enabled
else if (!(port->sm_vars & AD_PORT_BEGIN) && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) {
port->sm_rx_state = AD_RX_PORT_DISABLED; // next state
}
// check if new lacpdu arrived
else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) {
port->sm_rx_timer_counter = 0; // zero timer
port->sm_rx_state = AD_RX_CURRENT;
} else {
// if timer is on, and if it is expired
if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) {
switch (port->sm_rx_state) {
case AD_RX_EXPIRED:
port->sm_rx_state = AD_RX_DEFAULTED; // next state
break;
case AD_RX_CURRENT:
port->sm_rx_state = AD_RX_EXPIRED; // next state
break;
default: //to silence the compiler
break;
}
} else {
// if no lacpdu arrived and no timer is on
switch (port->sm_rx_state) {
case AD_RX_PORT_DISABLED:
if (port->sm_vars & AD_PORT_MOVED) {
port->sm_rx_state = AD_RX_INITIALIZE; // next state
} else if (port->is_enabled && (port->sm_vars & AD_PORT_LACP_ENABLED)) {
port->sm_rx_state = AD_RX_EXPIRED; // next state
} else if (port->is_enabled && ((port->sm_vars & AD_PORT_LACP_ENABLED) == 0)) {
port->sm_rx_state = AD_RX_LACP_DISABLED; // next state
}
break;
default: //to silence the compiler
break;
}
}
}
// check if the State machine was changed or new lacpdu arrived
if ((port->sm_rx_state != last_state) || (lacpdu)) {
BOND_PRINT_DBG(("Rx Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_rx_state));
switch (port->sm_rx_state) {
case AD_RX_INITIALIZE:
if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) {
port->sm_vars &= ~AD_PORT_LACP_ENABLED;
} else {
port->sm_vars |= AD_PORT_LACP_ENABLED;
}
port->sm_vars &= ~AD_PORT_SELECTED;
__record_default(port);
port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
port->sm_vars &= ~AD_PORT_MOVED;
port->sm_rx_state = AD_RX_PORT_DISABLED; // next state
/*- Fall Through -*/
case AD_RX_PORT_DISABLED:
port->sm_vars &= ~AD_PORT_MATCHED;
break;
case AD_RX_LACP_DISABLED:
port->sm_vars &= ~AD_PORT_SELECTED;
__record_default(port);
port->partner_oper_port_state &= ~AD_STATE_AGGREGATION;
port->sm_vars |= AD_PORT_MATCHED;
port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
break;
case AD_RX_EXPIRED:
//Reset of the Synchronization flag. (Standard 43.4.12)
//This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the
//mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port.
port->partner_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
port->sm_vars &= ~AD_PORT_MATCHED;
port->partner_oper_port_state |= AD_SHORT_TIMEOUT;
port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));
port->actor_oper_port_state |= AD_STATE_EXPIRED;
break;
case AD_RX_DEFAULTED:
__update_default_selected(port);
__record_default(port);
port->sm_vars |= AD_PORT_MATCHED;
port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
break;
case AD_RX_CURRENT:
// detect loopback situation
if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) {
// INFO_RECEIVED_LOOPBACK_FRAMES
printk(KERN_ERR "bonding: An illegal loopback occurred on adapter (%s)\n",
port->slave->dev->name);
printk(KERN_ERR "Check the configuration to verify that all Adapters "
"are connected to 802.3ad compliant switch ports\n");
__release_rx_machine_lock(port);
return;
}
__update_selected(lacpdu, port);
__update_ntt(lacpdu, port);
__record_pdu(lacpdu, port);
__choose_matched(lacpdu, port);
port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT));
port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
// verify that if the aggregator is enabled, the port is enabled too.
//(because if the link goes down for a short time, the 802.3ad will not
// catch it, and the port will continue to be disabled)
if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) {
__enable_port(port);
}
break;
default: //to silence the compiler
break;
}
}
__release_rx_machine_lock(port);
}
/**
* ad_tx_machine - handle a port's tx state machine
* @port: the port we're looking at
*
*/
static void ad_tx_machine(struct port *port)
{
struct lacpdu *lacpdu = &port->lacpdu;
// check if tx timer expired, to verify that we do not send more than 3 packets per second
if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) {
// check if there is something to send
if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) {
//update current actual Actor parameters
//lacpdu->subtype initialized
//lacpdu->version_number initialized
//lacpdu->tlv_type_actor_info initialized
//lacpdu->actor_information_length initialized
lacpdu->actor_system_priority = port->actor_system_priority;
lacpdu->actor_system = port->actor_system;
lacpdu->actor_key = port->actor_oper_port_key;
lacpdu->actor_port_priority = port->actor_port_priority;
lacpdu->actor_port = port->actor_port_number;
lacpdu->actor_state = port->actor_oper_port_state;
//lacpdu->reserved_3_1 initialized
//lacpdu->tlv_type_partner_info initialized
//lacpdu->partner_information_length initialized
lacpdu->partner_system_priority = port->partner_oper_system_priority;
lacpdu->partner_system = port->partner_oper_system;
lacpdu->partner_key = port->partner_oper_key;
lacpdu->partner_port_priority = port->partner_oper_port_priority;
lacpdu->partner_port = port->partner_oper_port_number;
lacpdu->partner_state = port->partner_oper_port_state;
//lacpdu->reserved_3_2 initialized
//lacpdu->tlv_type_collector_info initialized
//lacpdu->collector_information_length initialized
//collector_max_delay initialized
//reserved_12[12] initialized
//tlv_type_terminator initialized
//terminator_length initialized
//reserved_50[50] initialized
// We need to convert all non u8 parameters to Big Endian for transmit
__ntohs_lacpdu(lacpdu);
// send the lacpdu
if (ad_lacpdu_send(port) >= 0) {
BOND_PRINT_DBG(("Sent LACPDU on port %d", port->actor_port_number));
// mark ntt as false, so it will not be sent again until demanded
port->ntt = 0;
}
}
// restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND
port->sm_tx_timer_counter=ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;
}
}
/**
* ad_periodic_machine - handle a port's periodic state machine
* @port: the port we're looking at
*
* Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
*/
static void ad_periodic_machine(struct port *port)
{
periodic_states_t last_state;
// keep current state machine state to compare later if it was changed
last_state = port->sm_periodic_state;
// check if port was reinitialized
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
(!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper_port_state & AD_STATE_LACP_ACTIVITY))
) {
port->sm_periodic_state = AD_NO_PERIODIC; // next state
}
// check if state machine should change state
else if (port->sm_periodic_timer_counter) {
// check if periodic state machine expired
if (!(--port->sm_periodic_timer_counter)) {
// if expired then do tx
port->sm_periodic_state = AD_PERIODIC_TX; // next state
} else {
// If not expired, check if there is some new timeout parameter from the partner state
switch (port->sm_periodic_state) {
case AD_FAST_PERIODIC:
if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) {
port->sm_periodic_state = AD_SLOW_PERIODIC; // next state
}
break;
case AD_SLOW_PERIODIC:
if ((port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) {
// stop current timer
port->sm_periodic_timer_counter = 0;
port->sm_periodic_state = AD_PERIODIC_TX; // next state
}
break;
default: //to silence the compiler
break;
}
}
} else {
switch (port->sm_periodic_state) {
case AD_NO_PERIODIC:
port->sm_periodic_state = AD_FAST_PERIODIC; // next state
break;
case AD_PERIODIC_TX:
if (!(port->partner_oper_port_state & AD_STATE_LACP_TIMEOUT)) {
port->sm_periodic_state = AD_SLOW_PERIODIC; // next state
} else {
port->sm_periodic_state = AD_FAST_PERIODIC; // next state
}
break;
default: //to silence the compiler
break;
}
}
// check if the state machine was changed
if (port->sm_periodic_state != last_state) {
BOND_PRINT_DBG(("Periodic Machine: Port=%d, Last State=%d, Curr State=%d", port->actor_port_number, last_state, port->sm_periodic_state));
switch (port->sm_periodic_state) {
case AD_NO_PERIODIC:
port->sm_periodic_timer_counter = 0; // zero timer
break;
case AD_FAST_PERIODIC:
port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle
break;
case AD_SLOW_PERIODIC:
port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle
break;
case AD_PERIODIC_TX:
port->ntt = 1;
break;
default: //to silence the compiler
break;
}
}
}
/**
* ad_port_selection_logic - select aggregation groups
* @port: the port we're looking at
*
* Select aggregation groups, and assign each port for it's aggregetor. The
* selection logic is called in the inititalization (after all the handshkes),
* and after every lacpdu receive (if selected is off).
*/
static void ad_port_selection_logic(struct port *port)
{
struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
struct port *last_port = NULL, *curr_port;
int found = 0;
// if the port is already Selected, do nothing
if (port->sm_vars & AD_PORT_SELECTED) {
return;
}
// if the port is connected to other aggregator, detach it
if (port->aggregator) {
// detach the port from its former aggregator
temp_aggregator=port->aggregator;
for (curr_port=temp_aggregator->lag_ports; curr_port; last_port=curr_port, curr_port=curr_port->next_port_in_aggregator) {
if (curr_port == port) {
temp_aggregator->num_of_ports--;
if (!last_port) {// if it is the first port attached to the aggregator
temp_aggregator->lag_ports=port->next_port_in_aggregator;
} else {// not the first port attached to the aggregator
last_port->next_port_in_aggregator=port->next_port_in_aggregator;
}
// clear the port's relations to this aggregator
port->aggregator = NULL;
port->next_port_in_aggregator=NULL;
port->actor_port_aggregator_identifier=0;
BOND_PRINT_DBG(("Port %d left LAG %d", port->actor_port_number, temp_aggregator->aggregator_identifier));
// if the aggregator is empty, clear its parameters, and set it ready to be attached
if (!temp_aggregator->lag_ports) {
ad_clear_agg(temp_aggregator);
}
break;
}
}
if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list
printk(KERN_WARNING "bonding: Warning: Port %d (on %s) was "
"related to aggregator %d but was not on its port list\n",
port->actor_port_number, port->slave->dev->name,
port->aggregator->aggregator_identifier);
}
}
// search on all aggregators for a suitable aggregator for this port
for (aggregator = __get_first_agg(port); aggregator;
aggregator = __get_next_agg(aggregator)) {
// keep a free aggregator for later use(if needed)
if (!aggregator->lag_ports) {
if (!free_aggregator) {
free_aggregator=aggregator;
}
continue;
}
// check if current aggregator suits us
if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND
!MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper_system)) &&
(aggregator->partner_system_priority == port->partner_oper_system_priority) &&
(aggregator->partner_oper_aggregator_key == port->partner_oper_key)
) &&
((MAC_ADDRESS_COMPARE(&(port->partner_oper_system), &(null_mac_addr)) && // partner answers
!aggregator->is_individual) // but is not individual OR
)
) {
// attach to the founded aggregator
port->aggregator = aggregator;
port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier;
port->next_port_in_aggregator=aggregator->lag_ports;
port->aggregator->num_of_ports++;
aggregator->lag_ports=port;
BOND_PRINT_DBG(("Port %d joined LAG %d(existing LAG)", port->actor_port_number, port->aggregator->aggregator_identifier));
// mark this port as selected
port->sm_vars |= AD_PORT_SELECTED;
found = 1;
break;
}
}
// the port couldn't find an aggregator - attach it to a new aggregator
if (!found) {
if (free_aggregator) {
// assign port a new aggregator
port->aggregator = free_aggregator;
port->actor_port_aggregator_identifier=port->aggregator->aggregator_identifier;
// update the new aggregator's parameters
// if port was responsed from the end-user
if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS) {// if port is full duplex
port->aggregator->is_individual = 0;
} else {
port->aggregator->is_individual = 1;
}
port->aggregator->actor_admin_aggregator_key = port->actor_admin_port_key;
port->aggregator->actor_oper_aggregator_key = port->actor_oper_port_key;
port->aggregator->partner_system=port->partner_oper_system;
port->aggregator->partner_system_priority = port->partner_oper_system_priority;
port->aggregator->partner_oper_aggregator_key = port->partner_oper_key;
port->aggregator->receive_state = 1;
port->aggregator->transmit_state = 1;
port->aggregator->lag_ports = port;
port->aggregator->num_of_ports++;
// mark this port as selected
port->sm_vars |= AD_PORT_SELECTED;
BOND_PRINT_DBG(("Port %d joined LAG %d(new LAG)", port->actor_port_number, port->aggregator->aggregator_identifier));
} else {
printk(KERN_ERR "bonding: Port %d (on %s) did not find a suitable aggregator\n",
port->actor_port_number, port->slave->dev->name);
}
}
// if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports
// else set ready=FALSE in all aggregator's ports
__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator));
if (!__check_agg_selection_timer(port) && (aggregator = __get_first_agg(port))) {
ad_agg_selection_logic(aggregator);
}
}
/**
* ad_agg_selection_logic - select an aggregation group for a team
* @aggregator: the aggregator we're looking at
*
* It is assumed that only one aggregator may be selected for a team.
* The logic of this function is to select (at first time) the aggregator with
* the most ports attached to it, and to reselect the active aggregator only if
* the previous aggregator has no more ports related to it.
*
* FIXME: this function MUST be called with the first agg in the bond, or
* __get_active_agg() won't work correctly. This function should be better
* called with the bond itself, and retrieve the first agg from it.
*/
static void ad_agg_selection_logic(struct aggregator *aggregator)
{
struct aggregator *best_aggregator = NULL, *active_aggregator = NULL;
struct aggregator *last_active_aggregator = NULL, *origin_aggregator;
struct port *port;
u16 num_of_aggs=0;
origin_aggregator = aggregator;
//get current active aggregator
last_active_aggregator = __get_active_agg(aggregator);
// search for the aggregator with the most ports attached to it.
do {
// count how many candidate lag's we have
if (aggregator->lag_ports) {
num_of_aggs++;
}
if (aggregator->is_active && !aggregator->is_individual && // if current aggregator is the active aggregator
MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr))) { // and partner answers to 802.3ad PDUs
if (aggregator->num_of_ports) { // if any ports attached to the current aggregator
best_aggregator=NULL; // disregard the best aggregator that was chosen by now
break; // stop the selection of other aggregator if there are any ports attached to this active aggregator
} else { // no ports attached to this active aggregator
aggregator->is_active = 0; // mark this aggregator as not active anymore
}
}
if (aggregator->num_of_ports) { // if any ports attached
if (best_aggregator) { // if there is a candidte aggregator
//The reasons for choosing new best aggregator:
// 1. if current agg is NOT individual and the best agg chosen so far is individual OR
// current and best aggs are both individual or both not individual, AND
// 2a. current agg partner reply but best agg partner do not reply OR
// 2b. current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply AND
// current has more ports/bandwidth, or same amount of ports but current has faster ports, THEN
// current agg become best agg so far
//if current agg is NOT individual and the best agg chosen so far is individual change best_aggregator
if (!aggregator->is_individual && best_aggregator->is_individual) {
best_aggregator=aggregator;
}
// current and best aggs are both individual or both not individual
else if ((aggregator->is_individual && best_aggregator->is_individual) ||
(!aggregator->is_individual && !best_aggregator->is_individual)) {
// current and best aggs are both individual or both not individual AND
// current agg partner reply but best agg partner do not reply
if ((MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) &&
!MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) {
best_aggregator=aggregator;
}
// current agg partner reply OR current agg partner do not reply AND best agg partner also do not reply
else if (! (!MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(null_mac_addr)) &&
MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) {
if ((__get_agg_selection_mode(aggregator->lag_ports) == AD_BANDWIDTH)&&
(__get_agg_bandwidth(aggregator) > __get_agg_bandwidth(best_aggregator))) {
best_aggregator=aggregator;
} else if (__get_agg_selection_mode(aggregator->lag_ports) == AD_COUNT) {
if (((aggregator->num_of_ports > best_aggregator->num_of_ports) &&
(aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS))||
((aggregator->num_of_ports == best_aggregator->num_of_ports) &&
((u16)(aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS) >
(u16)(best_aggregator->actor_oper_aggregator_key & AD_SPEED_KEY_BITS)))) {
best_aggregator=aggregator;
}
}
}
}
} else {
best_aggregator=aggregator;
}
}
aggregator->is_active = 0; // mark all aggregators as not active anymore
} while ((aggregator = __get_next_agg(aggregator)));
// if we have new aggregator selected, don't replace the old aggregator if it has an answering partner,
// or if both old aggregator and new aggregator don't have answering partner
if (best_aggregator) {
if (last_active_aggregator && last_active_aggregator->lag_ports && last_active_aggregator->lag_ports->is_enabled &&
(MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) || // partner answers OR
(!MAC_ADDRESS_COMPARE(&(last_active_aggregator->partner_system), &(null_mac_addr)) && // both old and new
!MAC_ADDRESS_COMPARE(&(best_aggregator->partner_system), &(null_mac_addr)))) // partner do not answer
) {
// if new aggregator has link, and old aggregator does not, replace old aggregator.(do nothing)
// -> don't replace otherwise.
if (!(!last_active_aggregator->actor_oper_aggregator_key && best_aggregator->actor_oper_aggregator_key)) {
best_aggregator=NULL;
last_active_aggregator->is_active = 1; // don't replace good old aggregator
}
}
}
// if there is new best aggregator, activate it
if (best_aggregator) {
for (aggregator = __get_first_agg(best_aggregator->lag_ports);
aggregator;
aggregator = __get_next_agg(aggregator)) {
BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d",
aggregator->aggregator_identifier, aggregator->num_of_ports,
aggregator->actor_oper_aggregator_key, aggregator->partner_oper_aggregator_key,
aggregator->is_individual, aggregator->is_active));
}
// check if any partner replys
if (best_aggregator->is_individual) {
printk(KERN_WARNING "bonding: Warning: No 802.3ad response from the link partner "
"for any adapters in the bond\n");
}
// check if there are more than one aggregator
if (num_of_aggs > 1) {
BOND_PRINT_DBG(("Warning: More than one Link Aggregation Group was "
"found in the bond. Only one group will function in the bond"));
}
best_aggregator->is_active = 1;
BOND_PRINT_DBG(("LAG %d choosed as the active LAG", best_aggregator->aggregator_identifier));
BOND_PRINT_DBG(("Agg=%d; Ports=%d; a key=%d; p key=%d; Indiv=%d; Active=%d",
best_aggregator->aggregator_identifier, best_aggregator->num_of_ports,
best_aggregator->actor_oper_aggregator_key, best_aggregator->partner_oper_aggregator_key,
best_aggregator->is_individual, best_aggregator->is_active));
// disable the ports that were related to the former active_aggregator
if (last_active_aggregator) {
for (port=last_active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) {
__disable_port(port);
}
}
}
// if the selected aggregator is of join individuals(partner_system is NULL), enable their ports
active_aggregator = __get_active_agg(origin_aggregator);
if (active_aggregator) {
if (!MAC_ADDRESS_COMPARE(&(active_aggregator->partner_system), &(null_mac_addr))) {
for (port=active_aggregator->lag_ports; port; port=port->next_port_in_aggregator) {
__enable_port(port);
}
}
}
}
/**
* ad_clear_agg - clear a given aggregator's parameters
* @aggregator: the aggregator we're looking at
*
*/
static void ad_clear_agg(struct aggregator *aggregator)
{
if (aggregator) {
aggregator->is_individual = 0;
aggregator->actor_admin_aggregator_key = 0;
aggregator->actor_oper_aggregator_key = 0;
aggregator->partner_system = null_mac_addr;
aggregator->partner_system_priority = 0;
aggregator->partner_oper_aggregator_key = 0;
aggregator->receive_state = 0;
aggregator->transmit_state = 0;
aggregator->lag_ports = NULL;
aggregator->is_active = 0;
aggregator->num_of_ports = 0;
BOND_PRINT_DBG(("LAG %d was cleared", aggregator->aggregator_identifier));
}
}
/**
* ad_initialize_agg - initialize a given aggregator's parameters
* @aggregator: the aggregator we're looking at
*
*/
static void ad_initialize_agg(struct aggregator *aggregator)
{
if (aggregator) {
ad_clear_agg(aggregator);
aggregator->aggregator_mac_address = null_mac_addr;
aggregator->aggregator_identifier = 0;
aggregator->slave = NULL;
}
}
/**
* ad_initialize_port - initialize a given port's parameters
* @aggregator: the aggregator we're looking at
* @lacp_fast: boolean. whether fast periodic should be used
*
*/
static void ad_initialize_port(struct port *port, int lacp_fast)
{
if (port) {
port->actor_port_number = 1;
port->actor_port_priority = 0xff;
port->actor_system = null_mac_addr;
port->actor_system_priority = 0xffff;
port->actor_port_aggregator_identifier = 0;
port->ntt = 0;
port->actor_admin_port_key = 1;
port->actor_oper_port_key = 1;
port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY;
port->actor_oper_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY;
if (lacp_fast) {
port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;
}
port->partner_admin_system = null_mac_addr;
port->partner_oper_system = null_mac_addr;
port->partner_admin_system_priority = 0xffff;
port->partner_oper_system_priority = 0xffff;
port->partner_admin_key = 1;
port->partner_oper_key = 1;
port->partner_admin_port_number = 1;
port->partner_oper_port_number = 1;
port->partner_admin_port_priority = 0xff;
port->partner_oper_port_priority = 0xff;
port->partner_admin_port_state = 1;
port->partner_oper_port_state = 1;
port->is_enabled = 1;
// ****** private parameters ******
port->sm_vars = 0x3;
port->sm_rx_state = 0;
port->sm_rx_timer_counter = 0;
port->sm_periodic_state = 0;
port->sm_periodic_timer_counter = 0;
port->sm_mux_state = 0;
port->sm_mux_timer_counter = 0;
port->sm_tx_state = 0;
port->sm_tx_timer_counter = 0;
port->slave = NULL;
port->aggregator = NULL;
port->next_port_in_aggregator = NULL;
port->transaction_id = 0;
ad_initialize_lacpdu(&(port->lacpdu));
}
}
/**
* ad_enable_collecting_distributing - enable a port's transmit/receive
* @port: the port we're looking at
*
* Enable @port if it's in an active aggregator
*/
static void ad_enable_collecting_distributing(struct port *port)
{
if (port->aggregator->is_active) {
BOND_PRINT_DBG(("Enabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier));
__enable_port(port);
}
}
/**
* ad_disable_collecting_distributing - disable a port's transmit/receive
* @port: the port we're looking at
*
*/
static void ad_disable_collecting_distributing(struct port *port)
{
if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) {
BOND_PRINT_DBG(("Disabling port %d(LAG %d)", port->actor_port_number, port->aggregator->aggregator_identifier));
__disable_port(port);
}
}
#if 0
/**
* ad_marker_info_send - send a marker information frame
* @port: the port we're looking at
*
* This function does nothing since we decided not to implement send and handle
* response for marker PDU's, in this stage, but only to respond to marker
* information.
*/
static void ad_marker_info_send(struct port *port)
{
struct marker marker;
u16 index;
// fill the marker PDU with the appropriate values
marker.subtype = 0x02;
marker.version_number = 0x01;
marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE;
marker.marker_length = 0x16;
// convert requester_port to Big Endian
marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8));
marker.requester_system = port->actor_system;
// convert requester_port(u32) to Big Endian
marker.requester_transaction_id = (((++port->transaction_id & 0xFF) << 24) |((port->transaction_id & 0xFF00) << 8) |((port->transaction_id & 0xFF0000) >> 8) |((port->transaction_id & 0xFF000000) >> 24));
marker.pad = 0;
marker.tlv_type_terminator = 0x00;
marker.terminator_length = 0x00;
for (index=0; index<90; index++) {
marker.reserved_90[index]=0;
}
// send the marker information
if (ad_marker_send(port, &marker) >= 0) {
BOND_PRINT_DBG(("Sent Marker Information on port %d", port->actor_port_number));
}
}
#endif
/**
* ad_marker_info_received - handle receive of a Marker information frame
* @marker_info: Marker info received
* @port: the port we're looking at
*
*/
static void ad_marker_info_received(struct marker *marker_info,struct port *port)
{
struct marker marker;
// copy the received marker data to the response marker
//marker = *marker_info;
memcpy(&marker, marker_info, sizeof(struct marker));
// change the marker subtype to marker response
marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
// send the marker response
if (ad_marker_send(port, &marker) >= 0) {
BOND_PRINT_DBG(("Sent Marker Response on port %d", port->actor_port_number));
}
}
/**
* ad_marker_response_received - handle receive of a marker response frame
* @marker: marker PDU received
* @port: the port we're looking at
*
* This function does nothing since we decided not to implement send and handle
* response for marker PDU's, in this stage, but only to respond to marker
* information.
*/
static void ad_marker_response_received(struct marker *marker, struct port *port)
{
marker=NULL; // just to satisfy the compiler
port=NULL; // just to satisfy the compiler
// DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW
}
/**
* ad_initialize_lacpdu - initialize a given lacpdu structure
* @lacpdu: lacpdu structure to initialize
*
*/
static void ad_initialize_lacpdu(struct lacpdu *lacpdu)
{
u16 index;
// initialize lacpdu data
lacpdu->subtype = 0x01;
lacpdu->version_number = 0x01;
lacpdu->tlv_type_actor_info = 0x01;
lacpdu->actor_information_length = 0x14;
// lacpdu->actor_system_priority updated on send
// lacpdu->actor_system updated on send
// lacpdu->actor_key updated on send
// lacpdu->actor_port_priority updated on send
// lacpdu->actor_port updated on send
// lacpdu->actor_state updated on send
lacpdu->tlv_type_partner_info = 0x02;
lacpdu->partner_information_length = 0x14;
for (index=0; index<=2; index++) {
lacpdu->reserved_3_1[index]=0;
}
// lacpdu->partner_system_priority updated on send
// lacpdu->partner_system updated on send
// lacpdu->partner_key updated on send
// lacpdu->partner_port_priority updated on send
// lacpdu->partner_port updated on send
// lacpdu->partner_state updated on send
for (index=0; index<=2; index++) {
lacpdu->reserved_3_2[index]=0;
}
lacpdu->tlv_type_collector_info = 0x03;
lacpdu->collector_information_length= 0x10;
lacpdu->collector_max_delay = AD_COLLECTOR_MAX_DELAY;
for (index=0; index<=11; index++) {
lacpdu->reserved_12[index]=0;
}
lacpdu->tlv_type_terminator = 0x00;
lacpdu->terminator_length = 0;
for (index=0; index<=49; index++) {
lacpdu->reserved_50[index]=0;
}
}
//////////////////////////////////////////////////////////////////////////////////////
// ================= AD exported functions to the main bonding code ==================
//////////////////////////////////////////////////////////////////////////////////////
// Check aggregators status in team every T seconds
#define AD_AGGREGATOR_SELECTION_TIMER 8
static u16 aggregator_identifier;
/**
* bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures
* @bond: bonding struct to work on
* @tick_resolution: tick duration (millisecond resolution)
* @lacp_fast: boolean. whether fast periodic should be used
*
* Can be called only after the mac address of the bond is set.
*/
void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast)
{
// check that the bond is not initialized yet
if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), &(bond->device->dev_addr))) {
aggregator_identifier = 0;
BOND_AD_INFO(bond).lacp_fast = lacp_fast;
BOND_AD_INFO(bond).system.sys_priority = 0xFFFF;
BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->device->dev_addr);
// initialize how many times this module is called in one second(should be about every 100ms)
ad_ticks_per_sec = tick_resolution;
// initialize the aggregator selection timer(to activate an aggregation selection after initialize)
BOND_AD_INFO(bond).agg_select_timer = (AD_AGGREGATOR_SELECTION_TIMER * ad_ticks_per_sec);
BOND_AD_INFO(bond).agg_select_mode = AD_BANDWIDTH;
}
}
/**
* bond_3ad_bind_slave - initialize a slave's port
* @slave: slave struct to work on
*
* Returns: 0 on success
* < 0 on error
*/
int bond_3ad_bind_slave(struct slave *slave)
{
struct bonding *bond = bond_get_bond_by_slave(slave);
struct port *port;
struct aggregator *aggregator;
if (bond == NULL) {
printk(KERN_CRIT "The slave %s is not attached to its bond\n", slave->dev->name);
return -1;
}
//check that the slave has not been intialized yet.
if (SLAVE_AD_INFO(slave).port.slave != slave) {
// port initialization
port = &(SLAVE_AD_INFO(slave).port);
ad_initialize_port(port, BOND_AD_INFO(bond).lacp_fast);
port->slave = slave;
port->actor_port_number = SLAVE_AD_INFO(slave).id;
// key is determined according to the link speed, duplex and user key(which is yet not supported)
// ------------------------------------------------------------
// Port key : | User key | Speed |Duplex|
// ------------------------------------------------------------
// 16 6 1 0
port->actor_admin_port_key = 0; // initialize this parameter
port->actor_admin_port_key |= __get_duplex(port);
port->actor_admin_port_key |= (__get_link_speed(port) << 1);
port->actor_oper_port_key = port->actor_admin_port_key;
// if the port is not full duplex, then the port should be not lacp Enabled
if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)) {
port->sm_vars &= ~AD_PORT_LACP_ENABLED;
}
// actor system is the bond's system
port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr;
// tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second)
port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;
port->aggregator = NULL;
port->next_port_in_aggregator = NULL;
__disable_port(port);
__initialize_port_locks(port);
// aggregator initialization
aggregator = &(SLAVE_AD_INFO(slave).aggregator);
ad_initialize_agg(aggregator);
aggregator->aggregator_mac_address = *((struct mac_addr *)bond->device->dev_addr);
aggregator->aggregator_identifier = (++aggregator_identifier);
aggregator->slave = slave;
aggregator->is_active = 0;
aggregator->num_of_ports = 0;
}
return 0;
}
/**
* bond_3ad_unbind_slave - deinitialize a slave's port
* @slave: slave struct to work on
*
* Search for the aggregator that is related to this port, remove the
* aggregator and assign another aggregator for other port related to it
* (if any), and remove the port.
*/
void bond_3ad_unbind_slave(struct slave *slave)
{
struct port *port, *prev_port, *temp_port;
struct aggregator *aggregator, *new_aggregator, *temp_aggregator;
int select_new_active_agg = 0;
// find the aggregator related to this slave
aggregator = &(SLAVE_AD_INFO(slave).aggregator);
// find the port related to this slave
port = &(SLAVE_AD_INFO(slave).port);
// if slave is null, the whole port is not initialized
if (!port->slave) {
printk(KERN_WARNING "bonding: Trying to unbind an uninitialized port on %s\n", slave->dev->name);
return;
}
// disable the port
ad_disable_collecting_distributing(port);
// deinitialize port's locks if necessary(os-specific)
__deinitialize_port_locks(port);
BOND_PRINT_DBG(("Unbinding Link Aggregation Group %d", aggregator->aggregator_identifier));
// check if this aggregator is occupied
if (aggregator->lag_ports) {
// check if there are other ports related to this aggregator except
// the port related to this slave(thats ensure us that there is a
// reason to search for new aggregator, and that we will find one
if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) {
// find new aggregator for the related port(s)
new_aggregator = __get_first_agg(port);
for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) {
// if the new aggregator is empty, or it connected to to our port only
if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) {
break;
}
}
// if new aggregator found, copy the aggregator's parameters
// and connect the related lag_ports to the new aggregator
if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) {
BOND_PRINT_DBG(("Some port(s) related to LAG %d - replaceing with LAG %d", aggregator->aggregator_identifier, new_aggregator->aggregator_identifier));
if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) {
printk(KERN_INFO "bonding: Removing an active aggregator\n");
// select new active aggregator
select_new_active_agg = 1;
}
new_aggregator->is_individual = aggregator->is_individual;
new_aggregator->actor_admin_aggregator_key = aggregator->actor_admin_aggregator_key;
new_aggregator->actor_oper_aggregator_key = aggregator->actor_oper_aggregator_key;
new_aggregator->partner_system = aggregator->partner_system;
new_aggregator->partner_system_priority = aggregator->partner_system_priority;
new_aggregator->partner_oper_aggregator_key = aggregator->partner_oper_aggregator_key;
new_aggregator->receive_state = aggregator->receive_state;
new_aggregator->transmit_state = aggregator->transmit_state;
new_aggregator->lag_ports = aggregator->lag_ports;
new_aggregator->is_active = aggregator->is_active;
new_aggregator->num_of_ports = aggregator->num_of_ports;
// update the information that is written on the ports about the aggregator
for (temp_port=aggregator->lag_ports; temp_port; temp_port=temp_port->next_port_in_aggregator) {
temp_port->aggregator=new_aggregator;
temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier;
}
// clear the aggregator
ad_clear_agg(aggregator);
if (select_new_active_agg) {
ad_agg_selection_logic(__get_first_agg(port));
}
} else {
printk(KERN_WARNING "bonding: Warning: unbinding aggregator, "
"and could not find a new aggregator for its ports\n");
}
} else { // in case that the only port related to this aggregator is the one we want to remove
select_new_active_agg = aggregator->is_active;
// clear the aggregator
ad_clear_agg(aggregator);
if (select_new_active_agg) {
printk(KERN_INFO "Removing an active aggregator\n");
// select new active aggregator
ad_agg_selection_logic(__get_first_agg(port));
}
}
}
BOND_PRINT_DBG(("Unbinding port %d", port->actor_port_number));
// find the aggregator that this port is connected to
temp_aggregator = __get_first_agg(port);
for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) {
prev_port = NULL;
// search the port in the aggregator's related ports
for (temp_port=temp_aggregator->lag_ports; temp_port; prev_port=temp_port, temp_port=temp_port->next_port_in_aggregator) {
if (temp_port == port) { // the aggregator found - detach the port from this aggregator
if (prev_port) {
prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator;
} else {
temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
}
temp_aggregator->num_of_ports--;
if (temp_aggregator->num_of_ports==0) {
select_new_active_agg = temp_aggregator->is_active;
// clear the aggregator
ad_clear_agg(temp_aggregator);
if (select_new_active_agg) {
printk(KERN_INFO "Removing an active aggregator\n");
// select new active aggregator
ad_agg_selection_logic(__get_first_agg(port));
}
}
break;
}
}
}
port->slave=NULL;
}
/**
* bond_3ad_state_machine_handler - handle state machines timeout
* @bond: bonding struct to work on
*
* The state machine handling concept in this module is to check every tick
* which state machine should operate any function. The execution order is
* round robin, so when we have an interaction between state machines, the
* reply of one to each other might be delayed until next tick.
*
* This function also complete the initialization when the agg_select_timer
* times out, and it selects an aggregator for the ports that are yet not
* related to any aggregator, and selects the active aggregator for a bond.
*/
void bond_3ad_state_machine_handler(struct bonding *bond)
{
struct port *port;
struct aggregator *aggregator;
read_lock(&bond->lock);
//check if there are any slaves
if (bond->next == (struct slave *)bond) {
goto end;
}
if ((bond->device->flags & IFF_UP) != IFF_UP) {
goto end;
}
// check if agg_select_timer timer after initialize is timed out
if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) {
// select the active aggregator for the bond
if ((port = __get_first_port(bond))) {
if (!port->slave) {
printk(KERN_WARNING "bonding: Warning: bond's first port is uninitialized\n");
goto end;
}
aggregator = __get_first_agg(port);
ad_agg_selection_logic(aggregator);
}
}
// for each port run the state machines
for (port = __get_first_port(bond); port; port = __get_next_port(port)) {
if (!port->slave) {
printk(KERN_WARNING "bonding: Warning: Found an uninitialized port\n");
goto end;
}
ad_rx_machine(NULL, port);
ad_periodic_machine(port);
ad_port_selection_logic(port);
ad_mux_machine(port);
ad_tx_machine(port);
// turn off the BEGIN bit, since we already handled it
if (port->sm_vars & AD_PORT_BEGIN) {
port->sm_vars &= ~AD_PORT_BEGIN;
}
}
end:
read_unlock(&bond->lock);
if ((bond->device->flags & IFF_UP) == IFF_UP) {
/* re-arm the timer */
mod_timer(&(BOND_AD_INFO(bond).ad_timer), jiffies + (AD_TIMER_INTERVAL * HZ / 1000));
}
}
/**
* bond_3ad_rx_indication - handle a received frame
* @lacpdu: received lacpdu
* @slave: slave struct to work on
* @length: length of the data received
*
* It is assumed that frames that were sent on this NIC don't returned as new
* received frames (loopback). Since only the payload is given to this
* function, it check for loopback.
*/
void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length)
{
struct port *port;
if (length >= sizeof(struct lacpdu)) {
port = &(SLAVE_AD_INFO(slave).port);
if (!port->slave) {
printk(KERN_WARNING "bonding: Warning: port of slave %s is uninitialized\n", slave->dev->name);
return;
}
switch (lacpdu->subtype) {
case AD_TYPE_LACPDU:
__ntohs_lacpdu(lacpdu);
BOND_PRINT_DBG(("Received LACPDU on port %d", port->actor_port_number));
ad_rx_machine(lacpdu, port);
break;
case AD_TYPE_MARKER:
// No need to convert fields to Little Endian since we don't use the marker's fields.
switch (((struct marker *)lacpdu)->tlv_type) {
case AD_MARKER_INFORMATION_SUBTYPE:
BOND_PRINT_DBG(("Received Marker Information on port %d", port->actor_port_number));
ad_marker_info_received((struct marker *)lacpdu, port);
break;
case AD_MARKER_RESPONSE_SUBTYPE:
BOND_PRINT_DBG(("Received Marker Response on port %d", port->actor_port_number));
ad_marker_response_received((struct marker *)lacpdu, port);
break;
default:
BOND_PRINT_DBG(("Received an unknown Marker subtype on slot %d", port->actor_port_number));
}
}
}
}
/**
* bond_3ad_adapter_speed_changed - handle a slave's speed change indication
* @slave: slave struct to work on
*
* Handle reselection of aggregator (if needed) for this port.
*/
void bond_3ad_adapter_speed_changed(struct slave *slave)
{
struct port *port;
port = &(SLAVE_AD_INFO(slave).port);
// if slave is null, the whole port is not initialized
if (!port->slave) {
printk(KERN_WARNING "bonding: Warning: speed changed for uninitialized port on %s\n",
slave->dev->name);
return;
}
port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;
port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1);
BOND_PRINT_DBG(("Port %d changed speed", port->actor_port_number));
// there is no need to reselect a new aggregator, just signal the
// state machines to reinitialize
port->sm_vars |= AD_PORT_BEGIN;
}
/**
* bond_3ad_adapter_duplex_changed - handle a slave's duplex change indication
* @slave: slave struct to work on
*
* Handle reselection of aggregator (if needed) for this port.
*/
void bond_3ad_adapter_duplex_changed(struct slave *slave)
{
struct port *port;
port=&(SLAVE_AD_INFO(slave).port);
// if slave is null, the whole port is not initialized
if (!port->slave) {
printk(KERN_WARNING "bonding: Warning: duplex changed for uninitialized port on %s\n",
slave->dev->name);
return;
}
port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;
port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port);
BOND_PRINT_DBG(("Port %d changed duplex", port->actor_port_number));
// there is no need to reselect a new aggregator, just signal the
// state machines to reinitialize
port->sm_vars |= AD_PORT_BEGIN;
}
/**
* bond_3ad_handle_link_change - handle a slave's link status change indication
* @slave: slave struct to work on
* @status: whether the link is now up or down
*
* Handle reselection of aggregator (if needed) for this port.
*/
void bond_3ad_handle_link_change(struct slave *slave, char link)
{
struct port *port;
port = &(SLAVE_AD_INFO(slave).port);
// if slave is null, the whole port is not initialized
if (!port->slave) {
#ifdef BONDING_DEBUG
printk(KERN_WARNING "bonding: Warning: link status changed for uninitialized port on %s\n",
slave->dev->name);
#endif
return;
}
// on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed)
// on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report
if (link == BOND_LINK_UP) {
port->is_enabled = 1;
port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;
port->actor_oper_port_key=port->actor_admin_port_key |= __get_duplex(port);
port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;
port->actor_oper_port_key=port->actor_admin_port_key |= (__get_link_speed(port) << 1);
} else {
/* link has failed */
port->is_enabled = 0;
port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;
port->actor_oper_port_key= (port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS);
}
//BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN")));
// there is no need to reselect a new aggregator, just signal the
// state machines to reinitialize
port->sm_vars |= AD_PORT_BEGIN;
}
/**
* bond_3ad_get_active_agg_info - get information of the active aggregator
* @bond: bonding struct to work on
* @ad_info: ad_info struct to fill with the bond's info
*
* Returns: 0 on success
* < 0 on error
*/
int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
{
struct aggregator *aggregator = NULL;
struct port *port;
for (port = __get_first_port(bond); port; port = __get_next_port(port)) {
if (port->aggregator && port->aggregator->is_active) {
aggregator = port->aggregator;
break;
}
}
if (aggregator) {
ad_info->aggregator_id = aggregator->aggregator_identifier;
ad_info->ports = aggregator->num_of_ports;
ad_info->actor_key = aggregator->actor_oper_aggregator_key;
ad_info->partner_key = aggregator->partner_oper_aggregator_key;
memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN);
return 0;
}
return -1;
}
int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
{
slave_t *slave, *start_at;
struct bonding *bond = (struct bonding *) dev->priv;
struct ethhdr *data = (struct ethhdr *)skb->data;
int slave_agg_no;
int slaves_in_agg;
int agg_id;
struct ad_info ad_info;
if (!IS_UP(dev)) { /* bond down */
dev_kfree_skb(skb);
return 0;
}
if (bond == NULL) {
printk(KERN_CRIT "bonding: Error: bond is NULL on device %s\n", dev->name);
dev_kfree_skb(skb);
return 0;
}
read_lock(&bond->lock);
slave = bond->prev;
/* check if bond is empty */
if ((slave == (struct slave *) bond) || (bond->slave_cnt == 0)) {
printk(KERN_DEBUG "ERROR: bond is empty\n");
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
printk(KERN_DEBUG "ERROR: bond_3ad_get_active_agg_info failed\n");
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
slaves_in_agg = ad_info.ports;
agg_id = ad_info.aggregator_id;
if (slaves_in_agg == 0) {
/*the aggregator is empty*/
printk(KERN_DEBUG "ERROR: active aggregator is empty\n");
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
/* we're at the root, get the first slave */
if ((slave == NULL) || (slave->dev == NULL)) {
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
slave_agg_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % slaves_in_agg;
while (slave != (slave_t *)bond) {
struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
if (agg && (agg->aggregator_identifier == agg_id)) {
slave_agg_no--;
if (slave_agg_no < 0) {
break;
}
}
slave = slave->prev;
if (slave == NULL) {
printk(KERN_ERR "bonding: Error: slave is NULL\n");
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
}
if (slave == (slave_t *)bond) {
printk(KERN_ERR "bonding: Error: Couldn't find a slave to tx on for aggregator ID %d\n", agg_id);
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
start_at = slave;
do {
int slave_agg_id = 0;
struct aggregator *agg;
if (slave == NULL) {
printk(KERN_ERR "bonding: Error: slave is NULL\n");
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
agg = SLAVE_AD_INFO(slave).port.aggregator;
if (agg) {
slave_agg_id = agg->aggregator_identifier;
}
if (SLAVE_IS_OK(slave) &&
agg && (slave_agg_id == agg_id)) {
skb->dev = slave->dev;
skb->priority = 1;
dev_queue_xmit(skb);
read_unlock(&bond->lock);
return 0;
}
} while ((slave = slave->next) != start_at);
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype)
{
struct bonding *bond = (struct bonding *)dev->priv;
struct slave *slave = NULL;
int ret = NET_RX_DROP;
if (!(dev->flags & IFF_MASTER)) {
goto out;
}
read_lock(&bond->lock);
slave = bond_get_slave_by_dev((struct bonding *)dev->priv,
skb->real_dev);
if (slave == NULL) {
goto out_unlock;
}
bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len);
ret = NET_RX_SUCCESS;
out_unlock:
read_unlock(&bond->lock);
out:
dev_kfree_skb(skb);
return ret;
}
/*
* Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59
* Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*
*
* Changes:
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Added support for lacp_rate module param.
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Renamed bond_3ad_link_status_changed() to
* bond_3ad_handle_link_change() for compatibility with TLB.
*/
#ifndef __BOND_3AD_H__
#define __BOND_3AD_H__
#include <asm/byteorder.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
// General definitions
#define BOND_ETH_P_LACPDU 0x8809
#define PKT_TYPE_LACPDU __constant_htons(BOND_ETH_P_LACPDU)
#define AD_TIMER_INTERVAL 100 /*msec*/
#define MULTICAST_LACPDU_ADDR {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
#define AD_MULTICAST_LACPDU_ADDR {MULTICAST_LACPDU_ADDR}
#define AD_LACP_SLOW 0
#define AD_LACP_FAST 1
typedef struct mac_addr {
u8 mac_addr_value[ETH_ALEN];
} mac_addr_t;
typedef enum {
AD_BANDWIDTH = 0,
AD_COUNT
} agg_selection_t;
// rx machine states(43.4.11 in the 802.3ad standard)
typedef enum {
AD_RX_DUMMY,
AD_RX_INITIALIZE, // rx Machine
AD_RX_PORT_DISABLED, // rx Machine
AD_RX_LACP_DISABLED, // rx Machine
AD_RX_EXPIRED, // rx Machine
AD_RX_DEFAULTED, // rx Machine
AD_RX_CURRENT // rx Machine
} rx_states_t;
// periodic machine states(43.4.12 in the 802.3ad standard)
typedef enum {
AD_PERIODIC_DUMMY,
AD_NO_PERIODIC, // periodic machine
AD_FAST_PERIODIC, // periodic machine
AD_SLOW_PERIODIC, // periodic machine
AD_PERIODIC_TX // periodic machine
} periodic_states_t;
// mux machine states(43.4.13 in the 802.3ad standard)
typedef enum {
AD_MUX_DUMMY,
AD_MUX_DETACHED, // mux machine
AD_MUX_WAITING, // mux machine
AD_MUX_ATTACHED, // mux machine
AD_MUX_COLLECTING_DISTRIBUTING // mux machine
} mux_states_t;
// tx machine states(43.4.15 in the 802.3ad standard)
typedef enum {
AD_TX_DUMMY,
AD_TRANSMIT // tx Machine
} tx_states_t;
// rx indication types
typedef enum {
AD_TYPE_LACPDU = 1, // type lacpdu
AD_TYPE_MARKER // type marker
} pdu_type_t;
// rx marker indication types
typedef enum {
AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
AD_MARKER_RESPONSE_SUBTYPE // marker response subtype
} marker_subtype_t;
// timers types(43.4.9 in the 802.3ad standard)
typedef enum {
AD_CURRENT_WHILE_TIMER,
AD_ACTOR_CHURN_TIMER,
AD_PERIODIC_TIMER,
AD_PARTNER_CHURN_TIMER,
AD_WAIT_WHILE_TIMER
} ad_timers_t;
#pragma pack(1)
typedef struct ad_header {
struct mac_addr destination_address;
struct mac_addr source_address;
u16 length_type;
} ad_header_t;
// Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard)
typedef struct lacpdu {
u8 subtype; // = LACP(= 0x01)
u8 version_number;
u8 tlv_type_actor_info; // = actor information(type/length/value)
u8 actor_information_length; // = 20
u16 actor_system_priority;
struct mac_addr actor_system;
u16 actor_key;
u16 actor_port_priority;
u16 actor_port;
u8 actor_state;
u8 reserved_3_1[3]; // = 0
u8 tlv_type_partner_info; // = partner information
u8 partner_information_length; // = 20
u16 partner_system_priority;
struct mac_addr partner_system;
u16 partner_key;
u16 partner_port_priority;
u16 partner_port;
u8 partner_state;
u8 reserved_3_2[3]; // = 0
u8 tlv_type_collector_info; // = collector information
u8 collector_information_length; // = 16
u16 collector_max_delay;
u8 reserved_12[12];
u8 tlv_type_terminator; // = terminator
u8 terminator_length; // = 0
u8 reserved_50[50]; // = 0
} lacpdu_t;
typedef struct lacpdu_header {
struct ad_header ad_header;
struct lacpdu lacpdu;
} lacpdu_header_t;
// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
typedef struct marker {
u8 subtype; // = 0x02 (marker PDU)
u8 version_number; // = 0x01
u8 tlv_type; // = 0x01 (marker information)
// = 0x02 (marker response information)
u8 marker_length; // = 0x16
u16 requester_port; // The number assigned to the port by the requester
struct mac_addr requester_system; // The requester’s system id
u32 requester_transaction_id; // The transaction id allocated by the requester,
u16 pad; // = 0
u8 tlv_type_terminator; // = 0x00
u8 terminator_length; // = 0x00
u8 reserved_90[90]; // = 0
} marker_t;
typedef struct marker_header {
struct ad_header ad_header;
struct marker marker;
} marker_header_t;
#pragma pack()
struct slave;
struct bonding;
struct ad_info;
struct port;
#ifdef __ia64__
#pragma pack(8)
#endif
// aggregator structure(43.4.5 in the 802.3ad standard)
typedef struct aggregator {
struct mac_addr aggregator_mac_address;
u16 aggregator_identifier;
u16 is_individual; // BOOLEAN
u16 actor_admin_aggregator_key;
u16 actor_oper_aggregator_key;
struct mac_addr partner_system;
u16 partner_system_priority;
u16 partner_oper_aggregator_key;
u16 receive_state; // BOOLEAN
u16 transmit_state; // BOOLEAN
struct port *lag_ports;
// ****** PRIVATE PARAMETERS ******
struct slave *slave; // pointer to the bond slave that this aggregator belongs to
u16 is_active; // BOOLEAN. Indicates if this aggregator is active
u16 num_of_ports;
} aggregator_t;
// port structure(43.4.6 in the 802.3ad standard)
typedef struct port {
u16 actor_port_number;
u16 actor_port_priority;
struct mac_addr actor_system; // This parameter is added here although it is not specified in the standard, just for simplification
u16 actor_system_priority; // This parameter is added here although it is not specified in the standard, just for simplification
u16 actor_port_aggregator_identifier;
u16 ntt; // BOOLEAN
u16 actor_admin_port_key;
u16 actor_oper_port_key;
u8 actor_admin_port_state;
u8 actor_oper_port_state;
struct mac_addr partner_admin_system;
struct mac_addr partner_oper_system;
u16 partner_admin_system_priority;
u16 partner_oper_system_priority;
u16 partner_admin_key;
u16 partner_oper_key;
u16 partner_admin_port_number;
u16 partner_oper_port_number;
u16 partner_admin_port_priority;
u16 partner_oper_port_priority;
u8 partner_admin_port_state;
u8 partner_oper_port_state;
u16 is_enabled; // BOOLEAN
// ****** PRIVATE PARAMETERS ******
u16 sm_vars; // all state machines variables for this port
rx_states_t sm_rx_state; // state machine rx state
u16 sm_rx_timer_counter; // state machine rx timer counter
periodic_states_t sm_periodic_state;// state machine periodic state
u16 sm_periodic_timer_counter; // state machine periodic timer counter
mux_states_t sm_mux_state; // state machine mux state
u16 sm_mux_timer_counter; // state machine mux timer counter
tx_states_t sm_tx_state; // state machine tx state
u16 sm_tx_timer_counter; // state machine tx timer counter(allways on - enter to transmit state 3 time per second)
struct slave *slave; // pointer to the bond slave that this port belongs to
struct aggregator *aggregator; // pointer to an aggregator that this port related to
struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator
u32 transaction_id; // continuous number for identification of Marker PDU's;
struct lacpdu lacpdu; // the lacpdu that will be sent for this port
} port_t;
// system structure
typedef struct ad_system {
u16 sys_priority;
struct mac_addr sys_mac_addr;
} ad_system_t;
#ifdef __ia64__
#pragma pack()
#endif
// ================= AD Exported structures to the main bonding code ==================
#define BOND_AD_INFO(bond) ((bond)->ad_info)
#define SLAVE_AD_INFO(slave) ((slave)->ad_info)
struct ad_bond_info {
ad_system_t system; // 802.3ad system structure
u32 agg_select_timer; // Timer to select aggregator after all adapter's hand shakes
u32 agg_select_mode; // Mode of selection of active aggregator(bandwidth/count)
int lacp_fast; /* whether fast periodic tx should be
* requested
*/
struct timer_list ad_timer;
struct packet_type ad_pkt_type;
};
struct ad_slave_info {
struct aggregator aggregator; // 802.3ad aggregator structure
struct port port; // 802.3ad port structure
spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt
u16 id;
};
// ================= AD Exported functions to the main bonding code ==================
void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast);
int bond_3ad_bind_slave(struct slave *slave);
void bond_3ad_unbind_slave(struct slave *slave);
void bond_3ad_state_machine_handler(struct bonding *bond);
void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length);
void bond_3ad_adapter_speed_changed(struct slave *slave);
void bond_3ad_adapter_duplex_changed(struct slave *slave);
void bond_3ad_handle_link_change(struct slave *slave, char link);
int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev);
int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype);
#endif //__BOND_3AD_H__
/*
* Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/pkt_sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_bonding.h>
#include <net/ipx.h>
#include <net/arp.h>
#include <asm/byteorder.h>
#include "bonding.h"
#include "bond_alb.h"
#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */
#define BOND_TLB_REBALANCE_INTERVAL 10 /* in seconds, periodic re-balancing
* used for division - never set
* to zero !!!
*/
#define BOND_ALB_LP_INTERVAL 1 /* in seconds periodic send of
* learning packets to the switch
*/
#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \
* ALB_TIMER_TICKS_PER_SEC)
#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \
* ALB_TIMER_TICKS_PER_SEC)
#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table.
* Note that this value MUST NOT be smaller
* because the key hash table BYTE wide !
*/
#define TLB_NULL_INDEX 0xffffffff
#define MAX_LP_RETRY 3
/* rlb defs */
#define RLB_HASH_TABLE_SIZE 256
#define RLB_NULL_INDEX 0xffffffff
#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */
#define RLB_ARP_BURST_SIZE 2
#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb
* rebalance interval (5 min).
*/
/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is
* promiscuous after failover
*/
#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC
#pragma pack(1)
struct learning_pkt {
u8 mac_dst[ETH_ALEN];
u8 mac_src[ETH_ALEN];
u16 type;
u8 padding[ETH_ZLEN - (2*ETH_ALEN + 2)];
};
struct arp_pkt {
u16 hw_addr_space;
u16 prot_addr_space;
u8 hw_addr_len;
u8 prot_addr_len;
u16 op_code;
u8 mac_src[ETH_ALEN]; /* sender hardware address */
u32 ip_src; /* sender IP address */
u8 mac_dst[ETH_ALEN]; /* target hardware address */
u32 ip_dst; /* target IP address */
};
#pragma pack()
/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
static inline u8
_simple_hash(u8 *hash_start, int hash_size)
{
int i;
u8 hash = 0;
for (i=0; i<hash_size; i++) {
hash ^= hash_start[i];
}
return hash;
}
/*********************** tlb specific functions ***************************/
static inline void
_lock_tx_hashtbl(struct bonding *bond)
{
spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
}
static inline void
_unlock_tx_hashtbl(struct bonding *bond)
{
spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
}
/* Caller must hold tx_hashtbl lock */
static inline void
tlb_init_table_entry(struct bonding *bond, u8 index, u8 save_load)
{
struct tlb_client_info *entry;
if (BOND_ALB_INFO(bond).tx_hashtbl == NULL) {
return;
}
entry = &(BOND_ALB_INFO(bond).tx_hashtbl[index]);
/* at end of cycle, save the load that was transmitted to the client
* during the cycle, and set the tx_bytes counter to 0 for counting
* the load during the next cycle
*/
if (save_load) {
entry->load_history = 1 + entry->tx_bytes /
BOND_TLB_REBALANCE_INTERVAL;
entry->tx_bytes = 0;
}
entry->tx_slave = NULL;
entry->next = TLB_NULL_INDEX;
entry->prev = TLB_NULL_INDEX;
}
static inline void
tlb_init_slave(struct slave *slave)
{
struct tlb_slave_info *slave_info = &(SLAVE_TLB_INFO(slave));
slave_info->load = 0;
slave_info->head = TLB_NULL_INDEX;
}
/* Caller must hold bond lock for read */
static inline void
tlb_clear_slave(struct bonding *bond, struct slave *slave, u8 save_load)
{
struct tlb_client_info *tx_hash_table = NULL;
u32 index, next_index;
/* clear slave from tx_hashtbl */
_lock_tx_hashtbl(bond);
tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
if (tx_hash_table) {
index = SLAVE_TLB_INFO(slave).head;
while (index != TLB_NULL_INDEX) {
next_index = tx_hash_table[index].next;
tlb_init_table_entry(bond, index, save_load);
index = next_index;
}
}
_unlock_tx_hashtbl(bond);
tlb_init_slave(slave);
}
/* Must be called before starting the monitor timer */
static int
tlb_initialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
int i;
size_t size;
#if(TLB_HASH_TABLE_SIZE != 256)
/* Key to the hash table is byte wide. Check the size! */
#error Hash Table size is wrong.
#endif
spin_lock_init(&(bond_info->tx_hashtbl_lock));
_lock_tx_hashtbl(bond);
if (bond_info->tx_hashtbl != NULL) {
printk (KERN_ERR "%s: TLB hash table is not NULL\n",
bond->device->name);
_unlock_tx_hashtbl(bond);
return -1;
}
size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL);
if (bond_info->tx_hashtbl == NULL) {
printk (KERN_ERR "%s: Failed to allocate TLB hash table\n",
bond->device->name);
_unlock_tx_hashtbl(bond);
return -1;
}
memset(bond_info->tx_hashtbl, 0, size);
for (i=0; i<TLB_HASH_TABLE_SIZE; i++) {
tlb_init_table_entry(bond, i, 1);
}
_unlock_tx_hashtbl(bond);
return 0;
}
/* Must be called only after all slaves have been released */
static void
tlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
_lock_tx_hashtbl(bond);
if (bond_info->tx_hashtbl == NULL) {
_unlock_tx_hashtbl(bond);
return;
}
kfree(bond_info->tx_hashtbl);
bond_info->tx_hashtbl = NULL;
_unlock_tx_hashtbl(bond);
}
/* Caller must hold bond lock for read */
static struct slave*
tlb_get_least_loaded_slave(struct bonding *bond)
{
struct slave *slave;
struct slave *least_loaded;
u32 curr_gap, max_gap;
/* Find the first enabled slave */
slave = bond_get_first_slave(bond);
while (slave) {
if (SLAVE_IS_OK(slave)) {
break;
}
slave = bond_get_next_slave(bond, slave);
}
if (!slave) {
return NULL;
}
least_loaded = slave;
max_gap = (slave->speed * 1000000) -
(SLAVE_TLB_INFO(slave).load * 8);
/* Find the slave with the largest gap */
slave = bond_get_next_slave(bond, slave);
while (slave) {
if (SLAVE_IS_OK(slave)) {
curr_gap = (slave->speed * 1000000) -
(SLAVE_TLB_INFO(slave).load * 8);
if (max_gap < curr_gap) {
least_loaded = slave;
max_gap = curr_gap;
}
}
slave = bond_get_next_slave(bond, slave);
}
return least_loaded;
}
/* Caller must hold bond lock for read */
struct slave*
tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct tlb_client_info *hash_table = NULL;
struct slave *assigned_slave = NULL;
_lock_tx_hashtbl(bond);
hash_table = bond_info->tx_hashtbl;
if (hash_table == NULL) {
printk (KERN_ERR "%s: TLB hash table is NULL\n",
bond->device->name);
_unlock_tx_hashtbl(bond);
return NULL;
}
assigned_slave = hash_table[hash_index].tx_slave;
if (!assigned_slave) {
assigned_slave = tlb_get_least_loaded_slave(bond);
if (assigned_slave) {
struct tlb_slave_info *slave_info =
&(SLAVE_TLB_INFO(assigned_slave));
u32 next_index = slave_info->head;
hash_table[hash_index].tx_slave = assigned_slave;
hash_table[hash_index].next = next_index;
hash_table[hash_index].prev = TLB_NULL_INDEX;
if (next_index != TLB_NULL_INDEX) {
hash_table[next_index].prev = hash_index;
}
slave_info->head = hash_index;
slave_info->load +=
hash_table[hash_index].load_history;
}
}
if (assigned_slave) {
hash_table[hash_index].tx_bytes += skb_len;
}
_unlock_tx_hashtbl(bond);
return assigned_slave;
}
/*********************** rlb specific functions ***************************/
static inline void
_lock_rx_hashtbl(struct bonding *bond)
{
spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
}
static inline void
_unlock_rx_hashtbl(struct bonding *bond)
{
spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
}
/* when an ARP REPLY is received from a client update its info
* in the rx_hashtbl
*/
static void
rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
{
u32 hash_index;
struct rlb_client_info *client_info = NULL;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
hash_index = _simple_hash((u8*)&(arp->ip_src), 4);
client_info = &(bond_info->rx_hashtbl[hash_index]);
if ((client_info->assigned) &&
(client_info->ip_src == arp->ip_dst) &&
(client_info->ip_dst == arp->ip_src)) {
/* update the clients MAC address */
memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN);
client_info->ntt = 1;
bond_info->rx_ntt = 1;
}
_unlock_rx_hashtbl(bond);
}
static int
rlb_arp_recv(struct sk_buff *skb,
struct net_device *dev,
struct packet_type* ptype)
{
struct bonding *bond = (struct bonding *)dev->priv;
int ret = NET_RX_DROP;
struct arp_pkt *arp = (struct arp_pkt *)skb->data;
if (!(dev->flags & IFF_MASTER)) {
goto out;
}
if (!arp) {
printk(KERN_ERR "Packet has no ARP data\n");
goto out;
}
if (skb->len < sizeof(struct arp_pkt)) {
printk(KERN_ERR "Packet is too small to be an ARP\n");
goto out;
}
if (arp->op_code == htons(ARPOP_REPLY)) {
/* update rx hash table for this ARP */
rlb_update_entry_from_arp(bond, arp);
BOND_PRINT_DBG(("Server received an ARP Reply from client"));
}
ret = NET_RX_SUCCESS;
out:
dev_kfree_skb(skb);
return ret;
}
/* Caller must hold bond lock for read */
static struct slave*
rlb_next_rx_slave(struct bonding *bond)
{
struct slave *rx_slave = NULL, *slave = NULL;
unsigned int i = 0;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
slave = bond_info->next_rx_slave;
if (slave == NULL) {
slave = bond->next;
}
/* this loop uses the circular linked list property of the
* slave's list to go through all slaves
*/
for (i = 0; i < bond->slave_cnt; i++, slave = slave->next) {
if (SLAVE_IS_OK(slave)) {
if (!rx_slave) {
rx_slave = slave;
}
else if (slave->speed > rx_slave->speed) {
rx_slave = slave;
}
}
}
if (rx_slave) {
bond_info->next_rx_slave = rx_slave->next;
}
return rx_slave;
}
/* teach the switch the mac of a disabled slave
* on the primary for fault tolerance
*
* Caller must hold bond->ptrlock for write or bond lock for write
*/
static void
rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
{
if (!bond->current_slave) {
return;
}
if (!bond->alb_info.primary_is_promisc) {
bond->alb_info.primary_is_promisc = 1;
dev_set_promiscuity(bond->current_slave->dev, 1);
}
bond->alb_info.rlb_promisc_timeout_counter = 0;
alb_send_learning_packets(bond->current_slave, addr);
}
/* slave being removed should not be active at this point
*
* Caller must hold bond lock for read
*/
static void
rlb_clear_slave(struct bonding *bond, struct slave *slave)
{
struct rlb_client_info *rx_hash_table = NULL;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
u32 index, next_index;
/* clear slave from rx_hashtbl */
_lock_rx_hashtbl(bond);
rx_hash_table = bond_info->rx_hashtbl;
if (rx_hash_table == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
index = bond_info->rx_hashtbl_head;
for (; index != RLB_NULL_INDEX; index = next_index) {
next_index = rx_hash_table[index].next;
if (rx_hash_table[index].slave == slave) {
struct slave *assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
rx_hash_table[index].slave = assigned_slave;
if (memcmp(rx_hash_table[index].mac_dst,
mac_bcast, ETH_ALEN)) {
bond_info->rx_hashtbl[index].ntt = 1;
bond_info->rx_ntt = 1;
/* A slave has been removed from the
* table because it is either disabled
* or being released. We must retry the
* update to avoid clients from not
* being updated & disconnecting when
* there is stress
*/
bond_info->rlb_update_retry_counter =
RLB_UPDATE_RETRY;
}
} else { /* there is no active slave */
rx_hash_table[index].slave = NULL;
}
}
}
_unlock_rx_hashtbl(bond);
write_lock(&bond->ptrlock);
if (slave != bond->current_slave) {
rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
}
write_unlock(&bond->ptrlock);
}
static void
rlb_update_client(struct rlb_client_info *client_info)
{
int i = 0;
if (client_info->slave == NULL) {
return;
}
for (i=0; i<RLB_ARP_BURST_SIZE; i++) {
arp_send(ARPOP_REPLY, ETH_P_ARP,
client_info->ip_dst,
client_info->slave->dev,
client_info->ip_src,
client_info->mac_dst,
client_info->slave->dev->dev_addr,
client_info->mac_dst);
}
}
/* sends ARP REPLIES that update the clients that need updating */
static void
rlb_update_rx_clients(struct bonding *bond)
{
u32 hash_index;
struct rlb_client_info *client_info = NULL;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if (client_info->ntt) {
rlb_update_client(client_info);
if (bond_info->rlb_update_retry_counter == 0) {
client_info->ntt = 0;
}
}
}
/* do not update the entries again untill this counter is zero so that
* not to confuse the clients.
*/
bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
_unlock_rx_hashtbl(bond);
}
/* The slave was assigned a new mac address - update the clients */
static void
rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
{
u32 hash_index;
u8 ntt = 0;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
struct rlb_client_info* client_info = NULL;
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if ((client_info->slave == slave) &&
memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
client_info->ntt = 1;
ntt = 1;
}
}
// update the team's flag only after the whole iteration
if (ntt) {
bond_info->rx_ntt = 1;
//fasten the change
bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
}
_unlock_rx_hashtbl(bond);
}
/* mark all clients using src_ip to be updated */
static void
rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
{
u32 hash_index;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
struct rlb_client_info *client_info = NULL;
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
if (!client_info->slave) {
printk(KERN_ERR "Bonding: Error: found a client with no"
" channel in the client's hash table\n");
continue;
}
/*update all clients using this src_ip, that are not assigned
* to the team's address (current_slave) and have a known
* unicast mac address.
*/
if ((client_info->ip_src == src_ip) &&
memcmp(client_info->slave->dev->dev_addr,
bond->device->dev_addr, ETH_ALEN) &&
memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
client_info->ntt = 1;
bond_info->rx_ntt = 1;
}
}
_unlock_rx_hashtbl(bond);
}
/* Caller must hold both bond and ptr locks for read */
struct slave*
rlb_choose_channel(struct bonding *bond, struct arp_pkt *arp)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct rlb_client_info *client_info = NULL;
u32 hash_index = 0;
struct slave *assigned_slave = NULL;
u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return NULL;
}
hash_index = _simple_hash((u8 *)&arp->ip_dst, 4);
client_info = &(bond_info->rx_hashtbl[hash_index]);
if (client_info->assigned == 1) {
if ((client_info->ip_src == arp->ip_src) &&
(client_info->ip_dst == arp->ip_dst)) {
/* the entry is already assigned to this client */
if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) {
/* update mac address from arp */
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
}
assigned_slave = client_info->slave;
if (assigned_slave) {
_unlock_rx_hashtbl(bond);
return assigned_slave;
}
} else {
/* the entry is already assigned to some other client,
* move the old client to primary (current_slave) so
* that the new client can be assigned to this entry.
*/
if (bond->current_slave &&
client_info->slave != bond->current_slave) {
client_info->slave = bond->current_slave;
rlb_update_client(client_info);
}
}
}
/* assign a new slave */
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave) {
client_info->ip_src = arp->ip_src;
client_info->ip_dst = arp->ip_dst;
/* arp->mac_dst is broadcast for arp reqeusts.
* will be updated with clients actual unicast mac address
* upon receiving an arp reply.
*/
memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
client_info->slave = assigned_slave;
if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
client_info->ntt = 1;
bond->alb_info.rx_ntt = 1;
}
else {
client_info->ntt = 0;
}
if (!client_info->assigned) {
u32 prev_tbl_head = bond_info->rx_hashtbl_head;
bond_info->rx_hashtbl_head = hash_index;
client_info->next = prev_tbl_head;
if (prev_tbl_head != RLB_NULL_INDEX) {
bond_info->rx_hashtbl[prev_tbl_head].prev =
hash_index;
}
client_info->assigned = 1;
}
}
_unlock_rx_hashtbl(bond);
return assigned_slave;
}
/* chooses (and returns) transmit channel for arp reply
* does not choose channel for other arp types since they are
* sent on the current_slave
*/
static struct slave*
rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
{
struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
struct slave *tx_slave = NULL;
if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
/* the arp must be sent on the selected
* rx channel
*/
tx_slave = rlb_choose_channel(bond, arp);
if (tx_slave) {
memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN);
}
BOND_PRINT_DBG(("Server sent ARP Reply packet"));
} else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) {
/* Create an entry in the rx_hashtbl for this client as a
* place holder.
* When the arp reply is received the entry will be updated
* with the correct unicast address of the client.
*/
rlb_choose_channel(bond, arp);
/* The ARP relpy packets must be delayed so that
* they can cancel out the influence of the ARP request.
*/
bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;
/* arp requests are broadcast and are sent on the primary
* the arp request will collapse all clients on the subnet to
* the primary slave. We must register these clients to be
* updated with their assigned mac.
*/
rlb_req_update_subnet_clients(bond, arp->ip_src);
BOND_PRINT_DBG(("Server sent ARP Request packet"));
}
return tx_slave;
}
/* Caller must hold bond lock for read */
static void
rlb_rebalance(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *assigned_slave = NULL;
u32 hash_index;
struct rlb_client_info *client_info = NULL;
u8 ntt = 0;
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
hash_index = bond_info->rx_hashtbl_head;
for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
client_info = &(bond_info->rx_hashtbl[hash_index]);
assigned_slave = rlb_next_rx_slave(bond);
if (assigned_slave && (client_info->slave != assigned_slave)){
client_info->slave = assigned_slave;
client_info->ntt = 1;
ntt = 1;
}
}
/* update the team's flag only after the whole iteration */
if (ntt) {
bond_info->rx_ntt = 1;
}
_unlock_rx_hashtbl(bond);
}
/* Caller must hold rx_hashtbl lock */
static inline void
rlb_init_table_entry(struct rlb_client_info *entry)
{
entry->next = RLB_NULL_INDEX;
entry->prev = RLB_NULL_INDEX;
entry->assigned = 0;
entry->ntt = 0;
}
static int
rlb_initialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type);
int i;
size_t size;
spin_lock_init(&(bond_info->rx_hashtbl_lock));
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl != NULL) {
printk (KERN_ERR "%s: RLB hash table is not NULL\n",
bond->device->name);
_unlock_rx_hashtbl(bond);
return -1;
}
size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL);
if (bond_info->rx_hashtbl == NULL) {
printk (KERN_ERR "%s: Failed to allocate"
" RLB hash table\n", bond->device->name);
_unlock_rx_hashtbl(bond);
return -1;
}
bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
for (i=0; i<RLB_HASH_TABLE_SIZE; i++) {
rlb_init_table_entry(bond_info->rx_hashtbl + i);
}
_unlock_rx_hashtbl(bond);
/* register to receive ARPs */
/*initialize packet type*/
pk_type->type = __constant_htons(ETH_P_ARP);
pk_type->dev = bond->device;
pk_type->func = rlb_arp_recv;
pk_type->data = (void*)1; /* understand shared skbs */
dev_add_pack(pk_type);
return 0;
}
static void
rlb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
dev_remove_pack(&(bond_info->rlb_pkt_type));
_lock_rx_hashtbl(bond);
if (bond_info->rx_hashtbl == NULL) {
_unlock_rx_hashtbl(bond);
return;
}
kfree(bond_info->rx_hashtbl);
bond_info->rx_hashtbl = NULL;
_unlock_rx_hashtbl(bond);
}
/*********************** tlb/rlb shared functions *********************/
static void
alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
{
struct sk_buff *skb = NULL;
struct learning_pkt pkt;
char *data = NULL;
int i;
unsigned int size = sizeof(struct learning_pkt);
memset(&pkt, 0, size);
memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
pkt.type = __constant_htons(ETH_P_LOOP);
for (i=0; i < MAX_LP_RETRY; i++) {
skb = NULL;
skb = dev_alloc_skb(size);
if (!skb) {
return;
}
data = skb_put(skb, size);
memcpy(data, &pkt, size);
skb->mac.raw = data;
skb->nh.raw = data + ETH_HLEN;
skb->protocol = pkt.type;
skb->priority = TC_PRIO_CONTROL;
skb->dev = slave->dev;
dev_queue_xmit(skb);
}
}
/* hw is a boolean parameter that determines whether we should try and
* set the hw address of the hw as well as the hw address of the net_device
*/
static int
alb_set_mac_addr(struct slave *slave, u8 addr[], int hw)
{
struct net_device *dev = NULL;
struct sockaddr s_addr;
dev = slave->dev;
if (!hw) {
memcpy(dev->dev_addr, addr, ETH_ALEN);
return 0;
}
/* for rlb each slave must have a unique hw mac addresses so that */
/* each slave will receive packets destined to a different mac */
memcpy(s_addr.sa_data, addr, ETH_ALEN);
s_addr.sa_family = dev->type;
if (dev->set_mac_address(dev, &s_addr)) {
printk(KERN_DEBUG "bonding: Error: alb_set_mac_addr:"
" dev->set_mac_address of dev %s failed!"
" ALB mode requires that the base driver"
" support setting the hw address also when"
" the network device's interface is open\n",
dev->name);
return -EOPNOTSUPP;
}
return 0;
}
/* Caller must hold bond lock for write or ptrlock for write*/
static void
alb_swap_mac_addr(struct bonding *bond,
struct slave *slave1,
struct slave *slave2)
{
u8 tmp_mac_addr[ETH_ALEN];
struct slave *disabled_slave = NULL;
u8 slaves_state_differ;
slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2));
memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN);
alb_set_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled);
alb_set_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled);
/* fasten the change in the switch */
if (SLAVE_IS_OK(slave1)) {
alb_send_learning_packets(slave1, slave1->dev->dev_addr);
if (bond->alb_info.rlb_enabled) {
/* inform the clients that the mac address
* has changed
*/
rlb_req_update_slave_clients(bond, slave1);
}
}
else {
disabled_slave = slave1;
}
if (SLAVE_IS_OK(slave2)) {
alb_send_learning_packets(slave2, slave2->dev->dev_addr);
if (bond->alb_info.rlb_enabled) {
/* inform the clients that the mac address
* has changed
*/
rlb_req_update_slave_clients(bond, slave2);
}
}
else {
disabled_slave = slave2;
}
if (bond->alb_info.rlb_enabled && slaves_state_differ) {
/* A disabled slave was assigned an active mac addr */
rlb_teach_disabled_mac_on_primary(bond,
disabled_slave->dev->dev_addr);
}
}
/**
* alb_change_hw_addr_on_detach
* @bond: bonding we're working on
* @slave: the slave that was just detached
*
* We assume that @slave was already detached from the slave list.
*
* If @slave's permanent hw address is different both from its current
* address and from @bond's address, then somewhere in the bond there's
* a slave that has @slave's permanet address as its current address.
* We'll make sure that that slave no longer uses @slave's permanent address.
*
* Caller must hold bond lock
*/
static void
alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
{
struct slave *tmp_slave;
int perm_curr_diff;
int perm_bond_diff;
perm_curr_diff = memcmp(slave->perm_hwaddr,
slave->dev->dev_addr,
ETH_ALEN);
perm_bond_diff = memcmp(slave->perm_hwaddr,
bond->device->dev_addr,
ETH_ALEN);
if (perm_curr_diff && perm_bond_diff) {
tmp_slave = bond_get_first_slave(bond);
while (tmp_slave) {
if (!memcmp(slave->perm_hwaddr,
tmp_slave->dev->dev_addr,
ETH_ALEN)) {
break;
}
tmp_slave = bond_get_next_slave(bond, tmp_slave);
}
if (tmp_slave) {
alb_swap_mac_addr(bond, slave, tmp_slave);
}
}
}
/**
* alb_handle_addr_collision_on_attach
* @bond: bonding we're working on
* @slave: the slave that was just attached
*
* checks uniqueness of slave's mac address and handles the case the
* new slave uses the bonds mac address.
*
* If the permanent hw address of @slave is @bond's hw address, we need to
* find a different hw address to give @slave, that isn't in use by any other
* slave in the bond. This address must be, of course, one of the premanent
* addresses of the other slaves.
*
* We go over the slave list, and for each slave there we compare its
* permanent hw address with the current address of all the other slaves.
* If no match was found, then we've found a slave with a permanent address
* that isn't used by any other slave in the bond, so we can assign it to
* @slave.
*
* assumption: this function is called before @slave is attached to the
* bond slave list.
*
* caller must hold the bond lock for write since the mac addresses are compared
* and may be swapped.
*/
static int
alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
{
struct slave *tmp_slave1, *tmp_slave2;
if (bond->slave_cnt == 0) {
/* this is the first slave */
return 0;
}
/* if slave's mac address differs from bond's mac address
* check uniqueness of slave's mac address against the other
* slaves in the bond.
*/
if (memcmp(slave->perm_hwaddr, bond->device->dev_addr, ETH_ALEN)) {
tmp_slave1 = bond_get_first_slave(bond);
for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) {
if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr,
ETH_ALEN)) {
break;
}
}
if (tmp_slave1) {
/* a slave was found that is using the mac address
* of the new slave
*/
printk(KERN_ERR "bonding: Warning: the hw address "
"of slave %s is not unique - cannot enslave it!"
, slave->dev->name);
return -EINVAL;
}
return 0;
}
/* the slave's address is equal to the address of the bond
* search for a spare address in the bond for this slave.
*/
tmp_slave1 = bond_get_first_slave(bond);
for (; tmp_slave1; tmp_slave1 = bond_get_next_slave(bond, tmp_slave1)) {
tmp_slave2 = bond_get_first_slave(bond);
for (; tmp_slave2; tmp_slave2 = bond_get_next_slave(bond, tmp_slave2)) {
if (!memcmp(tmp_slave1->perm_hwaddr,
tmp_slave2->dev->dev_addr,
ETH_ALEN)) {
break;
}
}
if (!tmp_slave2) {
/* no slave has tmp_slave1's perm addr
* as its curr addr
*/
break;
}
}
if (tmp_slave1) {
alb_set_mac_addr(slave, tmp_slave1->perm_hwaddr,
bond->alb_info.rlb_enabled);
printk(KERN_WARNING "bonding: Warning: the hw address "
"of slave %s is in use by the bond; "
"giving it the hw address of %s\n",
slave->dev->name, tmp_slave1->dev->name);
} else {
printk(KERN_CRIT "bonding: Error: the hw address "
"of slave %s is in use by the bond; "
"couldn't find a slave with a free hw "
"address to give it (this should not have "
"happened)\n", slave->dev->name);
return -EFAULT;
}
return 0;
}
/************************ exported alb funcions ************************/
int
bond_alb_initialize(struct bonding *bond, int rlb_enabled)
{
int res;
res = tlb_initialize(bond);
if (res) {
return res;
}
if (rlb_enabled) {
bond->alb_info.rlb_enabled = 1;
/* initialize rlb */
res = rlb_initialize(bond);
if (res) {
tlb_deinitialize(bond);
return res;
}
}
return 0;
}
void
bond_alb_deinitialize(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
tlb_deinitialize(bond);
if (bond_info->rlb_enabled) {
rlb_deinitialize(bond);
}
}
int
bond_alb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = (struct bonding *) dev->priv;
struct ethhdr *eth_data = (struct ethhdr *)skb->data;
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *tx_slave = NULL;
char do_tx_balance = 1;
int hash_size = 0;
u32 hash_index = 0;
u8 *hash_start = NULL;
u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};
if (!IS_UP(dev)) { /* bond down */
dev_kfree_skb(skb);
return 0;
}
/* make sure that the current_slave and the slaves list do
* not change during tx
*/
read_lock(&bond->lock);
if (bond->slave_cnt == 0) {
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock(&bond->lock);
return 0;
}
read_lock(&bond->ptrlock);
switch (ntohs(skb->protocol)) {
case ETH_P_IP:
if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
(skb->nh.iph->daddr == 0xffffffff)) {
do_tx_balance = 0;
break;
}
hash_start = (char*)&(skb->nh.iph->daddr);
hash_size = 4;
break;
case ETH_P_IPV6:
if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
do_tx_balance = 0;
break;
}
hash_start = (char*)&(skb->nh.ipv6h->daddr);
hash_size = 16;
break;
case ETH_P_IPX:
if (skb->nh.ipxh->ipx_checksum !=
__constant_htons(IPX_NO_CHECKSUM)) {
/* something is wrong with this packet */
do_tx_balance = 0;
break;
}
if (skb->nh.ipxh->ipx_type !=
__constant_htons(IPX_TYPE_NCP)) {
/* The only protocol worth balancing in
* this family since it has an "ARP" like
* mechanism
*/
do_tx_balance = 0;
break;
}
hash_start = (char*)eth_data->h_dest;
hash_size = ETH_ALEN;
break;
case ETH_P_ARP:
do_tx_balance = 0;
if (bond_info->rlb_enabled) {
tx_slave = rlb_arp_xmit(skb, bond);
}
break;
default:
do_tx_balance = 0;
break;
}
if (do_tx_balance) {
hash_index = _simple_hash(hash_start, hash_size);
tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
}
if (!tx_slave) {
/* unbalanced or unassigned, send through primary */
tx_slave = bond->current_slave;
bond_info->unbalanced_load += skb->len;
}
if (tx_slave && SLAVE_IS_OK(tx_slave)) {
skb->dev = tx_slave->dev;
if (tx_slave != bond->current_slave) {
memcpy(eth_data->h_source,
tx_slave->dev->dev_addr,
ETH_ALEN);
}
dev_queue_xmit(skb);
} else {
/* no suitable interface, frame not sent */
if (tx_slave) {
tlb_clear_slave(bond, tx_slave, 0);
}
dev_kfree_skb(skb);
}
read_unlock(&bond->ptrlock);
read_unlock(&bond->lock);
return 0;
}
void
bond_alb_monitor(struct bonding *bond)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct slave *slave = NULL;
read_lock(&bond->lock);
if ((bond->slave_cnt == 0) || !(bond->device->flags & IFF_UP)) {
bond_info->tx_rebalance_counter = 0;
bond_info->lp_counter = 0;
goto out;
}
bond_info->tx_rebalance_counter++;
bond_info->lp_counter++;
/* send learning packets */
if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
/* change of current_slave involves swapping of mac addresses.
* in order to avoid this swapping from happening while
* sending the learning packets, the ptrlock must be held for
* read.
*/
read_lock(&bond->ptrlock);
slave = bond_get_first_slave(bond);
while (slave) {
alb_send_learning_packets(slave,slave->dev->dev_addr);
slave = bond_get_next_slave(bond, slave);
}
read_unlock(&bond->ptrlock);
bond_info->lp_counter = 0;
}
/* rebalance tx traffic */
if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
read_lock(&bond->ptrlock);
slave = bond_get_first_slave(bond);
while (slave) {
tlb_clear_slave(bond, slave, 1);
if (slave == bond->current_slave) {
SLAVE_TLB_INFO(slave).load =
bond_info->unbalanced_load /
BOND_TLB_REBALANCE_INTERVAL;
bond_info->unbalanced_load = 0;
}
slave = bond_get_next_slave(bond, slave);
}
read_unlock(&bond->ptrlock);
bond_info->tx_rebalance_counter = 0;
}
/* handle rlb stuff */
if (bond_info->rlb_enabled) {
/* the following code changes the promiscuity of the
* the current_slave. It needs to be locked with a
* write lock to protect from other code that also
* sets the promiscuity.
*/
write_lock(&bond->ptrlock);
if (bond_info->primary_is_promisc &&
(++bond_info->rlb_promisc_timeout_counter >=
RLB_PROMISC_TIMEOUT)) {
bond_info->rlb_promisc_timeout_counter = 0;
/* If the primary was set to promiscuous mode
* because a slave was disabled then
* it can now leave promiscuous mode.
*/
dev_set_promiscuity(bond->current_slave->dev, -1);
bond_info->primary_is_promisc = 0;
}
write_unlock(&bond->ptrlock);
if (bond_info->rlb_rebalance == 1) {
bond_info->rlb_rebalance = 0;
rlb_rebalance(bond);
}
/* check if clients need updating */
if (bond_info->rx_ntt) {
if (bond_info->rlb_update_delay_counter) {
--bond_info->rlb_update_delay_counter;
} else {
rlb_update_rx_clients(bond);
if (bond_info->rlb_update_retry_counter) {
--bond_info->rlb_update_retry_counter;
} else {
bond_info->rx_ntt = 0;
}
}
}
}
out:
read_unlock(&bond->lock);
if (bond->device->flags & IFF_UP) {
/* re-arm the timer */
mod_timer(&(bond_info->alb_timer),
jiffies + (HZ/ALB_TIMER_TICKS_PER_SEC));
}
}
/* assumption: called before the slave is attched to the bond
* and not locked by the bond lock
*/
int
bond_alb_init_slave(struct bonding *bond, struct slave *slave)
{
int err = 0;
err = alb_set_mac_addr(slave, slave->perm_hwaddr,
bond->alb_info.rlb_enabled);
if (err) {
return err;
}
/* caller must hold the bond lock for write since the mac addresses
* are compared and may be swapped.
*/
write_lock_bh(&bond->lock);
err = alb_handle_addr_collision_on_attach(bond, slave);
write_unlock_bh(&bond->lock);
if (err) {
return err;
}
tlb_init_slave(slave);
/* order a rebalance ASAP */
bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
if (bond->alb_info.rlb_enabled) {
bond->alb_info.rlb_rebalance = 1;
}
return 0;
}
/* Caller must hold bond lock for write */
void
bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
{
if (bond->slave_cnt > 1) {
alb_change_hw_addr_on_detach(bond, slave);
}
tlb_clear_slave(bond, slave, 0);
if (bond->alb_info.rlb_enabled) {
bond->alb_info.next_rx_slave = NULL;
rlb_clear_slave(bond, slave);
}
}
/* Caller must hold bond lock for read */
void
bond_alb_handle_link_change(struct bonding *bond, struct slave *slave,
char link)
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
if (link == BOND_LINK_DOWN) {
tlb_clear_slave(bond, slave, 0);
if (bond->alb_info.rlb_enabled) {
rlb_clear_slave(bond, slave);
}
} else if (link == BOND_LINK_UP) {
/* order a rebalance ASAP */
bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
if (bond->alb_info.rlb_enabled) {
bond->alb_info.rlb_rebalance = 1;
/* If the updelay module parameter is smaller than the
* forwarding delay of the switch the rebalance will
* not work because the rebalance arp replies will
* not be forwarded to the clients..
*/
}
}
}
/**
* bond_alb_assign_current_slave - assign new current_slave
* @bond: our bonding struct
* @new_slave: new slave to assign
*
* Set the bond->current_slave to @new_slave and handle
* mac address swapping and promiscuity changes as needed.
*
* Caller must hold bond ptrlock for write (or bond lock for write)
*/
void
bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave)
{
struct slave *swap_slave = bond->current_slave;
if (bond->current_slave == new_slave) {
return;
}
if (bond->current_slave && bond->alb_info.primary_is_promisc) {
dev_set_promiscuity(bond->current_slave->dev, -1);
bond->alb_info.primary_is_promisc = 0;
bond->alb_info.rlb_promisc_timeout_counter = 0;
}
bond->current_slave = new_slave;
if (!new_slave || (bond->slave_cnt == 0)) {
return;
}
/* set the new current_slave to the bonds mac address
* i.e. swap mac addresses of old current_slave and new current_slave
*/
if (!swap_slave) {
/* find slave that is holding the bond's mac address */
swap_slave = bond_get_first_slave(bond);
while (swap_slave) {
if (!memcmp(swap_slave->dev->dev_addr,
bond->device->dev_addr, ETH_ALEN)) {
break;
}
swap_slave = bond_get_next_slave(bond, swap_slave);
}
}
/* current_slave must be set before calling alb_swap_mac_addr */
if (swap_slave) {
/* swap mac address */
alb_swap_mac_addr(bond, swap_slave, new_slave);
} else {
/* set the new_slave to the bond mac address */
alb_set_mac_addr(new_slave, bond->device->dev_addr,
bond->alb_info.rlb_enabled);
/* fasten bond mac on new current slave */
alb_send_learning_packets(new_slave, bond->device->dev_addr);
}
}
/*
* Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* The full GNU General Public License is included in this distribution in the
* file called LICENSE.
*/
#ifndef __BOND_ALB_H__
#define __BOND_ALB_H__
#include <linux/if_ether.h>
struct bonding;
struct slave;
#define BOND_ALB_INFO(bond) ((bond)->alb_info)
#define SLAVE_TLB_INFO(slave) ((slave)->tlb_info)
struct tlb_client_info {
struct slave *tx_slave; /* A pointer to slave used for transmiting
* packets to a Client that the Hash function
* gave this entry index.
*/
u32 tx_bytes; /* Each Client acumulates the BytesTx that
* were tranmitted to it, and after each
* CallBack the LoadHistory is devided
* by the balance interval
*/
u32 load_history; /* This field contains the amount of Bytes
* that were transmitted to this client by
* the server on the previous balance
* interval in Bps.
*/
u32 next; /* The next Hash table entry index, assigned
* to use the same adapter for transmit.
*/
u32 prev; /* The previous Hash table entry index,
* assigned to use the same
*/
};
/* -------------------------------------------------------------------------
* struct rlb_client_info contains all info related to a specific rx client
* connection. This is the Clients Hash Table entry struct
* -------------------------------------------------------------------------
*/
struct rlb_client_info {
u32 ip_src; /* the server IP address */
u32 ip_dst; /* the client IP address */
u8 mac_dst[ETH_ALEN]; /* the client MAC address */
u32 next; /* The next Hash table entry index */
u32 prev; /* The previous Hash table entry index */
u8 assigned; /* checking whether this entry is assigned */
u8 ntt; /* flag - need to transmit client info */
struct slave *slave; /* the slave assigned to this client */
};
struct tlb_slave_info {
u32 head; /* Index to the head of the bi-directional clients
* hash table entries list. The entries in the list
* are the entries that were assigned to use this
* slave for transmit.
*/
u32 load; /* Each slave sums the loadHistory of all clients
* assigned to it
*/
};
struct alb_bond_info {
struct timer_list alb_timer;
struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
spinlock_t tx_hashtbl_lock;
u32 unbalanced_load;
int tx_rebalance_counter;
int lp_counter;
/* -------- rlb parameters -------- */
int rlb_enabled;
struct packet_type rlb_pkt_type;
struct rlb_client_info *rx_hashtbl; /* Receive hash table */
spinlock_t rx_hashtbl_lock;
u32 rx_hashtbl_head;
u8 rx_ntt; /* flag - need to transmit
* to all rx clients
*/
struct slave *next_rx_slave;/* next slave to be assigned
* to a new rx client for
*/
u32 rlb_interval_counter;
u8 primary_is_promisc; /* boolean */
u32 rlb_promisc_timeout_counter;/* counts primary
* promiscuity time
*/
u32 rlb_update_delay_counter;
u32 rlb_update_retry_counter;/* counter of retries
* of client update
*/
u8 rlb_rebalance; /* flag - indicates that the
* rx traffic should be
* rebalanced
*/
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
void bond_alb_deinitialize(struct bonding *bond);
int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
void bond_alb_assign_current_slave(struct bonding *bond, struct slave *new_slave);
int bond_alb_xmit(struct sk_buff *skb, struct net_device *dev);
void bond_alb_monitor(struct bonding *bond);
#endif /* __BOND_ALB_H__ */
......@@ -278,7 +278,7 @@
* bonding round-robin mode ignoring links after failover/recovery
*
* 2003/03/17 - Jay Vosburgh <fubar at us dot ibm dot com>
* - kmalloc fix (GFP_KERNEL to GFP_ATOMIC) reported by
* - kmalloc fix (GPF_KERNEL to GPF_ATOMIC) reported by
* Shmulik dot Hen at intel.com.
* - Based on discussion on mailing list, changed use of
* update_slave_cnt(), created wrapper functions for adding/removing
......@@ -292,21 +292,95 @@
* - Make sure only bond_attach_slave() and bond_detach_slave() can
* manipulate the slave list, including slave_cnt, even when in
* bond_release_all().
* - Fixed hang in bond_release() while traffic is running.
* - Fixed hang in bond_release() with traffic running:
* netdev_set_master() must not be called from within the bond lock.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Fixed hang in bond_enslave(): netdev_set_master() must not be
* called from within the bond lock while traffic is running.
* - Fixed hang in bond_enslave() with traffic running:
* netdev_set_master() must not be called from within the bond lock.
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>
* - Added support for getting slave's speed and duplex via ethtool.
* Needed for 802.3ad and other future modes.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Enable support of modes that need to use the unique mac address of
* each slave.
* * bond_enslave(): Moved setting the slave's mac address, and
* openning it, from the application to the driver. This breaks
* backward comaptibility with old versions of ifenslave that open
* the slave before enalsving it !!!.
* * bond_release(): The driver also takes care of closing the slave
* and restoring its original mac address.
* - Removed the code that restores all base driver's flags.
* Flags are automatically restored once all undo stages are done
* properly.
* - Block possibility of enslaving before the master is up. This
* prevents putting the system in an unstable state.
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
* Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for IEEE 802.3ad Dynamic link aggregation mode.
*
* 2003/05/01 - Amir Noam <amir.noam at intel dot com>
* - Added ABI version control to restore compatibility between
* new/old ifenslave and new/old bonding.
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Fixed bug in bond_release_all(): save old value of current_slave
* before setting it to NULL.
* - Changed driver versioning scheme to include version number instead
* of release date (that is already in another field). There are 3
* fields X.Y.Z where:
* X - Major version - big behavior changes
* Y - Minor version - addition of features
* Z - Extra version - minor changes and bug fixes
* The current version is 1.0.0 as a base line.
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Added support for lacp_rate module param.
* - Code beautification and style changes (mainly in comments).
* new version - 1.0.1
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Based on discussion on mailing list, changed locking scheme
* to use lock/unlock or lock_bh/unlock_bh appropriately instead
* of lock_irqsave/unlock_irqrestore. The new scheme helps exposing
* hidden bugs and solves system hangs that occurred due to the fact
* that holding lock_irqsave doesn't prevent softirqs from running.
* This also increases total throughput since interrupts are not
* blocked on each transmitted packets or monitor timeout.
* new version - 2.0.0
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for Transmit load balancing mode.
* - Concentrate all assignments of current_slave to a single point
* so specific modes can take actions when the primary adapter is
* changed.
* - Take the updelay parameter into consideration during bond_enslave
* since some adapters loose their link during setting the device.
* - Renamed bond_3ad_link_status_changed() to
* bond_3ad_handle_link_change() for compatibility with TLB.
* new version - 2.1.0
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com>
* - Added support for Adaptive load balancing mode which is
* equivalent to Transmit load balancing + Receive load balancing.
* new version - 2.2.0
*/
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/interrupt.h>
#include <linux/ptrace.h>
#include <linux/ioport.h>
#include <linux/in.h>
#include <linux/ip.h>
......@@ -316,28 +390,32 @@
#include <linux/timer.h>
#include <linux/socket.h>
#include <linux/ctype.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/if_bonding.h>
#include <linux/smp.h>
#include <linux/if_ether.h>
#include <net/arp.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
#include <asm/system.h>
#include <asm/bitops.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <asm/uaccess.h>
#define DRV_VERSION "2.5.65-20030320"
#define DRV_RELDATE "March 20, 2003"
#define DRV_VERSION "2.2.0"
#define DRV_RELDATE "April 15, 2003"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
......@@ -357,6 +435,11 @@ DRV_NAME ".c:v" DRV_VERSION " (" DRV_RELDATE ")\n";
#define MAX_ARP_IP_TARGETS 16
#endif
struct bond_parm_tbl {
char *modename;
int mode;
};
static int arp_interval = BOND_LINK_ARP_INTERV;
static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, };
static unsigned long arp_target[MAX_ARP_IP_TARGETS] = { 0, } ;
......@@ -366,6 +449,15 @@ char *arp_target_hw_addr = NULL;
static char *primary= NULL;
static int app_abi_ver = 0;
static int orig_app_abi_ver = -1; /* This is used to save the first ABI version
* we receive from the application. Once set,
* it won't be changed, and the module will
* refuse to enslave/release interfaces if the
* command comes from an application using
* another ABI version.
*/
static int max_bonds = BOND_DEFAULT_MAX_BONDS;
static int miimon = BOND_LINK_MON_INTERV;
static int use_carrier = 1;
......@@ -380,6 +472,9 @@ static struct bond_parm_tbl bond_mode_tbl[] = {
{ "active-backup", BOND_MODE_ACTIVEBACKUP},
{ "balance-xor", BOND_MODE_XOR},
{ "broadcast", BOND_MODE_BROADCAST},
{ "802.3ad", BOND_MODE_8023AD},
{ "tlb", BOND_MODE_TLB},
{ "alb", BOND_MODE_ALB},
{ NULL, -1},
};
......@@ -393,6 +488,15 @@ static struct bond_parm_tbl bond_mc_tbl[] = {
{ NULL, -1},
};
static int lacp_fast = 0;
static char *lacp_rate = NULL;
static struct bond_parm_tbl bond_lacp_tbl[] = {
{ "slow", AD_LACP_SLOW},
{ "fast", AD_LACP_FAST},
{ NULL, -1},
};
static int first_pass = 1;
static struct bonding *these_bonds = NULL;
static struct net_device *dev_bonds = NULL;
......@@ -417,6 +521,8 @@ MODULE_PARM(primary, "s");
MODULE_PARM_DESC(primary, "Primary network device to use");
MODULE_PARM(multicast, "s");
MODULE_PARM_DESC(multicast, "Mode for multicast support : 0 for none, 1 for active slave, 2 for all slaves (default)");
MODULE_PARM(lacp_rate, "s");
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)");
static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev);
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev);
......@@ -426,7 +532,6 @@ static void bond_mii_monitor(struct net_device *dev);
static void loadbalance_arp_monitor(struct net_device *dev);
static void activebackup_arp_monitor(struct net_device *dev);
static int bond_event(struct notifier_block *this, unsigned long event, void *ptr);
static void bond_restore_slave_flags(slave_t *slave);
static void bond_mc_list_destroy(struct bonding *bond);
static void bond_mc_add(bonding_t *bond, void *addr, int alen);
static void bond_mc_delete(bonding_t *bond, void *addr, int alen);
......@@ -436,8 +541,6 @@ static void bond_set_promiscuity(bonding_t *bond, int inc);
static void bond_set_allmulti(bonding_t *bond, int inc);
static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list);
static void bond_mc_update(bonding_t *bond, slave_t *new, slave_t *old);
static void bond_set_slave_inactive_flags(slave_t *slave);
static void bond_set_slave_active_flags(slave_t *slave);
static int bond_enslave(struct net_device *master, struct net_device *slave);
static int bond_release(struct net_device *master, struct net_device *slave);
static int bond_release_all(struct net_device *master);
......@@ -451,13 +554,24 @@ static int bond_sethwaddr(struct net_device *master, struct net_device *slave);
*/
static int bond_get_info(char *buf, char **start, off_t offset, int length);
/* Caller must hold bond->ptrlock for write */
static inline struct slave*
bond_assign_current_slave(struct bonding *bond,struct slave *newslave)
{
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
bond_alb_assign_current_slave(bond, newslave);
} else {
bond->current_slave = newslave;
}
return bond->current_slave;
}
/* #define BONDING_DEBUG 1 */
/* several macros */
#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \
(netif_running(dev) && netif_carrier_ok(dev)))
static void arp_send_all(slave_t *slave)
{
int i;
......@@ -482,7 +596,13 @@ bond_mode_name(void)
return "load balancing (xor)";
case BOND_MODE_BROADCAST :
return "fault-tolerance (broadcast)";
default :
case BOND_MODE_8023AD:
return "IEEE 802.3ad Dynamic link aggregation";
case BOND_MODE_TLB:
return "transmit load balancing";
case BOND_MODE_ALB:
return "adaptive load balancing";
default:
return "unknown";
}
}
......@@ -502,18 +622,13 @@ multicast_mode_name(void)
}
}
static void bond_restore_slave_flags(slave_t *slave)
{
slave->dev->flags = slave->original_flags;
}
static void bond_set_slave_inactive_flags(slave_t *slave)
void bond_set_slave_inactive_flags(slave_t *slave)
{
slave->state = BOND_STATE_BACKUP;
slave->dev->flags |= IFF_NOARP;
}
static void bond_set_slave_active_flags(slave_t *slave)
void bond_set_slave_active_flags(slave_t *slave)
{
slave->state = BOND_STATE_ACTIVE;
slave->dev->flags &= ~IFF_NOARP;
......@@ -548,9 +663,9 @@ update_slave_cnt(bonding_t *bond, int incr)
* belongs to <bond>. It returns <slave> in case it's needed.
* Nothing is freed on return, structures are just unchained.
* If the bond->current_slave pointer was pointing to <slave>,
* it's replaced with slave->next, or <bond> if not applicable.
* it's replaced with bond->next, or NULL if not applicable.
*
* bond->lock held by caller.
* bond->lock held for writing by caller.
*/
static slave_t *
bond_detach_slave(bonding_t *bond, slave_t *slave)
......@@ -565,20 +680,11 @@ bond_detach_slave(bonding_t *bond, slave_t *slave)
if (bond->next == slave) { /* is the slave at the head ? */
if (bond->prev == slave) { /* is the slave alone ? */
write_lock(&bond->ptrlock);
bond->current_slave = NULL; /* no slave anymore */
write_unlock(&bond->ptrlock);
bond->prev = bond->next = (slave_t *)bond;
} else { /* not alone */
bond->next = slave->next;
slave->next->prev = (slave_t *)bond;
bond->prev->next = slave->next;
write_lock(&bond->ptrlock);
if (bond->current_slave == slave) {
bond->current_slave = slave->next;
}
write_unlock(&bond->ptrlock);
}
} else {
slave->prev->next = slave->next;
......@@ -587,19 +693,29 @@ bond_detach_slave(bonding_t *bond, slave_t *slave)
} else {
slave->next->prev = slave->prev;
}
write_lock(&bond->ptrlock);
if (bond->current_slave == slave) {
bond->current_slave = slave->next;
}
write_unlock(&bond->ptrlock);
}
update_slave_cnt(bond, -1);
/* no need to hold ptrlock since bond lock is
* already held for writing
*/
if (slave == bond->current_slave) {
if ( bond->next != (slave_t *)bond) { /* found one slave */
bond_assign_current_slave(bond, bond->next);
} else {
bond_assign_current_slave(bond, NULL);
}
}
return slave;
}
/*
* This function attaches the slave <slave> to the list <bond>.
*
* bond->lock held for writing by caller.
*/
static void
bond_attach_slave(struct bonding *bond, struct slave *new_slave)
{
......@@ -646,6 +762,59 @@ bond_attach_slave(struct bonding *bond, struct slave *new_slave)
set_fs(fs); \
ret; })
/*
* Get link speed and duplex from the slave's base driver
* using ethtool. If for some reason the call fails or the
* values are invalid, fake speed and duplex to 100/Full
* and return error.
*/
static int bond_update_speed_duplex(struct slave *slave)
{
struct net_device *dev = slave->dev;
static int (* ioctl)(struct net_device *, struct ifreq *, int);
struct ifreq ifr;
struct ethtool_cmd etool;
ioctl = dev->do_ioctl;
if (ioctl) {
etool.cmd = ETHTOOL_GSET;
ifr.ifr_data = (char*)&etool;
if (IOCTL(dev, &ifr, SIOCETHTOOL) == 0) {
slave->speed = etool.speed;
slave->duplex = etool.duplex;
} else {
goto err_out;
}
} else {
goto err_out;
}
switch (slave->speed) {
case SPEED_10:
case SPEED_100:
case SPEED_1000:
break;
default:
goto err_out;
}
switch (slave->duplex) {
case DUPLEX_FULL:
case DUPLEX_HALF:
break;
default:
goto err_out;
}
return 0;
err_out:
/* Fake speed and duplex */
slave->speed = SPEED_100;
slave->duplex = DUPLEX_FULL;
return -1;
}
/*
* if <dev> supports MII link status reporting, check its link status.
*
......@@ -728,21 +897,61 @@ bond_check_dev_link(struct net_device *dev, int reporting)
static u16 bond_check_mii_link(bonding_t *bond)
{
int has_active_interface = 0;
unsigned long flags;
read_lock_irqsave(&bond->lock, flags);
read_lock_bh(&bond->lock);
read_lock(&bond->ptrlock);
has_active_interface = (bond->current_slave != NULL);
read_unlock(&bond->ptrlock);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
return (has_active_interface ? BMSR_LSTATUS : 0);
}
/* register to receive lacpdus on a bond */
static void bond_register_lacpdu(struct bonding *bond)
{
struct packet_type* pk_type = &(BOND_AD_INFO(bond).ad_pkt_type);
/* initialize packet type */
pk_type->type = PKT_TYPE_LACPDU;
pk_type->dev = bond->device;
pk_type->func = bond_3ad_lacpdu_recv;
pk_type->data = (void*)1; /* understand shared skbs */
dev_add_pack(pk_type);
}
/* unregister to receive lacpdus on a bond */
static void bond_unregister_lacpdu(struct bonding *bond)
{
dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type));
}
static int bond_open(struct net_device *dev)
{
struct bonding *bond = (struct bonding *)(dev->priv);
struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer;
struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer;
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer);
/* bond_alb_initialize must be called before the timer
* is started.
*/
if (bond_alb_initialize(bond, (bond_mode == BOND_MODE_ALB))) {
/* something went wrong - fail the open operation */
return -1;
}
init_timer(alb_timer);
alb_timer->expires = jiffies + 1;
alb_timer->data = (unsigned long)bond;
alb_timer->function = (void *)&bond_alb_monitor;
add_timer(alb_timer);
}
MOD_INC_USE_COUNT;
if (miimon > 0) { /* link check interval, in milliseconds. */
......@@ -764,15 +973,27 @@ static int bond_open(struct net_device *dev)
}
add_timer(arp_timer);
}
if (bond_mode == BOND_MODE_8023AD) {
struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer);
init_timer(ad_timer);
ad_timer->expires = jiffies + (AD_TIMER_INTERVAL * HZ / 1000);
ad_timer->data = (unsigned long)bond;
ad_timer->function = (void *)&bond_3ad_state_machine_handler;
add_timer(ad_timer);
/* register to receive LACPDUs */
bond_register_lacpdu(bond);
}
return 0;
}
static int bond_close(struct net_device *master)
{
bonding_t *bond = (struct bonding *) master->priv;
unsigned long flags;
write_lock_irqsave(&bond->lock, flags);
write_lock_bh(&bond->lock);
if (miimon > 0) { /* link check interval, in milliseconds. */
del_timer(&bond->mii_timer);
......@@ -785,11 +1006,26 @@ static int bond_close(struct net_device *master)
}
}
if (bond_mode == BOND_MODE_8023AD) {
del_timer_sync(&(BOND_AD_INFO(bond).ad_timer));
/* Unregister the receive of LACPDUs */
bond_unregister_lacpdu(bond);
}
bond_mc_list_destroy (bond);
write_unlock_bh(&bond->lock);
/* Release the bonded slaves */
bond_release_all(master);
bond_mc_list_destroy (bond);
write_unlock_irqrestore(&bond->lock, flags);
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer));
bond_alb_deinitialize(bond);
}
MOD_DEC_USE_COUNT;
return 0;
......@@ -804,6 +1040,13 @@ static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush)
for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next)
dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
if (bond_mode == BOND_MODE_8023AD) {
/* del lacpdu mc addr from mc list */
u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
dev_mc_delete(dev, lacpdu_multicast, ETH_ALEN, 0);
}
}
/*
......@@ -960,14 +1203,13 @@ static void set_multicast_list(struct net_device *master)
{
bonding_t *bond = master->priv;
struct dev_mc_list *dmi;
unsigned long flags = 0;
if (multicast_mode == BOND_MULTICAST_DISABLED)
return;
/*
* Lock the private data for the master
*/
write_lock_irqsave(&bond->lock, flags);
write_lock_bh(&bond->lock);
/* set promiscuity flag to slaves */
if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) )
......@@ -1002,7 +1244,7 @@ static void set_multicast_list(struct net_device *master)
bond_mc_list_destroy (bond);
bond_mc_list_copy (master->mc_list, bond, GFP_ATOMIC);
write_unlock_irqrestore(&bond->lock, flags);
write_unlock_bh(&bond->lock);
}
/*
......@@ -1048,14 +1290,13 @@ static int bond_enslave(struct net_device *master_dev,
{
bonding_t *bond = NULL;
slave_t *new_slave = NULL;
unsigned long flags = 0;
unsigned long rflags = 0;
int ndx = 0;
int err = 0;
struct dev_mc_list *dmi;
struct in_ifaddr **ifap;
struct in_ifaddr *ifa;
int link_reporting;
struct sockaddr addr;
if (master_dev == NULL || slave_dev == NULL) {
return -ENODEV;
......@@ -1068,12 +1309,13 @@ static int bond_enslave(struct net_device *master_dev,
slave_dev->name);
}
/* not running. */
if ((slave_dev->flags & IFF_UP) != IFF_UP) {
/* bond must be initialized by bond_open() before enslaving */
if (!(master_dev->flags & IFF_UP)) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Error, slave_dev is not running\n");
printk(KERN_CRIT "Error, master_dev is not up\n");
#endif
return -EINVAL;
return -EPERM;
}
/* already enslaved */
......@@ -1084,24 +1326,116 @@ static int bond_enslave(struct net_device *master_dev,
return -EBUSY;
}
if ((new_slave = kmalloc(sizeof(slave_t), GFP_ATOMIC)) == NULL) {
if (app_abi_ver >= 1) {
/* The application is using an ABI, which requires the
* slave interface to be closed.
*/
if ((slave_dev->flags & IFF_UP)) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Error, slave_dev is up\n");
#endif
return -EPERM;
}
if (slave_dev->set_mac_address == NULL) {
printk(KERN_CRIT
"The slave device you specified does not support"
" setting the MAC address.\n");
printk(KERN_CRIT
"Your kernel likely does not support slave"
" devices.\n");
return -EOPNOTSUPP;
}
} else {
/* The application is not using an ABI, which requires the
* slave interface to be open.
*/
if (!(slave_dev->flags & IFF_UP)) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Error, slave_dev is not running\n");
#endif
return -EINVAL;
}
if ((bond_mode == BOND_MODE_8023AD) ||
(bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
printk(KERN_ERR
"bonding: Error: to use %s mode, you must "
"upgrade ifenslave.\n", bond_mode_name());
return -EOPNOTSUPP;
}
}
if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) {
return -ENOMEM;
}
memset(new_slave, 0, sizeof(slave_t));
/* save flags before call to netdev_set_master */
/* save slave's original flags before calling
* netdev_set_master and dev_open
*/
new_slave->original_flags = slave_dev->flags;
err = netdev_set_master(slave_dev, master_dev);
if (app_abi_ver >= 1) {
/* save slave's original ("permanent") mac address for
* modes that needs it, and for restoring it upon release,
* and then set it to the master's address
*/
memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
if (bond->slave_cnt > 0) {
/* set slave to master's mac address
* The application already set the master's
* mac address to that of the first slave
*/
memcpy(addr.sa_data, master_dev->dev_addr, ETH_ALEN);
addr.sa_family = slave_dev->type;
err = slave_dev->set_mac_address(slave_dev, &addr);
if (err) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Error %d calling set_mac_address\n", err);
#endif
goto err_free;
}
}
/* open the slave since the application closed it */
err = dev_open(slave_dev);
if (err) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Openning slave %s failed\n", slave_dev->name);
#endif
goto err_restore_mac;
}
}
err = netdev_set_master(slave_dev, master_dev);
if (err) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Error %d calling netdev_set_master\n", err);
#endif
if (app_abi_ver < 1) {
goto err_free;
} else {
goto err_close;
}
}
new_slave->dev = slave_dev;
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
/* bond_alb_init_slave() must be called before all other stages since
* it might fail and we do not want to have to undo everything
*/
err = bond_alb_init_slave(bond, new_slave);
if (err) {
goto err_unset_master;
}
}
if (multicast_mode == BOND_MULTICAST_ALL) {
/* set promiscuity level to new slave */
if (master_dev->flags & IFF_PROMISC)
......@@ -1116,7 +1450,14 @@ static int bond_enslave(struct net_device *master_dev,
dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
}
write_lock_irqsave(&bond->lock, flags);
if (bond_mode == BOND_MODE_8023AD) {
/* add lacpdu mc addr to mc list */
u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
}
write_lock_bh(&bond->lock);
bond_attach_slave(bond, new_slave);
new_slave->delay = 0;
......@@ -1157,19 +1498,45 @@ static int bond_enslave(struct net_device *master_dev,
/* check for initial state */
if ((miimon <= 0) ||
(bond_check_dev_link(slave_dev, 0) == BMSR_LSTATUS)) {
if (updelay) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Initial state of slave_dev is "
"BOND_LINK_BACK\n");
#endif
new_slave->link = BOND_LINK_BACK;
new_slave->delay = updelay;
}
else {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n");
printk(KERN_DEBUG "Initial state of slave_dev is "
"BOND_LINK_UP\n");
#endif
new_slave->link = BOND_LINK_UP;
}
new_slave->jiffies = jiffies;
}
else {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n");
printk(KERN_CRIT "Initial state of slave_dev is "
"BOND_LINK_DOWN\n");
#endif
new_slave->link = BOND_LINK_DOWN;
}
if (bond_update_speed_duplex(new_slave) &&
(new_slave->link != BOND_LINK_DOWN)) {
printk(KERN_WARNING
"bond_enslave(): failed to get speed/duplex from %s, "
"speed forced to 100Mbps, duplex forced to Full.\n",
new_slave->dev->name);
if (bond_mode == BOND_MODE_8023AD) {
printk(KERN_WARNING
"Operation of 802.3ad mode requires ETHTOOL support "
"in base driver for proper aggregator selection.\n");
}
}
/* if we're in active-backup mode, we need one and only one active
* interface. The backup interfaces will have their NOARP flag set
* because we need them to be completely deaf and not to respond to
......@@ -1180,13 +1547,13 @@ static int bond_enslave(struct net_device *master_dev,
if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
if (((bond->current_slave == NULL)
|| (bond->current_slave->dev->flags & IFF_NOARP))
&& (new_slave->link == BOND_LINK_UP)) {
&& (new_slave->link != BOND_LINK_DOWN)) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "This is the first active slave\n");
#endif
/* first slave or no active slave yet, and this link
is OK, so make this interface the active one */
bond->current_slave = new_slave;
bond_assign_current_slave(bond, new_slave);
bond_set_slave_active_flags(new_slave);
bond_mc_update(bond, new_slave, NULL);
}
......@@ -1203,9 +1570,47 @@ static int bond_enslave(struct net_device *master_dev,
read_unlock_irqrestore(&(((struct in_device *)slave_dev->ip_ptr)->lock), rflags);
/* if there is a primary slave, remember it */
if (primary != NULL)
if( strcmp(primary, new_slave->dev->name) == 0)
if (primary != NULL) {
if (strcmp(primary, new_slave->dev->name) == 0) {
bond->primary_slave = new_slave;
}
}
} else if (bond_mode == BOND_MODE_8023AD) {
/* in 802.3ad mode, the internal mechanism
* will activate the slaves in the selected
* aggregator
*/
bond_set_slave_inactive_flags(new_slave);
/* if this is the first slave */
if (new_slave == bond->next) {
SLAVE_AD_INFO(new_slave).id = 1;
/* Initialize AD with the number of times that the AD timer is called in 1 second
* can be called only after the mac address of the bond is set
*/
bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL,
lacp_fast);
} else {
SLAVE_AD_INFO(new_slave).id =
SLAVE_AD_INFO(new_slave->prev).id + 1;
}
bond_3ad_bind_slave(new_slave);
} else if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
new_slave->state = BOND_STATE_ACTIVE;
if ((bond->current_slave == NULL) && (new_slave->link != BOND_LINK_DOWN)) {
/* first slave or no active slave yet, and this link
* is OK, so make this interface the active one
*/
bond_assign_current_slave(bond, new_slave);
}
/* if there is a primary slave, remember it */
if (primary != NULL) {
if (strcmp(primary, new_slave->dev->name) == 0) {
bond->primary_slave = new_slave;
}
}
} else {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "This slave is always active in trunk mode\n");
......@@ -1213,26 +1618,30 @@ static int bond_enslave(struct net_device *master_dev,
/* always active in trunk mode */
new_slave->state = BOND_STATE_ACTIVE;
if (bond->current_slave == NULL)
bond->current_slave = new_slave;
bond_assign_current_slave(bond, new_slave);
}
write_unlock_irqrestore(&bond->lock, flags);
write_unlock_bh(&bond->lock);
if (app_abi_ver < 1) {
/*
* !!! This is to support old versions of ifenslave. We can remove
* this in 2.5 because our ifenslave takes care of this for us.
* We check to see if the master has a mac address yet. If not,
* we'll give it the mac address of our slave device.
* !!! This is to support old versions of ifenslave.
* We can remove this in 2.5 because our ifenslave takes
* care of this for us.
* We check to see if the master has a mac address yet.
* If not, we'll give it the mac address of our slave device.
*/
int ndx = 0;
for (ndx = 0; ndx < slave_dev->addr_len; ndx++) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n",
ndx);
printk(KERN_DEBUG
"Checking ndx=%d of master_dev->dev_addr\n", ndx);
#endif
if (master_dev->dev_addr[ndx] != 0) {
#ifdef BONDING_DEBUG
printk(KERN_CRIT "Found non-zero byte at ndx=%d\n",
ndx);
printk(KERN_DEBUG
"Found non-zero byte at ndx=%d\n", ndx);
#endif
break;
}
......@@ -1243,21 +1652,35 @@ static int bond_enslave(struct net_device *master_dev,
* all 0's.
*/
#ifdef BONDING_DEBUG
printk(KERN_CRIT "%s doesn't have a MAC address yet. ",
printk(KERN_DEBUG "%s doesn't have a MAC address yet. ",
master_dev->name);
printk(KERN_CRIT "Going to give assign it from %s.\n",
printk(KERN_DEBUG "Going to give assign it from %s.\n",
slave_dev->name);
#endif
bond_sethwaddr(master_dev, slave_dev);
}
}
printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n",
master_dev->name, slave_dev->name,
new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
new_slave->link == BOND_LINK_UP ? "n up" : " down");
new_slave->link != BOND_LINK_DOWN ? "n up" : " down");
/* enslave is successful */
return 0;
/* Undo stages on error */
err_unset_master:
netdev_set_master(slave_dev, NULL);
err_close:
dev_close(slave_dev);
err_restore_mac:
memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
addr.sa_family = slave_dev->type;
slave_dev->set_mac_address(slave_dev, &addr);
err_free:
kfree(new_slave);
return err;
......@@ -1280,7 +1703,6 @@ static int bond_change_active(struct net_device *master_dev, struct net_device *
slave_t *slave;
slave_t *oldactive = NULL;
slave_t *newactive = NULL;
unsigned long flags;
int ret = 0;
if (master_dev == NULL || slave_dev == NULL) {
......@@ -1288,7 +1710,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device *
}
bond = (struct bonding *) master_dev->priv;
write_lock_irqsave(&bond->lock, flags);
write_lock_bh(&bond->lock);
slave = (slave_t *)bond;
oldactive = bond->current_slave;
......@@ -1307,7 +1729,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device *
bond_set_slave_inactive_flags(oldactive);
bond_set_slave_active_flags(newactive);
bond_mc_update(bond, newactive, oldactive);
bond->current_slave = newactive;
bond_assign_current_slave(bond, newactive);
printk("%s : activate %s(old : %s)\n",
master_dev->name, newactive->dev->name,
oldactive->dev->name);
......@@ -1315,7 +1737,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device *
else {
ret = -EINVAL;
}
write_unlock_irqrestore(&bond->lock, flags);
write_unlock_bh(&bond->lock);
return ret;
}
......@@ -1329,7 +1751,7 @@ static int bond_change_active(struct net_device *master_dev, struct net_device *
* Since this function sends messages tails through printk, the caller
* must have started something like `printk(KERN_INFO "xxxx ");'.
*
* Warning: must put locks around the call to this function if needed.
* Warning: Caller must hold ptrlock for writing.
*/
slave_t *change_active_interface(bonding_t *bond)
{
......@@ -1337,22 +1759,16 @@ slave_t *change_active_interface(bonding_t *bond)
slave_t *bestslave = NULL;
int mintime;
read_lock(&bond->ptrlock);
newslave = oldslave = bond->current_slave;
read_unlock(&bond->ptrlock);
if (newslave == NULL) { /* there were no active slaves left */
if (bond->next != (slave_t *)bond) { /* found one slave */
write_lock(&bond->ptrlock);
newslave = bond->current_slave = bond->next;
write_unlock(&bond->ptrlock);
newslave = bond_assign_current_slave(bond, bond->next);
} else {
printk (" but could not find any %s interface.\n",
(bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
write_lock(&bond->ptrlock);
bond->current_slave = (slave_t *)NULL;
write_unlock(&bond->ptrlock);
bond_assign_current_slave(bond, NULL);
return NULL; /* still no slave, return NULL */
}
} else if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
......@@ -1395,9 +1811,7 @@ slave_t *change_active_interface(bonding_t *bond)
newslave->dev->name);
}
write_lock(&bond->ptrlock);
bond->current_slave = newslave;
write_unlock(&bond->ptrlock);
bond_assign_current_slave(bond, newslave);
return newslave;
}
else if (newslave->link == BOND_LINK_BACK) {
......@@ -1425,9 +1839,7 @@ slave_t *change_active_interface(bonding_t *bond)
bestslave->jiffies = jiffies;
bond_set_slave_active_flags(bestslave);
bond_mc_update(bond, bestslave, oldslave);
write_lock(&bond->ptrlock);
bond->current_slave = bestslave;
write_unlock(&bond->ptrlock);
bond_assign_current_slave(bond, bestslave);
return bestslave;
}
......@@ -1450,9 +1862,7 @@ slave_t *change_active_interface(bonding_t *bond)
(bond_mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
/* absolutely nothing found. let's return NULL */
write_lock(&bond->ptrlock);
bond->current_slave = (slave_t *)NULL;
write_unlock(&bond->ptrlock);
bond_assign_current_slave(bond, NULL);
return NULL;
}
......@@ -1471,7 +1881,7 @@ static int bond_release(struct net_device *master, struct net_device *slave)
{
bonding_t *bond;
slave_t *our_slave, *old_current;
unsigned long flags;
struct sockaddr addr;
if (master == NULL || slave == NULL) {
return -ENODEV;
......@@ -1486,12 +1896,41 @@ static int bond_release(struct net_device *master, struct net_device *slave)
return -EINVAL;
}
write_lock_irqsave(&bond->lock, flags);
write_lock_bh(&bond->lock);
bond->current_arp_slave = NULL;
our_slave = (slave_t *)bond;
old_current = bond->current_slave;
while ((our_slave = our_slave->prev) != (slave_t *)bond) {
if (our_slave->dev == slave) {
int mac_addr_differ = memcmp(bond->device->dev_addr,
our_slave->perm_hwaddr,
ETH_ALEN);
if (!mac_addr_differ && (bond->slave_cnt > 1)) {
printk(KERN_WARNING "WARNING: the permanent HWaddr of %s "
"- %02X:%02X:%02X:%02X:%02X:%02X - "
"is still in use by %s. Set the HWaddr "
"of %s to a different address "
"to avoid conflicts.\n",
slave->name,
slave->dev_addr[0],
slave->dev_addr[1],
slave->dev_addr[2],
slave->dev_addr[3],
slave->dev_addr[4],
slave->dev_addr[5],
bond->device->name,
slave->name);
}
/* Inform AD package of unbinding of slave. */
if (bond_mode == BOND_MODE_8023AD) {
/* must be called before the slave is
* detached from the list
*/
bond_3ad_unbind_slave(our_slave);
}
/* release the slave from its bond */
bond_detach_slave(bond, our_slave);
printk (KERN_INFO "%s: releasing %s interface %s",
......@@ -1516,11 +1955,20 @@ static int bond_release(struct net_device *master, struct net_device *slave)
bond->primary_slave = NULL;
}
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
/* must be called only after the slave has been
* detached from the list and the current_slave
* has been replaced (if our_slave == old_current)
*/
bond_alb_deinit_slave(bond, our_slave);
}
break;
}
}
write_unlock_irqrestore(&bond->lock, flags);
write_unlock_bh(&bond->lock);
if (our_slave == (slave_t *)bond) {
/* if we get here, it's because the device was not found */
......@@ -1545,33 +1993,47 @@ static int bond_release(struct net_device *master, struct net_device *slave)
netdev_set_master(slave, NULL);
/* only restore its RUNNING flag if monitoring set it down */
if (slave->flags & IFF_UP) {
slave->flags |= IFF_RUNNING;
}
if (slave->flags & IFF_NOARP ||
bond->current_slave != NULL) {
/* close slave before restoring its mac address */
dev_close(slave);
our_slave->original_flags &= ~IFF_UP;
if (app_abi_ver >= 1) {
/* restore original ("permanent") mac address */
memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN);
addr.sa_family = slave->type;
slave->set_mac_address(slave, &addr);
}
bond_restore_slave_flags(our_slave);
/* restore the original state of the
* IFF_NOARP flag that might have been
* set by bond_set_slave_inactive_flags()
*/
if ((our_slave->original_flags & IFF_NOARP) == 0) {
slave->flags &= ~IFF_NOARP;
}
kfree(our_slave);
/* if the last slave was removed, zero the mac address
* of the master so it will be set by the application
* to the mac address of the first slave
*/
if (bond->next == (slave_t*)bond) {
memset(master->dev_addr, 0, master->addr_len);
}
return 0; /* deletion OK */
}
/*
* This function releases all slaves.
* Warning: must put write-locks around the call to this function.
*/
static int bond_release_all(struct net_device *master)
{
bonding_t *bond;
slave_t *our_slave;
slave_t *our_slave, *old_current;
struct net_device *slave_dev;
struct sockaddr addr;
int err = 0;
if (master == NULL) {
return -ENODEV;
......@@ -1582,17 +2044,46 @@ static int bond_release_all(struct net_device *master)
}
bond = (struct bonding *) master->priv;
write_lock_bh(&bond->lock);
if (bond->next == (struct slave *) bond) {
err = -EINVAL;
goto out;
}
old_current = bond->current_slave;
bond_assign_current_slave(bond, NULL);
bond->current_arp_slave = NULL;
bond->current_slave = NULL;
bond->primary_slave = NULL;
while ((our_slave = bond->prev) != (slave_t *)bond) {
/* Inform AD package of unbinding of slave
* before slave is detached from the list.
*/
if (bond_mode == BOND_MODE_8023AD) {
bond_3ad_unbind_slave(our_slave);
}
slave_dev = our_slave->dev;
bond_detach_slave(bond, our_slave);
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
/* must be called only after the slave
* has been detached from the list
*/
bond_alb_deinit_slave(bond, our_slave);
}
/* now that the slave is detached, unlock and perform
* all the undo steps that should not be called from
* within a lock.
*/
write_unlock_bh(&bond->lock);
if (multicast_mode == BOND_MULTICAST_ALL
|| (multicast_mode == BOND_MULTICAST_ACTIVE
&& bond->current_slave == our_slave)) {
&& old_current == our_slave)) {
/* flush master's mc_list from slave */
bond_mc_list_flush (slave_dev, master);
......@@ -1606,25 +2097,43 @@ static int bond_release_all(struct net_device *master)
dev_set_allmulti(slave_dev, -1);
}
kfree(our_slave);
netdev_set_master(slave_dev, NULL);
/*
* Can be safely called from inside the bond lock
* since traffic and timers have already stopped
/* close slave before restoring its mac address */
dev_close(slave_dev);
if (app_abi_ver >= 1) {
/* restore original ("permanent") mac address*/
memcpy(addr.sa_data, our_slave->perm_hwaddr, ETH_ALEN);
addr.sa_family = slave_dev->type;
slave_dev->set_mac_address(slave_dev, &addr);
}
/* restore the original state of the IFF_NOARP flag that might have
* been set by bond_set_slave_inactive_flags()
*/
netdev_set_master(slave_dev, NULL);
if ((our_slave->original_flags & IFF_NOARP) == 0) {
slave_dev->flags &= ~IFF_NOARP;
}
/* only restore its RUNNING flag if monitoring set it down */
if (slave_dev->flags & IFF_UP)
slave_dev->flags |= IFF_RUNNING;
kfree(our_slave);
if (slave_dev->flags & IFF_NOARP)
dev_close(slave_dev);
/* re-acquire the lock before getting the next slave */
write_lock_bh(&bond->lock);
}
/* zero the mac address of the master so it will be
* set by the application to the mac address of the
* first slave
*/
memset(master->dev_addr, 0, master->addr_len);
printk (KERN_INFO "%s: released all slaves\n", master->name);
return 0;
out:
write_unlock_bh(&bond->lock);
return err;
}
/* this function is called regularly to monitor each slave's link. */
......@@ -1632,10 +2141,9 @@ static void bond_mii_monitor(struct net_device *master)
{
bonding_t *bond = (struct bonding *) master->priv;
slave_t *slave, *bestslave, *oldcurrent;
unsigned long flags;
int slave_died = 0;
read_lock_irqsave(&bond->lock, flags);
read_lock(&bond->lock);
/* we will try to read the link status of each of our slaves, and
* set their IFF_RUNNING flag appropriately. For each slave not
......@@ -1655,6 +2163,8 @@ static void bond_mii_monitor(struct net_device *master)
int mindelay = updelay + 1;
struct net_device *dev = slave->dev;
int link_state;
u16 old_speed = slave->speed;
u8 old_duplex = slave->duplex;
link_state = bond_check_dev_link(dev, 0);
......@@ -1702,25 +2212,36 @@ static void bond_mii_monitor(struct net_device *master)
/* link down for too long time */
slave->link = BOND_LINK_DOWN;
/* in active/backup mode, we must
completely disable this interface */
if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
* completely disable this interface
*/
if ((bond_mode == BOND_MODE_ACTIVEBACKUP) ||
(bond_mode == BOND_MODE_8023AD)) {
bond_set_slave_inactive_flags(slave);
}
printk(KERN_INFO
"%s: link status definitely down "
"for interface %s, disabling it",
"for interface %s, disabling it\n",
master->name,
dev->name);
read_lock(&bond->ptrlock);
/* notify ad that the link status has changed */
if (bond_mode == BOND_MODE_8023AD) {
bond_3ad_handle_link_change(slave, BOND_LINK_DOWN);
}
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN);
}
write_lock(&bond->ptrlock);
if (slave == bond->current_slave) {
read_unlock(&bond->ptrlock);
/* find a new interface and be verbose */
change_active_interface(bond);
} else {
read_unlock(&bond->ptrlock);
printk(".\n");
}
write_unlock(&bond->ptrlock);
slave_died = 1;
} else {
slave->delay--;
......@@ -1783,7 +2304,11 @@ static void bond_mii_monitor(struct net_device *master)
slave->link = BOND_LINK_UP;
slave->jiffies = jiffies;
if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
if (bond_mode == BOND_MODE_8023AD) {
/* prevent it from being the active one */
slave->state = BOND_STATE_BACKUP;
}
else if (bond_mode != BOND_MODE_ACTIVEBACKUP) {
/* make it immediately active */
slave->state = BOND_STATE_ACTIVE;
} else if (slave != bond->primary_slave) {
......@@ -1797,9 +2322,21 @@ static void bond_mii_monitor(struct net_device *master)
master->name,
dev->name);
/* notify ad that the link status has changed */
if (bond_mode == BOND_MODE_8023AD) {
bond_3ad_handle_link_change(slave, BOND_LINK_UP);
}
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
bond_alb_handle_link_change(bond, slave, BOND_LINK_UP);
}
write_lock(&bond->ptrlock);
if ( (bond->primary_slave != NULL)
&& (slave == bond->primary_slave) )
change_active_interface(bond);
write_unlock(&bond->ptrlock);
}
else
slave->delay--;
......@@ -1819,6 +2356,18 @@ static void bond_mii_monitor(struct net_device *master)
}
break;
} /* end of switch */
bond_update_speed_duplex(slave);
if (bond_mode == BOND_MODE_8023AD) {
if (old_speed != slave->speed) {
bond_3ad_adapter_speed_changed(slave);
}
if (old_duplex != slave->duplex) {
bond_3ad_adapter_duplex_changed(slave);
}
}
} /* end of while */
/*
......@@ -1846,16 +2395,26 @@ static void bond_mii_monitor(struct net_device *master)
bestslave->delay = 0;
bestslave->link = BOND_LINK_UP;
bestslave->jiffies = jiffies;
/* notify ad that the link status has changed */
if (bond_mode == BOND_MODE_8023AD) {
bond_3ad_handle_link_change(bestslave, BOND_LINK_UP);
}
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
bond_alb_handle_link_change(bond, bestslave, BOND_LINK_UP);
}
}
if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
bond_set_slave_active_flags(bestslave);
bond_mc_update(bond, bestslave, NULL);
} else {
} else if (bond_mode != BOND_MODE_8023AD) {
bestslave->state = BOND_STATE_ACTIVE;
}
write_lock(&bond->ptrlock);
bond->current_slave = bestslave;
bond_assign_current_slave(bond, bestslave);
write_unlock(&bond->ptrlock);
} else if (slave_died) {
/* print this message only once a slave has just died */
......@@ -1865,7 +2424,7 @@ static void bond_mii_monitor(struct net_device *master)
}
}
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
/* re-arm the timer */
mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000));
}
......@@ -1880,7 +2439,6 @@ static void bond_mii_monitor(struct net_device *master)
static void loadbalance_arp_monitor(struct net_device *master)
{
bonding_t *bond;
unsigned long flags;
slave_t *slave;
int the_delta_in_ticks = arp_interval * HZ / 1000;
int next_timer = jiffies + (arp_interval * HZ / 1000);
......@@ -1891,24 +2449,22 @@ static void loadbalance_arp_monitor(struct net_device *master)
return;
}
read_lock_irqsave(&bond->lock, flags);
/* TODO: investigate why rtnl_shlock_nowait and rtnl_exlock_nowait
* are called below and add comment why they are required...
*/
if ((!IS_UP(master)) || rtnl_shlock_nowait()) {
mod_timer(&bond->arp_timer, next_timer);
read_unlock_irqrestore(&bond->lock, flags);
return;
}
if (rtnl_exlock_nowait()) {
rtnl_shunlock();
mod_timer(&bond->arp_timer, next_timer);
read_unlock_irqrestore(&bond->lock, flags);
return;
}
read_lock(&bond->lock);
/* see if any of the previous devices are up now (i.e. they have
* xmt and rcv traffic). the current_slave does not come into
* the picture unless it is null. also, slave->jiffies is not needed
......@@ -1935,9 +2491,8 @@ static void loadbalance_arp_monitor(struct net_device *master)
* current_slave being null after enslaving
* is closed.
*/
read_lock(&bond->ptrlock);
write_lock(&bond->ptrlock);
if (bond->current_slave == NULL) {
read_unlock(&bond->ptrlock);
printk(KERN_INFO
"%s: link status definitely up "
"for interface %s, ",
......@@ -1945,12 +2500,12 @@ static void loadbalance_arp_monitor(struct net_device *master)
slave->dev->name);
change_active_interface(bond);
} else {
read_unlock(&bond->ptrlock);
printk(KERN_INFO
"%s: interface %s is now up\n",
master->name,
slave->dev->name);
}
write_unlock(&bond->ptrlock);
}
} else {
/* slave->link == BOND_LINK_UP */
......@@ -1973,13 +2528,11 @@ static void loadbalance_arp_monitor(struct net_device *master)
master->name,
slave->dev->name);
read_lock(&bond->ptrlock);
write_lock(&bond->ptrlock);
if (slave == bond->current_slave) {
read_unlock(&bond->ptrlock);
change_active_interface(bond);
} else {
read_unlock(&bond->ptrlock);
}
write_unlock(&bond->ptrlock);
}
}
......@@ -1995,9 +2548,9 @@ static void loadbalance_arp_monitor(struct net_device *master)
}
}
read_unlock(&bond->lock);
rtnl_exunlock();
rtnl_shunlock();
read_unlock_irqrestore(&bond->lock, flags);
/* re-arm the timer */
mod_timer(&bond->arp_timer, next_timer);
......@@ -2021,7 +2574,6 @@ static void loadbalance_arp_monitor(struct net_device *master)
static void activebackup_arp_monitor(struct net_device *master)
{
bonding_t *bond;
unsigned long flags;
slave_t *slave;
int the_delta_in_ticks = arp_interval * HZ / 1000;
int next_timer = jiffies + (arp_interval * HZ / 1000);
......@@ -2032,14 +2584,13 @@ static void activebackup_arp_monitor(struct net_device *master)
return;
}
read_lock_irqsave(&bond->lock, flags);
if (!IS_UP(master)) {
mod_timer(&bond->arp_timer, next_timer);
read_unlock_irqrestore(&bond->lock, flags);
return;
}
read_lock(&bond->lock);
/* determine if any slave has come up or any backup slave has
* gone down
* TODO: what about up/down delay in arp mode? it wasn't here before
......@@ -2057,7 +2608,7 @@ static void activebackup_arp_monitor(struct net_device *master)
if ((bond->current_slave == NULL) &&
((jiffies - slave->dev->trans_start) <=
the_delta_in_ticks)) {
bond->current_slave = slave;
bond_assign_current_slave(bond, slave);
bond_set_slave_active_flags(slave);
bond_mc_update(bond, slave, NULL);
bond->current_arp_slave = NULL;
......@@ -2148,7 +2699,9 @@ static void activebackup_arp_monitor(struct net_device *master)
"active interface %s, disabling it",
master->name,
slave->dev->name);
write_lock(&bond->ptrlock);
slave = change_active_interface(bond);
write_unlock(&bond->ptrlock);
bond->current_arp_slave = slave;
if (slave != NULL) {
slave->jiffies = jiffies;
......@@ -2169,7 +2722,7 @@ static void activebackup_arp_monitor(struct net_device *master)
bond_set_slave_inactive_flags(slave);
bond_mc_update(bond, bond->primary_slave, slave);
write_lock(&bond->ptrlock);
bond->current_slave = bond->primary_slave;
bond_assign_current_slave(bond, bond->primary_slave);
write_unlock(&bond->ptrlock);
slave = bond->primary_slave;
bond_set_slave_active_flags(slave);
......@@ -2240,8 +2793,8 @@ static void activebackup_arp_monitor(struct net_device *master)
}
}
read_unlock(&bond->lock);
mod_timer(&bond->arp_timer, next_timer);
read_unlock_irqrestore(&bond->lock, flags);
}
typedef uint32_t in_addr_t;
......@@ -2337,17 +2890,16 @@ static int bond_info_query(struct net_device *master, struct ifbond *info)
{
bonding_t *bond = (struct bonding *) master->priv;
slave_t *slave;
unsigned long flags;
info->bond_mode = bond_mode;
info->num_slaves = 0;
info->miimon = miimon;
read_lock_irqsave(&bond->lock, flags);
read_lock_bh(&bond->lock);
for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) {
info->num_slaves++;
}
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
return 0;
}
......@@ -2358,19 +2910,18 @@ static int bond_slave_info_query(struct net_device *master,
bonding_t *bond = (struct bonding *) master->priv;
slave_t *slave;
int cur_ndx = 0;
unsigned long flags;
if (info->slave_id < 0) {
return -ENODEV;
}
read_lock_irqsave(&bond->lock, flags);
read_lock_bh(&bond->lock);
for (slave = bond->prev;
slave != (slave_t *)bond && cur_ndx < info->slave_id;
slave = slave->prev) {
cur_ndx++;
}
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
if (slave != (slave_t *)bond) {
strcpy(info->slave_name, slave->dev->name);
......@@ -2384,6 +2935,59 @@ static int bond_slave_info_query(struct net_device *master,
return 0;
}
static int bond_ethtool_ioctl(struct net_device *master_dev, struct ifreq *ifr)
{
void *addr = ifr->ifr_data;
uint32_t cmd;
if (get_user(cmd, (uint32_t *) addr))
return -EFAULT;
switch (cmd) {
case ETHTOOL_GDRVINFO:
{
struct ethtool_drvinfo info;
char *endptr;
if (copy_from_user(&info, addr, sizeof(info)))
return -EFAULT;
if (strcmp(info.driver, "ifenslave") == 0) {
int new_abi_ver;
new_abi_ver = simple_strtoul(info.fw_version,
&endptr, 0);
if (*endptr) {
printk(KERN_ERR
"bonding: Error: got invalid ABI"
" version from application\n");
return -EINVAL;
}
if (orig_app_abi_ver == -1) {
orig_app_abi_ver = new_abi_ver;
}
app_abi_ver = new_abi_ver;
}
strncpy(info.driver, DRV_NAME, 32);
strncpy(info.version, DRV_VERSION, 32);
snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION);
if (copy_to_user(addr, &info, sizeof(info)))
return -EFAULT;
return 0;
}
break;
default:
return -EOPNOTSUPP;
}
}
static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd)
{
struct net_device *slave_dev = NULL;
......@@ -2398,6 +3002,9 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd)
#endif
switch (cmd) {
case SIOCETHTOOL:
return bond_ethtool_ioctl(master_dev, ifr);
case SIOCGMIIPHY:
mii = (struct mii_ioctl_data *)&ifr->ifr_data;
if (mii == NULL) {
......@@ -2451,6 +3058,21 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd)
return -EPERM;
}
if (orig_app_abi_ver == -1) {
/* no orig_app_abi_ver was provided yet, so we'll use the
* current one from now on, even if it's 0
*/
orig_app_abi_ver = app_abi_ver;
} else if (orig_app_abi_ver != app_abi_ver) {
printk(KERN_ERR
"bonding: Error: already using ifenslave ABI "
"version %d; to upgrade ifenslave to version %d,"
"you must first reload bonding.\n",
orig_app_abi_ver, app_abi_ver);
return -EINVAL;
}
slave_dev = dev_get_by_name(ifr->ifr_slave);
#ifdef BONDING_DEBUG
......@@ -2476,7 +3098,9 @@ static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd)
break;
case BOND_CHANGE_ACTIVE_OLD:
case SIOCBONDCHANGEACTIVE:
if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
if ((bond_mode == BOND_MODE_ACTIVEBACKUP) ||
(bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
ret = bond_change_active(master_dev, slave_dev);
}
else {
......@@ -2505,7 +3129,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev)
{
slave_t *slave, *start_at;
struct bonding *bond = (struct bonding *) dev->priv;
unsigned long flags;
struct net_device *device_we_should_send_to = 0;
if (!IS_UP(dev)) { /* bond down */
......@@ -2513,7 +3136,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev)
return 0;
}
read_lock_irqsave(&bond->lock, flags);
read_lock(&bond->lock);
read_lock(&bond->ptrlock);
slave = start_at = bond->current_slave;
......@@ -2521,7 +3144,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev)
if (slave == NULL) { /* we're at the root, get the first slave */
/* no suitable interface, frame not sent */
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
dev_kfree_skb(skb);
return 0;
}
......@@ -2553,7 +3176,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *dev)
dev_kfree_skb(skb);
/* frame sent to all suitable interfaces */
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
......@@ -2561,14 +3184,13 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev)
{
slave_t *slave, *start_at;
struct bonding *bond = (struct bonding *) dev->priv;
unsigned long flags;
if (!IS_UP(dev)) { /* bond down */
dev_kfree_skb(skb);
return 0;
}
read_lock_irqsave(&bond->lock, flags);
read_lock(&bond->lock);
read_lock(&bond->ptrlock);
slave = start_at = bond->current_slave;
......@@ -2577,7 +3199,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev)
if (slave == NULL) { /* we're at the root, get the first slave */
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
......@@ -2591,30 +3213,29 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev)
dev_queue_xmit(skb);
write_lock(&bond->ptrlock);
bond->current_slave = slave->next;
bond_assign_current_slave(bond, slave->next);
write_unlock(&bond->ptrlock);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
} while ((slave = slave->next) != start_at);
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
/*
* in XOR mode, we determine the output device by performing xor on
* the source and destination hw addresses. If this device is not
* the source and destination hw adresses. If this device is not
* enabled, find the next slave following this xor slave.
*/
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev)
{
slave_t *slave, *start_at;
struct bonding *bond = (struct bonding *) dev->priv;
unsigned long flags;
struct ethhdr *data = (struct ethhdr *)skb->data;
int slave_no;
......@@ -2623,14 +3244,14 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev)
return 0;
}
read_lock_irqsave(&bond->lock, flags);
read_lock(&bond->lock);
slave = bond->prev;
/* we're at the root, get the first slave */
if (bond->slave_cnt == 0) {
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
......@@ -2651,14 +3272,14 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev)
skb->priority = 1;
dev_queue_xmit(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
} while ((slave = slave->next) != start_at);
/* no suitable interface, frame not sent */
dev_kfree_skb(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
......@@ -2669,7 +3290,6 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev)
static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = (struct bonding *) dev->priv;
unsigned long flags;
int ret;
if (!IS_UP(dev)) { /* bond down */
......@@ -2710,7 +3330,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev)
}
}
read_lock_irqsave(&bond->lock, flags);
read_lock(&bond->lock);
read_lock(&bond->ptrlock);
if (bond->current_slave != NULL) { /* one usable interface */
......@@ -2718,7 +3338,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev)
read_unlock(&bond->ptrlock);
skb->priority = 1;
ret = dev_queue_xmit(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
else {
......@@ -2730,7 +3350,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev)
printk(KERN_INFO "There was no suitable interface, so we don't transmit\n");
#endif
dev_kfree_skb(skb);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock(&bond->lock);
return 0;
}
......@@ -2739,11 +3359,10 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev)
bonding_t *bond = dev->priv;
struct net_device_stats *stats = bond->stats, *sstats;
slave_t *slave;
unsigned long flags;
memset(bond->stats, 0, sizeof(struct net_device_stats));
read_lock_irqsave(&bond->lock, flags);
read_lock_bh(&bond->lock);
for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) {
sstats = slave->dev->get_stats(slave->dev);
......@@ -2776,7 +3395,7 @@ static struct net_device_stats *bond_get_stats(struct net_device *dev)
}
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
return stats;
}
......@@ -2787,7 +3406,8 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length)
off_t begin = 0;
u16 link;
slave_t *slave = NULL;
unsigned long flags;
len += sprintf(buf + len, "%s\n", version);
while (bond != NULL) {
/*
......@@ -2799,8 +3419,10 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length)
len += sprintf(buf + len, "Bonding Mode: %s\n",
bond_mode_name());
if (bond_mode == BOND_MODE_ACTIVEBACKUP) {
read_lock_irqsave(&bond->lock, flags);
if ((bond_mode == BOND_MODE_ACTIVEBACKUP) ||
(bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
read_lock_bh(&bond->lock);
read_lock(&bond->ptrlock);
if (bond->current_slave != NULL) {
len += sprintf(buf + len,
......@@ -2808,7 +3430,7 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length)
bond->current_slave->dev->name);
}
read_unlock(&bond->ptrlock);
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
}
len += sprintf(buf + len, "MII Status: ");
......@@ -2823,7 +3445,32 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length)
len += sprintf(buf + len, "Multicast Mode: %s\n",
multicast_mode_name());
read_lock_irqsave(&bond->lock, flags);
read_lock_bh(&bond->lock);
if (bond_mode == BOND_MODE_8023AD) {
struct ad_info ad_info;
len += sprintf(buf + len, "\n802.3ad info\n");
if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
len += sprintf(buf + len, "bond %s has no active aggregator\n", bond->device->name);
} else {
len += sprintf(buf + len, "Active Aggregator Info:\n");
len += sprintf(buf + len, "\tAggregator ID: %d\n", ad_info.aggregator_id);
len += sprintf(buf + len, "\tNumber of ports: %d\n", ad_info.ports);
len += sprintf(buf + len, "\tActor Key: %d\n", ad_info.actor_key);
len += sprintf(buf + len, "\tPartner Key: %d\n", ad_info.partner_key);
len += sprintf(buf + len, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n",
ad_info.partner_system[0],
ad_info.partner_system[1],
ad_info.partner_system[2],
ad_info.partner_system[3],
ad_info.partner_system[4],
ad_info.partner_system[5]);
}
}
for (slave = bond->prev; slave != (slave_t *)bond;
slave = slave->prev) {
len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name);
......@@ -2835,8 +3482,30 @@ static int bond_get_info(char *buf, char **start, off_t offset, int length)
"up\n" : "down\n");
len += sprintf(buf + len, "Link Failure Count: %d\n",
slave->link_failure_count);
if (app_abi_ver >= 1) {
len += sprintf(buf + len,
"Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n",
slave->perm_hwaddr[0],
slave->perm_hwaddr[1],
slave->perm_hwaddr[2],
slave->perm_hwaddr[3],
slave->perm_hwaddr[4],
slave->perm_hwaddr[5]);
}
if (bond_mode == BOND_MODE_8023AD) {
struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
if (agg) {
len += sprintf(buf + len, "Aggregator ID: %d\n",
agg->aggregator_identifier);
} else {
len += sprintf(buf + len, "Aggregator ID: N/A\n");
}
}
}
read_unlock_irqrestore(&bond->lock, flags);
read_unlock_bh(&bond->lock);
/*
* Figure out the calcs for the /proc/net interface
......@@ -2904,7 +3573,7 @@ static int bond_event(struct notifier_block *this, unsigned long event,
}
static struct notifier_block bond_netdev_notifier = {
.notifier_call = bond_event,
notifier_call: bond_event,
};
static int __init bond_init(struct net_device *dev)
......@@ -2952,6 +3621,13 @@ static int __init bond_init(struct net_device *dev)
case BOND_MODE_BROADCAST:
dev->hard_start_xmit = bond_xmit_broadcast;
break;
case BOND_MODE_8023AD:
dev->hard_start_xmit = bond_3ad_xmit_xor;
break;
case BOND_MODE_TLB:
case BOND_MODE_ALB:
dev->hard_start_xmit = bond_alb_xmit;
break;
default:
printk(KERN_ERR "Unknown bonding mode %d\n", bond_mode);
kfree(bond->stats);
......@@ -3101,6 +3777,24 @@ static int __init bonding_init(void)
}
}
if (lacp_rate) {
if (bond_mode != BOND_MODE_8023AD) {
printk(KERN_WARNING
"lacp_rate param is irrelevant in mode %s\n",
bond_mode_name());
} else {
lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl);
if (lacp_fast == -1) {
printk(KERN_WARNING
"bonding_init(): Invalid lacp rate "
"\"%s\"\n",
lacp_rate == NULL ? "NULL" : lacp_rate);
return -EINVAL;
}
}
}
if (max_bonds < 1 || max_bonds > INT_MAX) {
printk(KERN_WARNING
"bonding_init(): max_bonds (%d) not in range %d-%d, "
......@@ -3139,6 +3833,64 @@ static int __init bonding_init(void)
downdelay = 0;
}
/* reset values for 802.3ad */
if (bond_mode == BOND_MODE_8023AD) {
if (arp_interval != 0) {
printk(KERN_WARNING "bonding_init(): ARP monitoring"
"can't be used simultaneously with 802.3ad, "
"disabling ARP monitoring\n");
arp_interval = 0;
}
if (miimon == 0) {
printk(KERN_ERR
"bonding_init(): miimon must be specified, "
"otherwise bonding will not detect link failure, "
"speed and duplex which are essential "
"for 802.3ad operation\n");
printk(KERN_ERR "Forcing miimon to 100msec\n");
miimon = 100;
}
if (multicast_mode != BOND_MULTICAST_ALL) {
printk(KERN_ERR
"bonding_init(): Multicast mode must "
"be set to ALL for 802.3ad\n");
printk(KERN_ERR "Forcing Multicast mode to ALL\n");
multicast_mode = BOND_MULTICAST_ALL;
}
}
/* reset values for TLB/ALB */
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
if (miimon == 0) {
printk(KERN_ERR
"bonding_init(): miimon must be specified, "
"otherwise bonding will not detect link failure "
"and link speed which are essential "
"for TLB/ALB load balancing\n");
printk(KERN_ERR "Forcing miimon to 100msec\n");
miimon = 100;
}
if (multicast_mode != BOND_MULTICAST_ACTIVE) {
printk(KERN_ERR
"bonding_init(): Multicast mode must "
"be set to ACTIVE for TLB/ALB\n");
printk(KERN_ERR "Forcing Multicast mode to ACTIVE\n");
multicast_mode = BOND_MULTICAST_ACTIVE;
}
}
if (bond_mode == BOND_MODE_ALB) {
printk(KERN_INFO
"In ALB mode you might experience client disconnections"
" upon reconnection of a link if the bonding module"
" updelay parameter (%d msec) is incompatible with the"
" forwarding delay time of the switch\n", updelay);
}
if (miimon == 0) {
if ((updelay != 0) || (downdelay != 0)) {
/* just warn the user the up/down delay will have
......@@ -3230,13 +3982,15 @@ static int __init bonding_init(void)
"link failures! see bonding.txt for details.\n");
}
if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP)){
/* currently, using a primary only makes sence
* in active backup mode
if ((primary != NULL) && (bond_mode != BOND_MODE_ACTIVEBACKUP) &&
(bond_mode != BOND_MODE_TLB) &&
(bond_mode != BOND_MODE_ALB)){
/* currently, using a primary only makes sense
* in active backup, TLB or ALB modes
*/
printk(KERN_WARNING
"bonding_init(): %s primary device specified but has "
" no effect in %s mode\n",
"no effect in %s mode\n",
primary, bond_mode_name());
primary = NULL;
}
......
/*
* Bond several ethernet interfaces into a Cisco, running 'Etherchannel'.
*
* Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes
* NCM: Network and Communications Management, Inc.
*
* BUT, I'm the one who modified it for ethernet, so:
* (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov
*
* This software may be used and distributed according to the terms
* of the GNU Public License, incorporated herein by reference.
*
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
* Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for IEEE 802.3ad Dynamic link aggregation mode.
*
* 2003/05/01 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Code beautification and style changes (mainly in comments).
*
* 2003/05/01 - Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for Transmit load balancing mode.
*/
#ifndef _LINUX_BONDING_H
#define _LINUX_BONDING_H
#include <linux/timer.h>
#include <linux/proc_fs.h>
#include "bond_3ad.h"
#include "bond_alb.h"
#ifdef BONDING_DEBUG
// use this like so: BOND_PRINT_DBG(("foo = %d, bar = %d", foo, bar));
#define BOND_PRINT_DBG(X) \
do { \
printk(KERN_DEBUG "%s (%d)", __FUNCTION__, __LINE__); \
printk X; \
printk("\n"); \
} while(0)
#else
#define BOND_PRINT_DBG(X)
#endif /* BONDING_DEBUG */
#define IS_UP(dev) ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \
(netif_running(dev) && netif_carrier_ok(dev)))
/* Checks whether the dev is ready for transmit. We do not check netif_running
* since a device can be stopped by the driver for short periods of time for
* maintainance. dev_queue_xmit() handles this by queing the packet until the
* the dev is running again. Keeping packets ordering requires sticking the
* same dev as much as possible
*/
#define SLAVE_IS_OK(slave) \
((((slave)->dev->flags & (IFF_UP)) == (IFF_UP)) && \
netif_carrier_ok((slave)->dev) && \
((slave)->link == BOND_LINK_UP) && \
((slave)->state == BOND_STATE_ACTIVE))
typedef struct slave {
struct slave *next;
struct slave *prev;
struct net_device *dev;
short delay;
unsigned long jiffies;
char link; /* one of BOND_LINK_XXXX */
char state; /* one of BOND_STATE_XXXX */
unsigned short original_flags;
u32 link_failure_count;
u16 speed;
u8 duplex;
u8 perm_hwaddr[ETH_ALEN];
struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */
struct tlb_slave_info tlb_info;
} slave_t;
/*
* Here are the locking policies for the two bonding locks:
*
* 1) Get bond->lock when reading/writing slave list.
* 2) Get bond->ptrlock when reading/writing bond->current_slave.
* (It is unnecessary when the write-lock is put with bond->lock.)
* 3) When we lock with bond->ptrlock, we must lock with bond->lock
* beforehand.
*/
typedef struct bonding {
slave_t *next;
slave_t *prev;
slave_t *current_slave;
slave_t *primary_slave;
slave_t *current_arp_slave;
__s32 slave_cnt;
rwlock_t lock;
rwlock_t ptrlock;
struct timer_list mii_timer;
struct timer_list arp_timer;
struct net_device_stats *stats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *bond_proc_dir;
struct proc_dir_entry *bond_proc_info_file;
#endif /* CONFIG_PROC_FS */
struct bonding *next_bond;
struct net_device *device;
struct dev_mc_list *mc_list;
unsigned short flags;
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
} bonding_t;
/* Forward declarations */
void bond_set_slave_active_flags(slave_t *slave);
void bond_set_slave_inactive_flags(slave_t *slave);
/**
* These functions can be used for iterating the slave list
* (which is circular)
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_first_slave(struct bonding *bond)
{
/* if there are no slaves return NULL */
if (bond->next == (slave_t *)bond) {
return NULL;
}
return bond->next;
}
/**
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_next_slave(struct bonding *bond, struct slave *slave)
{
/* If we have reached the last slave return NULL */
if (slave->next == bond->next) {
return NULL;
}
return slave->next;
}
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
* Caller must hold bond lock for read
*/
extern inline struct slave*
bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev)
{
struct slave *our_slave = bond->next;
/* check if the list of slaves is empty */
if (our_slave == (slave_t *)bond) {
return NULL;
}
for (; our_slave; our_slave = bond_get_next_slave(bond, our_slave)) {
if (our_slave->dev == slave_dev) {
break;
}
}
return our_slave;
}
extern inline struct bonding*
bond_get_bond_by_slave(struct slave *slave)
{
if (!slave || !slave->dev->master) {
return NULL;
}
return (struct bonding *)(slave->dev->master->priv);
}
#endif /* _LINUX_BONDING_H */
......@@ -11,18 +11,38 @@
* This software may be used and distributed according to the terms
* of the GNU Public License, incorporated herein by reference.
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>
* - Added support for getting slave's speed and duplex via ethtool.
* Needed for 802.3ad and other future modes.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Enable support of modes that need to use the unique mac address of
* each slave.
*
* 2003/03/18 - Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Amir Noam <amir.noam at intel dot com>
* - Moved driver's private data types to bonding.h
*
* 2003/03/18 - Amir Noam <amir.noam at intel dot com>,
* Tsippy Mendelson <tsippy.mendelson at intel dot com> and
* Shmulik Hen <shmulik.hen at intel dot com>
* - Added support for IEEE 802.3ad Dynamic link aggregation mode.
*
* 2003/05/01 - Amir Noam <amir.noam at intel dot com>
* - Added ABI version control to restore compatibility between
* new/old ifenslave and new/old bonding.
*/
#ifndef _LINUX_IF_BONDING_H
#define _LINUX_IF_BONDING_H
#ifdef __KERNEL__
#include <linux/timer.h>
#include <linux/if.h>
#include <linux/proc_fs.h>
#endif /* __KERNEL__ */
#include <linux/types.h>
#include <linux/if_ether.h>
/* userland - kernel ABI version (2003/05/08) */
#define BOND_ABI_VERSION 1
/*
* We can remove these ioctl definitions in 2.5. People should use the
......@@ -41,6 +61,9 @@
#define BOND_MODE_ACTIVEBACKUP 1
#define BOND_MODE_XOR 2
#define BOND_MODE_BROADCAST 3
#define BOND_MODE_8023AD 4
#define BOND_MODE_TLB 5
#define BOND_MODE_ALB 6 /* TLB + RLB (receive load balancing) */
/* each slave's link has 4 states */
#define BOND_LINK_UP 0 /* link is up and running */
......@@ -58,11 +81,6 @@
#define BOND_MULTICAST_ACTIVE 1
#define BOND_MULTICAST_ALL 2
struct bond_parm_tbl {
char *modename;
int mode;
};
typedef struct ifbond {
__s32 bond_mode;
__s32 num_slaves;
......@@ -78,52 +96,15 @@ typedef struct ifslave
__u32 link_failure_count;
} ifslave;
#ifdef __KERNEL__
typedef struct slave {
struct slave *next;
struct slave *prev;
struct net_device *dev;
short delay;
unsigned long jiffies;
char link; /* one of BOND_LINK_XXXX */
char state; /* one of BOND_STATE_XXXX */
unsigned short original_flags;
u32 link_failure_count;
} slave_t;
/*
* Here are the locking policies for the two bonding locks:
*
* 1) Get bond->lock when reading/writing slave list.
* 2) Get bond->ptrlock when reading/writing bond->current_slave.
* (It is unnecessary when the write-lock is put with bond->lock.)
* 3) When we lock with bond->ptrlock, we must lock with bond->lock
* beforehand.
*/
typedef struct bonding {
slave_t *next;
slave_t *prev;
slave_t *current_slave;
slave_t *primary_slave;
slave_t *current_arp_slave;
__s32 slave_cnt;
rwlock_t lock;
rwlock_t ptrlock;
struct timer_list mii_timer;
struct timer_list arp_timer;
struct net_device_stats *stats;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *bond_proc_dir;
struct proc_dir_entry *bond_proc_info_file;
#endif /* CONFIG_PROC_FS */
struct bonding *next_bond;
struct net_device *device;
struct dev_mc_list *mc_list;
unsigned short flags;
} bonding_t;
#endif /* __KERNEL__ */
struct ad_info {
__u16 aggregator_id;
__u16 ports;
__u16 actor_key;
__u16 partner_key;
__u8 partner_system[ETH_ALEN];
};
#endif /* _LINUX_BOND_H */
#endif /* _LINUX_IF_BONDING_H */
/*
* Local variables:
......
......@@ -148,6 +148,7 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
{
struct net_device_stats *stats;
skb->real_dev = skb->dev;
skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK];
if (skb->dev == NULL) {
kfree_skb(skb);
......
......@@ -190,6 +190,7 @@ struct sk_buff {
struct sock *sk;
struct timeval stamp;
struct net_device *dev;
struct net_device *real_dev;
union {
struct tcphdr *th;
......
......@@ -1385,8 +1385,10 @@ static __inline__ void skb_bond(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
if (dev->master)
if (dev->master) {
skb->real_dev = skb->dev;
skb->dev = dev->master;
}
}
static void net_tx_action(struct softirq_action *h)
......
......@@ -271,6 +271,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
n->sk = NULL;
C(stamp);
C(dev);
C(real_dev);
C(h);
C(nh);
C(mac);
......@@ -334,6 +335,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->list = NULL;
new->sk = NULL;
new->dev = old->dev;
new->real_dev = old->real_dev;
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment