bond_alb.c 40 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright(c) 1999 - 2003 Intel Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * The full GNU General Public License is included in this distribution in the
 * file called LICENSE.
20 21 22 23 24 25 26
 *
 *
 * Changes:
 *
 * 2003/06/25 - Shmulik Hen <shmulik.hen at intel dot com>
 *	- Fixed signed/unsigned calculation errors that caused load sharing
 *	  to collapse to one slave under very heavy UDP Tx stress.
27 28 29 30
 *
 * 2003/08/06 - Amir Noam <amir.noam at intel dot com>
 *	- Add support for setting bond's MAC address with special
 *	  handling required for ALB/TLB.
31 32 33
 *
 * 2003/09/24 - Shmulik Hen <shmulik.hen at intel dot com>
 *	- Code cleanup and style changes
34 35
 */

36 37
//#define BONDING_DEBUG 1

38 39
#include <linux/skbuff.h>
#include <linux/netdevice.h>
40
#include <linux/etherdevice.h>
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
#include <linux/pkt_sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_bonding.h>
#include <net/ipx.h>
#include <net/arp.h>
#include <asm/byteorder.h>
#include "bonding.h"
#include "bond_alb.h"


#define ALB_TIMER_TICKS_PER_SEC	    10	/* should be a divisor of HZ */
58 59
#define BOND_TLB_REBALANCE_INTERVAL 10	/* In seconds, periodic re-balancing.
					 * Used for division - never set
60 61
					 * to zero !!!
					 */
62
#define BOND_ALB_LP_INTERVAL	    1	/* In seconds, periodic send of
63 64 65 66 67 68 69 70 71 72 73
					 * learning packets to the switch
					 */

#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \
				  * ALB_TIMER_TICKS_PER_SEC)

#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \
			   * ALB_TIMER_TICKS_PER_SEC)

#define TLB_HASH_TABLE_SIZE 256	/* The size of the clients hash table.
				 * Note that this value MUST NOT be smaller
74
				 * because the key hash table is BYTE wide !
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
				 */


#define TLB_NULL_INDEX		0xffffffff
#define MAX_LP_RETRY		3

/* rlb defs */
#define RLB_HASH_TABLE_SIZE	256
#define RLB_NULL_INDEX		0xffffffff
#define RLB_UPDATE_DELAY	2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */
#define RLB_ARP_BURST_SIZE	2
#define RLB_UPDATE_RETRY	3	/* 3-ticks - must be smaller than the rlb
					 * rebalance interval (5 min).
					 */
/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is
 * promiscuous after failover
 */
#define RLB_PROMISC_TIMEOUT	10*ALB_TIMER_TICKS_PER_SEC

94 95
static const u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff};

96 97 98 99 100
#pragma pack(1)
struct learning_pkt {
	u8 mac_dst[ETH_ALEN];
	u8 mac_src[ETH_ALEN];
	u16 type;
101
	u8 padding[ETH_ZLEN - ETH_HLEN];
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
};

struct arp_pkt {
	u16     hw_addr_space;
	u16     prot_addr_space;
	u8      hw_addr_len;
	u8      prot_addr_len;
	u16     op_code;
	u8      mac_src[ETH_ALEN];	/* sender hardware address */
	u32     ip_src;			/* sender IP address */
	u8      mac_dst[ETH_ALEN];	/* target hardware address */
	u32     ip_dst;			/* target IP address */
};
#pragma pack()

/* Forward declaration */
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);

120
static inline u8 _simple_hash(u8 *hash_start, int hash_size)
121 122 123 124
{
	int i;
	u8 hash = 0;

125
	for (i = 0; i < hash_size; i++) {
126 127 128 129 130 131 132 133
		hash ^= hash_start[i];
	}

	return hash;
}

/*********************** tlb specific functions ***************************/

134
static inline void _lock_tx_hashtbl(struct bonding *bond)
135 136 137 138
{
	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
}

139
static inline void _unlock_tx_hashtbl(struct bonding *bond)
140 141 142 143 144
{
	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
}

/* Caller must hold tx_hashtbl lock */
145
static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
146 147 148
{
	if (save_load) {
		entry->load_history = 1 + entry->tx_bytes /
149
				      BOND_TLB_REBALANCE_INTERVAL;
150 151
		entry->tx_bytes = 0;
	}
152

153 154 155 156 157
	entry->tx_slave = NULL;
	entry->next = TLB_NULL_INDEX;
	entry->prev = TLB_NULL_INDEX;
}

158
static inline void tlb_init_slave(struct slave *slave)
159
{
160 161
	SLAVE_TLB_INFO(slave).load = 0;
	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
162 163 164
}

/* Caller must hold bond lock for read */
165
static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load)
166
{
167
	struct tlb_client_info *tx_hash_table;
168
	u32 index;
169 170

	_lock_tx_hashtbl(bond);
171

172
	/* clear slave from tx_hashtbl */
173 174
	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;

175 176 177 178 179
	index = SLAVE_TLB_INFO(slave).head;
	while (index != TLB_NULL_INDEX) {
		u32 next_index = tx_hash_table[index].next;
		tlb_init_table_entry(&tx_hash_table[index], save_load);
		index = next_index;
180
	}
181

182 183 184 185 186 187
	_unlock_tx_hashtbl(bond);

	tlb_init_slave(slave);
}

/* Must be called before starting the monitor timer */
188
static int tlb_initialize(struct bonding *bond)
189 190
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
191
	int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
192 193 194 195 196 197 198
	int i;

	spin_lock_init(&(bond_info->tx_hashtbl_lock));

	_lock_tx_hashtbl(bond);

	bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL);
199
	if (!bond_info->tx_hashtbl) {
200 201
		printk(KERN_ERR DRV_NAME
		       ": Error: %s: Failed to allocate TLB hash table\n",
202
		       bond->dev->name);
203 204 205 206 207
		_unlock_tx_hashtbl(bond);
		return -1;
	}

	memset(bond_info->tx_hashtbl, 0, size);
208

209
	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) {
210
		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
211
	}
212

213 214 215 216 217 218
	_unlock_tx_hashtbl(bond);

	return 0;
}

/* Must be called only after all slaves have been released */
219
static void tlb_deinitialize(struct bonding *bond)
220 221 222 223
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

	_lock_tx_hashtbl(bond);
224

225 226
	kfree(bond_info->tx_hashtbl);
	bond_info->tx_hashtbl = NULL;
227

228 229 230 231
	_unlock_tx_hashtbl(bond);
}

/* Caller must hold bond lock for read */
232
static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
233
{
234 235
	struct slave *slave, *least_loaded;
	s64 max_gap;
236
	int i, found = 0;
237 238

	/* Find the first enabled slave */
239
	bond_for_each_slave(bond, slave, i) {
240
		if (SLAVE_IS_OK(slave)) {
241
			found = 1;
242 243 244 245
			break;
		}
	}

246
	if (!found) {
247 248 249 250
		return NULL;
	}

	least_loaded = slave;
251 252
	max_gap = (s64)(slave->speed << 20) - /* Convert to Megabit per sec */
			(s64)(SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
253 254

	/* Find the slave with the largest gap */
255
	bond_for_each_slave_from(bond, slave, i, least_loaded) {
256
		if (SLAVE_IS_OK(slave)) {
257 258
			s64 gap = (s64)(slave->speed << 20) -
					(s64)(SLAVE_TLB_INFO(slave).load << 3);
259
			if (max_gap < gap) {
260
				least_loaded = slave;
261
				max_gap = gap;
262 263 264 265 266 267 268 269
			}
		}
	}

	return least_loaded;
}

/* Caller must hold bond lock for read */
270
struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
271 272
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
273 274
	struct tlb_client_info *hash_table;
	struct slave *assigned_slave;
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

	_lock_tx_hashtbl(bond);

	hash_table = bond_info->tx_hashtbl;
	assigned_slave = hash_table[hash_index].tx_slave;
	if (!assigned_slave) {
		assigned_slave = tlb_get_least_loaded_slave(bond);

		if (assigned_slave) {
			struct tlb_slave_info *slave_info =
				&(SLAVE_TLB_INFO(assigned_slave));
			u32 next_index = slave_info->head;

			hash_table[hash_index].tx_slave = assigned_slave;
			hash_table[hash_index].next = next_index;
			hash_table[hash_index].prev = TLB_NULL_INDEX;

			if (next_index != TLB_NULL_INDEX) {
				hash_table[next_index].prev = hash_index;
			}

			slave_info->head = hash_index;
			slave_info->load +=
				hash_table[hash_index].load_history;
		}
	}

	if (assigned_slave) {
		hash_table[hash_index].tx_bytes += skb_len;
	}

	_unlock_tx_hashtbl(bond);

	return assigned_slave;
}

/*********************** rlb specific functions ***************************/
312
static inline void _lock_rx_hashtbl(struct bonding *bond)
313 314 315 316
{
	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
}

317
static inline void _unlock_rx_hashtbl(struct bonding *bond)
318 319 320 321 322 323 324
{
	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
}

/* when an ARP REPLY is received from a client update its info
 * in the rx_hashtbl
 */
325
static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
326 327
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
328 329
	struct rlb_client_info *client_info;
	u32 hash_index;
330 331 332

	_lock_rx_hashtbl(bond);

333
	hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
334 335 336 337 338 339 340 341 342 343 344 345 346 347
	client_info = &(bond_info->rx_hashtbl[hash_index]);

	if ((client_info->assigned) &&
	    (client_info->ip_src == arp->ip_dst) &&
	    (client_info->ip_dst == arp->ip_src)) {
		/* update the clients MAC address */
		memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN);
		client_info->ntt = 1;
		bond_info->rx_ntt = 1;
	}

	_unlock_rx_hashtbl(bond);
}

348
static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype)
349
{
350
	struct bonding *bond = (struct bonding *)bond_dev->priv;
351
	struct arp_pkt *arp = (struct arp_pkt *)skb->data;
352
	int res = NET_RX_DROP;
353

354
	if (!(bond_dev->flags & IFF_MASTER)) {
355 356 357 358
		goto out;
	}

	if (!arp) {
359
		dprintk("Packet has no ARP data\n");
360 361 362 363
		goto out;
	}

	if (skb->len < sizeof(struct arp_pkt)) {
364
		dprintk("Packet is too small to be an ARP\n");
365 366 367 368 369 370
		goto out;
	}

	if (arp->op_code == htons(ARPOP_REPLY)) {
		/* update rx hash table for this ARP */
		rlb_update_entry_from_arp(bond, arp);
371
		dprintk("Server received an ARP Reply from client\n");
372 373
	}

374
	res = NET_RX_SUCCESS;
375 376 377 378

out:
	dev_kfree_skb(skb);

379
	return res;
380 381 382
}

/* Caller must hold bond lock for read */
383
static struct slave *rlb_next_rx_slave(struct bonding *bond)
384 385
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
386
	struct slave *rx_slave, *slave, *start_at;
387
	int i = 0;
388

389 390 391 392
	if (bond_info->next_rx_slave) {
		start_at = bond_info->next_rx_slave;
	} else {
		start_at = bond->first_slave;
393 394
	}

395 396 397
	rx_slave = NULL;

	bond_for_each_slave_from(bond, slave, i, start_at) {
398 399 400
		if (SLAVE_IS_OK(slave)) {
			if (!rx_slave) {
				rx_slave = slave;
401
			} else if (slave->speed > rx_slave->speed) {
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
				rx_slave = slave;
			}
		}
	}

	if (rx_slave) {
		bond_info->next_rx_slave = rx_slave->next;
	}

	return rx_slave;
}

/* teach the switch the mac of a disabled slave
 * on the primary for fault tolerance
 *
417
 * Caller must hold bond->curr_slave_lock for write or bond lock for write
418
 */
419
static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
420
{
421
	if (!bond->curr_active_slave) {
422 423
		return;
	}
424

425 426
	if (!bond->alb_info.primary_is_promisc) {
		bond->alb_info.primary_is_promisc = 1;
427
		dev_set_promiscuity(bond->curr_active_slave->dev, 1);
428
	}
429

430 431
	bond->alb_info.rlb_promisc_timeout_counter = 0;

432
	alb_send_learning_packets(bond->curr_active_slave, addr);
433 434 435 436 437 438
}

/* slave being removed should not be active at this point
 *
 * Caller must hold bond lock for read
 */
439
static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
440 441
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
442
	struct rlb_client_info *rx_hash_table;
443 444 445 446 447
	u32 index, next_index;

	/* clear slave from rx_hashtbl */
	_lock_rx_hashtbl(bond);

448
	rx_hash_table = bond_info->rx_hashtbl;
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	index = bond_info->rx_hashtbl_head;
	for (; index != RLB_NULL_INDEX; index = next_index) {
		next_index = rx_hash_table[index].next;
		if (rx_hash_table[index].slave == slave) {
			struct slave *assigned_slave = rlb_next_rx_slave(bond);

			if (assigned_slave) {
				rx_hash_table[index].slave = assigned_slave;
				if (memcmp(rx_hash_table[index].mac_dst,
					   mac_bcast, ETH_ALEN)) {
					bond_info->rx_hashtbl[index].ntt = 1;
					bond_info->rx_ntt = 1;
					/* A slave has been removed from the
					 * table because it is either disabled
					 * or being released. We must retry the
					 * update to avoid clients from not
					 * being updated & disconnecting when
					 * there is stress
					 */
					bond_info->rlb_update_retry_counter =
						RLB_UPDATE_RETRY;
				}
			} else {  /* there is no active slave */
				rx_hash_table[index].slave = NULL;
			}
		}
	}

	_unlock_rx_hashtbl(bond);

479
	write_lock(&bond->curr_slave_lock);
480

481
	if (slave != bond->curr_active_slave) {
482 483
		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
	}
484

485
	write_unlock(&bond->curr_slave_lock);
486 487
}

488
static void rlb_update_client(struct rlb_client_info *client_info)
489
{
490
	int i;
491

492
	if (!client_info->slave) {
493 494 495
		return;
	}

496
	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
497 498 499 500 501 502 503 504 505 506 507
		arp_send(ARPOP_REPLY, ETH_P_ARP,
			 client_info->ip_dst,
			 client_info->slave->dev,
			 client_info->ip_src,
			 client_info->mac_dst,
			 client_info->slave->dev->dev_addr,
			 client_info->mac_dst);
	}
}

/* sends ARP REPLIES that update the clients that need updating */
508
static void rlb_update_rx_clients(struct bonding *bond)
509 510
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
511 512
	struct rlb_client_info *client_info;
	u32 hash_index;
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535

	_lock_rx_hashtbl(bond);

	hash_index = bond_info->rx_hashtbl_head;
	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
		client_info = &(bond_info->rx_hashtbl[hash_index]);
		if (client_info->ntt) {
			rlb_update_client(client_info);
			if (bond_info->rlb_update_retry_counter == 0) {
				client_info->ntt = 0;
			}
		}
	}

	/* do not update the entries again untill this counter is zero so that
	 * not to confuse the clients.
	 */
	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;

	_unlock_rx_hashtbl(bond);
}

/* The slave was assigned a new mac address - update the clients */
536
static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
537 538
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
539 540 541
	struct rlb_client_info *client_info;
	int ntt = 0;
	u32 hash_index;
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566

	_lock_rx_hashtbl(bond);

	hash_index = bond_info->rx_hashtbl_head;
	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
		client_info = &(bond_info->rx_hashtbl[hash_index]);

		if ((client_info->slave == slave) &&
		    memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
			client_info->ntt = 1;
			ntt = 1;
		}
	}

	// update the team's flag only after the whole iteration
	if (ntt) {
		bond_info->rx_ntt = 1;
		//fasten the change
		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
	}

	_unlock_rx_hashtbl(bond);
}

/* mark all clients using src_ip to be updated */
567
static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
568 569
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
570 571
	struct rlb_client_info *client_info;
	u32 hash_index;
572 573 574 575 576 577 578 579

	_lock_rx_hashtbl(bond);

	hash_index = bond_info->rx_hashtbl_head;
	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
		client_info = &(bond_info->rx_hashtbl[hash_index]);

		if (!client_info->slave) {
580 581 582
			printk(KERN_ERR DRV_NAME
			       ": Error: found a client with no channel in "
			       "the client's hash table\n");
583 584 585
			continue;
		}
		/*update all clients using this src_ip, that are not assigned
586
		 * to the team's address (curr_active_slave) and have a known
587 588 589 590
		 * unicast mac address.
		 */
		if ((client_info->ip_src == src_ip) &&
		    memcmp(client_info->slave->dev->dev_addr,
591
			   bond->dev->dev_addr, ETH_ALEN) &&
592 593 594 595 596 597 598 599 600 601
		    memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
			client_info->ntt = 1;
			bond_info->rx_ntt = 1;
		}
	}

	_unlock_rx_hashtbl(bond);
}

/* Caller must hold both bond and ptr locks for read */
602
struct slave *rlb_choose_channel(struct bonding *bond, struct arp_pkt *arp)
603 604
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
605 606
	struct slave *assigned_slave;
	struct rlb_client_info *client_info;
607 608 609 610
	u32 hash_index = 0;

	_lock_rx_hashtbl(bond);

611
	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
612 613
	client_info = &(bond_info->rx_hashtbl[hash_index]);

614
	if (client_info->assigned) {
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
		if ((client_info->ip_src == arp->ip_src) &&
		    (client_info->ip_dst == arp->ip_dst)) {
			/* the entry is already assigned to this client */
			if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) {
				/* update mac address from arp */
				memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
			}

			assigned_slave = client_info->slave;
			if (assigned_slave) {
				_unlock_rx_hashtbl(bond);
				return assigned_slave;
			}
		} else {
			/* the entry is already assigned to some other client,
630
			 * move the old client to primary (curr_active_slave) so
631 632
			 * that the new client can be assigned to this entry.
			 */
633 634 635
			if (bond->curr_active_slave &&
			    client_info->slave != bond->curr_active_slave) {
				client_info->slave = bond->curr_active_slave;
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
				rlb_update_client(client_info);
			}
		}
	}
	/* assign a new slave */
	assigned_slave = rlb_next_rx_slave(bond);

	if (assigned_slave) {
		client_info->ip_src = arp->ip_src;
		client_info->ip_dst = arp->ip_dst;
		/* arp->mac_dst is broadcast for arp reqeusts.
		 * will be updated with clients actual unicast mac address
		 * upon receiving an arp reply.
		 */
		memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
		client_info->slave = assigned_slave;

		if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) {
			client_info->ntt = 1;
			bond->alb_info.rx_ntt = 1;
656
		} else {
657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
			client_info->ntt = 0;
		}

		if (!client_info->assigned) {
			u32 prev_tbl_head = bond_info->rx_hashtbl_head;
			bond_info->rx_hashtbl_head = hash_index;
			client_info->next = prev_tbl_head;
			if (prev_tbl_head != RLB_NULL_INDEX) {
				bond_info->rx_hashtbl[prev_tbl_head].prev =
					hash_index;
			}
			client_info->assigned = 1;
		}
	}

	_unlock_rx_hashtbl(bond);

	return assigned_slave;
}

/* chooses (and returns) transmit channel for arp reply
 * does not choose channel for other arp types since they are
679
 * sent on the curr_active_slave
680
 */
681
static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
682 683 684 685 686 687 688 689 690 691 692 693
{
	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
	struct slave *tx_slave = NULL;

	if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
		/* the arp must be sent on the selected
		* rx channel
		*/
		tx_slave = rlb_choose_channel(bond, arp);
		if (tx_slave) {
			memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN);
		}
694
		dprintk("Server sent ARP Reply packet\n");
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
	} else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) {
		/* Create an entry in the rx_hashtbl for this client as a
		 * place holder.
		 * When the arp reply is received the entry will be updated
		 * with the correct unicast address of the client.
		 */
		rlb_choose_channel(bond, arp);

		/* The ARP relpy packets must be delayed so that
		 * they can cancel out the influence of the ARP request.
		 */
		bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;

		/* arp requests are broadcast and are sent on the primary
		 * the arp request will collapse all clients on the subnet to
		 * the primary slave. We must register these clients to be
		 * updated with their assigned mac.
		 */
		rlb_req_update_subnet_clients(bond, arp->ip_src);
714
		dprintk("Server sent ARP Request packet\n");
715 716 717 718 719 720
	}

	return tx_slave;
}

/* Caller must hold bond lock for read */
721
static void rlb_rebalance(struct bonding *bond)
722 723
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
724 725 726
	struct slave *assigned_slave;
	struct rlb_client_info *client_info;
	int ntt;
727 728 729 730
	u32 hash_index;

	_lock_rx_hashtbl(bond);

731
	ntt = 0;
732 733 734 735
	hash_index = bond_info->rx_hashtbl_head;
	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
		client_info = &(bond_info->rx_hashtbl[hash_index]);
		assigned_slave = rlb_next_rx_slave(bond);
736
		if (assigned_slave && (client_info->slave != assigned_slave)) {
737 738 739 740 741 742 743 744 745 746 747 748 749 750
			client_info->slave = assigned_slave;
			client_info->ntt = 1;
			ntt = 1;
		}
	}

	/* update the team's flag only after the whole iteration */
	if (ntt) {
		bond_info->rx_ntt = 1;
	}
	_unlock_rx_hashtbl(bond);
}

/* Caller must hold rx_hashtbl lock */
751
static void rlb_init_table_entry(struct rlb_client_info *entry)
752
{
753
	memset(entry, 0, sizeof(struct rlb_client_info));
754 755 756 757
	entry->next = RLB_NULL_INDEX;
	entry->prev = RLB_NULL_INDEX;
}

758
static int rlb_initialize(struct bonding *bond)
759 760 761
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type);
762
	int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
763 764 765 766 767 768 769
	int i;

	spin_lock_init(&(bond_info->rx_hashtbl_lock));

	_lock_rx_hashtbl(bond);

	bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL);
770
	if (!bond_info->rx_hashtbl) {
771 772
		printk(KERN_ERR DRV_NAME
		       ": Error: %s: Failed to allocate RLB hash table\n",
773
		       bond->dev->name);
774 775 776 777 778 779
		_unlock_rx_hashtbl(bond);
		return -1;
	}

	bond_info->rx_hashtbl_head = RLB_NULL_INDEX;

780
	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
781 782
		rlb_init_table_entry(bond_info->rx_hashtbl + i);
	}
783

784 785 786 787
	_unlock_rx_hashtbl(bond);

	/*initialize packet type*/
	pk_type->type = __constant_htons(ETH_P_ARP);
788
	pk_type->dev = bond->dev;
789 790
	pk_type->func = rlb_arp_recv;

791
	/* register to receive ARPs */
792 793 794 795 796
	dev_add_pack(pk_type);

	return 0;
}

797
static void rlb_deinitialize(struct bonding *bond)
798 799 800 801 802 803
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

	dev_remove_pack(&(bond_info->rlb_pkt_type));

	_lock_rx_hashtbl(bond);
804

805 806
	kfree(bond_info->rx_hashtbl);
	bond_info->rx_hashtbl = NULL;
807

808 809 810 811 812
	_unlock_rx_hashtbl(bond);
}

/*********************** tlb/rlb shared functions *********************/

813
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
814 815
{
	struct learning_pkt pkt;
816
	int size = sizeof(struct learning_pkt);
817 818 819 820 821 822 823
	int i;

	memset(&pkt, 0, size);
	memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
	memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
	pkt.type = __constant_htons(ETH_P_LOOP);

824
	for (i = 0; i < MAX_LP_RETRY; i++) {
825 826 827
		struct sk_buff *skb;
		char *data;

828 829 830 831 832 833 834
		skb = dev_alloc_skb(size);
		if (!skb) {
			return;
		}

		data = skb_put(skb, size);
		memcpy(data, &pkt, size);
835

836 837 838 839 840
		skb->mac.raw = data;
		skb->nh.raw = data + ETH_HLEN;
		skb->protocol = pkt.type;
		skb->priority = TC_PRIO_CONTROL;
		skb->dev = slave->dev;
841

842 843 844 845 846
		dev_queue_xmit(skb);
	}
}

/* hw is a boolean parameter that determines whether we should try and
847 848
 * set the hw address of the device as well as the hw address of the
 * net_device
849
 */
850
static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw)
851
{
852
	struct net_device *dev = slave->dev;
853 854 855
	struct sockaddr s_addr;

	if (!hw) {
856
		memcpy(dev->dev_addr, addr, dev->addr_len);
857 858 859 860 861
		return 0;
	}

	/* for rlb each slave must have a unique hw mac addresses so that */
	/* each slave will receive packets destined to a different mac */
862
	memcpy(s_addr.sa_data, addr, dev->addr_len);
863 864
	s_addr.sa_family = dev->type;
	if (dev->set_mac_address(dev, &s_addr)) {
865 866 867 868 869 870
		printk(KERN_ERR DRV_NAME
		       ": Error: dev->set_mac_address of dev %s failed! ALB "
		       "mode requires that the base driver support setting "
		       "the hw address also when the network device's "
		       "interface is open\n",
		       dev->name);
871 872 873 874 875
		return -EOPNOTSUPP;
	}
	return 0;
}

876
/* Caller must hold bond lock for write or curr_slave_lock for write*/
877
static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2)
878 879
{
	struct slave *disabled_slave = NULL;
880 881
	u8 tmp_mac_addr[ETH_ALEN];
	int slaves_state_differ;
882 883 884 885

	slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2));

	memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN);
886 887
	alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled);
	alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled);
888 889 890 891 892 893 894 895 896 897

	/* fasten the change in the switch */
	if (SLAVE_IS_OK(slave1)) {
		alb_send_learning_packets(slave1, slave1->dev->dev_addr);
		if (bond->alb_info.rlb_enabled) {
			/* inform the clients that the mac address
			 * has changed
			 */
			rlb_req_update_slave_clients(bond, slave1);
		}
898
	} else {
899 900 901 902 903 904 905 906 907 908 909
		disabled_slave = slave1;
	}

	if (SLAVE_IS_OK(slave2)) {
		alb_send_learning_packets(slave2, slave2->dev->dev_addr);
		if (bond->alb_info.rlb_enabled) {
			/* inform the clients that the mac address
			 * has changed
			 */
			rlb_req_update_slave_clients(bond, slave2);
		}
910
	} else {
911 912 913 914
		disabled_slave = slave2;
	}

	if (bond->alb_info.rlb_enabled && slaves_state_differ) {
915
		/* A disabled slave was assigned an active mac addr */
916 917
		rlb_teach_disabled_mac_on_primary(bond,
						  disabled_slave->dev->dev_addr);
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
	}
}

/**
 * alb_change_hw_addr_on_detach
 * @bond: bonding we're working on
 * @slave: the slave that was just detached
 *
 * We assume that @slave was already detached from the slave list.
 *
 * If @slave's permanent hw address is different both from its current
 * address and from @bond's address, then somewhere in the bond there's
 * a slave that has @slave's permanet address as its current address.
 * We'll make sure that that slave no longer uses @slave's permanent address.
 *
 * Caller must hold bond lock
 */
935
static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
936 937 938 939 940 941 942 943
{
	int perm_curr_diff;
	int perm_bond_diff;

	perm_curr_diff = memcmp(slave->perm_hwaddr,
				slave->dev->dev_addr,
				ETH_ALEN);
	perm_bond_diff = memcmp(slave->perm_hwaddr,
944
				bond->dev->dev_addr,
945
				ETH_ALEN);
946

947
	if (perm_curr_diff && perm_bond_diff) {
948 949 950
		struct slave *tmp_slave;
		int i, found = 0;

951
		bond_for_each_slave(bond, tmp_slave, i) {
952
			if (!memcmp(slave->perm_hwaddr,
953 954
				    tmp_slave->dev->dev_addr,
				    ETH_ALEN)) {
955
				found = 1;
956 957 958 959
				break;
			}
		}

960
		if (found) {
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
			alb_swap_mac_addr(bond, slave, tmp_slave);
		}
	}
}

/**
 * alb_handle_addr_collision_on_attach
 * @bond: bonding we're working on
 * @slave: the slave that was just attached
 *
 * checks uniqueness of slave's mac address and handles the case the
 * new slave uses the bonds mac address.
 *
 * If the permanent hw address of @slave is @bond's hw address, we need to
 * find a different hw address to give @slave, that isn't in use by any other
 * slave in the bond. This address must be, of course, one of the premanent
 * addresses of the other slaves.
 *
 * We go over the slave list, and for each slave there we compare its
 * permanent hw address with the current address of all the other slaves.
 * If no match was found, then we've found a slave with a permanent address
 * that isn't used by any other slave in the bond, so we can assign it to
 * @slave.
 *
 * assumption: this function is called before @slave is attached to the
 * 	       bond slave list.
 *
 * caller must hold the bond lock for write since the mac addresses are compared
 * and may be swapped.
 */
991
static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
992
{
993 994
	struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave;
	int i, j, found = 0;
995 996 997 998 999 1000 1001 1002 1003 1004

	if (bond->slave_cnt == 0) {
		/* this is the first slave */
		return 0;
	}

	/* if slave's mac address differs from bond's mac address
	 * check uniqueness of slave's mac address against the other
	 * slaves in the bond.
	 */
1005
	if (memcmp(slave->perm_hwaddr, bond->dev->dev_addr, ETH_ALEN)) {
1006
		bond_for_each_slave(bond, tmp_slave1, i) {
1007 1008
			if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr,
				    ETH_ALEN)) {
1009
				found = 1;
1010 1011 1012
				break;
			}
		}
1013

1014
		if (found) {
1015 1016 1017
			/* a slave was found that is using the mac address
			 * of the new slave
			 */
1018 1019 1020 1021
			printk(KERN_ERR DRV_NAME
			       ": Error: the hw address of slave %s is not "
			       "unique - cannot enslave it!",
			       slave->dev->name);
1022 1023
			return -EINVAL;
		}
1024

1025 1026 1027
		return 0;
	}

1028 1029
	/* The slave's address is equal to the address of the bond.
	 * Search for a spare address in the bond for this slave.
1030
	 */
1031
	free_mac_slave = NULL;
1032

1033 1034 1035
	bond_for_each_slave(bond, tmp_slave1, i) {
		found = 0;
		bond_for_each_slave(bond, tmp_slave2, j) {
1036 1037 1038
			if (!memcmp(tmp_slave1->perm_hwaddr,
				    tmp_slave2->dev->dev_addr,
				    ETH_ALEN)) {
1039
				found = 1;
1040 1041 1042 1043
				break;
			}
		}

1044
		if (!found) {
1045 1046 1047
			/* no slave has tmp_slave1's perm addr
			 * as its curr addr
			 */
1048
			free_mac_slave = tmp_slave1;
1049 1050 1051 1052
			break;
		}
	}

1053 1054
	if (free_mac_slave) {
		alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr,
1055
				       bond->alb_info.rlb_enabled);
1056

1057 1058 1059
		printk(KERN_WARNING DRV_NAME
		       ": Warning: the hw address of slave %s is in use by "
		       "the bond; giving it the hw address of %s\n",
1060
		       slave->dev->name, free_mac_slave->dev->name);
1061
	} else {
1062 1063 1064 1065 1066
		printk(KERN_ERR DRV_NAME
		       ": Error: the hw address of slave %s is in use by the "
		       "bond; couldn't find a slave with a free hw address to "
		       "give it (this should not have happened)\n",
		       slave->dev->name);
1067 1068 1069 1070 1071 1072
		return -EFAULT;
	}

	return 0;
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
/**
 * alb_set_mac_address
 * @bond:
 * @addr:
 *
 * In TLB mode all slaves are configured to the bond's hw address, but set
 * their dev_addr field to different addresses (based on their permanent hw
 * addresses).
 *
 * For each slave, this function sets the interface to the new address and then
 * changes its dev_addr field to its previous value.
1084
 *
1085 1086
 * Unwinding assumes bond's mac address has not yet changed.
 */
1087
static int alb_set_mac_address(struct bonding *bond, void *addr)
1088 1089
{
	struct sockaddr sa;
1090
	struct slave *slave, *stop_at;
1091
	char tmp_addr[ETH_ALEN];
1092
	int res;
1093
	int i;
1094 1095 1096 1097 1098

	if (bond->alb_info.rlb_enabled) {
		return 0;
	}

1099
	bond_for_each_slave(bond, slave, i) {
1100
		if (slave->dev->set_mac_address == NULL) {
1101
			res = -EOPNOTSUPP;
1102 1103 1104 1105 1106 1107
			goto unwind;
		}

		/* save net_device's current hw address */
		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);

1108
		res = slave->dev->set_mac_address(slave->dev, addr);
1109 1110 1111 1112

		/* restore net_device's hw address */
		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);

1113
		if (res) {
1114 1115 1116 1117 1118 1119 1120
			goto unwind;
		}
	}

	return 0;

unwind:
1121 1122
	memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len);
	sa.sa_family = bond->dev->type;
1123 1124 1125 1126

	/* unwind from head to the slave that failed */
	stop_at = slave;
	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
1127 1128 1129 1130 1131
		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);
		slave->dev->set_mac_address(slave->dev, &sa);
		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);
	}

1132
	return res;
1133 1134
}

1135 1136
/************************ exported alb funcions ************************/

1137
int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158
{
	int res;

	res = tlb_initialize(bond);
	if (res) {
		return res;
	}

	if (rlb_enabled) {
		bond->alb_info.rlb_enabled = 1;
		/* initialize rlb */
		res = rlb_initialize(bond);
		if (res) {
			tlb_deinitialize(bond);
			return res;
		}
	}

	return 0;
}

1159
void bond_alb_deinitialize(struct bonding *bond)
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

	tlb_deinitialize(bond);

	if (bond_info->rlb_enabled) {
		rlb_deinitialize(bond);
	}
}

1170
int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1171
{
1172 1173
	struct bonding *bond = (struct bonding *)bond_dev->priv;
	struct ethhdr *eth_data = (struct ethhdr *)skb->mac.raw = skb->data;
1174 1175
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct slave *tx_slave = NULL;
1176
	static u32 ip_bcast = 0xffffffff;
1177
	int hash_size = 0;
1178
	int do_tx_balance = 1;
1179 1180 1181
	u32 hash_index = 0;
	u8 *hash_start = NULL;

1182
	/* make sure that the curr_active_slave and the slaves list do
1183 1184 1185
	 * not change during tx
	 */
	read_lock(&bond->lock);
1186
	read_lock(&bond->curr_slave_lock);
1187

1188 1189
	if (!BOND_IS_OK(bond)) {
		goto free_out;
1190 1191 1192 1193 1194
	}

	switch (ntohs(skb->protocol)) {
	case ETH_P_IP:
		if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
1195
		    (skb->nh.iph->daddr == ip_bcast)) {
1196 1197 1198 1199
			do_tx_balance = 0;
			break;
		}
		hash_start = (char*)&(skb->nh.iph->daddr);
1200
		hash_size = sizeof(skb->nh.iph->daddr);
1201 1202 1203 1204 1205 1206 1207 1208
		break;
	case ETH_P_IPV6:
		if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
			do_tx_balance = 0;
			break;
		}

		hash_start = (char*)&(skb->nh.ipv6h->daddr);
1209
		hash_size = sizeof(skb->nh.ipv6h->daddr);
1210 1211
		break;
	case ETH_P_IPX:
1212
		if (ipx_hdr(skb)->ipx_checksum !=
1213 1214 1215 1216 1217 1218
		    __constant_htons(IPX_NO_CHECKSUM)) {
			/* something is wrong with this packet */
			do_tx_balance = 0;
			break;
		}

1219
		if (ipx_hdr(skb)->ipx_type !=
1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249
		    __constant_htons(IPX_TYPE_NCP)) {
			/* The only protocol worth balancing in
			 * this family since it has an "ARP" like
			 * mechanism
			 */
			do_tx_balance = 0;
			break;
		}

		hash_start = (char*)eth_data->h_dest;
		hash_size = ETH_ALEN;
		break;
	case ETH_P_ARP:
		do_tx_balance = 0;
		if (bond_info->rlb_enabled) {
			tx_slave = rlb_arp_xmit(skb, bond);
		}
		break;
	default:
		do_tx_balance = 0;
		break;
	}

	if (do_tx_balance) {
		hash_index = _simple_hash(hash_start, hash_size);
		tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
	}

	if (!tx_slave) {
		/* unbalanced or unassigned, send through primary */
1250
		tx_slave = bond->curr_active_slave;
1251 1252 1253 1254 1255
		bond_info->unbalanced_load += skb->len;
	}

	if (tx_slave && SLAVE_IS_OK(tx_slave)) {
		skb->dev = tx_slave->dev;
1256
		if (tx_slave != bond->curr_active_slave) {
1257
			memcpy(eth_data->h_source,
1258 1259
			       tx_slave->dev->dev_addr,
			       ETH_ALEN);
1260 1261 1262 1263 1264 1265 1266
		}
		dev_queue_xmit(skb);
	} else {
		/* no suitable interface, frame not sent */
		if (tx_slave) {
			tlb_clear_slave(bond, tx_slave, 0);
		}
1267
		goto free_out;
1268 1269
	}

1270
out:
1271
	read_unlock(&bond->curr_slave_lock);
1272 1273
	read_unlock(&bond->lock);
	return 0;
1274 1275 1276 1277

free_out:
	dev_kfree_skb(skb);
	goto out;
1278 1279
}

1280
void bond_alb_monitor(struct bonding *bond)
1281 1282
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1283
	struct slave *slave;
1284
	int delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
1285
	int i;
1286 1287 1288

	read_lock(&bond->lock);

1289 1290 1291 1292 1293
	if (bond->kill_timers) {
		goto out;
	}

	if (bond->slave_cnt == 0) {
1294 1295
		bond_info->tx_rebalance_counter = 0;
		bond_info->lp_counter = 0;
1296
		goto re_arm;
1297 1298 1299 1300 1301 1302 1303
	}

	bond_info->tx_rebalance_counter++;
	bond_info->lp_counter++;

	/* send learning packets */
	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
1304
		/* change of curr_active_slave involves swapping of mac addresses.
1305
		 * in order to avoid this swapping from happening while
1306
		 * sending the learning packets, the curr_slave_lock must be held for
1307 1308
		 * read.
		 */
1309
		read_lock(&bond->curr_slave_lock);
1310

1311
		bond_for_each_slave(bond, slave, i) {
1312 1313
			alb_send_learning_packets(slave,slave->dev->dev_addr);
		}
1314

1315
		read_unlock(&bond->curr_slave_lock);
1316 1317 1318 1319 1320 1321

		bond_info->lp_counter = 0;
	}

	/* rebalance tx traffic */
	if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
1322

1323
		read_lock(&bond->curr_slave_lock);
1324

1325
		bond_for_each_slave(bond, slave, i) {
1326
			tlb_clear_slave(bond, slave, 1);
1327
			if (slave == bond->curr_active_slave) {
1328 1329 1330 1331 1332 1333
				SLAVE_TLB_INFO(slave).load =
					bond_info->unbalanced_load /
						BOND_TLB_REBALANCE_INTERVAL;
				bond_info->unbalanced_load = 0;
			}
		}
1334

1335
		read_unlock(&bond->curr_slave_lock);
1336

1337 1338 1339 1340 1341 1342
		bond_info->tx_rebalance_counter = 0;
	}

	/* handle rlb stuff */
	if (bond_info->rlb_enabled) {
		/* the following code changes the promiscuity of the
1343
		 * the curr_active_slave. It needs to be locked with a
1344 1345 1346
		 * write lock to protect from other code that also
		 * sets the promiscuity.
		 */
1347
		write_lock(&bond->curr_slave_lock);
1348

1349
		if (bond_info->primary_is_promisc &&
1350
		    (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
1351 1352 1353 1354 1355 1356 1357

			bond_info->rlb_promisc_timeout_counter = 0;

			/* If the primary was set to promiscuous mode
			 * because a slave was disabled then
			 * it can now leave promiscuous mode.
			 */
1358
			dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1359 1360
			bond_info->primary_is_promisc = 0;
		}
1361

1362
		write_unlock(&bond->curr_slave_lock);
1363

1364
		if (bond_info->rlb_rebalance) {
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383
			bond_info->rlb_rebalance = 0;
			rlb_rebalance(bond);
		}

		/* check if clients need updating */
		if (bond_info->rx_ntt) {
			if (bond_info->rlb_update_delay_counter) {
				--bond_info->rlb_update_delay_counter;
			} else {
				rlb_update_rx_clients(bond);
				if (bond_info->rlb_update_retry_counter) {
					--bond_info->rlb_update_retry_counter;
				} else {
					bond_info->rx_ntt = 0;
				}
			}
		}
	}

1384 1385
re_arm:
	mod_timer(&(bond_info->alb_timer), jiffies + delta_in_ticks);
1386 1387 1388 1389
out:
	read_unlock(&bond->lock);
}

1390
/* assumption: called before the slave is attached to the bond
1391 1392
 * and not locked by the bond lock
 */
1393
int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
1394
{
1395
	int res;
1396

1397
	res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr,
1398
				     bond->alb_info.rlb_enabled);
1399 1400
	if (res) {
		return res;
1401 1402 1403 1404 1405 1406 1407
	}

	/* caller must hold the bond lock for write since the mac addresses
	 * are compared and may be swapped.
	 */
	write_lock_bh(&bond->lock);

1408
	res = alb_handle_addr_collision_on_attach(bond, slave);
1409 1410 1411

	write_unlock_bh(&bond->lock);

1412 1413
	if (res) {
		return res;
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
	}

	tlb_init_slave(slave);

	/* order a rebalance ASAP */
	bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;

	if (bond->alb_info.rlb_enabled) {
		bond->alb_info.rlb_rebalance = 1;
	}

	return 0;
}

/* Caller must hold bond lock for write */
1429
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443
{
	if (bond->slave_cnt > 1) {
		alb_change_hw_addr_on_detach(bond, slave);
	}

	tlb_clear_slave(bond, slave, 0);

	if (bond->alb_info.rlb_enabled) {
		bond->alb_info.next_rx_slave = NULL;
		rlb_clear_slave(bond, slave);
	}
}

/* Caller must hold bond lock for read */
1444
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

	if (link == BOND_LINK_DOWN) {
		tlb_clear_slave(bond, slave, 0);
		if (bond->alb_info.rlb_enabled) {
			rlb_clear_slave(bond, slave);
		}
	} else if (link == BOND_LINK_UP) {
		/* order a rebalance ASAP */
		bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
		if (bond->alb_info.rlb_enabled) {
			bond->alb_info.rlb_rebalance = 1;
			/* If the updelay module parameter is smaller than the
			 * forwarding delay of the switch the rebalance will
			 * not work because the rebalance arp replies will
			 * not be forwarded to the clients..
			 */
		}
	}
}

/**
1468
 * bond_alb_handle_active_change - assign new curr_active_slave
1469 1470 1471
 * @bond: our bonding struct
 * @new_slave: new slave to assign
 *
1472
 * Set the bond->curr_active_slave to @new_slave and handle
1473 1474
 * mac address swapping and promiscuity changes as needed.
 *
1475
 * Caller must hold bond curr_slave_lock for write (or bond lock for write)
1476
 */
1477
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
1478
{
1479
	struct slave *swap_slave;
1480
	int i;
1481

1482
	if (bond->curr_active_slave == new_slave) {
1483 1484 1485
		return;
	}

1486 1487
	if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) {
		dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1488 1489 1490 1491
		bond->alb_info.primary_is_promisc = 0;
		bond->alb_info.rlb_promisc_timeout_counter = 0;
	}

1492 1493
	swap_slave = bond->curr_active_slave;
	bond->curr_active_slave = new_slave;
1494 1495 1496 1497 1498

	if (!new_slave || (bond->slave_cnt == 0)) {
		return;
	}

1499 1500
	/* set the new curr_active_slave to the bonds mac address
	 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave
1501 1502
	 */
	if (!swap_slave) {
1503
		struct slave *tmp_slave;
1504
		/* find slave that is holding the bond's mac address */
1505 1506
		bond_for_each_slave(bond, tmp_slave, i) {
			if (!memcmp(tmp_slave->dev->dev_addr,
1507
				    bond->dev->dev_addr, ETH_ALEN)) {
1508
				swap_slave = tmp_slave;
1509 1510 1511 1512 1513
				break;
			}
		}
	}

1514
	/* curr_active_slave must be set before calling alb_swap_mac_addr */
1515
	if (swap_slave) {
1516 1517 1518 1519
		/* swap mac address */
		alb_swap_mac_addr(bond, swap_slave, new_slave);
	} else {
		/* set the new_slave to the bond mac address */
1520
		alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr,
1521
				       bond->alb_info.rlb_enabled);
1522
		/* fasten bond mac on new current slave */
1523
		alb_send_learning_packets(new_slave, bond->dev->dev_addr);
1524 1525 1526
	}
}

1527
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
1528
{
1529
	struct bonding *bond = (struct bonding *)bond_dev->priv;
1530
	struct sockaddr *sa = addr;
1531
	struct slave *slave, *swap_slave;
1532
	int res;
1533
	int i;
1534 1535 1536 1537 1538

	if (!is_valid_ether_addr(sa->sa_data)) {
		return -EADDRNOTAVAIL;
	}

1539 1540 1541
	res = alb_set_mac_address(bond, addr);
	if (res) {
		return res;
1542 1543
	}

1544
	memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
1545

1546
	/* If there is no curr_active_slave there is nothing else to do.
1547 1548 1549
	 * Otherwise we'll need to pass the new address to it and handle
	 * duplications.
	 */
1550
	if (!bond->curr_active_slave) {
1551 1552 1553
		return 0;
	}

1554 1555 1556 1557 1558
	swap_slave = NULL;

	bond_for_each_slave(bond, slave, i) {
		if (!memcmp(slave->dev->dev_addr, bond_dev->dev_addr, ETH_ALEN)) {
			swap_slave = slave;
1559 1560 1561 1562
			break;
		}
	}

1563
	if (swap_slave) {
1564
		alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave);
1565
	} else {
1566
		alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr,
1567 1568
				       bond->alb_info.rlb_enabled);

1569
		alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr);
1570 1571
		if (bond->alb_info.rlb_enabled) {
			/* inform clients mac address has changed */
1572
			rlb_req_update_slave_clients(bond, bond->curr_active_slave);
1573 1574 1575 1576 1577 1578
		}
	}

	return 0;
}