sch_teql.c 12 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13
/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
14
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
15 16
#include <linux/string.h>
#include <linux/errno.h>
17
#include <linux/if_arp.h>
Linus Torvalds's avatar
Linus Torvalds committed
18 19 20 21
#include <linux/netdevice.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/moduleparam.h>
22 23
#include <net/dst.h>
#include <net/neighbour.h>
Linus Torvalds's avatar
Linus Torvalds committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
#include <net/pkt_sched.h>

/*
   How to setup it.
   ----------------

   After loading this module you will find a new device teqlN
   and new qdisc with the same name. To join a slave to the equalizer
   you should just set this qdisc on a device f.e.

   # tc qdisc add dev eth0 root teql0
   # tc qdisc add dev eth1 root teql0

   That's all. Full PnP 8)

   Applicability.
   --------------

   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
      signal and generate EOI events. If you want to equalize virtual devices
      like tunnels, use a normal eql device.
   2. This device puts no limitations on physical slave characteristics
      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
      Certainly, large difference in link speeds will make the resulting
      eqalized link unusable, because of huge packet reordering.
      I estimate an upper useful difference as ~10 times.
   3. If the slave requires address resolution, only protocols using
      neighbour cache (IPv4/IPv6) will work over the equalized link.
      Other protocols are still allowed to use the slave device directly,
      which will not break load balancing, though native slave
      traffic will have the highest priority.  */

Eric Dumazet's avatar
Eric Dumazet committed
56
struct teql_master {
Linus Torvalds's avatar
Linus Torvalds committed
57 58 59 60
	struct Qdisc_ops qops;
	struct net_device *dev;
	struct Qdisc *slaves;
	struct list_head master_list;
61 62 63 64
	unsigned long	tx_bytes;
	unsigned long	tx_packets;
	unsigned long	tx_errors;
	unsigned long	tx_dropped;
Linus Torvalds's avatar
Linus Torvalds committed
65 66
};

Eric Dumazet's avatar
Eric Dumazet committed
67
struct teql_sched_data {
Linus Torvalds's avatar
Linus Torvalds committed
68 69 70 71 72
	struct Qdisc *next;
	struct teql_master *m;
	struct sk_buff_head q;
};

Eric Dumazet's avatar
Eric Dumazet committed
73
#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
Linus Torvalds's avatar
Linus Torvalds committed
74

Eric Dumazet's avatar
Eric Dumazet committed
75
#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
Linus Torvalds's avatar
Linus Torvalds committed
76 77 78 79

/* "teql*" qdisc routines */

static int
Eric Dumazet's avatar
Eric Dumazet committed
80
teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
Linus Torvalds's avatar
Linus Torvalds committed
81
{
82
	struct net_device *dev = qdisc_dev(sch);
Linus Torvalds's avatar
Linus Torvalds committed
83 84
	struct teql_sched_data *q = qdisc_priv(sch);

85 86
	if (q->q.qlen < dev->tx_queue_len) {
		__skb_queue_tail(&q->q, skb);
87
		return NET_XMIT_SUCCESS;
Linus Torvalds's avatar
Linus Torvalds committed
88 89
	}

90
	return qdisc_drop(skb, sch);
Linus Torvalds's avatar
Linus Torvalds committed
91 92 93
}

static struct sk_buff *
Eric Dumazet's avatar
Eric Dumazet committed
94
teql_dequeue(struct Qdisc *sch)
Linus Torvalds's avatar
Linus Torvalds committed
95 96
{
	struct teql_sched_data *dat = qdisc_priv(sch);
97
	struct netdev_queue *dat_queue;
Linus Torvalds's avatar
Linus Torvalds committed
98 99 100
	struct sk_buff *skb;

	skb = __skb_dequeue(&dat->q);
101
	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
Linus Torvalds's avatar
Linus Torvalds committed
102
	if (skb == NULL) {
103
		struct net_device *m = qdisc_dev(dat_queue->qdisc);
Linus Torvalds's avatar
Linus Torvalds committed
104 105 106 107
		if (m) {
			dat->m->slaves = sch;
			netif_wake_queue(m);
		}
108 109
	} else {
		qdisc_bstats_update(sch, skb);
Linus Torvalds's avatar
Linus Torvalds committed
110
	}
111
	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
Linus Torvalds's avatar
Linus Torvalds committed
112 113 114
	return skb;
}

115
static struct sk_buff *
Eric Dumazet's avatar
Eric Dumazet committed
116
teql_peek(struct Qdisc *sch)
117 118 119 120 121
{
	/* teql is meant to be used as root qdisc */
	return NULL;
}

Eric Dumazet's avatar
Eric Dumazet committed
122
static inline void
Linus Torvalds's avatar
Linus Torvalds committed
123 124 125 126 127 128 129
teql_neigh_release(struct neighbour *n)
{
	if (n)
		neigh_release(n);
}

static void
Eric Dumazet's avatar
Eric Dumazet committed
130
teql_reset(struct Qdisc *sch)
Linus Torvalds's avatar
Linus Torvalds committed
131 132 133 134 135 136 137 138
{
	struct teql_sched_data *dat = qdisc_priv(sch);

	skb_queue_purge(&dat->q);
	sch->q.qlen = 0;
}

static void
Eric Dumazet's avatar
Eric Dumazet committed
139
teql_destroy(struct Qdisc *sch)
Linus Torvalds's avatar
Linus Torvalds committed
140 141 142 143 144
{
	struct Qdisc *q, *prev;
	struct teql_sched_data *dat = qdisc_priv(sch);
	struct teql_master *master = dat->m;

Eric Dumazet's avatar
Eric Dumazet committed
145 146
	prev = master->slaves;
	if (prev) {
Linus Torvalds's avatar
Linus Torvalds committed
147 148 149 150 151 152 153
		do {
			q = NEXT_SLAVE(prev);
			if (q == sch) {
				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
				if (q == master->slaves) {
					master->slaves = NEXT_SLAVE(q);
					if (q == master->slaves) {
154
						struct netdev_queue *txq;
155
						spinlock_t *root_lock;
156 157

						txq = netdev_get_tx_queue(master->dev, 0);
Linus Torvalds's avatar
Linus Torvalds committed
158
						master->slaves = NULL;
159

160
						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
161
						spin_lock_bh(root_lock);
162
						qdisc_reset(txq->qdisc);
163
						spin_unlock_bh(root_lock);
Linus Torvalds's avatar
Linus Torvalds committed
164 165 166 167 168
					}
				}
				skb_queue_purge(&dat->q);
				break;
			}
169

Linus Torvalds's avatar
Linus Torvalds committed
170 171 172 173
		} while ((prev = q) != master->slaves);
	}
}

174
static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds's avatar
Linus Torvalds committed
175
{
176
	struct net_device *dev = qdisc_dev(sch);
Eric Dumazet's avatar
Eric Dumazet committed
177
	struct teql_master *m = (struct teql_master *)sch->ops;
Linus Torvalds's avatar
Linus Torvalds committed
178 179 180 181 182 183 184 185 186 187 188 189 190 191
	struct teql_sched_data *q = qdisc_priv(sch);

	if (dev->hard_header_len > m->dev->hard_header_len)
		return -EINVAL;

	if (m->dev == dev)
		return -ELOOP;

	q->m = m;

	skb_queue_head_init(&q->q);

	if (m->slaves) {
		if (m->dev->flags & IFF_UP) {
192 193 194 195 196 197 198
			if ((m->dev->flags & IFF_POINTOPOINT &&
			     !(dev->flags & IFF_POINTOPOINT)) ||
			    (m->dev->flags & IFF_BROADCAST &&
			     !(dev->flags & IFF_BROADCAST)) ||
			    (m->dev->flags & IFF_MULTICAST &&
			     !(dev->flags & IFF_MULTICAST)) ||
			    dev->mtu < m->dev->mtu)
Linus Torvalds's avatar
Linus Torvalds committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
				return -EINVAL;
		} else {
			if (!(dev->flags&IFF_POINTOPOINT))
				m->dev->flags &= ~IFF_POINTOPOINT;
			if (!(dev->flags&IFF_BROADCAST))
				m->dev->flags &= ~IFF_BROADCAST;
			if (!(dev->flags&IFF_MULTICAST))
				m->dev->flags &= ~IFF_MULTICAST;
			if (dev->mtu < m->dev->mtu)
				m->dev->mtu = dev->mtu;
		}
		q->next = NEXT_SLAVE(m->slaves);
		NEXT_SLAVE(m->slaves) = sch;
	} else {
		q->next = sch;
		m->slaves = sch;
		m->dev->mtu = dev->mtu;
		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
	}
	return 0;
}


static int
Eric Dumazet's avatar
Eric Dumazet committed
223 224
__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
	       struct net_device *dev, struct netdev_queue *txq,
225
	       struct dst_entry *dst)
Linus Torvalds's avatar
Linus Torvalds committed
226
{
227 228
	struct neighbour *n;
	int err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
229

230 231 232 233 234 235 236 237 238 239 240 241
	n = dst_neigh_lookup_skb(dst, skb);
	if (!n)
		return -ENOENT;

	if (dst->dev != dev) {
		struct neighbour *mn;

		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
		neigh_release(n);
		if (IS_ERR(mn))
			return PTR_ERR(mn);
		n = mn;
Linus Torvalds's avatar
Linus Torvalds committed
242
	}
243

Linus Torvalds's avatar
Linus Torvalds committed
244 245
	if (neigh_event_send(n, skb_res) == 0) {
		int err;
246
		char haddr[MAX_ADDR_LEN];
247

248 249 250
		neigh_ha_snapshot(haddr, n, dev);
		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
				      NULL, skb->len);
251

252 253 254 255
		if (err < 0)
			err = -EINVAL;
	} else {
		err = (skb_res == NULL) ? -EAGAIN : 1;
Linus Torvalds's avatar
Linus Torvalds committed
256 257
	}
	neigh_release(n);
258
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
259 260
}

261
static inline int teql_resolve(struct sk_buff *skb,
Eric Dumazet's avatar
Eric Dumazet committed
262 263 264
			       struct sk_buff *skb_res,
			       struct net_device *dev,
			       struct netdev_queue *txq)
Linus Torvalds's avatar
Linus Torvalds committed
265
{
Eric Dumazet's avatar
Eric Dumazet committed
266 267 268
	struct dst_entry *dst = skb_dst(skb);
	int res;

269
	if (txq->qdisc == &noop_qdisc)
270 271
		return -ENODEV;

Eric Dumazet's avatar
Eric Dumazet committed
272
	if (!dev->header_ops || !dst)
Linus Torvalds's avatar
Linus Torvalds committed
273
		return 0;
Eric Dumazet's avatar
Eric Dumazet committed
274 275

	rcu_read_lock();
276
	res = __teql_resolve(skb, skb_res, dev, txq, dst);
Eric Dumazet's avatar
Eric Dumazet committed
277 278 279
	rcu_read_unlock();

	return res;
Linus Torvalds's avatar
Linus Torvalds committed
280 281
}

282
static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
283
{
284
	struct teql_master *master = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
285 286 287
	struct Qdisc *start, *q;
	int busy;
	int nores;
288
	int subq = skb_get_queue_mapping(skb);
Linus Torvalds's avatar
Linus Torvalds committed
289 290 291 292 293 294 295 296
	struct sk_buff *skb_res = NULL;

	start = master->slaves;

restart:
	nores = 0;
	busy = 0;

Eric Dumazet's avatar
Eric Dumazet committed
297 298
	q = start;
	if (!q)
Linus Torvalds's avatar
Linus Torvalds committed
299 300 301
		goto drop;

	do {
302
		struct net_device *slave = qdisc_dev(q);
303 304
		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
		const struct net_device_ops *slave_ops = slave->netdev_ops;
305

306
		if (slave_txq->qdisc_sleeping != q)
Linus Torvalds's avatar
Linus Torvalds committed
307
			continue;
308
		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
309
		    !netif_running(slave)) {
Linus Torvalds's avatar
Linus Torvalds committed
310 311 312 313
			busy = 1;
			continue;
		}

Eric Dumazet's avatar
Eric Dumazet committed
314
		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
Linus Torvalds's avatar
Linus Torvalds committed
315
		case 0:
316
			if (__netif_tx_trylock(slave_txq)) {
317 318
				unsigned int length = qdisc_pkt_len(skb);

319
				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320
				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
Eric Dumazet's avatar
Eric Dumazet committed
321
					txq_trans_update(slave_txq);
322
					__netif_tx_unlock(slave_txq);
Linus Torvalds's avatar
Linus Torvalds committed
323 324
					master->slaves = NEXT_SLAVE(q);
					netif_wake_queue(dev);
325 326
					master->tx_packets++;
					master->tx_bytes += length;
327
					return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
328
				}
329
				__netif_tx_unlock(slave_txq);
Linus Torvalds's avatar
Linus Torvalds committed
330
			}
331
			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
Linus Torvalds's avatar
Linus Torvalds committed
332 333 334 335
				busy = 1;
			break;
		case 1:
			master->slaves = NEXT_SLAVE(q);
336
			return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
337 338 339 340
		default:
			nores = 1;
			break;
		}
341
		__skb_pull(skb, skb_network_offset(skb));
Linus Torvalds's avatar
Linus Torvalds committed
342 343 344 345 346 347 348 349 350
	} while ((q = NEXT_SLAVE(q)) != start);

	if (nores && skb_res == NULL) {
		skb_res = skb;
		goto restart;
	}

	if (busy) {
		netif_stop_queue(dev);
351
		return NETDEV_TX_BUSY;
Linus Torvalds's avatar
Linus Torvalds committed
352
	}
353
	master->tx_errors++;
Linus Torvalds's avatar
Linus Torvalds committed
354 355

drop:
356
	master->tx_dropped++;
Linus Torvalds's avatar
Linus Torvalds committed
357
	dev_kfree_skb(skb);
358
	return NETDEV_TX_OK;
Linus Torvalds's avatar
Linus Torvalds committed
359 360 361 362
}

static int teql_master_open(struct net_device *dev)
{
Eric Dumazet's avatar
Eric Dumazet committed
363
	struct Qdisc *q;
364
	struct teql_master *m = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
365
	int mtu = 0xFFFE;
Eric Dumazet's avatar
Eric Dumazet committed
366
	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
Linus Torvalds's avatar
Linus Torvalds committed
367 368 369 370 371 372 373 374

	if (m->slaves == NULL)
		return -EUNATCH;

	flags = FMASK;

	q = m->slaves;
	do {
375
		struct net_device *slave = qdisc_dev(q);
Linus Torvalds's avatar
Linus Torvalds committed
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408

		if (slave == NULL)
			return -EUNATCH;

		if (slave->mtu < mtu)
			mtu = slave->mtu;
		if (slave->hard_header_len > LL_MAX_HEADER)
			return -EINVAL;

		/* If all the slaves are BROADCAST, master is BROADCAST
		   If all the slaves are PtP, master is PtP
		   Otherwise, master is NBMA.
		 */
		if (!(slave->flags&IFF_POINTOPOINT))
			flags &= ~IFF_POINTOPOINT;
		if (!(slave->flags&IFF_BROADCAST))
			flags &= ~IFF_BROADCAST;
		if (!(slave->flags&IFF_MULTICAST))
			flags &= ~IFF_MULTICAST;
	} while ((q = NEXT_SLAVE(q)) != m->slaves);

	m->dev->mtu = mtu;
	m->dev->flags = (m->dev->flags&~FMASK) | flags;
	netif_start_queue(m->dev);
	return 0;
}

static int teql_master_close(struct net_device *dev)
{
	netif_stop_queue(dev);
	return 0;
}

409 410 411 412 413 414 415 416 417 418 419 420
static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
						     struct rtnl_link_stats64 *stats)
{
	struct teql_master *m = netdev_priv(dev);

	stats->tx_packets	= m->tx_packets;
	stats->tx_bytes		= m->tx_bytes;
	stats->tx_errors	= m->tx_errors;
	stats->tx_dropped	= m->tx_dropped;
	return stats;
}

Linus Torvalds's avatar
Linus Torvalds committed
421 422
static int teql_master_mtu(struct net_device *dev, int new_mtu)
{
423
	struct teql_master *m = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
424 425 426 427 428 429 430 431
	struct Qdisc *q;

	if (new_mtu < 68)
		return -EINVAL;

	q = m->slaves;
	if (q) {
		do {
432
			if (new_mtu > qdisc_dev(q)->mtu)
Linus Torvalds's avatar
Linus Torvalds committed
433
				return -EINVAL;
Eric Dumazet's avatar
Eric Dumazet committed
434
		} while ((q = NEXT_SLAVE(q)) != m->slaves);
Linus Torvalds's avatar
Linus Torvalds committed
435 436 437 438 439 440
	}

	dev->mtu = new_mtu;
	return 0;
}

441 442 443 444
static const struct net_device_ops teql_netdev_ops = {
	.ndo_open	= teql_master_open,
	.ndo_stop	= teql_master_close,
	.ndo_start_xmit	= teql_master_xmit,
445
	.ndo_get_stats64 = teql_master_stats64,
446 447 448
	.ndo_change_mtu	= teql_master_mtu,
};

Linus Torvalds's avatar
Linus Torvalds committed
449 450
static __init void teql_master_setup(struct net_device *dev)
{
451
	struct teql_master *master = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
452 453 454 455
	struct Qdisc_ops *ops = &master->qops;

	master->dev	= dev;
	ops->priv_size  = sizeof(struct teql_sched_data);
456

Linus Torvalds's avatar
Linus Torvalds committed
457 458
	ops->enqueue	=	teql_enqueue;
	ops->dequeue	=	teql_dequeue;
459
	ops->peek	=	teql_peek;
Linus Torvalds's avatar
Linus Torvalds committed
460 461 462 463 464
	ops->init	=	teql_qdisc_init;
	ops->reset	=	teql_reset;
	ops->destroy	=	teql_destroy;
	ops->owner	=	THIS_MODULE;

465
	dev->netdev_ops =       &teql_netdev_ops;
Linus Torvalds's avatar
Linus Torvalds committed
466 467 468 469 470
	dev->type		= ARPHRD_VOID;
	dev->mtu		= 1500;
	dev->tx_queue_len	= 100;
	dev->flags		= IFF_NOARP;
	dev->hard_header_len	= LL_MAX_HEADER;
471
	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
Linus Torvalds's avatar
Linus Torvalds committed
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
}

static LIST_HEAD(master_dev_list);
static int max_equalizers = 1;
module_param(max_equalizers, int, 0);
MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");

static int __init teql_init(void)
{
	int i;
	int err = -ENODEV;

	for (i = 0; i < max_equalizers; i++) {
		struct net_device *dev;
		struct teql_master *master;

		dev = alloc_netdev(sizeof(struct teql_master),
				  "teql%d", teql_master_setup);
		if (!dev) {
			err = -ENOMEM;
			break;
		}

		if ((err = register_netdev(dev))) {
			free_netdev(dev);
			break;
		}

500
		master = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515

		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
		err = register_qdisc(&master->qops);

		if (err) {
			unregister_netdev(dev);
			free_netdev(dev);
			break;
		}

		list_add_tail(&master->master_list, &master_dev_list);
	}
	return i ? 0 : err;
}

516
static void __exit teql_exit(void)
Linus Torvalds's avatar
Linus Torvalds committed
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
{
	struct teql_master *master, *nxt;

	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {

		list_del(&master->master_list);

		unregister_qdisc(&master->qops);
		unregister_netdev(master->dev);
		free_netdev(master->dev);
	}
}

module_init(teql_init);
module_exit(teql_exit);

MODULE_LICENSE("GPL");