Merge bk://kernel.bkbits.net/davem/net-2.6

into ppc970.osdl.org:/home/torvalds/v2.6/linux

Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
90b1f609 · Linus Torvalds · 044836d4 · 34b0db5b · 90b1f609 · 90b1f609
Commit 90b1f609 authored Oct 05, 2004 by Linus Torvalds
18 changed files
--- a/Documentation/networking/gen_stats.txt
+++ b/Documentation/networking/gen_stats.txt
+Generic networking statistics for netlink users
+======================================================================
+
+Statistic counters are grouped into structs:
+
+Struct               TLV type              Description
+----------------------------------------------------------------------
+gnet_stats_basic     TCA_STATS_BASIC       Basic statistics
+gnet_stats_rate_est  TCA_STATS_RATE_EST    Rate estimator
+gnet_stats_queue     TCA_STATS_QUEUE       Queue statistics
+none                 TCA_STATS_APP         Application specific
+
+
+Collecting:
+-----------
+
+Declare the statistic structs you need:
+struct mystruct {
+	struct gnet_stats_basic	bstats;
+	struct gnet_stats_queue	qstats;
+	...
+};
+
+Update statistics:
+mystruct->tstats.packet++;
+mystruct->qstats.backlog += skb->pkt_len;
+
+
+Export to userspace (Dump):
+---------------------------
+
+my_dumping_routine(struct sk_buff *skb, ...)
+{
+	struct gnet_dump dump;
+
+	if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0)
+		goto rtattr_failure;
+
+	if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
+	    gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 ||
+		gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0)
+		goto rtattr_failure;
+
+	if (gnet_stats_finish_copy(&dump) < 0)
+		goto rtattr_failure;
+	...
+}
+
+TCA_STATS/TCA_XSTATS backward compatibility:
+--------------------------------------------
+
+Prior users of struct tc_stats and xstats can maintain backward
+compatibility by calling the compat wrappers to keep providing the
+existing TLV types.
+
+my_dumping_routine(struct sk_buff *skb, ...)
+{
+    if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+		TCA_XSTATS, &mystruct->lock, &dump) < 0)
+		goto rtattr_failure;
+	...
+}
+
+A struct tc_stats will be filled out during gnet_stats_copy_* calls
+and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app
+was called.
+
+
+Locking:
+--------
+
+Locks are taken before writing and released once all statistics have
+been written. Locks are always released in case of an error. You
+are responsible for making sure that the lock is initialized.
+
+
+Rate Estimator:
+--------------
+
+0) Prepare an estimator attribute. Most likely this would be in user
+   space. The value of this TLV should contain a tc_estimator structure.
+   As usual, such a TLV nees to be 32 bit aligned and therefore the
+   length needs to be appropriately set etc. The estimator interval
+   and ewma log need to be converted to the appropriate values.
+   tc_estimator.c::tc_setup_estimator() is advisable to be used as the
+   conversion routine. It does a few clever things. It takes a time
+   interval in microsecs, a time constant also in microsecs and a struct
+   tc_estimator to  be populated. The returned tc_estimator can be
+   transported to the kernel.  Transfer such a structure in a TLV of type
+   TCA_RATE to your code in the kernel.
+
+In the kernel when setting up:
+1) make sure you have basic stats and rate stats setup first.
+2) make sure you have initialized stats lock that is used to setup such
+   stats.
+3) Now initialize a new estimator:
+
+   int ret = gen_new_estimator(my_basicstats,my_rate_est_stats,
+       mystats_lock, attr_with_tcestimator_struct);
+
+   if ret == 0
+       success
+   else
+       failed
+
+From now on, everytime you dump my_rate_est_stats it will contain
+uptodate info.
+
+Once you are done, call gen_kill_estimator(my_basicstats,
+my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats
+are still valid (i.e still exist) at the time of making this call.
+
+
+Authors:
+--------
+Thomas Graf <tgraf@suug.ch>
+Jamal Hadi Salim <hadi@cyberus.ca>
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -632,9 +632,9 @@ M:	g.liakhovetski@gmx.de
 S:	Maintained

 DECnet NETWORK LAYER
-P:	Steven Whitehouse
-M:	SteveW@ACM.org
-W:	http://www.sucs.swan.ac.uk/~rohan/DECnet/index.html
+P:	Patrick Caulfield
+M:	patrick@tykepenguin.com
+W:	http://linux-decnet.sourceforge.net
 L:	linux-decnet-user@lists.sourceforge.net
 S:	Maintained


--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -102,10 +102,10 @@ struct aes_ctx {
 #define E_KEY ctx->E
 #define D_KEY ctx->D

-static u8 pow_tab[256];
-static u8 log_tab[256];
-static u8 sbx_tab[256];
-static u8 isb_tab[256];
+static u8 pow_tab[256] __initdata;
+static u8 log_tab[256] __initdata;
+static u8 sbx_tab[256] __initdata;
+static u8 isb_tab[256] __initdata;
 static u32 rco_tab[10];
 static u32 ft_tab[4][256];
 static u32 it_tab[4][256];
@@ -113,7 +113,7 @@ static u32 it_tab[4][256];
 static u32 fl_tab[4][256];
 static u32 il_tab[4][256];

-static inline u8
+static inline u8 __init
 f_mult (u8 a, u8 b)
 {
 	u8 aa = log_tab[a], cc = aa + log_tab[b];
@@ -153,7 +153,7 @@ f_mult (u8 a, u8 b)
             il_tab[2][byte(bi[(n + 2) & 3],2)] ^		\
             il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)

-static void
+static void __init
 gen_tabs (void)
 {
 	u32 i, t;

--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2187,7 +2187,7 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
 #undef K3

 /* This should not be decreased so low that ISNs wrap too fast. */
-#define REKEY_INTERVAL	300
+#define REKEY_INTERVAL	(300*HZ)
 /*
 * Bit layout of the tcp sequence numbers (before adding current time):
 * bit 24-31: increased after every key exchange
@@ -2213,48 +2213,55 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
 #define HASH_MASK	( (1<<HASH_BITS)-1 )

 static struct keydata {
-	time_t rekey_time;
 	__u32	count;		// already shifted to the final position
 	__u32	secret[12];
 } ____cacheline_aligned ip_keydata[2];

-static spinlock_t ip_lock = SPIN_LOCK_UNLOCKED;
 static unsigned int ip_cnt;

-static void rekey_seq_generator(void *private_)
-{
-	struct keydata *keyptr;
-	struct timeval 	tv;
+static void rekey_seq_generator(void *private_);

-	do_gettimeofday(&tv);
+static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);

-	spin_lock_bh(&ip_lock);
-	keyptr = &ip_keydata[ip_cnt&1];
+/*
+ * Lock avoidance:
+ * The ISN generation runs lockless - it's just a hash over random data.
+ * State changes happen every 5 minutes when the random key is replaced.
+ * Synchronization is performed by having two copies of the hash function
+ * state and rekey_seq_generator always updates the inactive copy.
+ * The copy is then activated by updating ip_cnt.
+ * The implementation breaks down if someone blocks the thread
+ * that processes SYN requests for more than 5 minutes. Should never
+ * happen, and even if that happens only a not perfectly compliant
+ * ISN is generated, nothing fatal.
+ */
+static void rekey_seq_generator(void *private_)
+{
+	struct keydata *keyptr = &ip_keydata[1^(ip_cnt&1)];

-	keyptr = &ip_keydata[1^(ip_cnt&1)];
-	keyptr->rekey_time = tv.tv_sec;
 	get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
 	keyptr->count = (ip_cnt&COUNT_MASK)<<HASH_BITS;
-	mb();
+	smp_wmb();
 	ip_cnt++;
-
-	spin_unlock_bh(&ip_lock);
+	schedule_delayed_work(&rekey_work, REKEY_INTERVAL);
 }

-static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
-
-static inline struct keydata *check_and_rekey(time_t time)
+static inline struct keydata *get_keyptr(void)
 {
 	struct keydata *keyptr = &ip_keydata[ip_cnt&1];

-	rmb();
-	if (!keyptr->rekey_time || (time - keyptr->rekey_time) > REKEY_INTERVAL) {
-		schedule_work(&rekey_work);
-	}
+	smp_rmb();

 	return keyptr;
 }

+static __init int seqgen_init(void)
+{
+	rekey_seq_generator(NULL);
+	return 0;
+}
+late_initcall(seqgen_init);
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
 				   __u16 sport, __u16 dport)
@@ -2262,14 +2269,12 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
 	struct timeval 	tv;
 	__u32		seq;
 	__u32		hash[12];
-	struct keydata *keyptr;
+	struct keydata *keyptr = get_keyptr();

 	/* The procedure is the same as for IPv4, but addresses are longer.
 	 * Thus we must use twothirdsMD4Transform.
 	 */

-	do_gettimeofday(&tv);	/* We need the usecs below... */
-	keyptr = check_and_rekey(tv.tv_sec);

 	memcpy(hash, saddr, 16);
 	hash[4]=(sport << 16) + dport;
@@ -2277,6 +2282,8 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,

 	seq = twothirdsMD4Transform(daddr, hash) & HASH_MASK;
 	seq += keyptr->count;
+
+	do_gettimeofday(&tv);
 	seq += tv.tv_usec + tv.tv_sec*1000000;

 	return seq;
@@ -2290,13 +2297,7 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
 	struct timeval 	tv;
 	__u32		seq;
 	__u32	hash[4];
-	struct keydata *keyptr;
-
-	/*
-	 * Pick a random secret every REKEY_INTERVAL seconds.
-	 */
-	do_gettimeofday(&tv);	/* We need the usecs below... */
-	keyptr = check_and_rekey(tv.tv_sec);
+	struct keydata *keyptr = get_keyptr();

 	/*
 	 *  Pick a unique starting offset for each TCP connection endpoints
@@ -2319,6 +2320,7 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
 	 *	That's funny, Linux has one built in!  Use it!
 	 *	(Networks are faster now - should this be increased?)
 	 */
+	do_gettimeofday(&tv);
 	seq += tv.tv_usec + tv.tv_sec*1000000;
 #if 0
 	printk("init_seq(%lx, %lx, %d, %d) = %d\n",
@@ -2337,7 +2339,7 @@ __u32 secure_ip_id(__u32 daddr)
 	struct keydata *keyptr;
 	__u32 hash[4];

-	keyptr = check_and_rekey(get_seconds());
+	keyptr = get_keyptr();

 	/*
 	 *  Pick a unique starting offset for each IP destination.

--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -1814,16 +1814,18 @@ static struct net_device_stats *dfx_ctl_get_stats(struct net_device *dev)

 	/* Fill the bp->stats structure with driver-maintained counters */

-	bp->stats.rx_packets			= bp->rcv_total_frames;
-	bp->stats.tx_packets			= bp->xmt_total_frames;
-	bp->stats.rx_bytes			= bp->rcv_total_bytes;
-	bp->stats.tx_bytes			= bp->xmt_total_bytes;
-	bp->stats.rx_errors				= (u32)(bp->rcv_crc_errors + bp->rcv_frame_status_errors + bp->rcv_length_errors);
-	bp->stats.tx_errors				= bp->xmt_length_errors;
-	bp->stats.rx_dropped			= bp->rcv_discards;
-	bp->stats.tx_dropped			= bp->xmt_discards;
-	bp->stats.multicast				= bp->rcv_multicast_frames;
-	bp->stats.transmit_collision	= 0;	/* always zero (0) for FDDI */
+	bp->stats.gen.rx_packets = bp->rcv_total_frames;
+	bp->stats.gen.tx_packets = bp->xmt_total_frames;
+	bp->stats.gen.rx_bytes   = bp->rcv_total_bytes;
+	bp->stats.gen.tx_bytes   = bp->xmt_total_bytes;
+	bp->stats.gen.rx_errors  = bp->rcv_crc_errors +
+				   bp->rcv_frame_status_errors +
+				   bp->rcv_length_errors;
+	bp->stats.gen.tx_errors  = bp->xmt_length_errors;
+	bp->stats.gen.rx_dropped = bp->rcv_discards;
+	bp->stats.gen.tx_dropped = bp->xmt_discards;
+	bp->stats.gen.multicast  = bp->rcv_multicast_frames;
+	bp->stats.gen.collisions = 0;		/* always zero (0) for FDDI */

 	/* Get FDDI SMT MIB objects */


--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1095,7 +1095,7 @@ static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev)
 	 */

 	if (!(skb->len >= FDDI_K_LLC_ZLEN && skb->len <= FDDI_K_LLC_LEN)) {
-		bp->MacStat.tx_errors++;	/* bump error counter */
+		bp->MacStat.gen.tx_errors++;	/* bump error counter */
 		// dequeue packets from xmt queue and send them
 		netif_start_queue(dev);
 		dev_kfree_skb(skb);
@@ -1546,8 +1546,8 @@ void mac_drv_tx_complete(struct s_smc *smc, volatile struct s_smt_fp_txd *txd)
 			 skb->len, PCI_DMA_TODEVICE);
 	txd->txd_os.dma_addr = 0;

-	smc->os.MacStat.tx_packets++;	// Count transmitted packets.
-	smc->os.MacStat.tx_bytes+=skb->len;	// Count bytes
+	smc->os.MacStat.gen.tx_packets++;	// Count transmitted packets.
+	smc->os.MacStat.gen.tx_bytes+=skb->len;	// Count bytes

 	// free the skb
 	dev_kfree_skb_irq(skb);
@@ -1629,7 +1629,7 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
 	skb = rxd->rxd_os.skb;
 	if (!skb) {
 		PRINTK(KERN_INFO "No skb in rxd\n");
-		smc->os.MacStat.rx_errors++;
+		smc->os.MacStat.gen.rx_errors++;
 		goto RequeueRxd;
 	}
 	virt = skb->data;
@@ -1682,13 +1682,14 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
 	}

 	// Count statistics.
-	smc->os.MacStat.rx_packets++;	// Count indicated receive packets.
-	smc->os.MacStat.rx_bytes+=len;	// Count bytes
+	smc->os.MacStat.gen.rx_packets++;	// Count indicated receive
+						// packets.
+	smc->os.MacStat.gen.rx_bytes+=len;	// Count bytes.

 	// virt points to header again
 	if (virt[1] & 0x01) {	// Check group (multicast) bit.

-		smc->os.MacStat.multicast++;
+		smc->os.MacStat.gen.multicast++;
 	}

 	// deliver frame to system
@@ -1706,7 +1707,8 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
      RequeueRxd:
 	PRINTK(KERN_INFO "Rx: re-queue RXD.\n");
 	mac_drv_requeue_rxd(smc, rxd, frag_count);
-	smc->os.MacStat.rx_errors++;	// Count receive packets not indicated.
+	smc->os.MacStat.gen.rx_errors++;	// Count receive packets
+						// not indicated.

 }				// mac_drv_rx_complete

@@ -2081,7 +2083,7 @@ void smt_stat_counter(struct s_smc *smc, int stat)
 		break;
 	case 1:
 		PRINTK(KERN_INFO "Receive fifo overflow.\n");
-		smc->os.MacStat.rx_errors++;
+		smc->os.MacStat.gen.rx_errors++;
 		break;
 	default:
 		PRINTK(KERN_INFO "Unknown status (%d).\n", stat);

--- a/include/linux/gen_stats.h
+++ b/include/linux/gen_stats.h
+#ifndef __LINUX_GEN_STATS_H
+#define __LINUX_GEN_STATS_H
+
+#include <linux/types.h>
+
+enum {
+	TCA_STATS_UNSPEC,
+	TCA_STATS_BASIC,
+	TCA_STATS_RATE_EST,
+	TCA_STATS_QUEUE,
+	TCA_STATS_APP,
+	__TCA_STATS_MAX,
+};
+#define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
+
+/**
+ * @bytes: number of seen bytes
+ * @packets: number of seen packets
+ */
+struct gnet_stats_basic
+{
+	__u64	bytes;
+	__u32	packets;
+};
+
+/**
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est
+{
+	__u32	bps;
+	__u32	pps;
+};
+
+/**
+ * @qlen: queue length
+ * @backlog: backlog size of queue
+ * @drops: number of dropped packets
+ * @requeues: number of requeues
+ */
+struct gnet_stats_queue
+{
+	__u32	qlen;
+	__u32	backlog;
+	__u32	drops;
+	__u32	requeues;
+	__u32	overlimits;
+};
+
+/**
+ * @interval: sampling period
+ * @ewma_log: the log of measurement window weight
+ */
+struct gnet_estimator
+{
+	signed char	interval;
+	unsigned char	ewma_log;
+};
+
+
+#endif /* __LINUX_GEN_STATS_H */
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -5,7 +5,7 @@
 *
 *		Global definitions for the ANSI FDDI interface.
 *
- * Version:	@(#)if_fddi.h	1.0.1	09/16/96
+ * Version:	@(#)if_fddi.h	1.0.2	Sep 29 2004
 *
 * Author:	Lawrence V. Stefani, <stefani@lkg.dec.com>
 *
@@ -103,38 +103,12 @@ struct fddihdr
 	} __attribute__ ((packed));

 /* Define FDDI statistics structure */
-struct fddi_statistics
-	{
-	__u32	rx_packets;				/* total packets received */
-	__u32	tx_packets;				/* total packets transmitted */
-	__u32	rx_bytes;				/* total bytes received	*/
-	__u32	tx_bytes;				/* total bytes transmitted */
-	__u32	rx_errors;				/* bad packets received	*/
-	__u32	tx_errors;				/* packet transmit problems	*/
-	__u32	rx_dropped;				/* no space in linux buffers */
-	__u32	tx_dropped;				/* no space available in linux */
-	__u32	multicast;				/* multicast packets received */
-	__u32	transmit_collision;		/* always 0 for FDDI */
+struct fddi_statistics {
+
+	/* Generic statistics. */

-	/* detailed rx_errors */
-	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
+	struct net_device_stats gen;

-	/* detailed tx_errors */
-	__u32	tx_aborted_errors;
-	__u32	tx_carrier_errors;
-	__u32	tx_fifo_errors;
-	__u32	tx_heartbeat_errors;
-	__u32	tx_window_errors;
-	
-	/* for cslip etc */
-	__u32	rx_compressed;
-	__u32	tx_compressed;
-   
 	/* Detailed FDDI statistics.  Adopted from RFC 1512 */

 	__u8	smt_station_id[8];

--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
+#ifndef __NET_GEN_STATS_H
+#define __NET_GEN_STATS_H
+
+#include <linux/gen_stats.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+
+struct gnet_dump
+{
+	spinlock_t *      lock;
+	struct sk_buff *  skb;
+	struct rtattr *   tail;
+
+	/* Backward compatability */
+	int               compat_tc_stats;
+	int               compat_xstats;
+	struct rtattr *   xstats;
+	struct tc_stats   tc_stats;
+};
+
+extern int gnet_stats_start_copy(struct sk_buff *skb, int type,
+				 spinlock_t *lock, struct gnet_dump *d);
+
+extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
+					int tc_stats_type,int xstats_type,
+					spinlock_t *lock, struct gnet_dump *d);
+
+extern int gnet_stats_copy_basic(struct gnet_dump *d,
+				 struct gnet_stats_basic *b);
+extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
+				    struct gnet_stats_rate_est *r);
+extern int gnet_stats_copy_queue(struct gnet_dump *d,
+				 struct gnet_stats_queue *q);
+extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
+
+extern int gnet_stats_finish_copy(struct gnet_dump *d);
+
+extern int gen_new_estimator(struct gnet_stats_basic *bstats,
+			     struct gnet_stats_rate_est *rate_est,
+			     spinlock_t *stats_lock, struct rtattr *opt);
+extern void gen_kill_estimator(struct gnet_stats_basic *bstats,
+			       struct gnet_stats_rate_est *rate_est);
+
+#endif
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the Linux networking core.
 #

-obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o
+obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o

 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o


--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
+/*
+ * net/sched/gen_estimator.c	Simple rate estimator.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *              Jamal Hadi Salim - moved it to net/core and reshulfed
+ *              names to make it usable in general net subsystem.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/gen_stats.h>
+
+/*
+   This code is NOT intended to be used for statistics collection,
+   its purpose is to provide a base for statistical multiplexing
+   for controlled load service.
+   If you need only statistics, run a user level daemon which
+   periodically reads byte counters.
+
+   Unfortunately, rate estimation is not a very easy task.
+   F.e. I did not find a simple way to estimate the current peak rate
+   and even failed to formulate the problem 8)8)
+
+   So I preferred not to built an estimator into the scheduler,
+   but run this task separately.
+   Ideally, it should be kernel thread(s), but for now it runs
+   from timers, which puts apparent top bounds on the number of rated
+   flows, has minimal overhead on small, but is enough
+   to handle controlled load service, sets of aggregates.
+
+   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
+
+   avrate = avrate*(1-W) + rate*W
+
+   where W is chosen as negative power of 2: W = 2^(-ewma_log)
+
+   The resulting time constant is:
+
+   T = A/(-ln(1-W))
+
+
+   NOTES.
+
+   * The stored value for avbps is scaled by 2^5, so that maximal
+     rate is ~1Gbit, avpps is scaled by 2^10.
+
+   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
+     for HZ=100 and HZ=1024 8)), maximal interval
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
+     are too expensive, longer ones can be implemented
+     at user level painlessly.
+ */
+
+#define EST_MAX_INTERVAL	5
+
+struct gen_estimator
+{
+	struct gen_estimator	*next;
+	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_rate_est	*rate_est;
+	spinlock_t		*stats_lock;
+	unsigned		interval;
+	int			ewma_log;
+	u64			last_bytes;
+	u32			last_packets;
+	u32			avpps;
+	u32			avbps;
+};
+
+struct gen_estimator_head
+{
+	struct timer_list	timer;
+	struct gen_estimator	*list;
+};
+
+static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
+
+/* Estimator array lock */
+static rwlock_t est_lock = RW_LOCK_UNLOCKED;
+
+static void est_timer(unsigned long arg)
+{
+	int idx = (int)arg;
+	struct gen_estimator *e;
+
+	read_lock(&est_lock);
+	for (e = elist[idx].list; e; e = e->next) {
+		u64 nbytes;
+		u32 npackets;
+		u32 rate;
+
+		spin_lock(e->stats_lock);
+		nbytes = e->bstats->bytes;
+		npackets = e->bstats->packets;
+		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		e->last_bytes = nbytes;
+		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->rate_est->bps = (e->avbps+0xF)>>5;
+
+		rate = (npackets - e->last_packets)<<(12 - idx);
+		e->last_packets = npackets;
+		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
+		e->rate_est->pps = (e->avpps+0x1FF)>>10;
+		spin_unlock(e->stats_lock);
+	}
+
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+	read_unlock(&est_lock);
+}
+
+int gen_new_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
+{
+	struct gen_estimator *est;
+	struct gnet_estimator *parm = RTA_DATA(opt);
+
+	if (RTA_PAYLOAD(opt) < sizeof(*parm))
+		return -EINVAL;
+
+	if (parm->interval < -2 || parm->interval > 3)
+		return -EINVAL;
+
+	est = kmalloc(sizeof(*est), GFP_KERNEL);
+	if (est == NULL)
+		return -ENOBUFS;
+
+	memset(est, 0, sizeof(*est));
+	est->interval = parm->interval + 2;
+	est->bstats = bstats;
+	est->rate_est = rate_est;
+	est->stats_lock = stats_lock;
+	est->ewma_log = parm->ewma_log;
+	est->last_bytes = bstats->bytes;
+	est->avbps = rate_est->bps<<5;
+	est->last_packets = bstats->packets;
+	est->avpps = rate_est->pps<<10;
+
+	est->next = elist[est->interval].list;
+	if (est->next == NULL) {
+		init_timer(&elist[est->interval].timer);
+		elist[est->interval].timer.data = est->interval;
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
+		elist[est->interval].timer.function = est_timer;
+		add_timer(&elist[est->interval].timer);
+	}
+	write_lock_bh(&est_lock);
+	elist[est->interval].list = est;
+	write_unlock_bh(&est_lock);
+	return 0;
+}
+
+void gen_kill_estimator(struct gnet_stats_basic *bstats,
+	struct gnet_stats_rate_est *rate_est)
+{
+	int idx;
+	struct gen_estimator *est, **pest;
+
+	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
+		int killed = 0;
+		pest = &elist[idx].list;
+		while ((est=*pest) != NULL) {
+			if (est->rate_est != rate_est || est->bstats != bstats) {
+				pest = &est->next;
+				continue;
+			}
+
+			write_lock_bh(&est_lock);
+			*pest = est->next;
+			write_unlock_bh(&est_lock);
+
+			kfree(est);
+			killed++;
+		}
+		if (killed && elist[idx].list == NULL)
+			del_timer(&elist[idx].timer);
+	}
+}
+
+EXPORT_SYMBOL(gen_kill_estimator);
+EXPORT_SYMBOL(gen_new_estimator);
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
+/*
+ * net/core/gen_stats.c
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:  Thomas Graf <tgraf@suug.ch>
+ *           Jamal Hadi Salim
+ *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * See Documentation/networking/gen_stats.txt
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/gen_stats.h>
+#include <net/gen_stats.h>
+
+
+static inline int
+gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
+{
+	RTA_PUT(d->skb, type, size, buf);
+	return 0;
+
+rtattr_failure:
+	spin_unlock_bh(d->lock);
+	return -1;
+}
+
+int
+gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
+	int xstats_type, spinlock_t *lock, struct gnet_dump *d)
+{
+	spin_lock_bh(lock);
+	d->lock = lock;
+	d->tail = (struct rtattr *) skb->tail;
+	d->skb = skb;
+	d->compat_tc_stats = tc_stats_type;
+	d->compat_xstats = xstats_type;
+	d->xstats = NULL;
+
+	if (d->compat_tc_stats)
+		memset(&d->tc_stats, 0, sizeof(d->tc_stats));
+
+	return gnet_stats_copy(d, type, NULL, 0);
+}
+
+int
+gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+	struct gnet_dump *d)
+{
+	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
+}
+
+
+int
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bytes = b->bytes;
+		d->tc_stats.packets = b->packets;
+	}
+	
+	return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+}
+
+int
+gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.bps = r->bps;
+		d->tc_stats.pps = r->pps;
+	}
+
+	return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
+}
+
+int
+gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
+{
+	if (d->compat_tc_stats) {
+		d->tc_stats.drops = q->drops;
+		d->tc_stats.qlen = q->qlen;
+		d->tc_stats.backlog = q->backlog;
+		d->tc_stats.overlimits = q->overlimits;
+	}
+		
+	return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
+}
+
+int
+gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
+{
+	if (d->compat_xstats)
+		d->xstats = (struct rtattr *) d->skb->tail;
+	return gnet_stats_copy(d, TCA_STATS_APP, st, len);
+}
+
+int
+gnet_stats_finish_copy(struct gnet_dump *d)
+{
+	d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+
+	if (d->compat_tc_stats)
+		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
+			sizeof(d->tc_stats)) < 0)
+			return -1;
+
+	if (d->compat_xstats && d->xstats) {
+		if (gnet_stats_copy(d, d->compat_xstats, RTA_DATA(d->xstats),
+			RTA_PAYLOAD(d->xstats)) < 0)
+			return -1;
+	}
+
+	spin_unlock_bh(d->lock);
+	return 0;
+}
+
+
+EXPORT_SYMBOL(gnet_stats_start_copy);
+EXPORT_SYMBOL(gnet_stats_copy_basic);
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+EXPORT_SYMBOL(gnet_stats_copy_queue);
+EXPORT_SYMBOL(gnet_stats_copy_app);
+EXPORT_SYMBOL(gnet_stats_finish_copy);
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -810,9 +810,15 @@ static void neigh_timer_handler(unsigned long arg)
 		add_timer(&neigh->timer);
 	}
 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+		/* keep skb alive even if arp_queue overflows */
+		if (skb)
+			skb_get(skb);
 		write_unlock(&neigh->lock);
-		neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+		neigh->ops->solicit(neigh, skb);
 		atomic_inc(&neigh->probes);
+		if (skb)
+			kfree_skb(skb);
 	} else {
 out:
 		write_unlock(&neigh->lock);

--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1005,8 +1005,26 @@ int arp_req_set(struct arpreq *r, struct net_device * dev)
 		if (!dev)
 			return -EINVAL;
 	}
-	if (r->arp_ha.sa_family != dev->type)	
-		return -EINVAL;
+	switch (dev->type) {
+#ifdef CONFIG_FDDI
+	case ARPHRD_FDDI:
+		/*
+		 * According to RFC 1390, FDDI devices should accept ARP
+		 * hardware types of 1 (Ethernet).  However, to be more
+		 * robust, we'll accept hardware types of either 1 (Ethernet)
+		 * or 6 (IEEE 802.2).
+		 */
+		if (r->arp_ha.sa_family != ARPHRD_FDDI &&
+		    r->arp_ha.sa_family != ARPHRD_ETHER &&
+		    r->arp_ha.sa_family != ARPHRD_IEEE802)
+			return -EINVAL;
+		break;
+#endif
+	default:
+		if (r->arp_ha.sa_family != dev->type)
+			return -EINVAL;
+		break;
+	}

 	neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
 	err = PTR_ERR(neigh);

--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -720,6 +720,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 		b->htype = dev->type;
 	else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */
 		b->htype = ARPHRD_IEEE802;
+	else if (dev->type == ARPHRD_FDDI)
+		b->htype = ARPHRD_ETHER;
 	else {
 		printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name);
 		b->htype = dev->type; /* can cause undefined behavior */

--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -16,6 +16,7 @@
 *	Alexandre Cassen	:	Added master & backup support at a time.
 *	Alexandre Cassen	:	Added SyncID support for incoming sync
 *					messages filtering.
+ *	Justin Ossevoort	:	Fix endian problem on sync message size.
 */

 #include <linux/module.h>
@@ -279,6 +280,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 	char *p;
 	int i;

+	/* Convert size back to host byte order */
+	m->size = ntohs(m->size);
+
 	if (buflen != m->size) {
 		IP_VS_ERR("bogus message\n");
 		return;
@@ -569,6 +573,19 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
 	return len;
 }

+static void
+ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
+{
+	int msize;
+
+	msize = msg->size;
+
+	/* Put size in network byte order */
+	msg->size = htons(msg->size);
+
+	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
+		IP_VS_ERR("ip_vs_send_async error\n");
+}

 static int
 ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
@@ -605,7 +622,6 @@ static void sync_master_loop(void)
 {
 	struct socket *sock;
 	struct ip_vs_sync_buff *sb;
-	struct ip_vs_sync_mesg *m;

 	/* create the sending multicast socket */
 	sock = make_send_sock();
@@ -618,19 +634,13 @@ static void sync_master_loop(void)

 	for (;;) {
 		while ((sb=sb_dequeue())) {
-			m = sb->mesg;
-			if (ip_vs_send_async(sock, (char *)m,
-					     m->size) != m->size)
-				IP_VS_ERR("ip_vs_send_async error\n");
+			ip_vs_send_sync_msg(sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}

 		/* check if entries stay in curr_sb for 2 seconds */
 		if ((sb = get_curr_sync_buff(2*HZ))) {
-			m = sb->mesg;
-			if (ip_vs_send_async(sock, (char *)m,
-					     m->size) != m->size)
-				IP_VS_ERR("ip_vs_send_async error\n");
+			ip_vs_send_sync_msg(sock, sb->mesg);
 			ip_vs_sync_buff_release(sb);
 		}


--- a/net/sched/estimator.c
+++ b/net/sched/estimator.c
@@ -66,15 +66,11 @@

   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
     for HZ=100 and HZ=1024 8)), maximal interval
-     is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals
+     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
     are too expensive, longer ones can be implemented
     at user level painlessly.
 */

-#if (HZ%4) != 0
-#error Bad HZ value.
-#endif
-
 #define EST_MAX_INTERVAL	5

 struct qdisc_estimator
@@ -128,7 +124,7 @@ static void est_timer(unsigned long arg)
 		spin_unlock(e->stats_lock);
 	}

-	mod_timer(&elist[idx].timer, jiffies + ((HZ/4)<<idx));
+	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
 	read_unlock(&est_lock);
 }

@@ -161,7 +157,7 @@ int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct r
 	if (est->next == NULL) {
 		init_timer(&elist[est->interval].timer);
 		elist[est->interval].timer.data = est->interval;
-		elist[est->interval].timer.expires = jiffies + ((HZ/4)<<est->interval);
+		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
 		elist[est->interval].timer.function = est_timer;
 		add_timer(&elist[est->interval].timer);
 	}

--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1675,7 +1675,6 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	cl->xstats.undertime = 0;
 	if (!PSCHED_IS_PASTPERFECT(cl->undertime))
 		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
-	q->link.xstats.avgidle = q->link.avgidle;
 	if (cbq_copy_xstats(skb, &cl->xstats)) {
 		spin_unlock_bh(&sch->dev->queue_lock);
 		goto rtattr_failure;