Commit 90b1f609 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://kernel.bkbits.net/davem/net-2.6

into ppc970.osdl.org:/home/torvalds/v2.6/linux
parents 044836d4 34b0db5b
Generic networking statistics for netlink users
======================================================================
Statistic counters are grouped into structs:
Struct TLV type Description
----------------------------------------------------------------------
gnet_stats_basic TCA_STATS_BASIC Basic statistics
gnet_stats_rate_est TCA_STATS_RATE_EST Rate estimator
gnet_stats_queue TCA_STATS_QUEUE Queue statistics
none TCA_STATS_APP Application specific
Collecting:
-----------
Declare the statistic structs you need:
struct mystruct {
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
...
};
Update statistics:
mystruct->tstats.packet++;
mystruct->qstats.backlog += skb->pkt_len;
Export to userspace (Dump):
---------------------------
my_dumping_routine(struct sk_buff *skb, ...)
{
struct gnet_dump dump;
if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0)
goto rtattr_failure;
if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 ||
gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0)
goto rtattr_failure;
if (gnet_stats_finish_copy(&dump) < 0)
goto rtattr_failure;
...
}
TCA_STATS/TCA_XSTATS backward compatibility:
--------------------------------------------
Prior users of struct tc_stats and xstats can maintain backward
compatibility by calling the compat wrappers to keep providing the
existing TLV types.
my_dumping_routine(struct sk_buff *skb, ...)
{
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
TCA_XSTATS, &mystruct->lock, &dump) < 0)
goto rtattr_failure;
...
}
A struct tc_stats will be filled out during gnet_stats_copy_* calls
and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app
was called.
Locking:
--------
Locks are taken before writing and released once all statistics have
been written. Locks are always released in case of an error. You
are responsible for making sure that the lock is initialized.
Rate Estimator:
--------------
0) Prepare an estimator attribute. Most likely this would be in user
space. The value of this TLV should contain a tc_estimator structure.
As usual, such a TLV nees to be 32 bit aligned and therefore the
length needs to be appropriately set etc. The estimator interval
and ewma log need to be converted to the appropriate values.
tc_estimator.c::tc_setup_estimator() is advisable to be used as the
conversion routine. It does a few clever things. It takes a time
interval in microsecs, a time constant also in microsecs and a struct
tc_estimator to be populated. The returned tc_estimator can be
transported to the kernel. Transfer such a structure in a TLV of type
TCA_RATE to your code in the kernel.
In the kernel when setting up:
1) make sure you have basic stats and rate stats setup first.
2) make sure you have initialized stats lock that is used to setup such
stats.
3) Now initialize a new estimator:
int ret = gen_new_estimator(my_basicstats,my_rate_est_stats,
mystats_lock, attr_with_tcestimator_struct);
if ret == 0
success
else
failed
From now on, everytime you dump my_rate_est_stats it will contain
uptodate info.
Once you are done, call gen_kill_estimator(my_basicstats,
my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats
are still valid (i.e still exist) at the time of making this call.
Authors:
--------
Thomas Graf <tgraf@suug.ch>
Jamal Hadi Salim <hadi@cyberus.ca>
......@@ -632,9 +632,9 @@ M: g.liakhovetski@gmx.de
S: Maintained
DECnet NETWORK LAYER
P: Steven Whitehouse
M: SteveW@ACM.org
W: http://www.sucs.swan.ac.uk/~rohan/DECnet/index.html
P: Patrick Caulfield
M: patrick@tykepenguin.com
W: http://linux-decnet.sourceforge.net
L: linux-decnet-user@lists.sourceforge.net
S: Maintained
......
......@@ -102,10 +102,10 @@ struct aes_ctx {
#define E_KEY ctx->E
#define D_KEY ctx->D
static u8 pow_tab[256];
static u8 log_tab[256];
static u8 sbx_tab[256];
static u8 isb_tab[256];
static u8 pow_tab[256] __initdata;
static u8 log_tab[256] __initdata;
static u8 sbx_tab[256] __initdata;
static u8 isb_tab[256] __initdata;
static u32 rco_tab[10];
static u32 ft_tab[4][256];
static u32 it_tab[4][256];
......@@ -113,7 +113,7 @@ static u32 it_tab[4][256];
static u32 fl_tab[4][256];
static u32 il_tab[4][256];
static inline u8
static inline u8 __init
f_mult (u8 a, u8 b)
{
u8 aa = log_tab[a], cc = aa + log_tab[b];
......@@ -153,7 +153,7 @@ f_mult (u8 a, u8 b)
il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \
il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
static void
static void __init
gen_tabs (void)
{
u32 i, t;
......
......@@ -2187,7 +2187,7 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
#undef K3
/* This should not be decreased so low that ISNs wrap too fast. */
#define REKEY_INTERVAL 300
#define REKEY_INTERVAL (300*HZ)
/*
* Bit layout of the tcp sequence numbers (before adding current time):
* bit 24-31: increased after every key exchange
......@@ -2213,48 +2213,55 @@ static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12])
#define HASH_MASK ( (1<<HASH_BITS)-1 )
static struct keydata {
time_t rekey_time;
__u32 count; // already shifted to the final position
__u32 secret[12];
} ____cacheline_aligned ip_keydata[2];
static spinlock_t ip_lock = SPIN_LOCK_UNLOCKED;
static unsigned int ip_cnt;
static void rekey_seq_generator(void *private_)
{
struct keydata *keyptr;
struct timeval tv;
static void rekey_seq_generator(void *private_);
do_gettimeofday(&tv);
static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
spin_lock_bh(&ip_lock);
keyptr = &ip_keydata[ip_cnt&1];
/*
* Lock avoidance:
* The ISN generation runs lockless - it's just a hash over random data.
* State changes happen every 5 minutes when the random key is replaced.
* Synchronization is performed by having two copies of the hash function
* state and rekey_seq_generator always updates the inactive copy.
* The copy is then activated by updating ip_cnt.
* The implementation breaks down if someone blocks the thread
* that processes SYN requests for more than 5 minutes. Should never
* happen, and even if that happens only a not perfectly compliant
* ISN is generated, nothing fatal.
*/
static void rekey_seq_generator(void *private_)
{
struct keydata *keyptr = &ip_keydata[1^(ip_cnt&1)];
keyptr = &ip_keydata[1^(ip_cnt&1)];
keyptr->rekey_time = tv.tv_sec;
get_random_bytes(keyptr->secret, sizeof(keyptr->secret));
keyptr->count = (ip_cnt&COUNT_MASK)<<HASH_BITS;
mb();
smp_wmb();
ip_cnt++;
spin_unlock_bh(&ip_lock);
schedule_delayed_work(&rekey_work, REKEY_INTERVAL);
}
static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
static inline struct keydata *check_and_rekey(time_t time)
static inline struct keydata *get_keyptr(void)
{
struct keydata *keyptr = &ip_keydata[ip_cnt&1];
rmb();
if (!keyptr->rekey_time || (time - keyptr->rekey_time) > REKEY_INTERVAL) {
schedule_work(&rekey_work);
}
smp_rmb();
return keyptr;
}
static __init int seqgen_init(void)
{
rekey_seq_generator(NULL);
return 0;
}
late_initcall(seqgen_init);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
__u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
__u16 sport, __u16 dport)
......@@ -2262,14 +2269,12 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
struct timeval tv;
__u32 seq;
__u32 hash[12];
struct keydata *keyptr;
struct keydata *keyptr = get_keyptr();
/* The procedure is the same as for IPv4, but addresses are longer.
* Thus we must use twothirdsMD4Transform.
*/
do_gettimeofday(&tv); /* We need the usecs below... */
keyptr = check_and_rekey(tv.tv_sec);
memcpy(hash, saddr, 16);
hash[4]=(sport << 16) + dport;
......@@ -2277,6 +2282,8 @@ __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
seq = twothirdsMD4Transform(daddr, hash) & HASH_MASK;
seq += keyptr->count;
do_gettimeofday(&tv);
seq += tv.tv_usec + tv.tv_sec*1000000;
return seq;
......@@ -2290,13 +2297,7 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
struct timeval tv;
__u32 seq;
__u32 hash[4];
struct keydata *keyptr;
/*
* Pick a random secret every REKEY_INTERVAL seconds.
*/
do_gettimeofday(&tv); /* We need the usecs below... */
keyptr = check_and_rekey(tv.tv_sec);
struct keydata *keyptr = get_keyptr();
/*
* Pick a unique starting offset for each TCP connection endpoints
......@@ -2319,6 +2320,7 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
* That's funny, Linux has one built in! Use it!
* (Networks are faster now - should this be increased?)
*/
do_gettimeofday(&tv);
seq += tv.tv_usec + tv.tv_sec*1000000;
#if 0
printk("init_seq(%lx, %lx, %d, %d) = %d\n",
......@@ -2337,7 +2339,7 @@ __u32 secure_ip_id(__u32 daddr)
struct keydata *keyptr;
__u32 hash[4];
keyptr = check_and_rekey(get_seconds());
keyptr = get_keyptr();
/*
* Pick a unique starting offset for each IP destination.
......
......@@ -1814,16 +1814,18 @@ static struct net_device_stats *dfx_ctl_get_stats(struct net_device *dev)
/* Fill the bp->stats structure with driver-maintained counters */
bp->stats.rx_packets = bp->rcv_total_frames;
bp->stats.tx_packets = bp->xmt_total_frames;
bp->stats.rx_bytes = bp->rcv_total_bytes;
bp->stats.tx_bytes = bp->xmt_total_bytes;
bp->stats.rx_errors = (u32)(bp->rcv_crc_errors + bp->rcv_frame_status_errors + bp->rcv_length_errors);
bp->stats.tx_errors = bp->xmt_length_errors;
bp->stats.rx_dropped = bp->rcv_discards;
bp->stats.tx_dropped = bp->xmt_discards;
bp->stats.multicast = bp->rcv_multicast_frames;
bp->stats.transmit_collision = 0; /* always zero (0) for FDDI */
bp->stats.gen.rx_packets = bp->rcv_total_frames;
bp->stats.gen.tx_packets = bp->xmt_total_frames;
bp->stats.gen.rx_bytes = bp->rcv_total_bytes;
bp->stats.gen.tx_bytes = bp->xmt_total_bytes;
bp->stats.gen.rx_errors = bp->rcv_crc_errors +
bp->rcv_frame_status_errors +
bp->rcv_length_errors;
bp->stats.gen.tx_errors = bp->xmt_length_errors;
bp->stats.gen.rx_dropped = bp->rcv_discards;
bp->stats.gen.tx_dropped = bp->xmt_discards;
bp->stats.gen.multicast = bp->rcv_multicast_frames;
bp->stats.gen.collisions = 0; /* always zero (0) for FDDI */
/* Get FDDI SMT MIB objects */
......
......@@ -1095,7 +1095,7 @@ static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev)
*/
if (!(skb->len >= FDDI_K_LLC_ZLEN && skb->len <= FDDI_K_LLC_LEN)) {
bp->MacStat.tx_errors++; /* bump error counter */
bp->MacStat.gen.tx_errors++; /* bump error counter */
// dequeue packets from xmt queue and send them
netif_start_queue(dev);
dev_kfree_skb(skb);
......@@ -1546,8 +1546,8 @@ void mac_drv_tx_complete(struct s_smc *smc, volatile struct s_smt_fp_txd *txd)
skb->len, PCI_DMA_TODEVICE);
txd->txd_os.dma_addr = 0;
smc->os.MacStat.tx_packets++; // Count transmitted packets.
smc->os.MacStat.tx_bytes+=skb->len; // Count bytes
smc->os.MacStat.gen.tx_packets++; // Count transmitted packets.
smc->os.MacStat.gen.tx_bytes+=skb->len; // Count bytes
// free the skb
dev_kfree_skb_irq(skb);
......@@ -1629,7 +1629,7 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
skb = rxd->rxd_os.skb;
if (!skb) {
PRINTK(KERN_INFO "No skb in rxd\n");
smc->os.MacStat.rx_errors++;
smc->os.MacStat.gen.rx_errors++;
goto RequeueRxd;
}
virt = skb->data;
......@@ -1682,13 +1682,14 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
}
// Count statistics.
smc->os.MacStat.rx_packets++; // Count indicated receive packets.
smc->os.MacStat.rx_bytes+=len; // Count bytes
smc->os.MacStat.gen.rx_packets++; // Count indicated receive
// packets.
smc->os.MacStat.gen.rx_bytes+=len; // Count bytes.
// virt points to header again
if (virt[1] & 0x01) { // Check group (multicast) bit.
smc->os.MacStat.multicast++;
smc->os.MacStat.gen.multicast++;
}
// deliver frame to system
......@@ -1706,7 +1707,8 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
RequeueRxd:
PRINTK(KERN_INFO "Rx: re-queue RXD.\n");
mac_drv_requeue_rxd(smc, rxd, frag_count);
smc->os.MacStat.rx_errors++; // Count receive packets not indicated.
smc->os.MacStat.gen.rx_errors++; // Count receive packets
// not indicated.
} // mac_drv_rx_complete
......@@ -2081,7 +2083,7 @@ void smt_stat_counter(struct s_smc *smc, int stat)
break;
case 1:
PRINTK(KERN_INFO "Receive fifo overflow.\n");
smc->os.MacStat.rx_errors++;
smc->os.MacStat.gen.rx_errors++;
break;
default:
PRINTK(KERN_INFO "Unknown status (%d).\n", stat);
......
#ifndef __LINUX_GEN_STATS_H
#define __LINUX_GEN_STATS_H
#include <linux/types.h>
enum {
TCA_STATS_UNSPEC,
TCA_STATS_BASIC,
TCA_STATS_RATE_EST,
TCA_STATS_QUEUE,
TCA_STATS_APP,
__TCA_STATS_MAX,
};
#define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
/**
* @bytes: number of seen bytes
* @packets: number of seen packets
*/
struct gnet_stats_basic
{
__u64 bytes;
__u32 packets;
};
/**
* @bps: current byte rate
* @pps: current packet rate
*/
struct gnet_stats_rate_est
{
__u32 bps;
__u32 pps;
};
/**
* @qlen: queue length
* @backlog: backlog size of queue
* @drops: number of dropped packets
* @requeues: number of requeues
*/
struct gnet_stats_queue
{
__u32 qlen;
__u32 backlog;
__u32 drops;
__u32 requeues;
__u32 overlimits;
};
/**
* @interval: sampling period
* @ewma_log: the log of measurement window weight
*/
struct gnet_estimator
{
signed char interval;
unsigned char ewma_log;
};
#endif /* __LINUX_GEN_STATS_H */
......@@ -5,7 +5,7 @@
*
* Global definitions for the ANSI FDDI interface.
*
* Version: @(#)if_fddi.h 1.0.1 09/16/96
* Version: @(#)if_fddi.h 1.0.2 Sep 29 2004
*
* Author: Lawrence V. Stefani, <stefani@lkg.dec.com>
*
......@@ -103,38 +103,12 @@ struct fddihdr
} __attribute__ ((packed));
/* Define FDDI statistics structure */
struct fddi_statistics
{
__u32 rx_packets; /* total packets received */
__u32 tx_packets; /* total packets transmitted */
__u32 rx_bytes; /* total bytes received */
__u32 tx_bytes; /* total bytes transmitted */
__u32 rx_errors; /* bad packets received */
__u32 tx_errors; /* packet transmit problems */
__u32 rx_dropped; /* no space in linux buffers */
__u32 tx_dropped; /* no space available in linux */
__u32 multicast; /* multicast packets received */
__u32 transmit_collision; /* always 0 for FDDI */
struct fddi_statistics {
/* Generic statistics. */
/* detailed rx_errors */
__u32 rx_length_errors;
__u32 rx_over_errors; /* receiver ring buff overflow */
__u32 rx_crc_errors; /* recved pkt with crc error */
__u32 rx_frame_errors; /* recv'd frame alignment error */
__u32 rx_fifo_errors; /* recv'r fifo overrun */
__u32 rx_missed_errors; /* receiver missed packet */
struct net_device_stats gen;
/* detailed tx_errors */
__u32 tx_aborted_errors;
__u32 tx_carrier_errors;
__u32 tx_fifo_errors;
__u32 tx_heartbeat_errors;
__u32 tx_window_errors;
/* for cslip etc */
__u32 rx_compressed;
__u32 tx_compressed;
/* Detailed FDDI statistics. Adopted from RFC 1512 */
__u8 smt_station_id[8];
......
#ifndef __NET_GEN_STATS_H
#define __NET_GEN_STATS_H
#include <linux/gen_stats.h>
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
struct gnet_dump
{
spinlock_t * lock;
struct sk_buff * skb;
struct rtattr * tail;
/* Backward compatability */
int compat_tc_stats;
int compat_xstats;
struct rtattr * xstats;
struct tc_stats tc_stats;
};
extern int gnet_stats_start_copy(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d);
extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
int tc_stats_type,int xstats_type,
spinlock_t *lock, struct gnet_dump *d);
extern int gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic *b);
extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
struct gnet_stats_rate_est *r);
extern int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue *q);
extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
extern int gnet_stats_finish_copy(struct gnet_dump *d);
extern int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est,
spinlock_t *stats_lock, struct rtattr *opt);
extern void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est);
#endif
......@@ -2,7 +2,7 @@
# Makefile for the Linux networking core.
#
obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o
obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
......
/*
* net/sched/gen_estimator.c Simple rate estimator.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* Jamal Hadi Salim - moved it to net/core and reshulfed
* names to make it usable in general net subsystem.
*/
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
#include <net/gen_stats.h>
/*
This code is NOT intended to be used for statistics collection,
its purpose is to provide a base for statistical multiplexing
for controlled load service.
If you need only statistics, run a user level daemon which
periodically reads byte counters.
Unfortunately, rate estimation is not a very easy task.
F.e. I did not find a simple way to estimate the current peak rate
and even failed to formulate the problem 8)8)
So I preferred not to built an estimator into the scheduler,
but run this task separately.
Ideally, it should be kernel thread(s), but for now it runs
from timers, which puts apparent top bounds on the number of rated
flows, has minimal overhead on small, but is enough
to handle controlled load service, sets of aggregates.
We measure rate over A=(1<<interval) seconds and evaluate EWMA:
avrate = avrate*(1-W) + rate*W
where W is chosen as negative power of 2: W = 2^(-ewma_log)
The resulting time constant is:
T = A/(-ln(1-W))
NOTES.
* The stored value for avbps is scaled by 2^5, so that maximal
rate is ~1Gbit, avpps is scaled by 2^10.
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
are too expensive, longer ones can be implemented
at user level painlessly.
*/
#define EST_MAX_INTERVAL 5
struct gen_estimator
{
struct gen_estimator *next;
struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock;
unsigned interval;
int ewma_log;
u64 last_bytes;
u32 last_packets;
u32 avpps;
u32 avbps;
};
struct gen_estimator_head
{
struct timer_list timer;
struct gen_estimator *list;
};
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */
static rwlock_t est_lock = RW_LOCK_UNLOCKED;
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
struct gen_estimator *e;
read_lock(&est_lock);
for (e = elist[idx].list; e; e = e->next) {
u64 nbytes;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
e->rate_est->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->rate_est->pps = (e->avpps+0x1FF)>>10;
spin_unlock(e->stats_lock);
}
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock);
}
int gen_new_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt);
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
est = kmalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOBUFS;
memset(est, 0, sizeof(*est));
est->interval = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = bstats->bytes;
est->avbps = rate_est->bps<<5;
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
est->next = elist[est->interval].list;
if (est->next == NULL) {
init_timer(&elist[est->interval].timer);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
}
write_lock_bh(&est_lock);
elist[est->interval].list = est;
write_unlock_bh(&est_lock);
return 0;
}
void gen_kill_estimator(struct gnet_stats_basic *bstats,
struct gnet_stats_rate_est *rate_est)
{
int idx;
struct gen_estimator *est, **pest;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
int killed = 0;
pest = &elist[idx].list;
while ((est=*pest) != NULL) {
if (est->rate_est != rate_est || est->bstats != bstats) {
pest = &est->next;
continue;
}
write_lock_bh(&est_lock);
*pest = est->next;
write_unlock_bh(&est_lock);
kfree(est);
killed++;
}
if (killed && elist[idx].list == NULL)
del_timer(&elist[idx].timer);
}
}
EXPORT_SYMBOL(gen_kill_estimator);
EXPORT_SYMBOL(gen_new_estimator);
/*
* net/core/gen_stats.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
* Jamal Hadi Salim
* Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* See Documentation/networking/gen_stats.txt
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/socket.h>
#include <linux/rtnetlink.h>
#include <linux/gen_stats.h>
#include <net/gen_stats.h>
static inline int
gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size)
{
RTA_PUT(d->skb, type, size, buf);
return 0;
rtattr_failure:
spin_unlock_bh(d->lock);
return -1;
}
int
gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
int xstats_type, spinlock_t *lock, struct gnet_dump *d)
{
spin_lock_bh(lock);
d->lock = lock;
d->tail = (struct rtattr *) skb->tail;
d->skb = skb;
d->compat_tc_stats = tc_stats_type;
d->compat_xstats = xstats_type;
d->xstats = NULL;
if (d->compat_tc_stats)
memset(&d->tc_stats, 0, sizeof(d->tc_stats));
return gnet_stats_copy(d, type, NULL, 0);
}
int
gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d)
{
return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
}
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
{
if (d->compat_tc_stats) {
d->tc_stats.bytes = b->bytes;
d->tc_stats.packets = b->packets;
}
return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
}
int
gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r)
{
if (d->compat_tc_stats) {
d->tc_stats.bps = r->bps;
d->tc_stats.pps = r->pps;
}
return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r));
}
int
gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
{
if (d->compat_tc_stats) {
d->tc_stats.drops = q->drops;
d->tc_stats.qlen = q->qlen;
d->tc_stats.backlog = q->backlog;
d->tc_stats.overlimits = q->overlimits;
}
return gnet_stats_copy(d, TCA_STATS_QUEUE, q, sizeof(*q));
}
int
gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
{
if (d->compat_xstats)
d->xstats = (struct rtattr *) d->skb->tail;
return gnet_stats_copy(d, TCA_STATS_APP, st, len);
}
int
gnet_stats_finish_copy(struct gnet_dump *d)
{
d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
if (d->compat_tc_stats)
if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
sizeof(d->tc_stats)) < 0)
return -1;
if (d->compat_xstats && d->xstats) {
if (gnet_stats_copy(d, d->compat_xstats, RTA_DATA(d->xstats),
RTA_PAYLOAD(d->xstats)) < 0)
return -1;
}
spin_unlock_bh(d->lock);
return 0;
}
EXPORT_SYMBOL(gnet_stats_start_copy);
EXPORT_SYMBOL(gnet_stats_copy_basic);
EXPORT_SYMBOL(gnet_stats_copy_rate_est);
EXPORT_SYMBOL(gnet_stats_copy_queue);
EXPORT_SYMBOL(gnet_stats_copy_app);
EXPORT_SYMBOL(gnet_stats_finish_copy);
......@@ -810,9 +810,15 @@ static void neigh_timer_handler(unsigned long arg)
add_timer(&neigh->timer);
}
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
struct sk_buff *skb = skb_peek(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
skb_get(skb);
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
if (skb)
kfree_skb(skb);
} else {
out:
write_unlock(&neigh->lock);
......
......@@ -1005,8 +1005,26 @@ int arp_req_set(struct arpreq *r, struct net_device * dev)
if (!dev)
return -EINVAL;
}
if (r->arp_ha.sa_family != dev->type)
return -EINVAL;
switch (dev->type) {
#ifdef CONFIG_FDDI
case ARPHRD_FDDI:
/*
* According to RFC 1390, FDDI devices should accept ARP
* hardware types of 1 (Ethernet). However, to be more
* robust, we'll accept hardware types of either 1 (Ethernet)
* or 6 (IEEE 802.2).
*/
if (r->arp_ha.sa_family != ARPHRD_FDDI &&
r->arp_ha.sa_family != ARPHRD_ETHER &&
r->arp_ha.sa_family != ARPHRD_IEEE802)
return -EINVAL;
break;
#endif
default:
if (r->arp_ha.sa_family != dev->type)
return -EINVAL;
break;
}
neigh = __neigh_lookup_errno(&arp_tbl, &ip, dev);
err = PTR_ERR(neigh);
......
......@@ -720,6 +720,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
b->htype = dev->type;
else if (dev->type == ARPHRD_IEEE802_TR) /* fix for token ring */
b->htype = ARPHRD_IEEE802;
else if (dev->type == ARPHRD_FDDI)
b->htype = ARPHRD_ETHER;
else {
printk("Unknown ARP type 0x%04x for device %s\n", dev->type, dev->name);
b->htype = dev->type; /* can cause undefined behavior */
......
......@@ -16,6 +16,7 @@
* Alexandre Cassen : Added master & backup support at a time.
* Alexandre Cassen : Added SyncID support for incoming sync
* messages filtering.
* Justin Ossevoort : Fix endian problem on sync message size.
*/
#include <linux/module.h>
......@@ -279,6 +280,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
char *p;
int i;
/* Convert size back to host byte order */
m->size = ntohs(m->size);
if (buflen != m->size) {
IP_VS_ERR("bogus message\n");
return;
......@@ -569,6 +573,19 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
return len;
}
static void
ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
{
int msize;
msize = msg->size;
/* Put size in network byte order */
msg->size = htons(msg->size);
if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
IP_VS_ERR("ip_vs_send_async error\n");
}
static int
ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
......@@ -605,7 +622,6 @@ static void sync_master_loop(void)
{
struct socket *sock;
struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg *m;
/* create the sending multicast socket */
sock = make_send_sock();
......@@ -618,19 +634,13 @@ static void sync_master_loop(void)
for (;;) {
while ((sb=sb_dequeue())) {
m = sb->mesg;
if (ip_vs_send_async(sock, (char *)m,
m->size) != m->size)
IP_VS_ERR("ip_vs_send_async error\n");
ip_vs_send_sync_msg(sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
/* check if entries stay in curr_sb for 2 seconds */
if ((sb = get_curr_sync_buff(2*HZ))) {
m = sb->mesg;
if (ip_vs_send_async(sock, (char *)m,
m->size) != m->size)
IP_VS_ERR("ip_vs_send_async error\n");
ip_vs_send_sync_msg(sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
......
......@@ -66,15 +66,11 @@
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ/4)*2^EST_MAX_INTERVAL = 8sec. Shorter intervals
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
are too expensive, longer ones can be implemented
at user level painlessly.
*/
#if (HZ%4) != 0
#error Bad HZ value.
#endif
#define EST_MAX_INTERVAL 5
struct qdisc_estimator
......@@ -128,7 +124,7 @@ static void est_timer(unsigned long arg)
spin_unlock(e->stats_lock);
}
mod_timer(&elist[idx].timer, jiffies + ((HZ/4)<<idx));
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock);
}
......@@ -161,7 +157,7 @@ int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct r
if (est->next == NULL) {
init_timer(&elist[est->interval].timer);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ/4)<<est->interval);
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
}
......
......@@ -1675,7 +1675,6 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
cl->xstats.undertime = 0;
if (!PSCHED_IS_PASTPERFECT(cl->undertime))
cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
q->link.xstats.avgidle = q->link.avgidle;
if (cbq_copy_xstats(skb, &cl->xstats)) {
spin_unlock_bh(&sch->dev->queue_lock);
goto rtattr_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment