Commit 63aff461 authored by David S. Miller's avatar David S. Miller

Merge nuts.ninka.net:/home/davem/src/BK/network-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
parents 6be102ce 205d4b20
......@@ -307,7 +307,8 @@ enum
NET_IPV4_ICMP_RATEMASK=90,
NET_TCP_TW_REUSE=91,
NET_TCP_FRTO=92,
NET_TCP_LOW_LATENCY=93
NET_TCP_LOW_LATENCY=93,
NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
};
enum {
......
......@@ -208,6 +208,8 @@ struct tcp_tw_bucket {
#endif
};
#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
extern kmem_cache_t *tcp_timewait_cachep;
static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
......@@ -248,6 +250,10 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
(((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
(((*((__u64 *)&(tcptw_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(tcptw_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
#else /* 32-bit arch */
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
......@@ -255,6 +261,11 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
((tcptw_sk(__sk)->daddr == (__saddr)) && \
(tcptw_sk(__sk)->rcv_saddr == (__daddr)) && \
((*((__u32 *)&(tcptw_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->bound_dev_if) || ((__sk)->bound_dev_if == (__dif))))
#endif /* 64-bit arch */
#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
......
......@@ -41,9 +41,6 @@ static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
indev = skb->dev;
skb->dev = &br->dev;
skb->pkt_type = PACKET_HOST;
skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, &br->dev);
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
br_pass_frame_up_finish);
......
......@@ -199,6 +199,8 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
fle->genid = atomic_read(&flow_cache_genid);
fle->object = obj;
fle->object_ref = obj_ref;
if (obj)
atomic_inc(fle->object_ref);
flow_count(cpu)++;
}
......
......@@ -1122,6 +1122,7 @@ static int __init init_ipv4_mibs(void)
}
int ipv4_proc_init(void);
extern void ipfrag_init(void);
static int __init inet_init(void)
{
......@@ -1224,6 +1225,9 @@ static int __init inet_init(void)
printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
ipv4_proc_init();
ipfrag_init();
return 0;
}
......
......@@ -31,6 +31,8 @@
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netdevice.h>
#include <linux/jhash.h>
#include <linux/random.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
......@@ -97,6 +99,7 @@ struct ipq {
/* Per-bucket lock is easy to add now. */
static struct ipq *ipq_hash[IPQ_HASHSZ];
static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED;
static u32 ipfrag_hash_rnd;
static LIST_HEAD(ipq_lru_list);
int ip_frag_nqueues = 0;
......@@ -116,21 +119,51 @@ static __inline__ void ipq_unlink(struct ipq *ipq)
write_unlock(&ipfrag_lock);
}
/*
* Was: ((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
*
* I see, I see evil hand of bigendian mafia. On Intel all the packets hit
* one hash bucket with this hash function. 8)
*/
static __inline__ unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
{
unsigned int h = saddr ^ daddr;
h ^= (h>>16)^id;
h ^= (h>>8)^prot;
return h & (IPQ_HASHSZ - 1);
return jhash_3words((u32)id << 16 | prot, saddr, daddr,
ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
}
static struct timer_list ipfrag_secret_timer;
int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
static void ipfrag_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
int i;
write_lock(&ipfrag_lock);
get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
for (i = 0; i < IPQ_HASHSZ; i++) {
struct ipq *q;
q = ipq_hash[i];
while (q) {
struct ipq *next = q->next;
unsigned int hval = ipqhashfn(q->id, q->saddr,
q->daddr, q->protocol);
if (hval != i) {
/* Unlink. */
if (q->next)
q->next->pprev = q->pprev;
*q->pprev = q->next;
/* Relink to new hash chain. */
if ((q->next = ipq_hash[hval]) != NULL)
q->next->pprev = &q->next;
ipq_hash[hval] = q;
q->pprev = &ipq_hash[hval];
}
q = next;
}
}
write_unlock(&ipfrag_lock);
mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
}
atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
......@@ -631,3 +664,14 @@ struct sk_buff *ip_defrag(struct sk_buff *skb)
kfree_skb(skb);
return NULL;
}
void ipfrag_init(void)
{
ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
init_timer(&ipfrag_secret_timer);
ipfrag_secret_timer.function = ipfrag_secret_rebuild;
ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
add_timer(&ipfrag_secret_timer);
}
......@@ -27,6 +27,7 @@ extern int sysctl_icmp_ignore_bogus_error_responses;
extern int sysctl_ipfrag_low_thresh;
extern int sysctl_ipfrag_high_thresh;
extern int sysctl_ipfrag_time;
extern int sysctl_ipfrag_secret_interval;
/* From ip_output.c */
extern int sysctl_ip_dynaddr;
......@@ -572,6 +573,15 @@ ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
{
.ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
.procname = "ipfrag_secret_interval",
.data = &sysctl_ipfrag_secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies
},
{ .ctl_name = 0 }
};
......
......@@ -509,7 +509,7 @@ static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
/* Must check for a TIME_WAIT'er before going to listener hash. */
for (sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next)
if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit;
out:
read_unlock(&head->lock);
......@@ -570,7 +570,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
skp = &sk2->next) {
tw = (struct tcp_tw_bucket *)sk2;
if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
struct tcp_opt *tp = tcp_sk(sk);
/* With PAWS, it is safe from the viewpoint
......
......@@ -389,9 +389,14 @@ int ndisc_output(struct sk_buff *skb)
return -EINVAL;
}
static inline void ndisc_rt_init(struct rt6_info *rt, struct net_device *dev,
static inline struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
struct neighbour *neigh)
{
struct rt6_info *rt = ip6_dst_alloc();
if (unlikely(rt == NULL))
goto out;
rt->rt6i_dev = dev;
rt->rt6i_nexthop = neigh;
rt->rt6i_expires = 0;
......@@ -399,6 +404,8 @@ static inline void ndisc_rt_init(struct rt6_info *rt, struct net_device *dev,
rt->rt6i_metric = 0;
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
rt->u.dst.output = ndisc_output;
out:
return (struct dst_entry *)rt;
}
static inline void ndisc_flow_init(struct flowi *fl, u8 type,
......@@ -420,7 +427,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
struct flowi fl;
struct rt6_info *rt = NULL;
struct dst_entry* dst;
struct sock *sk = ndisc_socket->sk;
struct in6_addr *src_addr;
......@@ -442,14 +448,11 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
src_addr = &tmpaddr;
}
rt = ip6_dst_alloc();
if (!rt)
return;
ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
ndisc_rt_init(rt, dev, neigh);
dst = (struct dst_entry*)rt;
dst = ndisc_dst_alloc(dev, neigh);
if (!dst)
return;
err = xfrm_lookup(&dst, &fl, NULL, 0);
if (err < 0) {
......@@ -516,7 +519,6 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
struct in6_addr *daddr, struct in6_addr *saddr)
{
struct flowi fl;
struct rt6_info *rt = NULL;
struct dst_entry* dst;
struct inet6_dev *idev;
struct sock *sk = ndisc_socket->sk;
......@@ -533,14 +535,11 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
saddr = &addr_buf;
}
rt = ip6_dst_alloc();
if (!rt)
return;
ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
ndisc_rt_init(rt, dev, neigh);
dst = (struct dst_entry*)rt;
dst = ndisc_dst_alloc(dev, neigh);
if (!dst)
return;
dst_clone(dst);
err = xfrm_lookup(&dst, &fl, NULL, 0);
......@@ -599,7 +598,6 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr)
{
struct flowi fl;
struct rt6_info *rt = NULL;
struct dst_entry* dst;
struct inet6_dev *idev;
struct sock *sk = ndisc_socket->sk;
......@@ -609,14 +607,11 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
int len;
int err;
rt = ip6_dst_alloc();
if (!rt)
return;
ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
ndisc_rt_init(rt, dev, NULL);
dst = (struct dst_entry*)rt;
dst = ndisc_dst_alloc(dev, NULL);
if (!dst)
return;
dst_clone(dst);
err = xfrm_lookup(&dst, &fl, NULL, 0);
......@@ -1297,12 +1292,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
int hlen;
dev = skb->dev;
rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
if (rt == NULL)
return;
dst = (struct dst_entry*)rt;
if (ipv6_get_lladdr(dev, &saddr_buf)) {
ND_PRINTK1("redirect: no link_local addr for dev\n");
......@@ -1311,7 +1300,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
if (rt == NULL)
return;
dst = &rt->u.dst;
dst_clone(dst);
err = xfrm_lookup(&dst, &fl, NULL, 0);
if (err) {
dst_release(dst);
......
......@@ -24,6 +24,9 @@
* Alexey Kuznetsov SMP races, threading, cleanup.
* Patrick McHardy LRU queue of frag heads for evictor.
* Mitsuru KANDA @USAGI Register inet6_protocol{}.
* David Stevens and
* YOSHIFUJI,H. @USAGI Always remove fragment header to
* calculate ICV correctly.
*/
#include <linux/config.h>
#include <linux/errno.h>
......@@ -38,6 +41,8 @@
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/jhash.h>
#include <net/sock.h>
#include <net/snmp.h>
......@@ -99,6 +104,7 @@ struct frag_queue
static struct frag_queue *ip6_frag_hash[IP6Q_HASHSZ];
static rwlock_t ip6_frag_lock = RW_LOCK_UNLOCKED;
static u32 ip6_frag_hash_rnd;
static LIST_HEAD(ip6_frag_lru_list);
int ip6_frag_nqueues = 0;
......@@ -118,16 +124,73 @@ static __inline__ void fq_unlink(struct frag_queue *fq)
write_unlock(&ip6_frag_lock);
}
static __inline__ unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
{
unsigned int h = saddr->s6_addr32[3] ^ daddr->s6_addr32[3] ^ id;
u32 a, b, c;
h ^= (h>>16);
h ^= (h>>8);
return h & (IP6Q_HASHSZ - 1);
a = saddr->s6_addr32[0];
b = saddr->s6_addr32[1];
c = saddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += ip6_frag_hash_rnd;
__jhash_mix(a, b, c);
a += saddr->s6_addr32[3];
b += daddr->s6_addr32[0];
c += daddr->s6_addr32[1];
__jhash_mix(a, b, c);
a += daddr->s6_addr32[2];
b += daddr->s6_addr32[3];
c += id;
__jhash_mix(a, b, c);
return c & (IP6Q_HASHSZ - 1);
}
static struct timer_list ip6_frag_secret_timer;
static int ip6_frag_secret_interval = 10 * 60 * HZ;
static void ip6_frag_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
int i;
write_lock(&ip6_frag_lock);
get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
for (i = 0; i < IP6Q_HASHSZ; i++) {
struct frag_queue *q;
q = ip6_frag_hash[i];
while (q) {
struct frag_queue *next = q->next;
unsigned int hval = ip6qhashfn(q->id,
&q->saddr,
&q->daddr);
if (hval != i) {
/* Unlink. */
if (q->next)
q->next->pprev = q->pprev;
*q->pprev = q->next;
/* Relink to new hash chain. */
if ((q->next = ip6_frag_hash[hval]) != NULL)
q->next->pprev = &q->next;
ip6_frag_hash[hval] = q;
q->pprev = &ip6_frag_hash[hval];
}
q = next;
}
}
write_unlock(&ip6_frag_lock);
mod_timer(&ip6_frag_secret_timer, now + ip6_frag_secret_interval);
}
atomic_t ip6_frag_mem = ATOMIC_INIT(0);
......@@ -524,7 +587,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
struct net_device *dev)
{
struct sk_buff *fp, *head = fq->fragments;
int remove_fraghdr = 0;
int payload_len;
unsigned int nhoff;
......@@ -537,12 +599,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len;
nhoff = head->h.raw - head->nh.raw;
if (payload_len > 65535) {
payload_len -= 8;
if (payload_len > 65535)
if (payload_len > 65535 + 8)
goto out_oversize;
remove_fraghdr = 1;
}
/* Head of list must not be cloned. */
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
......@@ -571,18 +629,13 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
atomic_add(clone->truesize, &ip6_frag_mem);
}
/* Normally we do not remove frag header from datagram, but
* we have to do this and to relocate header, when payload
* is > 65535-8. */
if (remove_fraghdr) {
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
head->nh.raw[nhoff] = head->h.raw[0];
memmove(head->head+8, head->head, (head->data-head->head)-8);
head->mac.raw += 8;
head->nh.raw += 8;
} else {
((struct frag_hdr*)head->h.raw)->frag_off = 0;
}
skb_shinfo(head)->frag_list = head->next;
head->h.raw = head->data;
......@@ -696,4 +749,12 @@ void __init ipv6_frag_init(void)
{
if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
init_timer(&ip6_frag_secret_timer);
ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
ip6_frag_secret_timer.expires = jiffies + ip6_frag_secret_interval;
add_timer(&ip6_frag_secret_timer);
}
......@@ -394,9 +394,22 @@ inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
{
return (jhash_3words(raddr->s6_addr32[0] ^ raddr->s6_addr32[1],
raddr->s6_addr32[2] ^ raddr->s6_addr32[3],
(u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
u32 a, b, c;
a = raddr->s6_addr32[0];
b = raddr->s6_addr32[1];
c = raddr->s6_addr32[2];
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
c += rnd;
__jhash_mix(a, b, c);
a += raddr->s6_addr32[3];
b += (u32) rport;
__jhash_mix(a, b, c);
return c & (TCP_SYNQ_HSIZE - 1);
}
static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
......
......@@ -244,22 +244,34 @@ static u32 xfrm_gen_index(int dir)
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
struct xfrm_policy *pol, **p;
struct xfrm_policy *delpol = NULL;
struct xfrm_policy **newpos = NULL;
write_lock_bh(&xfrm_policy_lock);
for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
if (memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
if (excl) {
write_unlock_bh(&xfrm_policy_lock);
return -EEXIST;
}
*p = pol->next;
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority)
continue;
if (!newpos)
newpos = p;
if (delpol)
break;
}
}
if (newpos)
p = newpos;
xfrm_pol_hold(policy);
policy->next = pol ? pol->next : NULL;
policy->next = *p;
*p = policy;
atomic_inc(&flow_cache_genid);
policy->index = pol ? pol->index : xfrm_gen_index(dir);
policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
policy->curlft.add_time = (unsigned long)xtime.tv_sec;
policy->curlft.use_time = 0;
if (policy->lft.hard_add_expires_seconds &&
......@@ -267,10 +279,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
write_unlock_bh(&xfrm_policy_lock);
if (pol) {
atomic_dec(&pol->refcnt);
xfrm_policy_kill(pol);
xfrm_pol_put(pol);
if (delpol) {
atomic_dec(&delpol->refcnt);
xfrm_policy_kill(delpol);
xfrm_pol_put(delpol);
}
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment