Commit 2407e34e authored by David S. Miller's avatar David S. Miller

Merge nuts.davemloft.net:/disk1/BK/network-2.6

into nuts.davemloft.net:/disk1/BK/net-2.6
parents f0fdf5f8 faf1633b
This diff is collapsed.
...@@ -16,7 +16,9 @@ obj-$(CONFIG_LLC) += llc/ ...@@ -16,7 +16,9 @@ obj-$(CONFIG_LLC) += llc/
obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
obj-$(CONFIG_INET) += ipv4/ xfrm/ obj-$(CONFIG_INET) += ipv4/ xfrm/
obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_IPV6) += ipv6/ ifneq ($(CONFIG_IPV6),)
obj-y += ipv6/
endif
obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_NET_SCHED) += sched/ obj-$(CONFIG_NET_SCHED) += sched/
......
...@@ -124,7 +124,16 @@ ip_nat_fn(unsigned int hooknum, ...@@ -124,7 +124,16 @@ ip_nat_fn(unsigned int hooknum,
WRITE_LOCK(&ip_nat_lock); WRITE_LOCK(&ip_nat_lock);
/* Seen it before? This can happen for loopback, retrans, /* Seen it before? This can happen for loopback, retrans,
or local packets.. */ or local packets.. */
if (!(info->initialized & (1 << maniptype))) { if (!(info->initialized & (1 << maniptype))
#ifndef CONFIG_IP_NF_NAT_LOCAL
/* If this session has already been confirmed we must not
* touch it again even if there is no mapping set up.
* Can only happen on local->local traffic with
* CONFIG_IP_NF_NAT_LOCAL disabled.
*/
&& !(ct->status & IPS_CONFIRMED)
#endif
) {
unsigned int ret; unsigned int ret;
if (ct->master if (ct->master
......
...@@ -45,7 +45,7 @@ masquerade_check(const char *tablename, ...@@ -45,7 +45,7 @@ masquerade_check(const char *tablename,
const struct ip_nat_multi_range *mr = targinfo; const struct ip_nat_multi_range *mr = targinfo;
if (strcmp(tablename, "nat") != 0) { if (strcmp(tablename, "nat") != 0) {
DEBUGP("masquerade_check: bad table `%s'.\n", table); DEBUGP("masquerade_check: bad table `%s'.\n", tablename);
return 0; return 0;
} }
if (targinfosize != IPT_ALIGN(sizeof(*mr))) { if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
......
...@@ -1825,12 +1825,15 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1825,12 +1825,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
goto discard_it; goto discard_it;
do_time_wait: do_time_wait:
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
goto discard_and_relse; tcp_tw_put((struct tcp_tw_bucket *) sk);
goto discard_it;
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
TCP_INC_STATS_BH(TcpInErrs); TCP_INC_STATS_BH(TcpInErrs);
goto discard_and_relse; tcp_tw_put((struct tcp_tw_bucket *) sk);
goto discard_it;
} }
switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
skb, th, skb->len)) { skb, th, skb->len)) {
......
...@@ -19,3 +19,5 @@ obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o ...@@ -19,3 +19,5 @@ obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
obj-y += exthdrs_core.o
...@@ -633,105 +633,3 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) ...@@ -633,105 +633,3 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
} }
return opt2; return opt2;
} }
/*
* find out if nexthdr is a well-known extension header or a protocol
*/
int ipv6_ext_hdr(u8 nexthdr)
{
/*
* find out if nexthdr is an extension header or a protocol
*/
return ( (nexthdr == NEXTHDR_HOP) ||
(nexthdr == NEXTHDR_ROUTING) ||
(nexthdr == NEXTHDR_FRAGMENT) ||
(nexthdr == NEXTHDR_AUTH) ||
(nexthdr == NEXTHDR_NONE) ||
(nexthdr == NEXTHDR_DEST) );
}
/*
* Skip any extension headers. This is used by the ICMP module.
*
* Note that strictly speaking this conflicts with RFC 2460 4.0:
* ...The contents and semantics of each extension header determine whether
* or not to proceed to the next header. Therefore, extension headers must
* be processed strictly in the order they appear in the packet; a
* receiver must not, for example, scan through a packet looking for a
* particular kind of extension header and process that header prior to
* processing all preceding ones.
*
* We do exactly this. This is a protocol bug. We can't decide after a
* seeing an unknown discard-with-error flavour TLV option if it's a
* ICMP error message or not (errors should never be send in reply to
* ICMP error messages).
*
* But I see no other way to do this. This might need to be reexamined
* when Linux implements ESP (and maybe AUTH) headers.
* --AK
*
* This function parses (probably truncated) exthdr set "hdr"
* of length "len". "nexthdrp" initially points to some place,
* where type of the first header can be found.
*
* It skips all well-known exthdrs, and returns pointer to the start
* of unparsable area i.e. the first header with unknown type.
* If it is not NULL *nexthdr is updated by type/protocol of this header.
*
* NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
* - it may return pointer pointing beyond end of packet,
* if the last recognized header is truncated in the middle.
* - if packet is truncated, so that all parsed headers are skipped,
* it returns NULL.
* - First fragment header is skipped, not-first ones
* are considered as unparsable.
* - ESP is unparsable for now and considered like
* normal payload protocol.
* - Note also special handling of AUTH header. Thanks to IPsec wizards.
*
* --ANK (980726)
*/
int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
{
u8 nexthdr = *nexthdrp;
while (ipv6_ext_hdr(nexthdr)) {
struct ipv6_opt_hdr hdr;
int hdrlen;
if (len < (int)sizeof(struct ipv6_opt_hdr))
return -1;
if (nexthdr == NEXTHDR_NONE)
return -1;
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_FRAGMENT) {
unsigned short frag_off;
if (skb_copy_bits(skb,
start+offsetof(struct frag_hdr,
frag_off),
&frag_off,
sizeof(frag_off))) {
return -1;
}
if (ntohs(frag_off) & ~0x7)
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hdr.hdrlen+2)<<2;
else
hdrlen = ipv6_optlen(&hdr);
nexthdr = hdr.nexthdr;
len -= hdrlen;
start += hdrlen;
}
*nexthdrp = nexthdr;
return start;
}
/*
* IPv6 library code, needed by static components when full IPv6 support is
* not configured or static.
*/
#include <net/ipv6.h>
/*
* find out if nexthdr is a well-known extension header or a protocol
*/
int ipv6_ext_hdr(u8 nexthdr)
{
/*
* find out if nexthdr is an extension header or a protocol
*/
return ( (nexthdr == NEXTHDR_HOP) ||
(nexthdr == NEXTHDR_ROUTING) ||
(nexthdr == NEXTHDR_FRAGMENT) ||
(nexthdr == NEXTHDR_AUTH) ||
(nexthdr == NEXTHDR_NONE) ||
(nexthdr == NEXTHDR_DEST) );
}
/*
* Skip any extension headers. This is used by the ICMP module.
*
* Note that strictly speaking this conflicts with RFC 2460 4.0:
* ...The contents and semantics of each extension header determine whether
* or not to proceed to the next header. Therefore, extension headers must
* be processed strictly in the order they appear in the packet; a
* receiver must not, for example, scan through a packet looking for a
* particular kind of extension header and process that header prior to
* processing all preceding ones.
*
* We do exactly this. This is a protocol bug. We can't decide after a
* seeing an unknown discard-with-error flavour TLV option if it's a
* ICMP error message or not (errors should never be send in reply to
* ICMP error messages).
*
* But I see no other way to do this. This might need to be reexamined
* when Linux implements ESP (and maybe AUTH) headers.
* --AK
*
* This function parses (probably truncated) exthdr set "hdr"
* of length "len". "nexthdrp" initially points to some place,
* where type of the first header can be found.
*
* It skips all well-known exthdrs, and returns pointer to the start
* of unparsable area i.e. the first header with unknown type.
* If it is not NULL *nexthdr is updated by type/protocol of this header.
*
* NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
* - it may return pointer pointing beyond end of packet,
* if the last recognized header is truncated in the middle.
* - if packet is truncated, so that all parsed headers are skipped,
* it returns NULL.
* - First fragment header is skipped, not-first ones
* are considered as unparsable.
* - ESP is unparsable for now and considered like
* normal payload protocol.
* - Note also special handling of AUTH header. Thanks to IPsec wizards.
*
* --ANK (980726)
*/
int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
{
u8 nexthdr = *nexthdrp;
while (ipv6_ext_hdr(nexthdr)) {
struct ipv6_opt_hdr hdr;
int hdrlen;
if (len < (int)sizeof(struct ipv6_opt_hdr))
return -1;
if (nexthdr == NEXTHDR_NONE)
return -1;
if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
BUG();
if (nexthdr == NEXTHDR_FRAGMENT) {
unsigned short frag_off;
if (skb_copy_bits(skb,
start+offsetof(struct frag_hdr,
frag_off),
&frag_off,
sizeof(frag_off))) {
return -1;
}
if (ntohs(frag_off) & ~0x7)
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
hdrlen = (hdr.hdrlen+2)<<2;
else
hdrlen = ipv6_optlen(&hdr);
nexthdr = hdr.nexthdr;
len -= hdrlen;
start += hdrlen;
}
*nexthdrp = nexthdr;
return start;
}
EXPORT_SYMBOL(ipv6_ext_hdr);
EXPORT_SYMBOL(ipv6_skip_exthdr);
...@@ -41,9 +41,7 @@ EXPORT_SYMBOL(xfrm6_rcv); ...@@ -41,9 +41,7 @@ EXPORT_SYMBOL(xfrm6_rcv);
#endif #endif
EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(rt6_lookup);
EXPORT_SYMBOL(fl6_sock_lookup); EXPORT_SYMBOL(fl6_sock_lookup);
EXPORT_SYMBOL(ipv6_ext_hdr);
EXPORT_SYMBOL(ip6_append_data); EXPORT_SYMBOL(ip6_append_data);
EXPORT_SYMBOL(ip6_flush_pending_frames); EXPORT_SYMBOL(ip6_flush_pending_frames);
EXPORT_SYMBOL(ip6_push_pending_frames); EXPORT_SYMBOL(ip6_push_pending_frames);
EXPORT_SYMBOL(ipv6_push_nfrag_opts); EXPORT_SYMBOL(ipv6_push_nfrag_opts);
EXPORT_SYMBOL(ipv6_skip_exthdr);
...@@ -34,6 +34,8 @@ ...@@ -34,6 +34,8 @@
* Alexey Kuznetsov : Untied from IPv4 stack. * Alexey Kuznetsov : Untied from IPv4 stack.
* Cyrus Durgin : Fixed kerneld for kmod. * Cyrus Durgin : Fixed kerneld for kmod.
* Michal Ostrowski : Module initialization cleanup. * Michal Ostrowski : Module initialization cleanup.
* Ulises Alonso : Frame number limit removal and
* packet_set_ring memory leak.
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -168,30 +170,47 @@ static void packet_flush_mclist(struct sock *sk); ...@@ -168,30 +170,47 @@ static void packet_flush_mclist(struct sock *sk);
struct packet_opt struct packet_opt
{ {
struct tpacket_stats stats;
#ifdef CONFIG_PACKET_MMAP
unsigned long *pg_vec;
unsigned int head;
unsigned int frames_per_block;
unsigned int frame_size;
unsigned int frame_max;
int copy_thresh;
#endif
struct packet_type prot_hook; struct packet_type prot_hook;
spinlock_t bind_lock; spinlock_t bind_lock;
char running; /* prot_hook is attached*/ char running; /* prot_hook is attached*/
int ifindex; /* bound device */ int ifindex; /* bound device */
unsigned short num; unsigned short num;
struct tpacket_stats stats;
#ifdef CONFIG_PACKET_MULTICAST #ifdef CONFIG_PACKET_MULTICAST
struct packet_mclist *mclist; struct packet_mclist *mclist;
#endif #endif
#ifdef CONFIG_PACKET_MMAP #ifdef CONFIG_PACKET_MMAP
atomic_t mapped; atomic_t mapped;
unsigned long *pg_vec;
unsigned int pg_vec_order; unsigned int pg_vec_order;
unsigned int pg_vec_pages; unsigned int pg_vec_pages;
unsigned int pg_vec_len; unsigned int pg_vec_len;
struct tpacket_hdr **iovec;
unsigned int frame_size;
unsigned int iovmax;
unsigned int head;
int copy_thresh;
#endif #endif
}; };
#ifdef CONFIG_PACKET_MMAP
static inline unsigned long packet_lookup_frame(struct packet_opt *po, unsigned int position)
{
unsigned int pg_vec_pos, frame_offset;
unsigned long frame;
pg_vec_pos = position / po->frames_per_block;
frame_offset = position % po->frames_per_block;
frame = (unsigned long) (po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
return frame;
}
#endif
#define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo) #define pkt_sk(__sk) ((struct packet_opt *)(__sk)->sk_protinfo)
void packet_sock_destruct(struct sock *sk) void packet_sock_destruct(struct sock *sk)
...@@ -586,11 +605,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct pack ...@@ -586,11 +605,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
snaplen = skb->len-skb->data_len; snaplen = skb->len-skb->data_len;
spin_lock(&sk->sk_receive_queue.lock); spin_lock(&sk->sk_receive_queue.lock);
h = po->iovec[po->head]; h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
if (h->tp_status) if (h->tp_status)
goto ring_is_full; goto ring_is_full;
po->head = po->head != po->iovmax ? po->head+1 : 0; po->head = po->head != po->frame_max ? po->head+1 : 0;
po->stats.tp_packets++; po->stats.tp_packets++;
if (copy_skb) { if (copy_skb) {
status |= TP_STATUS_COPY; status |= TP_STATUS_COPY;
...@@ -1485,10 +1504,13 @@ unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wa ...@@ -1485,10 +1504,13 @@ unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wa
unsigned int mask = datagram_poll(file, sock, wait); unsigned int mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->iovec) { if (po->pg_vec) {
unsigned last = po->head ? po->head-1 : po->iovmax; unsigned last = po->head ? po->head-1 : po->frame_max;
struct tpacket_hdr *h;
h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
if (po->iovec[last]->tp_status) if (h->tp_status)
mask |= POLLIN | POLLRDNORM; mask |= POLLIN | POLLRDNORM;
} }
spin_unlock_bh(&sk->sk_receive_queue.lock); spin_unlock_bh(&sk->sk_receive_queue.lock);
...@@ -1548,16 +1570,18 @@ static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len) ...@@ -1548,16 +1570,18 @@ static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing) static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
{ {
unsigned long *pg_vec = NULL; unsigned long *pg_vec = NULL;
struct tpacket_hdr **io_vec = NULL;
struct packet_opt *po = pkt_sk(sk); struct packet_opt *po = pkt_sk(sk);
int was_running, num, order = 0; int was_running, num, order = 0;
int err = 0; int err = 0;
if (req->tp_block_nr) { if (req->tp_block_nr) {
int i, l; int i, l;
int frames_per_block;
/* Sanity tests and some calculations */ /* Sanity tests and some calculations */
if (po->pg_vec)
return -EBUSY;
if ((int)req->tp_block_size <= 0) if ((int)req->tp_block_size <= 0)
return -EINVAL; return -EINVAL;
if (req->tp_block_size&(PAGE_SIZE-1)) if (req->tp_block_size&(PAGE_SIZE-1))
...@@ -1566,10 +1590,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1566,10 +1590,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
return -EINVAL; return -EINVAL;
if (req->tp_frame_size&(TPACKET_ALIGNMENT-1)) if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
return -EINVAL; return -EINVAL;
frames_per_block = req->tp_block_size/req->tp_frame_size;
if (frames_per_block <= 0) po->frames_per_block = req->tp_block_size/req->tp_frame_size;
if (po->frames_per_block <= 0)
return -EINVAL; return -EINVAL;
if (frames_per_block*req->tp_block_nr != req->tp_frame_nr) if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
return -EINVAL; return -EINVAL;
/* OK! */ /* OK! */
...@@ -1596,20 +1621,16 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1596,20 +1621,16 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
} }
/* Page vector is allocated */ /* Page vector is allocated */
/* Draw frames */
io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
if (io_vec == NULL)
goto out_free_pgvec;
memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
l = 0; l = 0;
for (i=0; i<req->tp_block_nr; i++) { for (i=0; i<req->tp_block_nr; i++) {
unsigned long ptr = pg_vec[i]; unsigned long ptr = pg_vec[i];
struct tpacket_hdr *header;
int k; int k;
for (k=0; k<frames_per_block; k++, l++) { for (k=0; k<po->frames_per_block; k++) {
io_vec[l] = (struct tpacket_hdr*)ptr;
io_vec[l]->tp_status = TP_STATUS_KERNEL; header = (struct tpacket_hdr*)ptr;
header->tp_status = TP_STATUS_KERNEL;
ptr += req->tp_frame_size; ptr += req->tp_frame_size;
} }
} }
...@@ -1642,8 +1663,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1642,8 +1663,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
spin_lock_bh(&sk->sk_receive_queue.lock); spin_lock_bh(&sk->sk_receive_queue.lock);
pg_vec = XC(po->pg_vec, pg_vec); pg_vec = XC(po->pg_vec, pg_vec);
io_vec = XC(po->iovec, io_vec); po->frame_max = req->tp_frame_nr-1;
po->iovmax = req->tp_frame_nr-1;
po->head = 0; po->head = 0;
po->frame_size = req->tp_frame_size; po->frame_size = req->tp_frame_size;
spin_unlock_bh(&sk->sk_receive_queue.lock); spin_unlock_bh(&sk->sk_receive_queue.lock);
...@@ -1652,7 +1672,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1652,7 +1672,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr); req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
po->pg_vec_pages = req->tp_block_size/PAGE_SIZE; po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv; po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_receive_queue);
#undef XC #undef XC
if (atomic_read(&po->mapped)) if (atomic_read(&po->mapped))
...@@ -1670,9 +1690,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing ...@@ -1670,9 +1690,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
release_sock(sk); release_sock(sk);
if (io_vec)
kfree(io_vec);
out_free_pgvec: out_free_pgvec:
if (pg_vec) if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr); free_pg_vec(pg_vec, order, req->tp_block_nr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment