Commit e811f324 authored by James Morris's avatar James Morris Committed by David S. Miller

[NETFILTER]: Fixup ip6_queue much like ip_queue was:

- Fix unicast pid wrap issue
- Fix potential module unload races for netfilter and netlink paths
- General code cleanup
- Queue session cannot be overridden by another client
- Client can set copy mode to none, which stops queueing
parent 8d4cb2d7
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* real coder of this. * real coder of this.
* Few changes needed, mainly the hard_routing code and * Few changes needed, mainly the hard_routing code and
* the netlink socket protocol (we're NETLINK_IP6_FW). * the netlink socket protocol (we're NETLINK_IP6_FW).
* * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
...@@ -26,18 +26,12 @@ ...@@ -26,18 +26,12 @@
#include <linux/netfilter.h> #include <linux/netfilter.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/rtnetlink.h> #include <linux/brlock.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ip6_route.h> #include <net/ip6_route.h>
/* We're still usign the following structs. No need to change them: */
/* ipq_packet_msg */
/* ipq_mode_msg */
/* ipq_verdict_msg */
/* ipq_peer_msg */
#include <linux/netfilter_ipv4/ip_queue.h> #include <linux/netfilter_ipv4/ip_queue.h>
#include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h>
...@@ -47,184 +41,289 @@ ...@@ -47,184 +41,289 @@
#define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX 2088
#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
typedef struct ip6q_rt_info { struct ipq_rt_info {
struct in6_addr daddr; struct in6_addr daddr;
struct in6_addr saddr; struct in6_addr saddr;
} ip6q_rt_info_t; };
typedef struct ip6q_queue_element { struct ipq_queue_entry {
struct list_head list; /* Links element into queue */ struct list_head list;
int verdict; /* Current verdict */ struct nf_info *info;
struct nf_info *info; /* Extra info from netfilter */ struct sk_buff *skb;
struct sk_buff *skb; /* Packet inside */ struct ipq_rt_info rt_info;
ip6q_rt_info_t rt_info; /* May need post-mangle routing */ };
} ip6q_queue_element_t;
typedef int (*ip6q_send_cb_t)(ip6q_queue_element_t *e);
typedef struct ip6q_peer {
pid_t pid; /* PID of userland peer */
unsigned char died; /* We think the peer died */
unsigned char copy_mode; /* Copy packet as well as metadata? */
size_t copy_range; /* Range past metadata to copy */
ip6q_send_cb_t send; /* Callback for sending data to peer */
} ip6q_peer_t;
typedef struct ip6q_queue {
int len; /* Current queue len */
int *maxlen; /* Maximum queue len, via sysctl */
unsigned char flushing; /* If queue is being flushed */
unsigned char terminate; /* If the queue is being terminated */
struct list_head list; /* Head of packet queue */
spinlock_t lock; /* Queue spinlock */
ip6q_peer_t peer; /* Userland peer */
} ip6q_queue_t;
/****************************************************************************
*
* Packet queue
*
****************************************************************************/
/* Dequeue a packet if matched by cmp, or the next available if cmp is NULL */
static ip6q_queue_element_t *
ip6q_dequeue(ip6q_queue_t *q,
int (*cmp)(ip6q_queue_element_t *, unsigned long),
unsigned long data)
{
struct list_head *i;
spin_lock_bh(&q->lock); typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
for (i = q->list.prev; i != &q->list; i = i->prev) {
ip6q_queue_element_t *e = (ip6q_queue_element_t *)i;
if (!cmp || cmp(e, data)) { static unsigned char copy_mode = IPQ_COPY_NONE;
list_del(&e->list); static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
q->len--; static rwlock_t queue_lock = RW_LOCK_UNLOCKED;
spin_unlock_bh(&q->lock); static int peer_pid;
return e; static unsigned int copy_range;
static unsigned int queue_total;
static struct sock *ipqnl;
static LIST_HEAD(queue_list);
static DECLARE_MUTEX(ipqnl_sem);
static void
ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
{
nf_reinject(entry->skb, entry->info, verdict);
kfree(entry);
}
static inline int
__ipq_enqueue_entry(struct ipq_queue_entry *entry)
{
if (queue_total >= queue_maxlen) {
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, "
"dropping packet(s).\n", queue_total);
return -ENOSPC;
} }
list_add(&entry->list, &queue_list);
queue_total++;
return 0;
}
/*
* Find and return a queued entry matched by cmpfn, or return the last
* entry if cmpfn is NULL.
*/
static inline struct ipq_queue_entry *
__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
{
struct list_head *p;
list_for_each_prev(p, &queue_list) {
struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
if (!cmpfn || cmpfn(entry, data))
return entry;
} }
spin_unlock_bh(&q->lock);
return NULL; return NULL;
} }
/* Flush all packets */ static inline void
static void ip6q_flush(ip6q_queue_t *q) __ipq_dequeue_entry(struct ipq_queue_entry *entry)
{ {
ip6q_queue_element_t *e; list_del(&entry->list);
queue_total--;
spin_lock_bh(&q->lock);
q->flushing = 1;
spin_unlock_bh(&q->lock);
while ((e = ip6q_dequeue(q, NULL, 0))) {
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
}
spin_lock_bh(&q->lock);
q->flushing = 0;
spin_unlock_bh(&q->lock);
} }
static ip6q_queue_t *ip6q_create_queue(nf_queue_outfn_t outfn, static inline struct ipq_queue_entry *
ip6q_send_cb_t send_cb, __ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
int *errp, int *sysctl_qmax)
{ {
int status; struct ipq_queue_entry *entry;
ip6q_queue_t *q;
*errp = 0; entry = __ipq_find_entry(cmpfn, data);
q = kmalloc(sizeof(ip6q_queue_t), GFP_KERNEL); if (entry == NULL)
if (q == NULL) {
*errp = -ENOMEM;
return NULL; return NULL;
__ipq_dequeue_entry(entry);
return entry;
}
static inline void
__ipq_flush(int verdict)
{
struct ipq_queue_entry *entry;
while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
ipq_issue_verdict(entry, verdict);
}
static inline int
__ipq_set_mode(unsigned char mode, unsigned int range)
{
int status = 0;
switch(mode) {
case IPQ_COPY_NONE:
case IPQ_COPY_META:
copy_mode = mode;
copy_range = 0;
break;
case IPQ_COPY_PACKET:
copy_mode = mode;
copy_range = range;
if (copy_range > 0xFFFF)
copy_range = 0xFFFF;
break;
default:
status = -EINVAL;
} }
q->peer.pid = 0; return status;
q->peer.died = 0; }
q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0; static inline void
q->peer.send = send_cb; __ipq_reset(void)
q->len = 0; {
q->maxlen = sysctl_qmax; peer_pid = 0;
q->flushing = 0; __ipq_set_mode(IPQ_COPY_NONE, 0);
q->terminate = 0; __ipq_flush(NF_DROP);
INIT_LIST_HEAD(&q->list); }
spin_lock_init(&q->lock);
status = nf_register_queue_handler(PF_INET6, outfn, q); static struct ipq_queue_entry *
if (status < 0) { ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
*errp = -EBUSY; {
kfree(q); struct ipq_queue_entry *entry;
write_lock_bh(&queue_lock);
entry = __ipq_find_dequeue_entry(cmpfn, data);
write_unlock_bh(&queue_lock);
return entry;
}
static void
ipq_flush(int verdict)
{
write_lock_bh(&queue_lock);
__ipq_flush(verdict);
write_unlock_bh(&queue_lock);
}
static struct sk_buff *
ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
struct ipq_packet_msg *pmsg;
struct nlmsghdr *nlh;
read_lock_bh(&queue_lock);
switch (copy_mode) {
case IPQ_COPY_META:
case IPQ_COPY_NONE:
size = NLMSG_SPACE(sizeof(*pmsg));
data_len = 0;
break;
case IPQ_COPY_PACKET:
if (copy_range == 0 || copy_range > entry->skb->len)
data_len = entry->skb->len;
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
break;
default:
*errp = -EINVAL;
read_unlock_bh(&queue_lock);
return NULL; return NULL;
} }
return q;
read_unlock_bh(&queue_lock);
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
goto nlmsg_failure;
old_tail= skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pmsg = NLMSG_DATA(nlh);
memset(pmsg, 0, sizeof(*pmsg));
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
pmsg->mark = entry->skb->nfmark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
if (entry->info->indev)
strcpy(pmsg->indev_name, entry->info->indev->name);
else
pmsg->indev_name[0] = '\0';
if (entry->info->outdev)
strcpy(pmsg->outdev_name, entry->info->outdev->name);
else
pmsg->outdev_name[0] = '\0';
if (entry->info->indev && entry->skb->dev) {
pmsg->hw_type = entry->skb->dev->type;
if (entry->skb->dev->hard_header_parse)
pmsg->hw_addrlen =
entry->skb->dev->hard_header_parse(entry->skb,
pmsg->hw_addr);
}
if (data_len)
memcpy(pmsg->payload, entry->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
nlmsg_failure:
if (skb)
kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip6_queue: error creating packet message\n");
return NULL;
} }
static int ip6q_enqueue(ip6q_queue_t *q, static int
struct sk_buff *skb, struct nf_info *info) ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
{ {
ip6q_queue_element_t *e; int status = -EINVAL;
int status; struct sk_buff *nskb;
struct ipq_queue_entry *entry;
e = kmalloc(sizeof(*e), GFP_ATOMIC); if (copy_mode == IPQ_COPY_NONE)
if (e == NULL) { return -EAGAIN;
printk(KERN_ERR "ip6_queue: OOM in enqueue\n");
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (entry == NULL) {
printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
return -ENOMEM; return -ENOMEM;
} }
e->verdict = NF_DROP; entry->info = info;
e->info = info; entry->skb = skb;
e->skb = skb;
if (e->info->hook == NF_IP_LOCAL_OUT) { if (entry->info->hook == NF_IP_LOCAL_OUT) {
struct ipv6hdr *iph = skb->nh.ipv6h; struct ipv6hdr *iph = skb->nh.ipv6h;
e->rt_info.daddr = iph->daddr; entry->rt_info.daddr = iph->daddr;
e->rt_info.saddr = iph->saddr; entry->rt_info.saddr = iph->saddr;
} }
spin_lock_bh(&q->lock); nskb = ipq_build_packet_message(entry, &status);
if (q->len >= *q->maxlen) { if (nskb == NULL)
spin_unlock_bh(&q->lock); goto err_out_free;
if (net_ratelimit())
printk(KERN_WARNING "ip6_queue: full at %d entries, " write_lock_bh(&queue_lock);
"dropping packet(s).\n", q->len);
goto free_drop; if (!peer_pid)
} goto err_out_unlock;
if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
|| q->peer.pid == 0 || q->peer.died || q->terminate) { status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
spin_unlock_bh(&q->lock); if (status < 0)
goto free_drop; goto err_out_unlock;
}
status = q->peer.send(e); status = __ipq_enqueue_entry(entry);
if (status > 0) { if (status < 0)
list_add(&e->list, &q->list); goto err_out_unlock;
q->len++;
spin_unlock_bh(&q->lock); write_unlock_bh(&queue_lock);
return status; return status;
}
spin_unlock_bh(&q->lock);
if (status == -ECONNREFUSED) {
printk(KERN_INFO "ip6_queue: peer %d died, "
"resetting state and flushing queue\n", q->peer.pid);
q->peer.died = 1;
q->peer.pid = 0;
q->peer.copy_mode = IPQ_COPY_NONE;
q->peer.copy_range = 0;
ip6q_flush(q);
}
free_drop:
kfree(e);
return -EBUSY;
}
static void ip6q_destroy_queue(ip6q_queue_t *q) err_out_unlock:
{ write_unlock_bh(&queue_lock);
nf_unregister_queue_handler(PF_INET6);
spin_lock_bh(&q->lock); err_out_free:
q->terminate = 1; kfree(entry);
spin_unlock_bh(&q->lock); return status;
ip6q_flush(q);
kfree(q);
} }
/* /*
...@@ -236,7 +335,8 @@ static void ip6q_destroy_queue(ip6q_queue_t *q) ...@@ -236,7 +335,8 @@ static void ip6q_destroy_queue(ip6q_queue_t *q)
* *
* If that one is modified, this one should be modified too. * If that one is modified, this one should be modified too.
*/ */
static int route6_me_harder(struct sk_buff *skb) static int
route6_me_harder(struct sk_buff *skb)
{ {
struct ipv6hdr *iph = skb->nh.ipv6h; struct ipv6hdr *iph = skb->nh.ipv6h;
struct dst_entry *dst; struct dst_entry *dst;
...@@ -264,7 +364,9 @@ static int route6_me_harder(struct sk_buff *skb) ...@@ -264,7 +364,9 @@ static int route6_me_harder(struct sk_buff *skb)
skb->dst = dst; skb->dst = dst;
return 0; return 0;
} }
static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e)
static int
ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{ {
int diff; int diff;
struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
...@@ -319,70 +421,67 @@ static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e) ...@@ -319,70 +421,67 @@ static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e)
return 0; return 0;
} }
static inline int id_cmp(ip6q_queue_element_t *e, unsigned long id) static inline int
id_cmp(struct ipq_queue_entry *e, unsigned long id)
{ {
return (id == (unsigned long )e); return (id == (unsigned long )e);
} }
static int ip6q_set_verdict(ip6q_queue_t *q, static int
ipq_verdict_msg_t *v, unsigned int len) ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
{ {
ip6q_queue_element_t *e; struct ipq_queue_entry *entry;
if (v->value > NF_MAX_VERDICT) if (vmsg->value > NF_MAX_VERDICT)
return -EINVAL; return -EINVAL;
e = ip6q_dequeue(q, id_cmp, v->id);
if (e == NULL) entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
if (entry == NULL)
return -ENOENT; return -ENOENT;
else { else {
e->verdict = v->value; int verdict = vmsg->value;
if (v->data_len && v->data_len == len)
if (ip6q_mangle_ipv6(v, e) < 0) if (vmsg->data_len && vmsg->data_len == len)
e->verdict = NF_DROP; if (ipq_mangle_ipv6(vmsg, entry) < 0)
nf_reinject(e->skb, e->info, e->verdict); verdict = NF_DROP;
kfree(e);
ipq_issue_verdict(entry, verdict);
return 0; return 0;
} }
} }
static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m, static int
unsigned char type, unsigned int len) ipq_set_mode(unsigned char mode, unsigned int range)
{ {
int status;
write_lock_bh(&queue_lock);
status = __ipq_set_mode(mode, range);
write_unlock_bh(&queue_lock);
return status;
}
static int
ipq_receive_peer(struct ipq_peer_msg *pmsg,
unsigned char type, unsigned int len)
{
int status = 0; int status = 0;
int busy;
if (len < sizeof(*pmsg))
spin_lock_bh(&q->lock);
busy = (q->terminate || q->flushing);
spin_unlock_bh(&q->lock);
if (busy)
return -EBUSY;
if (len < sizeof(ipq_peer_msg_t))
return -EINVAL; return -EINVAL;
switch (type) { switch (type) {
case IPQM_MODE: case IPQM_MODE:
switch (m->msg.mode.value) { status = ipq_set_mode(pmsg->msg.mode.value,
case IPQ_COPY_META: pmsg->msg.mode.range);
q->peer.copy_mode = IPQ_COPY_META;
q->peer.copy_range = 0;
break;
case IPQ_COPY_PACKET:
q->peer.copy_mode = IPQ_COPY_PACKET;
q->peer.copy_range = m->msg.mode.range;
if (q->peer.copy_range > 0xFFFF)
q->peer.copy_range = 0xFFFF;
break;
default:
status = -EINVAL;
}
break; break;
case IPQM_VERDICT: case IPQM_VERDICT:
if (m->msg.verdict.value > NF_MAX_VERDICT) if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
status = -EINVAL; status = -EINVAL;
else else
status = ip6q_set_verdict(q, status = ipq_set_verdict(&pmsg->msg.verdict,
&m->msg.verdict, len - sizeof(*pmsg));
len - sizeof(*m));
break; break;
default: default:
status = -EINVAL; status = -EINVAL;
...@@ -390,273 +489,187 @@ static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m, ...@@ -390,273 +489,187 @@ static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m,
return status; return status;
} }
static inline int dev_cmp(ip6q_queue_element_t *e, unsigned long ifindex) static int
dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
{ {
if (e->info->indev) if (entry->info->indev)
if (e->info->indev->ifindex == ifindex) if (entry->info->indev->ifindex == ifindex)
return 1; return 1;
if (e->info->outdev)
if (e->info->outdev->ifindex == ifindex) if (entry->info->outdev)
if (entry->info->outdev->ifindex == ifindex)
return 1; return 1;
return 0; return 0;
} }
/* Drop any queued packets associated with device ifindex */ static void
static void ip6q_dev_drop(ip6q_queue_t *q, int ifindex) ipq_dev_drop(int ifindex)
{ {
ip6q_queue_element_t *e; struct ipq_queue_entry *entry;
while ((e = ip6q_dequeue(q, dev_cmp, ifindex))) {
e->verdict = NF_DROP;
nf_reinject(e->skb, e->info, e->verdict);
kfree(e);
}
}
/**************************************************************************** while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
* ipq_issue_verdict(entry, NF_DROP);
* Netfilter interface
*
****************************************************************************/
/*
* Packets arrive here from netfilter for queuing to userspace.
* All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
*/
static int netfilter6_receive(struct sk_buff *skb,
struct nf_info *info, void *data)
{
return ip6q_enqueue((ip6q_queue_t *)data, skb, info);
}
/****************************************************************************
*
* Netlink interface.
*
****************************************************************************/
static struct sock *nfnl = NULL;
/* This is not a static one, so we should not repeat its name */
ip6q_queue_t *nlq6 = NULL;
static struct sk_buff *netlink_build_message(ip6q_queue_element_t *e, int *errp)
{
unsigned char *old_tail;
size_t size = 0;
size_t data_len = 0;
struct sk_buff *skb;
ipq_packet_msg_t *pm;
struct nlmsghdr *nlh;
switch (nlq6->peer.copy_mode) {
size_t copy_range;
case IPQ_COPY_META:
size = NLMSG_SPACE(sizeof(*pm));
data_len = 0;
break;
case IPQ_COPY_PACKET:
copy_range = nlq6->peer.copy_range;
if (copy_range == 0 || copy_range > e->skb->len)
data_len = e->skb->len;
else
data_len = copy_range;
size = NLMSG_SPACE(sizeof(*pm) + data_len);
break;
case IPQ_COPY_NONE:
default:
*errp = -EINVAL;
return NULL;
}
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
goto nlmsg_failure;
old_tail = skb->tail;
nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
pm = NLMSG_DATA(nlh);
memset(pm, 0, sizeof(*pm));
pm->packet_id = (unsigned long )e;
pm->data_len = data_len;
pm->timestamp_sec = e->skb->stamp.tv_sec;
pm->timestamp_usec = e->skb->stamp.tv_usec;
pm->mark = e->skb->nfmark;
pm->hook = e->info->hook;
if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name);
else pm->indev_name[0] = '\0';
if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
else pm->outdev_name[0] = '\0';
pm->hw_protocol = e->skb->protocol;
if (e->info->indev && e->skb->dev) {
pm->hw_type = e->skb->dev->type;
if (e->skb->dev->hard_header_parse)
pm->hw_addrlen =
e->skb->dev->hard_header_parse(e->skb,
pm->hw_addr);
}
if (data_len)
memcpy(pm->payload, e->skb->data, data_len);
nlh->nlmsg_len = skb->tail - old_tail;
NETLINK_CB(skb).dst_groups = 0;
return skb;
nlmsg_failure:
if (skb)
kfree_skb(skb);
*errp = 0;
printk(KERN_ERR "ip6_queue: error creating netlink message\n");
return NULL;
}
static int netlink_send_peer(ip6q_queue_element_t *e)
{
int status = 0;
struct sk_buff *skb;
skb = netlink_build_message(e, &status);
if (skb == NULL)
return status;
return netlink_unicast(nfnl, skb, nlq6->peer.pid, MSG_DONTWAIT);
} }
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static __inline__ void netlink_receive_user_skb(struct sk_buff *skb) static inline void
ipq_rcv_skb(struct sk_buff *skb)
{ {
int status, type; int status, type, pid, flags, nlmsglen, skblen;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
if (skb->len < sizeof(struct nlmsghdr)) skblen = skb->len;
if (skblen < sizeof(*nlh))
return; return;
nlh = (struct nlmsghdr *)skb->data; nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(struct nlmsghdr) nlmsglen = nlh->nlmsg_len;
|| skb->len < nlh->nlmsg_len) if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
return; return;
if(nlh->nlmsg_pid <= 0 pid = nlh->nlmsg_pid;
|| !(nlh->nlmsg_flags & NLM_F_REQUEST) flags = nlh->nlmsg_flags;
|| nlh->nlmsg_flags & NLM_F_MULTI)
if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
RCV_SKB_FAIL(-EINVAL); RCV_SKB_FAIL(-EINVAL);
if (nlh->nlmsg_flags & MSG_TRUNC)
if (flags & MSG_TRUNC)
RCV_SKB_FAIL(-ECOMM); RCV_SKB_FAIL(-ECOMM);
type = nlh->nlmsg_type; type = nlh->nlmsg_type;
if (type < NLMSG_NOOP || type >= IPQM_MAX) if (type < NLMSG_NOOP || type >= IPQM_MAX)
RCV_SKB_FAIL(-EINVAL); RCV_SKB_FAIL(-EINVAL);
if (type <= IPQM_BASE) if (type <= IPQM_BASE)
return; return;
if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
RCV_SKB_FAIL(-EPERM); RCV_SKB_FAIL(-EPERM);
if (nlq6->peer.pid && !nlq6->peer.died
&& (nlq6->peer.pid != nlh->nlmsg_pid)) { write_lock_bh(&queue_lock);
printk(KERN_WARNING "ip6_queue: peer pid changed from %d to "
"%d, flushing queue\n", nlq6->peer.pid, nlh->nlmsg_pid); if (peer_pid) {
ip6q_flush(nlq6); if (peer_pid != pid) {
write_unlock_bh(&queue_lock);
RCV_SKB_FAIL(-EBUSY);
}
} }
nlq6->peer.pid = nlh->nlmsg_pid; else
nlq6->peer.died = 0; peer_pid = pid;
status = ip6q_receive_peer(nlq6, NLMSG_DATA(nlh),
type, skb->len - NLMSG_LENGTH(0)); write_unlock_bh(&queue_lock);
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
skblen - NLMSG_LENGTH(0));
if (status < 0) if (status < 0)
RCV_SKB_FAIL(status); RCV_SKB_FAIL(status);
if (nlh->nlmsg_flags & NLM_F_ACK)
if (flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0); netlink_ack(skb, nlh, 0);
return; return;
} }
/* Note: we are only dealing with single part messages at the moment. */ static void
static void netlink_receive_user_sk(struct sock *sk, int len) ipq_rcv_sk(struct sock *sk, int len)
{ {
do { do {
struct sk_buff *skb; struct sk_buff *skb;
if (rtnl_shlock_nowait()) if (down_trylock(&ipqnl_sem))
return; return;
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
netlink_receive_user_skb(skb); ipq_rcv_skb(skb);
kfree_skb(skb); kfree_skb(skb);
} }
up(&rtnl_sem);
} while (nfnl && nfnl->receive_queue.qlen);
}
/**************************************************************************** up(&ipqnl_sem);
*
* System events } while (ipqnl && ipqnl->receive_queue.qlen);
* }
****************************************************************************/
static int receive_event(struct notifier_block *this, static int
ipq_rcv_dev_event(struct notifier_block *this,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
struct net_device *dev = ptr; struct net_device *dev = ptr;
/* Drop any packets associated with the downed device */ /* Drop any packets associated with the downed device */
if (event == NETDEV_DOWN) if (event == NETDEV_DOWN)
ip6q_dev_drop(nlq6, dev->ifindex); ipq_dev_drop(dev->ifindex);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
struct notifier_block ip6q_dev_notifier = { static struct notifier_block ipq_dev_notifier = {
receive_event, ipq_rcv_dev_event,
NULL, NULL,
0 0
}; };
/**************************************************************************** static int
* ipq_rcv_nl_event(struct notifier_block *this,
* Sysctl - queue tuning. unsigned long event, void *ptr)
* {
****************************************************************************/ struct netlink_notify *n = ptr;
if (event == NETLINK_URELEASE &&
n->protocol == NETLINK_IP6_FW && n->pid) {
write_lock_bh(&queue_lock);
if (n->pid == peer_pid)
__ipq_reset();
write_unlock_bh(&queue_lock);
}
return NOTIFY_DONE;
}
static int sysctl_maxlen = IPQ_QMAX_DEFAULT; static struct notifier_block ipq_nl_notifier = {
ipq_rcv_nl_event,
NULL,
0
};
static struct ctl_table_header *ip6q_sysctl_header; static int sysctl_maxlen = IPQ_QMAX_DEFAULT;
static struct ctl_table_header *ipq_sysctl_header;
static ctl_table ip6q_table[] = { static ctl_table ipq_table[] = {
{ NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen, { NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen,
sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec }, sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec },
{ 0 } { 0 }
}; };
static ctl_table ip6q_dir_table[] = { static ctl_table ipq_dir_table[] = {
{NET_IPV6, "ipv6", NULL, 0, 0555, ip6q_table, 0, 0, 0, 0, 0}, {NET_IPV6, "ipv6", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0},
{ 0 } { 0 }
}; };
static ctl_table ip6q_root_table[] = { static ctl_table ipq_root_table[] = {
{CTL_NET, "net", NULL, 0, 0555, ip6q_dir_table, 0, 0, 0, 0, 0}, {CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0},
{ 0 } { 0 }
}; };
/**************************************************************************** static int
* ipq_get_info(char *buffer, char **start, off_t offset, int length)
* Procfs - debugging info.
*
****************************************************************************/
static int ip6q_get_info(char *buffer, char **start, off_t offset, int length)
{ {
int len; int len;
spin_lock_bh(&nlq6->lock); read_lock_bh(&queue_lock);
len = sprintf(buffer, len = sprintf(buffer,
"Peer pid : %d\n" "Peer PID : %d\n"
"Peer died : %d\n" "Copy mode : %hu\n"
"Peer copy mode : %d\n" "Copy range : %u\n"
"Peer copy range : %Zu\n" "Queue length : %u\n"
"Queue length : %d\n" "Queue max. length : %u\n",
"Queue max. length : %d\n" peer_pid,
"Queue flushing : %d\n" copy_mode,
"Queue terminate : %d\n", copy_range,
nlq6->peer.pid, queue_total,
nlq6->peer.died, queue_maxlen);
nlq6->peer.copy_mode,
nlq6->peer.copy_range, read_unlock_bh(&queue_lock);
nlq6->len,
*nlq6->maxlen,
nlq6->flushing,
nlq6->terminate);
spin_unlock_bh(&nlq6->lock);
*start = buffer + offset; *start = buffer + offset;
len -= offset; len -= offset;
if (len > length) if (len > length)
...@@ -666,52 +679,70 @@ static int ip6q_get_info(char *buffer, char **start, off_t offset, int length) ...@@ -666,52 +679,70 @@ static int ip6q_get_info(char *buffer, char **start, off_t offset, int length)
return len; return len;
} }
/**************************************************************************** static int
* init_or_cleanup(int init)
* Module stuff.
*
****************************************************************************/
static int __init init(void)
{ {
int status = 0; int status = -ENOMEM;
struct proc_dir_entry *proc; struct proc_dir_entry *proc;
/* We must create the NETLINK_IP6_FW protocol service */ if (!init)
nfnl = netlink_kernel_create(NETLINK_IP6_FW, netlink_receive_user_sk); goto cleanup;
if (nfnl == NULL) {
printk(KERN_ERR "ip6_queue: initialisation failed: unable to " netlink_register_notifier(&ipq_nl_notifier);
"create kernel netlink socket\n"); ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
return -ENOMEM; if (ipqnl == NULL) {
} printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
nlq6 = ip6q_create_queue(netfilter6_receive, goto cleanup_netlink_notifier;
netlink_send_peer, &status, &sysctl_maxlen);
if (nlq6 == NULL) {
printk(KERN_ERR "ip6_queue: initialisation failed: unable to "
"create queue\n");
sock_release(nfnl->socket);
return status;
} }
/* The file will be /proc/net/ip6_queue */
proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ip6q_get_info); proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
if (proc) proc->owner = THIS_MODULE; if (proc)
proc->owner = THIS_MODULE;
else { else {
ip6q_destroy_queue(nlq6); printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
sock_release(nfnl->socket); goto cleanup_ipqnl;
return -ENOMEM;
} }
register_netdevice_notifier(&ip6q_dev_notifier);
ip6q_sysctl_header = register_sysctl_table(ip6q_root_table, 0); register_netdevice_notifier(&ipq_dev_notifier);
ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
if (status < 0) {
printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
goto cleanup_sysctl;
}
return status;
cleanup:
nf_unregister_queue_handler(PF_INET6);
br_write_lock_bh(BR_NETPROTO_LOCK);
br_write_unlock_bh(BR_NETPROTO_LOCK);
ipq_flush(NF_DROP);
cleanup_sysctl:
unregister_sysctl_table(ipq_sysctl_header);
unregister_netdevice_notifier(&ipq_dev_notifier);
proc_net_remove(IPQ_PROC_FS_NAME);
cleanup_ipqnl:
sock_release(ipqnl->socket);
down(&ipqnl_sem);
up(&ipqnl_sem);
cleanup_netlink_notifier:
netlink_unregister_notifier(&ipq_nl_notifier);
return status; return status;
} }
static int __init init(void)
{
return init_or_cleanup(1);
}
static void __exit fini(void) static void __exit fini(void)
{ {
unregister_sysctl_table(ip6q_sysctl_header); init_or_cleanup(0);
proc_net_remove(IPQ_PROC_FS_NAME);
unregister_netdevice_notifier(&ip6q_dev_notifier);
ip6q_destroy_queue(nlq6);
sock_release(nfnl->socket);
} }
MODULE_DESCRIPTION("IPv6 packet queue handler"); MODULE_DESCRIPTION("IPv6 packet queue handler");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment