Commit c7286343 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[NET]: Network packet type using RCU.

* packet type converted from linked list to list_macro
* writer lock replaced with spin lock, readers use RCU
* add __dev_remove_pack for callers that cant sleep.
* af_packet changes to handle and sleeping requirements, and possible
  races that could cause.
parent a269756e
......@@ -456,7 +456,7 @@ struct packet_type
int (*func) (struct sk_buff *, struct net_device *,
struct packet_type *);
void *data; /* Private to the packet type */
struct packet_type *next;
struct list_head list;
};
......@@ -472,6 +472,7 @@ extern int netdev_boot_setup_check(struct net_device *dev);
extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr);
extern void dev_add_pack(struct packet_type *pt);
extern void dev_remove_pack(struct packet_type *pt);
extern void __dev_remove_pack(struct packet_type *pt);
extern int dev_get(const char *name);
extern struct net_device *dev_get_by_flags(unsigned short flags,
unsigned short mask);
......
......@@ -90,7 +90,6 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <linux/brlock.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
......@@ -170,8 +169,9 @@ const char *if_port_text[] = {
* 86DD IPv6
*/
static struct packet_type *ptype_base[16]; /* 16 way hashed list */
static struct packet_type *ptype_all; /* Taps */
static spinlock_t ptype_lock = SPIN_LOCK_UNLOCKED;
static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
#ifdef OFFLINE_SAMPLE
static void sample_queue(unsigned long dummy);
......@@ -239,14 +239,17 @@ int netdev_nit;
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
int hash;
br_write_lock_bh(BR_NETPROTO_LOCK);
spin_lock_bh(&ptype_lock);
#ifdef CONFIG_NET_FASTROUTE
/* Hack to detect packet socket */
if (pt->data && (long)(pt->data) != 1) {
......@@ -256,52 +259,76 @@ void dev_add_pack(struct packet_type *pt)
#endif
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit++;
pt->next = ptype_all;
ptype_all = pt;
list_add_rcu(&pt->list, &ptype_all);
} else {
hash = ntohs(pt->type) & 15;
pt->next = ptype_base[hash];
ptype_base[hash] = pt;
list_add_rcu(&pt->list, &ptype_base[hash]);
}
br_write_unlock_bh(BR_NETPROTO_LOCK);
spin_unlock_bh(&ptype_lock);
}
extern void linkwatch_run_queue(void);
/**
* dev_remove_pack - remove packet handler
* __dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
* returns.
*
* The packet type might still be in use by receivers
* and must not be freed until after all the CPU's have gone
* through a quiescent state.
*/
void dev_remove_pack(struct packet_type *pt)
void __dev_remove_pack(struct packet_type *pt)
{
struct packet_type **pt1;
struct list_head *head;
struct packet_type *pt1;
br_write_lock_bh(BR_NETPROTO_LOCK);
spin_lock_bh(&ptype_lock);
if (pt->type == htons(ETH_P_ALL)) {
netdev_nit--;
pt1 = &ptype_all;
head = &ptype_all;
} else
pt1 = &ptype_base[ntohs(pt->type) & 15];
head = &ptype_base[ntohs(pt->type) & 15];
for (; *pt1; pt1 = &((*pt1)->next)) {
if (pt == *pt1) {
*pt1 = pt->next;
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
#ifdef CONFIG_NET_FASTROUTE
if (pt->data)
netdev_fastroute_obstacles--;
#endif
list_del_rcu(&pt->list);
goto out;
}
}
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
br_write_unlock_bh(BR_NETPROTO_LOCK);
spin_unlock_bh(&ptype_lock);
}
/**
* dev_remove_pack - remove packet handler
* @pt: packet type declaration
*
* Remove a protocol handler that was previously added to the kernel
* protocol handlers by dev_add_pack(). The passed &packet_type is removed
* from the kernel lists and can be freed or reused once this function
* returns.
*
* This call sleeps to guarantee that no CPU is looking at the packet
* type after return.
*/
void dev_remove_pack(struct packet_type *pt)
{
__dev_remove_pack(pt);
synchronize_net();
}
/******************************************************************************
......@@ -943,8 +970,8 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype;
do_gettimeofday(&skb->stamp);
br_read_lock(BR_NETPROTO_LOCK);
for (ptype = ptype_all; ptype; ptype = ptype->next) {
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
......@@ -974,7 +1001,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
ptype->func(skb2, skb->dev, ptype);
}
}
br_read_unlock(BR_NETPROTO_LOCK);
rcu_read_unlock();
}
/* Calculate csum in the case, when packet is misrouted.
......@@ -1488,7 +1515,8 @@ int netif_receive_skb(struct sk_buff *skb)
skb->h.raw = skb->nh.raw = skb->data;
pt_prev = NULL;
for (ptype = ptype_all; ptype; ptype = ptype->next) {
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev) {
if (!pt_prev->data) {
......@@ -1511,17 +1539,15 @@ int netif_receive_skb(struct sk_buff *skb)
#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
if (skb->dev->br_port) {
int ret;
ret = handle_bridge(skb, pt_prev);
if (br_handle_frame_hook(skb) == 0)
return ret;
goto out;
pt_prev = NULL;
}
#endif
for (ptype = ptype_base[ntohs(type) & 15]; ptype; ptype = ptype->next) {
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev) {
......@@ -1552,6 +1578,8 @@ int netif_receive_skb(struct sk_buff *skb)
ret = NET_RX_DROP;
}
out:
rcu_read_unlock();
return ret;
}
......@@ -1625,7 +1653,8 @@ static void net_rx_action(struct softirq_action *h)
unsigned long start_time = jiffies;
int budget = netdev_max_backlog;
br_read_lock(BR_NETPROTO_LOCK);
preempt_disable();
local_irq_disable();
while (!list_empty(&queue->poll_list)) {
......@@ -1654,7 +1683,7 @@ static void net_rx_action(struct softirq_action *h)
}
out:
local_irq_enable();
br_read_unlock(BR_NETPROTO_LOCK);
preempt_enable();
return;
softnet_break:
......@@ -1997,9 +2026,9 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
dev_hold(master);
}
br_write_lock_bh(BR_NETPROTO_LOCK);
slave->master = master;
br_write_unlock_bh(BR_NETPROTO_LOCK);
synchronize_net();
if (old)
dev_put(old);
......@@ -2663,8 +2692,8 @@ int netdev_finish_unregister(struct net_device *dev)
/* Synchronize with packet receive processing. */
void synchronize_net(void)
{
br_write_lock_bh(BR_NETPROTO_LOCK);
br_write_unlock_bh(BR_NETPROTO_LOCK);
might_sleep();
synchronize_kernel();
}
/**
......@@ -2848,6 +2877,10 @@ static int __init net_dev_init(void)
subsystem_register(&net_subsys);
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
#ifdef CONFIG_NET_DIVERT
dv_init();
#endif /* CONFIG_NET_DIVERT */
......
......@@ -570,6 +570,7 @@ EXPORT_SYMBOL(netif_rx);
EXPORT_SYMBOL(netif_receive_skb);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_remove_pack);
EXPORT_SYMBOL(__dev_remove_pack);
EXPORT_SYMBOL(dev_get);
EXPORT_SYMBOL(dev_alloc);
EXPORT_SYMBOL(dev_alloc_name);
......
......@@ -774,6 +774,7 @@ static int packet_release(struct socket *sock)
*/
dev_remove_pack(&po->prot_hook);
po->running = 0;
po->num = 0;
__sock_put(sk);
}
......@@ -819,9 +820,12 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
spin_lock(&po->bind_lock);
if (po->running) {
dev_remove_pack(&po->prot_hook);
__sock_put(sk);
po->running = 0;
po->num = 0;
spin_unlock(&po->bind_lock);
dev_remove_pack(&po->prot_hook);
spin_lock(&po->bind_lock);
}
po->num = protocol;
......@@ -1374,7 +1378,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock);
if (po->running) {
dev_remove_pack(&po->prot_hook);
__dev_remove_pack(&po->prot_hook);
__sock_put(sk);
po->running = 0;
sk->err = ENETDOWN;
......@@ -1618,9 +1622,14 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
/* Detach socket from network */
spin_lock(&po->bind_lock);
if (po->running)
dev_remove_pack(&po->prot_hook);
if (po->running) {
__dev_remove_pack(&po->prot_hook);
po->num = 0;
po->running = 0;
}
spin_unlock(&po->bind_lock);
synchronize_net();
err = -EBUSY;
if (closing || atomic_read(&po->mapped) == 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment