Commit 7aeb5c3f authored by David S. Miller's avatar David S. Miller

[IPV{4,6}]: Make icmp_socket per-cpu and simplify locking.

parent fab4da15
...@@ -223,57 +223,28 @@ struct icmp_control { ...@@ -223,57 +223,28 @@ struct icmp_control {
static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
/* /*
* The ICMP socket. This is the most convenient way to flow control * The ICMP socket(s). This is the most convenient way to flow control
* our ICMP output as well as maintain a clean interface throughout * our ICMP output as well as maintain a clean interface throughout
* all layers. All Socketless IP sends will soon be gone. * all layers. All Socketless IP sends will soon be gone.
*
* On SMP we have one ICMP socket per-cpu.
*/ */
struct socket *icmp_socket; static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
#define icmp_socket per_cpu(__icmp_socket, smp_processor_id())
/* ICMPv4 socket is only a bit non-reenterable (unlike ICMPv6,
which is strongly non-reenterable). A bit later it will be made
reenterable and the lock may be removed then.
*/
static int icmp_xmit_holder = -1;
static int icmp_xmit_lock_bh(void)
{
int rc;
if (!spin_trylock(&icmp_socket->sk->lock.slock)) {
rc = -EAGAIN;
if (icmp_xmit_holder == smp_processor_id())
goto out;
spin_lock(&icmp_socket->sk->lock.slock);
}
rc = 0;
icmp_xmit_holder = smp_processor_id();
out:
return rc;
}
static __inline__ int icmp_xmit_lock(void) static __inline__ void icmp_xmit_lock(void)
{ {
int ret;
local_bh_disable(); local_bh_disable();
ret = icmp_xmit_lock_bh();
if (ret)
local_bh_enable();
return ret;
}
static void icmp_xmit_unlock_bh(void) if (unlikely(!spin_trylock(&icmp_socket->sk->lock.slock)))
{ BUG();
icmp_xmit_holder = -1;
spin_unlock(&icmp_socket->sk->lock.slock);
} }
static __inline__ void icmp_xmit_unlock(void) static void icmp_xmit_unlock(void)
{ {
icmp_xmit_unlock_bh(); spin_unlock_bh(&icmp_socket->sk->lock.slock);
local_bh_enable();
} }
/* /*
* Send an ICMP frame. * Send an ICMP frame.
*/ */
...@@ -404,10 +375,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -404,10 +375,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct rtable *rt = (struct rtable *)skb->dst; struct rtable *rt = (struct rtable *)skb->dst;
u32 daddr; u32 daddr;
if (ip_options_echo(&icmp_param->replyopts, skb) || if (ip_options_echo(&icmp_param->replyopts, skb))
icmp_xmit_lock_bh())
goto out; goto out;
icmp_xmit_lock();
icmp_param->data.icmph.checksum = 0; icmp_param->data.icmph.checksum = 0;
icmp_out_count(icmp_param->data.icmph.type); icmp_out_count(icmp_param->data.icmph.type);
...@@ -434,7 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -434,7 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_push_reply(icmp_param, &ipc, rt); icmp_push_reply(icmp_param, &ipc, rt);
ip_rt_put(rt); ip_rt_put(rt);
out_unlock: out_unlock:
icmp_xmit_unlock_bh(); icmp_xmit_unlock();
out:; out:;
} }
...@@ -519,8 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) ...@@ -519,8 +491,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
} }
} }
if (icmp_xmit_lock()) icmp_xmit_lock();
goto out;
/* /*
* Construct source address and options. * Construct source address and options.
...@@ -1141,13 +1112,23 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = { ...@@ -1141,13 +1112,23 @@ static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
void __init icmp_init(struct net_proto_family *ops) void __init icmp_init(struct net_proto_family *ops)
{ {
struct inet_opt *inet; struct inet_opt *inet;
int err = sock_create(PF_INET, SOCK_RAW, IPPROTO_ICMP, &icmp_socket); int i;
for (i = 0; i < NR_CPUS; i++) {
int err;
if (!cpu_possible(i))
continue;
err = sock_create(PF_INET, SOCK_RAW, IPPROTO_ICMP,
&per_cpu(__icmp_socket, i));
if (err < 0) if (err < 0)
panic("Failed to create the ICMP control socket.\n"); panic("Failed to create the ICMP control socket.\n");
icmp_socket->sk->allocation = GFP_ATOMIC;
icmp_socket->sk->sndbuf = SK_WMEM_MAX * 2; per_cpu(__icmp_socket, i)->sk->allocation = GFP_ATOMIC;
inet = inet_sk(icmp_socket->sk); per_cpu(__icmp_socket, i)->sk->sndbuf = SK_WMEM_MAX * 2;
inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
inet->ttl = MAXTTL; inet->ttl = MAXTTL;
inet->pmtudisc = IP_PMTUDISC_DONT; inet->pmtudisc = IP_PMTUDISC_DONT;
...@@ -1155,5 +1136,6 @@ void __init icmp_init(struct net_proto_family *ops) ...@@ -1155,5 +1136,6 @@ void __init icmp_init(struct net_proto_family *ops)
* see it, we do not wish this socket to see incoming * see it, we do not wish this socket to see incoming
* packets. * packets.
*/ */
icmp_socket->sk->prot->unhash(icmp_socket->sk); per_cpu(__icmp_socket, i)->sk->prot->unhash(per_cpu(__icmp_socket, i)->sk);
}
} }
...@@ -67,10 +67,12 @@ ...@@ -67,10 +67,12 @@
DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
/* /*
* ICMP socket for flow control. * ICMP socket(s) for flow control.
*/ */
struct socket *icmpv6_socket; /* XXX We can't use per_cpu because this can be modular... */
static struct socket *__icmpv6_socket[NR_CPUS];
#define icmpv6_socket __icmpv6_socket[smp_processor_id()]
static int icmpv6_rcv(struct sk_buff *skb); static int icmpv6_rcv(struct sk_buff *skb);
...@@ -87,40 +89,16 @@ struct icmpv6_msg { ...@@ -87,40 +89,16 @@ struct icmpv6_msg {
__u32 csum; __u32 csum;
}; };
static __inline__ void icmpv6_xmit_lock(void)
static int icmpv6_xmit_holder = -1;
static int icmpv6_xmit_lock_bh(void)
{
if (!spin_trylock(&icmpv6_socket->sk->lock.slock)) {
if (icmpv6_xmit_holder == smp_processor_id())
return -EAGAIN;
spin_lock(&icmpv6_socket->sk->lock.slock);
}
icmpv6_xmit_holder = smp_processor_id();
return 0;
}
static __inline__ int icmpv6_xmit_lock(void)
{ {
int ret;
local_bh_disable(); local_bh_disable();
ret = icmpv6_xmit_lock_bh(); if (unlikely(!spin_trylock(&icmpv6_socket->sk->lock.slock)))
if (ret) BUG();
local_bh_enable();
return ret;
}
static void icmpv6_xmit_unlock_bh(void)
{
icmpv6_xmit_holder = -1;
spin_unlock(&icmpv6_socket->sk->lock.slock);
} }
static __inline__ void icmpv6_xmit_unlock(void) static __inline__ void icmpv6_xmit_unlock(void)
{ {
icmpv6_xmit_unlock_bh(); spin_unlock_bh(&icmpv6_socket->sk->lock.slock);
local_bh_enable();
} }
...@@ -341,8 +319,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, ...@@ -341,8 +319,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
fl.uli_u.icmpt.type = type; fl.uli_u.icmpt.type = type;
fl.uli_u.icmpt.code = code; fl.uli_u.icmpt.code = code;
if (icmpv6_xmit_lock()) icmpv6_xmit_lock();
return;
if (!icmpv6_xrlim_allow(sk, type, &fl)) if (!icmpv6_xrlim_allow(sk, type, &fl))
goto out; goto out;
...@@ -415,15 +392,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -415,15 +392,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY; fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY;
fl.uli_u.icmpt.code = 0; fl.uli_u.icmpt.code = 0;
if (icmpv6_xmit_lock_bh()) icmpv6_xmit_lock();
return;
ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1, ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
MSG_DONTWAIT); MSG_DONTWAIT);
ICMP6_INC_STATS_BH(Icmp6OutEchoReplies); ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
ICMP6_INC_STATS_BH(Icmp6OutMsgs); ICMP6_INC_STATS_BH(Icmp6OutMsgs);
icmpv6_xmit_unlock_bh(); icmpv6_xmit_unlock();
} }
static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
...@@ -626,26 +602,47 @@ static int icmpv6_rcv(struct sk_buff *skb) ...@@ -626,26 +602,47 @@ static int icmpv6_rcv(struct sk_buff *skb)
int __init icmpv6_init(struct net_proto_family *ops) int __init icmpv6_init(struct net_proto_family *ops)
{ {
struct sock *sk; struct sock *sk;
int i;
for (i = 0; i < NR_CPUS; i++) {
int err; int err;
err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &icmpv6_socket); if (!cpu_possible(i))
continue;
err = sock_create(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
&__icmpv6_socket[i]);
if (err < 0) { if (err < 0) {
int j;
printk(KERN_ERR printk(KERN_ERR
"Failed to initialize the ICMP6 control socket (err %d).\n", "Failed to initialize the ICMP6 control socket "
"(err %d).\n",
err); err);
icmpv6_socket = NULL; /* for safety */ for (j = 0; j < i; j++) {
if (!cpu_possible(j))
continue;
sock_release(__icmpv6_socket[j]);
__icmpv6_socket[j] = NULL; /* for safety */
}
return err; return err;
} }
sk = icmpv6_socket->sk; sk = __icmpv6_socket[i]->sk;
sk->allocation = GFP_ATOMIC; sk->allocation = GFP_ATOMIC;
sk->sndbuf = SK_WMEM_MAX*2; sk->sndbuf = SK_WMEM_MAX*2;
sk->prot->unhash(sk); sk->prot->unhash(sk);
}
if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) { if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
printk(KERN_ERR "Failed to register ICMP6 protocol\n"); printk(KERN_ERR "Failed to register ICMP6 protocol\n");
sock_release(icmpv6_socket); for (i = 0; i < NR_CPUS; i++) {
icmpv6_socket = NULL; if (!cpu_possible(i))
continue;
sock_release(__icmpv6_socket[i]);
__icmpv6_socket[i] = NULL;
}
return -EAGAIN; return -EAGAIN;
} }
...@@ -654,8 +651,14 @@ int __init icmpv6_init(struct net_proto_family *ops) ...@@ -654,8 +651,14 @@ int __init icmpv6_init(struct net_proto_family *ops)
void icmpv6_cleanup(void) void icmpv6_cleanup(void)
{ {
sock_release(icmpv6_socket); int i;
icmpv6_socket = NULL; /* For safety. */
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
sock_release(__icmpv6_socket[i]);
__icmpv6_socket[i] = NULL; /* For safety. */
}
inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment