Commit 0fa7fa98 authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by David S. Miller

packet: Protect packet sk list with mutex (v2)

Change since v1:

* Fixed inuse counters access spotted by Eric

In patch eea68e2f (packet: Report socket mclist info via diag module) I've
introduced a "scheduling in atomic" problem in packet diag module -- the
socket list is traversed under rcu_read_lock() while performed under it sk
mclist access requires rtnl lock (i.e. -- mutex) to be taken.

[152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002
[152363.820573] 4 locks held by crtools/12517:
[152363.820581]  #0:  (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e
[152363.820613]  #1:  (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a
[152363.820644]  #2:  (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab
[152363.820693]  #3:  (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af

Similar thing was then re-introduced by further packet diag patches (fanount
mutex and pgvec mutex for rings) :(

Apart from being terribly sorry for the above, I propose to change the packet
sk list protection from spinlock to mutex. This lock currently protects two
modifications:

* sklist
* prot inuse counters

The sklist modifications can be just reprotected with mutex since they already
occur in a sleeping context. The inuse counters modifications are trickier -- the
__this_cpu_-s are used inside, thus requiring the caller to handle the potential
issues with contexts himself. Since packet sockets' counters are modified in two
places only (packet_create and packet_release) we only need to protect the context
from being preempted. BH disabling is not required in this case.
Signed-off-by: default avatarPavel Emelyanov <xemul@parallels.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b32607dd
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
struct netns_packet { struct netns_packet {
spinlock_t sklist_lock; struct mutex sklist_lock;
struct hlist_head sklist; struct hlist_head sklist;
}; };
......
...@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock) ...@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock)
net = sock_net(sk); net = sock_net(sk);
po = pkt_sk(sk); po = pkt_sk(sk);
spin_lock_bh(&net->packet.sklist_lock); mutex_lock(&net->packet.sklist_lock);
sk_del_node_init_rcu(sk); sk_del_node_init_rcu(sk);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, sk->sk_prot, -1);
spin_unlock_bh(&net->packet.sklist_lock); preempt_enable();
spin_lock(&po->bind_lock); spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false); unregister_prot_hook(sk, false);
...@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, ...@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
register_prot_hook(sk); register_prot_hook(sk);
} }
spin_lock_bh(&net->packet.sklist_lock); mutex_lock(&net->packet.sklist_lock);
sk_add_node_rcu(sk, &net->packet.sklist); sk_add_node_rcu(sk, &net->packet.sklist);
mutex_unlock(&net->packet.sklist_lock);
preempt_disable();
sock_prot_inuse_add(net, &packet_proto, 1); sock_prot_inuse_add(net, &packet_proto, 1);
spin_unlock_bh(&net->packet.sklist_lock); preempt_enable();
return 0; return 0;
out: out:
...@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { ...@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
static int __net_init packet_net_init(struct net *net) static int __net_init packet_net_init(struct net *net)
{ {
spin_lock_init(&net->packet.sklist_lock); mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist); INIT_HLIST_HEAD(&net->packet.sklist);
if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
......
...@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
net = sock_net(skb->sk); net = sock_net(skb->sk);
req = nlmsg_data(cb->nlh); req = nlmsg_data(cb->nlh);
rcu_read_lock(); mutex_lock(&net->packet.sklist_lock);
sk_for_each_rcu(sk, node, &net->packet.sklist) { sk_for_each(sk, node, &net->packet.sklist) {
if (!net_eq(sock_net(sk), net)) if (!net_eq(sock_net(sk), net))
continue; continue;
if (num < s_num) if (num < s_num)
...@@ -192,7 +192,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -192,7 +192,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
num++; num++;
} }
done: done:
rcu_read_unlock(); mutex_unlock(&net->packet.sklist_lock);
cb->args[0] = num; cb->args[0] = num;
return skb->len; return skb->len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment