Commit 1ae132b0 authored by Hans Schillstrom's avatar Hans Schillstrom Committed by Pablo Neira Ayuso

IPVS: Change of socket usage to enable name space exit.

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.
Signed-off-by: default avatarHans Schillstrom <hans@schillstrom.com>
[horms@verge.net.au: minor edit to changelog]
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
parent 103a9778
...@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net) ...@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)
static void __net_exit __ip_vs_cleanup(struct net *net) static void __net_exit __ip_vs_cleanup(struct net *net)
{ {
IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
} }
static struct pernet_operations ipvs_core_ops = { static struct pernet_operations ipvs_core_ops = {
......
...@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) ...@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
struct socket *sock; struct socket *sock;
int result; int result;
/* First create a socket */ /* First create a socket move it to right name space later */
result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) { if (result < 0) {
pr_err("Error during creation of socket; terminating\n"); pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result); return ERR_PTR(result);
} }
/*
* Kernel sockets that are a part of a namespace, should not
* hold a reference to a namespace in order to allow to stop it.
* After sk_change_net should be released using sk_release_kernel.
*/
sk_change_net(sock->sk, net);
result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) { if (result < 0) {
pr_err("Error setting outbound mcast interface\n"); pr_err("Error setting outbound mcast interface\n");
...@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) ...@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)
return sock; return sock;
error: error:
sock_release(sock); sk_release_kernel(sock->sk);
return ERR_PTR(result); return ERR_PTR(result);
} }
...@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) ...@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
int result; int result;
/* First create a socket */ /* First create a socket */
result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) { if (result < 0) {
pr_err("Error during creation of socket; terminating\n"); pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result); return ERR_PTR(result);
} }
/*
* Kernel sockets that are a part of a namespace, should not
* hold a reference to a namespace in order to allow to stop it.
* After sk_change_net should be released using sk_release_kernel.
*/
sk_change_net(sock->sk, net);
/* it is equivalent to the REUSEADDR option in user-space */ /* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = 1; sock->sk->sk_reuse = 1;
...@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) ...@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)
return sock; return sock;
error: error:
sock_release(sock); sk_release_kernel(sock->sk);
return ERR_PTR(result); return ERR_PTR(result);
} }
...@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) ...@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
ip_vs_sync_buff_release(sb); ip_vs_sync_buff_release(sb);
/* release the sending multicast socket */ /* release the sending multicast socket */
sock_release(tinfo->sock); sk_release_kernel(tinfo->sock->sk);
kfree(tinfo); kfree(tinfo);
return 0; return 0;
...@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) ...@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
} }
/* release the sending multicast socket */ /* release the sending multicast socket */
sock_release(tinfo->sock); sk_release_kernel(tinfo->sock->sk);
kfree(tinfo->buf); kfree(tinfo->buf);
kfree(tinfo); kfree(tinfo);
...@@ -1601,7 +1611,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) ...@@ -1601,7 +1611,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
outbuf: outbuf:
kfree(buf); kfree(buf);
outsocket: outsocket:
sock_release(sock); sk_release_kernel(sock->sk);
out: out:
return result; return result;
} }
...@@ -1610,6 +1620,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) ...@@ -1610,6 +1620,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
int stop_sync_thread(struct net *net, int state) int stop_sync_thread(struct net *net, int state)
{ {
struct netns_ipvs *ipvs = net_ipvs(net); struct netns_ipvs *ipvs = net_ipvs(net);
int retc = -EINVAL;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
...@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) ...@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
spin_lock_bh(&ipvs->sync_lock); spin_lock_bh(&ipvs->sync_lock);
ipvs->sync_state &= ~IP_VS_STATE_MASTER; ipvs->sync_state &= ~IP_VS_STATE_MASTER;
spin_unlock_bh(&ipvs->sync_lock); spin_unlock_bh(&ipvs->sync_lock);
kthread_stop(ipvs->master_thread); retc = kthread_stop(ipvs->master_thread);
ipvs->master_thread = NULL; ipvs->master_thread = NULL;
} else if (state == IP_VS_STATE_BACKUP) { } else if (state == IP_VS_STATE_BACKUP) {
if (!ipvs->backup_thread) if (!ipvs->backup_thread)
...@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state) ...@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
task_pid_nr(ipvs->backup_thread)); task_pid_nr(ipvs->backup_thread));
ipvs->sync_state &= ~IP_VS_STATE_BACKUP; ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
kthread_stop(ipvs->backup_thread); retc = kthread_stop(ipvs->backup_thread);
ipvs->backup_thread = NULL; ipvs->backup_thread = NULL;
} else {
return -EINVAL;
} }
/* decrease the module use count */ /* decrease the module use count */
ip_vs_use_count_dec(); ip_vs_use_count_dec();
return 0; return retc;
} }
/* /*
...@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net) ...@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)
static void __ip_vs_sync_cleanup(struct net *net) static void __ip_vs_sync_cleanup(struct net *net)
{ {
stop_sync_thread(net, IP_VS_STATE_MASTER); int retc;
stop_sync_thread(net, IP_VS_STATE_BACKUP);
retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
} }
static struct pernet_operations ipvs_sync_ops = { static struct pernet_operations ipvs_sync_ops = {
...@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = { ...@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {
int __init ip_vs_sync_init(void) int __init ip_vs_sync_init(void)
{ {
return register_pernet_subsys(&ipvs_sync_ops); return register_pernet_device(&ipvs_sync_ops);
} }
void ip_vs_sync_cleanup(void) void ip_vs_sync_cleanup(void)
{ {
unregister_pernet_subsys(&ipvs_sync_ops); unregister_pernet_device(&ipvs_sync_ops);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment