Commit 36b71a8b authored by David Teigland's avatar David Teigland

dlm: fix deadlock between dlm_send and dlm_controld

A deadlock sometimes occurs between dlm_controld closing
a lowcomms connection through configfs and dlm_send looking
up the address for a new connection in configfs.

dlm_controld does a configfs rmdir which calls
dlm_lowcomms_close which waits for dlm_send to
cancel work on the workqueues.

The dlm_send workqueue thread has called
tcp_connect_to_sock which calls dlm_nodeid_to_addr
which does a configfs lookup and blocks on a lock
held by dlm_controld in the rmdir path.

The solution here is to save the node addresses within
the lowcomms code so that the lowcomms workqueue does
not need to step through configfs to get a node address.

dlm_controld:
wait_for_completion+0x1d/0x20
__cancel_work_timer+0x1b3/0x1e0
cancel_work_sync+0x10/0x20
dlm_lowcomms_close+0x4c/0xb0 [dlm]
drop_comm+0x22/0x60 [dlm]
client_drop_item+0x26/0x50 [configfs]
configfs_rmdir+0x180/0x230 [configfs]
vfs_rmdir+0xbd/0xf0
do_rmdir+0x103/0x120
sys_rmdir+0x16/0x20

dlm_send:
mutex_lock+0x2b/0x50
get_comm+0x34/0x140 [dlm]
dlm_nodeid_to_addr+0x18/0xd0 [dlm]
tcp_connect_to_sock+0xf4/0x2d0 [dlm]
process_send_sockets+0x1d2/0x260 [dlm]
worker_thread+0x170/0x2a0
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent 42a579a0
...@@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, ...@@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
{ {
struct sockaddr_storage *addr; struct sockaddr_storage *addr;
int rv;
if (len != sizeof(struct sockaddr_storage)) if (len != sizeof(struct sockaddr_storage))
return -EINVAL; return -EINVAL;
...@@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) ...@@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
return -ENOMEM; return -ENOMEM;
memcpy(addr, buf, len); memcpy(addr, buf, len);
rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
if (rv) {
kfree(addr);
return rv;
}
cm->addr[cm->addr_count++] = addr; cm->addr[cm->addr_count++] = addr;
return len; return len;
} }
...@@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp) ...@@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp)
config_item_put(&sp->group.cg_item); config_item_put(&sp->group.cg_item);
} }
static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) static struct dlm_comm *get_comm(int nodeid)
{
switch (x->ss_family) {
case AF_INET: {
struct sockaddr_in *sinx = (struct sockaddr_in *)x;
struct sockaddr_in *siny = (struct sockaddr_in *)y;
if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
return 0;
if (sinx->sin_port != siny->sin_port)
return 0;
break;
}
case AF_INET6: {
struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
return 0;
if (sinx->sin6_port != siny->sin6_port)
return 0;
break;
}
default:
return 0;
}
return 1;
}
static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
{ {
struct config_item *i; struct config_item *i;
struct dlm_comm *cm = NULL; struct dlm_comm *cm = NULL;
...@@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) ...@@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
list_for_each_entry(i, &comm_list->cg_children, ci_entry) { list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
cm = config_item_to_comm(i); cm = config_item_to_comm(i);
if (nodeid) { if (cm->nodeid != nodeid)
if (cm->nodeid != nodeid) continue;
continue; found = 1;
found = 1; config_item_get(i);
config_item_get(i); break;
break;
} else {
if (!cm->addr_count || !addr_compare(cm->addr[0], addr))
continue;
found = 1;
config_item_get(i);
break;
}
} }
mutex_unlock(&clusters_root.subsys.su_mutex); mutex_unlock(&clusters_root.subsys.su_mutex);
...@@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, ...@@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int dlm_comm_seq(int nodeid, uint32_t *seq) int dlm_comm_seq(int nodeid, uint32_t *seq)
{ {
struct dlm_comm *cm = get_comm(nodeid, NULL); struct dlm_comm *cm = get_comm(nodeid);
if (!cm) if (!cm)
return -EEXIST; return -EEXIST;
*seq = cm->seq; *seq = cm->seq;
...@@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq) ...@@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
return 0; return 0;
} }
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
{
struct dlm_comm *cm = get_comm(nodeid, NULL);
if (!cm)
return -EEXIST;
if (!cm->addr_count)
return -ENOENT;
memcpy(addr, cm->addr[0], sizeof(*addr));
put_comm(cm);
return 0;
}
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
{
struct dlm_comm *cm = get_comm(0, addr);
if (!cm)
return -EEXIST;
*nodeid = cm->nodeid;
put_comm(cm);
return 0;
}
int dlm_our_nodeid(void) int dlm_our_nodeid(void)
{ {
return local_comm ? local_comm->nodeid : 0; return local_comm ? local_comm->nodeid : 0;
......
...@@ -46,8 +46,6 @@ void dlm_config_exit(void); ...@@ -46,8 +46,6 @@ void dlm_config_exit(void);
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out); int *count_out);
int dlm_comm_seq(int nodeid, uint32_t *seq); int dlm_comm_seq(int nodeid, uint32_t *seq);
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
int dlm_our_nodeid(void); int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num); int dlm_our_addr(struct sockaddr_storage *addr, int num);
......
...@@ -140,6 +140,16 @@ struct writequeue_entry { ...@@ -140,6 +140,16 @@ struct writequeue_entry {
struct connection *con; struct connection *con;
}; };
struct dlm_node_addr {
struct list_head list;
int nodeid;
int addr_count;
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
};
static LIST_HEAD(dlm_node_addrs);
static DEFINE_SPINLOCK(dlm_node_addrs_spin);
static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
static int dlm_local_count; static int dlm_local_count;
static int dlm_allow_conn; static int dlm_allow_conn;
...@@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id) ...@@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id)
return NULL; return NULL;
} }
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) static struct dlm_node_addr *find_node_addr(int nodeid)
{
struct dlm_node_addr *na;
list_for_each_entry(na, &dlm_node_addrs, list) {
if (na->nodeid == nodeid)
return na;
}
return NULL;
}
static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
{ {
struct sockaddr_storage addr; switch (x->ss_family) {
int error; case AF_INET: {
struct sockaddr_in *sinx = (struct sockaddr_in *)x;
struct sockaddr_in *siny = (struct sockaddr_in *)y;
if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
return 0;
if (sinx->sin_port != siny->sin_port)
return 0;
break;
}
case AF_INET6: {
struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
return 0;
if (sinx->sin6_port != siny->sin6_port)
return 0;
break;
}
default:
return 0;
}
return 1;
}
static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
struct sockaddr *sa_out)
{
struct sockaddr_storage sas;
struct dlm_node_addr *na;
if (!dlm_local_count) if (!dlm_local_count)
return -1; return -1;
error = dlm_nodeid_to_addr(nodeid, &addr); spin_lock(&dlm_node_addrs_spin);
if (error) na = find_node_addr(nodeid);
return error; if (na && na->addr_count)
memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage));
spin_unlock(&dlm_node_addrs_spin);
if (!na)
return -EEXIST;
if (!na->addr_count)
return -ENOENT;
if (sas_out)
memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
if (!sa_out)
return 0;
if (dlm_local_addr[0]->ss_family == AF_INET) { if (dlm_local_addr[0]->ss_family == AF_INET) {
struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
ret4->sin_addr.s_addr = in4->sin_addr.s_addr; ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
} else { } else {
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
ret6->sin6_addr = in6->sin6_addr; ret6->sin6_addr = in6->sin6_addr;
} }
return 0; return 0;
} }
static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
{
struct dlm_node_addr *na;
int rv = -EEXIST;
spin_lock(&dlm_node_addrs_spin);
list_for_each_entry(na, &dlm_node_addrs, list) {
if (!na->addr_count)
continue;
if (!addr_compare(na->addr[0], addr))
continue;
*nodeid = na->nodeid;
rv = 0;
break;
}
spin_unlock(&dlm_node_addrs_spin);
return rv;
}
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
{
struct sockaddr_storage *new_addr;
struct dlm_node_addr *new_node, *na;
new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
if (!new_node)
return -ENOMEM;
new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
if (!new_addr) {
kfree(new_node);
return -ENOMEM;
}
memcpy(new_addr, addr, len);
spin_lock(&dlm_node_addrs_spin);
na = find_node_addr(nodeid);
if (!na) {
new_node->nodeid = nodeid;
new_node->addr[0] = new_addr;
new_node->addr_count = 1;
list_add(&new_node->list, &dlm_node_addrs);
spin_unlock(&dlm_node_addrs_spin);
return 0;
}
if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
spin_unlock(&dlm_node_addrs_spin);
kfree(new_addr);
kfree(new_node);
return -ENOSPC;
}
na->addr[na->addr_count++] = new_addr;
spin_unlock(&dlm_node_addrs_spin);
kfree(new_node);
return 0;
}
/* Data available on socket or listen socket received a connect */ /* Data available on socket or listen socket received a connect */
static void lowcomms_data_ready(struct sock *sk, int count_unused) static void lowcomms_data_ready(struct sock *sk, int count_unused)
{ {
...@@ -510,7 +635,7 @@ static void process_sctp_notification(struct connection *con, ...@@ -510,7 +635,7 @@ static void process_sctp_notification(struct connection *con,
return; return;
} }
make_sockaddr(&prim.ssp_addr, 0, &addr_len); make_sockaddr(&prim.ssp_addr, 0, &addr_len);
if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
unsigned char *b=(unsigned char *)&prim.ssp_addr; unsigned char *b=(unsigned char *)&prim.ssp_addr;
log_print("reject connect from unknown addr"); log_print("reject connect from unknown addr");
print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
...@@ -747,7 +872,7 @@ static int tcp_accept_from_sock(struct connection *con) ...@@ -747,7 +872,7 @@ static int tcp_accept_from_sock(struct connection *con)
/* Get the new node's NODEID */ /* Get the new node's NODEID */
make_sockaddr(&peeraddr, 0, &len); make_sockaddr(&peeraddr, 0, &len);
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { if (addr_to_nodeid(&peeraddr, &nodeid)) {
unsigned char *b=(unsigned char *)&peeraddr; unsigned char *b=(unsigned char *)&peeraddr;
log_print("connect from non cluster node"); log_print("connect from non cluster node");
print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
...@@ -862,7 +987,7 @@ static void sctp_init_assoc(struct connection *con) ...@@ -862,7 +987,7 @@ static void sctp_init_assoc(struct connection *con)
if (con->retries++ > MAX_CONNECT_RETRIES) if (con->retries++ > MAX_CONNECT_RETRIES)
return; return;
if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) {
log_print("no address for nodeid %d", con->nodeid); log_print("no address for nodeid %d", con->nodeid);
return; return;
} }
...@@ -928,11 +1053,11 @@ static void sctp_init_assoc(struct connection *con) ...@@ -928,11 +1053,11 @@ static void sctp_init_assoc(struct connection *con)
/* Connect a new socket to its peer */ /* Connect a new socket to its peer */
static void tcp_connect_to_sock(struct connection *con) static void tcp_connect_to_sock(struct connection *con)
{ {
int result = -EHOSTUNREACH;
struct sockaddr_storage saddr, src_addr; struct sockaddr_storage saddr, src_addr;
int addr_len; int addr_len;
struct socket *sock = NULL; struct socket *sock = NULL;
int one = 1; int one = 1;
int result;
if (con->nodeid == 0) { if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled"); log_print("attempt to connect sock 0 foiled");
...@@ -944,10 +1069,8 @@ static void tcp_connect_to_sock(struct connection *con) ...@@ -944,10 +1069,8 @@ static void tcp_connect_to_sock(struct connection *con)
goto out; goto out;
/* Some odd races can cause double-connects, ignore them */ /* Some odd races can cause double-connects, ignore them */
if (con->sock) { if (con->sock)
result = 0;
goto out; goto out;
}
/* Create a socket to communicate with */ /* Create a socket to communicate with */
result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
...@@ -956,8 +1079,11 @@ static void tcp_connect_to_sock(struct connection *con) ...@@ -956,8 +1079,11 @@ static void tcp_connect_to_sock(struct connection *con)
goto out_err; goto out_err;
memset(&saddr, 0, sizeof(saddr)); memset(&saddr, 0, sizeof(saddr));
if (dlm_nodeid_to_addr(con->nodeid, &saddr)) result = nodeid_to_addr(con->nodeid, &saddr, NULL);
if (result < 0) {
log_print("no address for nodeid %d", con->nodeid);
goto out_err; goto out_err;
}
sock->sk->sk_user_data = con; sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock; con->rx_action = receive_from_sock;
...@@ -983,8 +1109,7 @@ static void tcp_connect_to_sock(struct connection *con) ...@@ -983,8 +1109,7 @@ static void tcp_connect_to_sock(struct connection *con)
kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
sizeof(one)); sizeof(one));
result = result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
O_NONBLOCK); O_NONBLOCK);
if (result == -EINPROGRESS) if (result == -EINPROGRESS)
result = 0; result = 0;
...@@ -1002,11 +1127,17 @@ static void tcp_connect_to_sock(struct connection *con) ...@@ -1002,11 +1127,17 @@ static void tcp_connect_to_sock(struct connection *con)
* Some errors are fatal and this list might need adjusting. For other * Some errors are fatal and this list might need adjusting. For other
* errors we try again until the max number of retries is reached. * errors we try again until the max number of retries is reached.
*/ */
if (result != -EHOSTUNREACH && result != -ENETUNREACH && if (result != -EHOSTUNREACH &&
result != -ENETDOWN && result != -EINVAL result != -ENETUNREACH &&
&& result != -EPROTONOSUPPORT) { result != -ENETDOWN &&
result != -EINVAL &&
result != -EPROTONOSUPPORT) {
log_print("connect %d try %d error %d", con->nodeid,
con->retries, result);
mutex_unlock(&con->sock_mutex);
msleep(1000);
lowcomms_connect_sock(con); lowcomms_connect_sock(con);
result = 0; return;
} }
out: out:
mutex_unlock(&con->sock_mutex); mutex_unlock(&con->sock_mutex);
...@@ -1414,6 +1545,7 @@ static void clean_one_writequeue(struct connection *con) ...@@ -1414,6 +1545,7 @@ static void clean_one_writequeue(struct connection *con)
int dlm_lowcomms_close(int nodeid) int dlm_lowcomms_close(int nodeid)
{ {
struct connection *con; struct connection *con;
struct dlm_node_addr *na;
log_print("closing connection to node %d", nodeid); log_print("closing connection to node %d", nodeid);
con = nodeid2con(nodeid, 0); con = nodeid2con(nodeid, 0);
...@@ -1428,6 +1560,17 @@ int dlm_lowcomms_close(int nodeid) ...@@ -1428,6 +1560,17 @@ int dlm_lowcomms_close(int nodeid)
clean_one_writequeue(con); clean_one_writequeue(con);
close_connection(con, true); close_connection(con, true);
} }
spin_lock(&dlm_node_addrs_spin);
na = find_node_addr(nodeid);
if (na) {
list_del(&na->list);
while (na->addr_count--)
kfree(na->addr[na->addr_count]);
kfree(na);
}
spin_unlock(&dlm_node_addrs_spin);
return 0; return 0;
} }
...@@ -1577,3 +1720,17 @@ int dlm_lowcomms_start(void) ...@@ -1577,3 +1720,17 @@ int dlm_lowcomms_start(void)
fail: fail:
return error; return error;
} }
void dlm_lowcomms_exit(void)
{
struct dlm_node_addr *na, *safe;
spin_lock(&dlm_node_addrs_spin);
list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
list_del(&na->list);
while (na->addr_count--)
kfree(na->addr[na->addr_count]);
kfree(na);
}
spin_unlock(&dlm_node_addrs_spin);
}
...@@ -16,10 +16,12 @@ ...@@ -16,10 +16,12 @@
int dlm_lowcomms_start(void); int dlm_lowcomms_start(void);
void dlm_lowcomms_stop(void); void dlm_lowcomms_stop(void);
void dlm_lowcomms_exit(void);
int dlm_lowcomms_close(int nodeid); int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh); void dlm_lowcomms_commit_buffer(void *mh);
int dlm_lowcomms_connect_node(int nodeid); int dlm_lowcomms_connect_node(int nodeid);
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
#endif /* __LOWCOMMS_DOT_H__ */ #endif /* __LOWCOMMS_DOT_H__ */
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "user.h" #include "user.h"
#include "memory.h" #include "memory.h"
#include "config.h" #include "config.h"
#include "lowcomms.h"
static int __init init_dlm(void) static int __init init_dlm(void)
{ {
...@@ -78,6 +79,7 @@ static void __exit exit_dlm(void) ...@@ -78,6 +79,7 @@ static void __exit exit_dlm(void)
dlm_config_exit(); dlm_config_exit();
dlm_memory_exit(); dlm_memory_exit();
dlm_lockspace_exit(); dlm_lockspace_exit();
dlm_lowcomms_exit();
dlm_unregister_debugfs(); dlm_unregister_debugfs();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment