Commit 64d68718 authored by Stefano Stabellini's avatar Stefano Stabellini Committed by Juergen Gross

pvcalls-front: introduce a per sock_mapping refcount

Introduce a per sock_mapping refcount, in addition to the existing
global refcount. Thanks to the sock_mapping refcount, we can safely wait
for it to be 1 in pvcalls_front_release before freeing an active socket,
instead of waiting for the global refcount to be 1.
Signed-off-by: default avatarStefano Stabellini <stefano@aporeto.com>
Acked-by: default avatarJuergen Gross <jgross@suse.com>
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
parent 63e708f8
...@@ -60,6 +60,7 @@ struct sock_mapping { ...@@ -60,6 +60,7 @@ struct sock_mapping {
bool active_socket; bool active_socket;
struct list_head list; struct list_head list;
struct socket *sock; struct socket *sock;
atomic_t refcount;
union { union {
struct { struct {
int irq; int irq;
...@@ -93,6 +94,32 @@ struct sock_mapping { ...@@ -93,6 +94,32 @@ struct sock_mapping {
}; };
}; };
static inline struct sock_mapping *pvcalls_enter_sock(struct socket *sock)
{
struct sock_mapping *map;
if (!pvcalls_front_dev ||
dev_get_drvdata(&pvcalls_front_dev->dev) == NULL)
return ERR_PTR(-ENOTCONN);
map = (struct sock_mapping *)sock->sk->sk_send_head;
if (map == NULL)
return ERR_PTR(-ENOTSOCK);
pvcalls_enter();
atomic_inc(&map->refcount);
return map;
}
static inline void pvcalls_exit_sock(struct socket *sock)
{
struct sock_mapping *map;
map = (struct sock_mapping *)sock->sk->sk_send_head;
atomic_dec(&map->refcount);
pvcalls_exit();
}
static inline int get_request(struct pvcalls_bedata *bedata, int *req_id) static inline int get_request(struct pvcalls_bedata *bedata, int *req_id)
{ {
*req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1); *req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
...@@ -369,31 +396,23 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, ...@@ -369,31 +396,23 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP; return -EOPNOTSUPP;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *)sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return -ENOTSOCK;
}
spin_lock(&bedata->socket_lock); spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id); ret = get_request(bedata, &req_id);
if (ret < 0) { if (ret < 0) {
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
ret = create_active(map, &evtchn); ret = create_active(map, &evtchn);
if (ret < 0) { if (ret < 0) {
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -423,7 +442,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, ...@@ -423,7 +442,7 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
smp_rmb(); smp_rmb();
ret = bedata->rsp[req_id].ret; ret = bedata->rsp[req_id].ret;
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -488,23 +507,15 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, ...@@ -488,23 +507,15 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB)) if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB))
return -EOPNOTSUPP; return -EOPNOTSUPP;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return -ENOTSOCK;
}
mutex_lock(&map->active.out_mutex); mutex_lock(&map->active.out_mutex);
if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) { if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) {
mutex_unlock(&map->active.out_mutex); mutex_unlock(&map->active.out_mutex);
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EAGAIN; return -EAGAIN;
} }
if (len > INT_MAX) if (len > INT_MAX)
...@@ -526,7 +537,7 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, ...@@ -526,7 +537,7 @@ int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
tot_sent = sent; tot_sent = sent;
mutex_unlock(&map->active.out_mutex); mutex_unlock(&map->active.out_mutex);
pvcalls_exit(); pvcalls_exit_sock(sock);
return tot_sent; return tot_sent;
} }
...@@ -591,19 +602,11 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ...@@ -591,19 +602,11 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC)) if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
return -EOPNOTSUPP; return -EOPNOTSUPP;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return -ENOTSOCK;
}
mutex_lock(&map->active.in_mutex); mutex_lock(&map->active.in_mutex);
if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER)) if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
...@@ -623,7 +626,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ...@@ -623,7 +626,7 @@ int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
ret = 0; ret = 0;
mutex_unlock(&map->active.in_mutex); mutex_unlock(&map->active.in_mutex);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -637,24 +640,16 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -637,24 +640,16 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM) if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
return -EOPNOTSUPP; return -EOPNOTSUPP;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (map == NULL) {
pvcalls_exit();
return -ENOTSOCK;
}
spin_lock(&bedata->socket_lock); spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id); ret = get_request(bedata, &req_id);
if (ret < 0) { if (ret < 0) {
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
req = RING_GET_REQUEST(&bedata->ring, req_id); req = RING_GET_REQUEST(&bedata->ring, req_id);
...@@ -684,7 +679,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -684,7 +679,7 @@ int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
map->passive.status = PVCALLS_STATUS_BIND; map->passive.status = PVCALLS_STATUS_BIND;
pvcalls_exit(); pvcalls_exit_sock(sock);
return 0; return 0;
} }
...@@ -695,21 +690,13 @@ int pvcalls_front_listen(struct socket *sock, int backlog) ...@@ -695,21 +690,13 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
struct xen_pvcalls_request *req; struct xen_pvcalls_request *req;
int notify, req_id, ret; int notify, req_id, ret;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return -ENOTSOCK;
}
if (map->passive.status != PVCALLS_STATUS_BIND) { if (map->passive.status != PVCALLS_STATUS_BIND) {
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -717,7 +704,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog) ...@@ -717,7 +704,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
ret = get_request(bedata, &req_id); ret = get_request(bedata, &req_id);
if (ret < 0) { if (ret < 0) {
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
req = RING_GET_REQUEST(&bedata->ring, req_id); req = RING_GET_REQUEST(&bedata->ring, req_id);
...@@ -741,7 +728,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog) ...@@ -741,7 +728,7 @@ int pvcalls_front_listen(struct socket *sock, int backlog)
bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
map->passive.status = PVCALLS_STATUS_LISTEN; map->passive.status = PVCALLS_STATUS_LISTEN;
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -753,21 +740,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -753,21 +740,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
struct xen_pvcalls_request *req; struct xen_pvcalls_request *req;
int notify, req_id, ret, evtchn, nonblock; int notify, req_id, ret, evtchn, nonblock;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit(); return PTR_ERR(map);
return -ENOTCONN;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return -ENOTSOCK;
}
if (map->passive.status != PVCALLS_STATUS_LISTEN) { if (map->passive.status != PVCALLS_STATUS_LISTEN) {
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EINVAL; return -EINVAL;
} }
...@@ -785,13 +764,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -785,13 +764,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
goto received; goto received;
} }
if (nonblock) { if (nonblock) {
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EAGAIN; return -EAGAIN;
} }
if (wait_event_interruptible(map->passive.inflight_accept_req, if (wait_event_interruptible(map->passive.inflight_accept_req,
!test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, !test_and_set_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags))) { (void *)&map->passive.flags))) {
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EINTR; return -EINTR;
} }
} }
...@@ -802,7 +781,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -802,7 +781,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags); (void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
...@@ -810,7 +789,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -810,7 +789,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags); (void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return -ENOMEM; return -ENOMEM;
} }
ret = create_active(map2, &evtchn); ret = create_active(map2, &evtchn);
...@@ -819,7 +798,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -819,7 +798,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags); (void *)&map->passive.flags);
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
list_add_tail(&map2->list, &bedata->socket_mappings); list_add_tail(&map2->list, &bedata->socket_mappings);
...@@ -841,13 +820,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -841,13 +820,13 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
/* We could check if we have received a response before returning. */ /* We could check if we have received a response before returning. */
if (nonblock) { if (nonblock) {
WRITE_ONCE(map->passive.inflight_req_id, req_id); WRITE_ONCE(map->passive.inflight_req_id, req_id);
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EAGAIN; return -EAGAIN;
} }
if (wait_event_interruptible(bedata->inflight_req, if (wait_event_interruptible(bedata->inflight_req,
READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) { READ_ONCE(bedata->rsp[req_id].req_id) == req_id)) {
pvcalls_exit(); pvcalls_exit_sock(sock);
return -EINTR; return -EINTR;
} }
/* read req_id, then the content */ /* read req_id, then the content */
...@@ -862,7 +841,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -862,7 +841,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
(void *)&map->passive.flags); (void *)&map->passive.flags);
pvcalls_front_free_map(bedata, map2); pvcalls_front_free_map(bedata, map2);
pvcalls_exit(); pvcalls_exit_sock(sock);
return -ENOMEM; return -ENOMEM;
} }
newsock->sk->sk_send_head = (void *)map2; newsock->sk->sk_send_head = (void *)map2;
...@@ -874,7 +853,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) ...@@ -874,7 +853,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags);
wake_up(&map->passive.inflight_accept_req); wake_up(&map->passive.inflight_accept_req);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -965,23 +944,16 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock, ...@@ -965,23 +944,16 @@ __poll_t pvcalls_front_poll(struct file *file, struct socket *sock,
struct sock_mapping *map; struct sock_mapping *map;
__poll_t ret; __poll_t ret;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map))
pvcalls_exit();
return EPOLLNVAL; return EPOLLNVAL;
}
bedata = dev_get_drvdata(&pvcalls_front_dev->dev); bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (!map) {
pvcalls_exit();
return EPOLLNVAL;
}
if (map->active_socket) if (map->active_socket)
ret = pvcalls_front_poll_active(file, bedata, map, wait); ret = pvcalls_front_poll_active(file, bedata, map, wait);
else else
ret = pvcalls_front_poll_passive(file, bedata, map, wait); ret = pvcalls_front_poll_passive(file, bedata, map, wait);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
...@@ -995,25 +967,20 @@ int pvcalls_front_release(struct socket *sock) ...@@ -995,25 +967,20 @@ int pvcalls_front_release(struct socket *sock)
if (sock->sk == NULL) if (sock->sk == NULL)
return 0; return 0;
pvcalls_enter(); map = pvcalls_enter_sock(sock);
if (!pvcalls_front_dev) { if (IS_ERR(map)) {
pvcalls_exit(); if (PTR_ERR(map) == -ENOTCONN)
return -EIO; return -EIO;
} else
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
map = (struct sock_mapping *) sock->sk->sk_send_head;
if (map == NULL) {
pvcalls_exit();
return 0; return 0;
} }
bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
spin_lock(&bedata->socket_lock); spin_lock(&bedata->socket_lock);
ret = get_request(bedata, &req_id); ret = get_request(bedata, &req_id);
if (ret < 0) { if (ret < 0) {
spin_unlock(&bedata->socket_lock); spin_unlock(&bedata->socket_lock);
pvcalls_exit(); pvcalls_exit_sock(sock);
return ret; return ret;
} }
sock->sk->sk_send_head = NULL; sock->sk->sk_send_head = NULL;
...@@ -1043,10 +1010,10 @@ int pvcalls_front_release(struct socket *sock) ...@@ -1043,10 +1010,10 @@ int pvcalls_front_release(struct socket *sock)
/* /*
* We need to make sure that sendmsg/recvmsg on this socket have * We need to make sure that sendmsg/recvmsg on this socket have
* not started before we've cleared sk_send_head here. The * not started before we've cleared sk_send_head here. The
* easiest (though not optimal) way to guarantee this is to see * easiest way to guarantee this is to see that no pvcalls
* that no pvcall (other than us) is in progress. * (other than us) is in progress on this socket.
*/ */
while (atomic_read(&pvcalls_refcount) > 1) while (atomic_read(&map->refcount) > 1)
cpu_relax(); cpu_relax();
pvcalls_front_free_map(bedata, map); pvcalls_front_free_map(bedata, map);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment