Commit 86200154 authored by David Vrabel's avatar David Vrabel

xen/evtchn: dynamically grow pending event channel ring

If more than 1024 event channels are bound to a evtchn device then it
possible (even with well behaved applications) for the ring to
overflow and events to be lost (reported as an -EFBIG error).

Dynamically increase the size of the ring so there is always enough
space for all bound events.  Well behaved applicables that only unmask
events after draining them from the ring can thus no longer lose
events.

However, an application could unmask an event before draining it,
allowing multiple entries per port to accumulate in the ring, and a
overflow could still occur.  So the overflow detection and reporting
is retained.

The ring size is initially only 64 entries so the common use case of
an application only binding a few events will use less memory than
before.  The ring size may grow to 512 KiB (enough for all 2^17
possible channels).  This order 7 kmalloc() may fail due to memory
fragmentation, so we fall back to trying vmalloc().
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Reviewed-by: default avatarAndrew Cooper <andrew.cooper3@citrix.com>
parent b4ff8389
...@@ -49,6 +49,8 @@ ...@@ -49,6 +49,8 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <xen/xen.h> #include <xen/xen.h>
#include <xen/events.h> #include <xen/events.h>
...@@ -58,10 +60,10 @@ ...@@ -58,10 +60,10 @@
struct per_user_data { struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */ struct mutex bind_mutex; /* serialize bind/unbind operations */
struct rb_root evtchns; struct rb_root evtchns;
unsigned int nr_evtchns;
/* Notification ring, accessed via /dev/xen/evtchn. */ /* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) unsigned int ring_size;
#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
evtchn_port_t *ring; evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow; unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */ struct mutex ring_cons_mutex; /* protect against concurrent readers */
...@@ -80,10 +82,41 @@ struct user_evtchn { ...@@ -80,10 +82,41 @@ struct user_evtchn {
bool enabled; bool enabled;
}; };
static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
{
evtchn_port_t *ring;
size_t s = size * sizeof(*ring);
ring = kmalloc(s, GFP_KERNEL);
if (!ring)
ring = vmalloc(s);
return ring;
}
static void evtchn_free_ring(evtchn_port_t *ring)
{
kvfree(ring);
}
static unsigned int evtchn_ring_offset(struct per_user_data *u,
unsigned int idx)
{
return idx & (u->ring_size - 1);
}
static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
unsigned int idx)
{
return u->ring + evtchn_ring_offset(u, idx);
}
static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{ {
struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
u->nr_evtchns++;
while (*new) { while (*new) {
struct user_evtchn *this; struct user_evtchn *this;
...@@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) ...@@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{ {
u->nr_evtchns--;
rb_erase(&evtchn->node, &u->evtchns); rb_erase(&evtchn->node, &u->evtchns);
kfree(evtchn); kfree(evtchn);
} }
...@@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) ...@@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock); spin_lock(&u->ring_prod_lock);
if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { if ((u->ring_prod - u->ring_cons) < u->ring_size) {
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
wmb(); /* Ensure ring contents visible */ wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) { if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait); wake_up_interruptible(&u->evtchn_wait);
...@@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, ...@@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
} }
/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { if (((c ^ p) & u->ring_size) != 0) {
bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
sizeof(evtchn_port_t); sizeof(evtchn_port_t);
bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
} else { } else {
bytes1 = (p - c) * sizeof(evtchn_port_t); bytes1 = (p - c) * sizeof(evtchn_port_t);
bytes2 = 0; bytes2 = 0;
...@@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf, ...@@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
rc = -EFAULT; rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */ rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) && ((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out; goto unlock_out;
...@@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, ...@@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
return rc; return rc;
} }
static int evtchn_resize_ring(struct per_user_data *u)
{
unsigned int new_size;
evtchn_port_t *new_ring, *old_ring;
unsigned int p, c;
/*
* Ensure the ring is large enough to capture all possible
* events. i.e., one free slot for each bound event.
*/
if (u->nr_evtchns <= u->ring_size)
return 0;
if (u->ring_size == 0)
new_size = 64;
else
new_size = 2 * u->ring_size;
new_ring = evtchn_alloc_ring(new_size);
if (!new_ring)
return -ENOMEM;
old_ring = u->ring;
/*
* Access to the ring contents is serialized by either the
* prod /or/ cons lock so take both when resizing.
*/
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);
/*
* Copy the old ring contents to the new ring.
*
* If the ring contents crosses the end of the current ring,
* it needs to be copied in two chunks.
*
* +---------+ +------------------+
* |34567 12| -> | 1234567 |
* +-----p-c-+ +------------------+
*/
p = evtchn_ring_offset(u, u->ring_prod);
c = evtchn_ring_offset(u, u->ring_cons);
if (p < c) {
memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
} else
memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
u->ring = new_ring;
u->ring_size = new_size;
spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
evtchn_free_ring(old_ring);
return 0;
}
static int evtchn_bind_to_user(struct per_user_data *u, int port) static int evtchn_bind_to_user(struct per_user_data *u, int port)
{ {
struct user_evtchn *evtchn; struct user_evtchn *evtchn;
...@@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) ...@@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0) if (rc < 0)
goto err; goto err;
rc = evtchn_resize_ring(u);
if (rc < 0)
goto err;
rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
u->name, evtchn); u->name, evtchn);
if (rc < 0) if (rc < 0)
...@@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp) ...@@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&u->evtchn_wait); init_waitqueue_head(&u->evtchn_wait);
u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
if (u->ring == NULL) {
kfree(u->name);
kfree(u);
return -ENOMEM;
}
mutex_init(&u->bind_mutex); mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex); mutex_init(&u->ring_cons_mutex);
spin_lock_init(&u->ring_prod_lock); spin_lock_init(&u->ring_prod_lock);
...@@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) ...@@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
evtchn_unbind_from_user(u, evtchn); evtchn_unbind_from_user(u, evtchn);
} }
free_page((unsigned long)u->ring); evtchn_free_ring(u->ring);
kfree(u->name); kfree(u->name);
kfree(u); kfree(u);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment