Commit 424980a8 authored by Davide Libenzi's avatar Davide Libenzi Committed by Jens Axboe

[PATCH] epoll bits 0.46 ...

- A more uniform poll queueing interface with tips from Manfred

- The f_op->poll() is done outside the irqlock to maintain compatibility
	with existing drivers that assume to be called with irq enabled

- Moved event mask setting inside ep_modify() with tips from John

- Fixed locking to fit the new "poll() outside the lock" approach

- Bufferd userspace event delivery to reduce irq_lock/irq_unlock switching
	rate and to reduce the number of __copy_to_user()

- Comments added
parent 6e941592
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/rwsem.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -65,13 +66,6 @@ ...@@ -65,13 +66,6 @@
/* Minimum size of the hash in bits ( 2^N ) */ /* Minimum size of the hash in bits ( 2^N ) */
#define EP_MIN_HASH_BITS 9 #define EP_MIN_HASH_BITS 9
/*
* Event buffer dimension used to cache events before sending them in
* userspace with a __copy_to_user(). The event buffer is in stack,
* so keep this size fairly small.
*/
#define EP_EVENT_BUFF_SIZE 32
/* Maximum number of wait queue we can attach to */ /* Maximum number of wait queue we can attach to */
#define EP_MAX_POLL_QUEUE 2 #define EP_MAX_POLL_QUEUE 2
...@@ -110,6 +104,17 @@ ...@@ -110,6 +104,17 @@
/* Get the "struct epitem" from an epoll queue wrapper */ /* Get the "struct epitem" from an epoll queue wrapper */
#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->dpi) #define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->dpi)
/*
* This is used to optimize the event transfer to userspace. Since this
* is kept on stack, it should be pretty small.
*/
#define EP_MAX_BUF_EVENTS 32
/*
* Used to optimize ready items collection by reducing the irqlock/irqunlock
* switching rate. This is kept in stack too, so do not go wild with this number.
*/
#define EP_MAX_COLLECT_ITEMS 64
...@@ -212,11 +217,15 @@ static void ep_release_epitem(struct epitem *dpi); ...@@ -212,11 +217,15 @@ static void ep_release_epitem(struct epitem *dpi);
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt); static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt);
static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfile); static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfile);
static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int events); static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int events);
static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi);
static int ep_unlink(struct eventpoll *ep, struct epitem *dpi); static int ep_unlink(struct eventpoll *ep, struct epitem *dpi);
static int ep_remove(struct eventpoll *ep, struct epitem *dpi); static int ep_remove(struct eventpoll *ep, struct epitem *dpi);
static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync); static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync);
static int ep_eventpoll_close(struct inode *inode, struct file *file); static int ep_eventpoll_close(struct inode *inode, struct file *file);
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait); static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait);
static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, int maxdpi);
static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi,
struct pollfd *events);
static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents); static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents);
static int ep_poll(struct eventpoll *ep, struct pollfd *events, int maxevents, static int ep_poll(struct eventpoll *ep, struct pollfd *events, int maxevents,
int timeout); int timeout);
...@@ -227,11 +236,21 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, ...@@ -227,11 +236,21 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
/* Use to link togheter all the "struct eventpoll" */ /*
* Use to link togheter all the "struct eventpoll". We need to link
* all the available eventpoll structures to be able to perform proper
* cleanup in case a file that is stored inside epoll is closed without
* previously being removed.
*/
static struct list_head eplist; static struct list_head eplist;
/* Serialize the access to "eplist" */ /*
static rwlock_t eplock; * Serialize the access to "eplist" and also to ep_notify_file_close().
* It is read-held when we want to be sure that a given file will not
* vanish while we're doing f_op->poll(). When "ep->lock" is taken,
* it will nest inside this semaphore.
*/
struct rw_semaphore epsem;
/* Slab cache used to allocate "struct epitem" */ /* Slab cache used to allocate "struct epitem" */
static kmem_cache_t *dpi_cache; static kmem_cache_t *dpi_cache;
...@@ -283,21 +302,24 @@ static unsigned int ep_get_hash_bits(unsigned int hintsize) ...@@ -283,21 +302,24 @@ static unsigned int ep_get_hash_bits(unsigned int hintsize)
*/ */
void ep_notify_file_close(struct file *file) void ep_notify_file_close(struct file *file)
{ {
unsigned long flags;
struct list_head *lnk; struct list_head *lnk;
struct eventpoll *ep; struct eventpoll *ep;
struct epitem *dpi; struct epitem *dpi;
read_lock_irqsave(&eplock, flags); down_write(&epsem);
list_for_each(lnk, &eplist) { list_for_each(lnk, &eplist) {
ep = list_entry(lnk, struct eventpoll, llink); ep = list_entry(lnk, struct eventpoll, llink);
/*
* The ep_find() function increases the "struct epitem" usage count
* so we have to do an ep_remove() + ep_release_epitem().
*/
while ((dpi = ep_find(ep, file))) { while ((dpi = ep_find(ep, file))) {
ep_remove(ep, dpi); ep_remove(ep, dpi);
ep_release_epitem(dpi); ep_release_epitem(dpi);
} }
} }
read_unlock_irqrestore(&eplock, flags); up_write(&epsem);
} }
...@@ -438,14 +460,14 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events) ...@@ -438,14 +460,14 @@ asmlinkage int sys_epoll_ctl(int epfd, int op, int fd, unsigned int events)
if (dpi) if (dpi)
ep_release_epitem(dpi); ep_release_epitem(dpi);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n",
current, epfd, op, fd, events, error));
eexit_3: eexit_3:
fput(tfile); fput(tfile);
eexit_2: eexit_2:
fput(file); fput(file);
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %u) = %d\n",
current, epfd, op, fd, events, error));
return error; return error;
} }
...@@ -495,12 +517,12 @@ asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents, ...@@ -495,12 +517,12 @@ asmlinkage int sys_epoll_wait(int epfd, struct pollfd *events, int maxevents,
/* Time to fish for events ... */ /* Time to fish for events ... */
error = ep_poll(ep, events, maxevents, timeout); error = ep_poll(ep, events, maxevents, timeout);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
current, epfd, events, maxevents, timeout, error));
eexit_2: eexit_2:
fput(file); fput(file);
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
current, epfd, events, maxevents, timeout, error));
return error; return error;
} }
...@@ -616,7 +638,6 @@ static int ep_free_pages(char **pages, int numpages) ...@@ -616,7 +638,6 @@ static int ep_free_pages(char **pages, int numpages)
static int ep_file_init(struct file *file, unsigned int hashbits) static int ep_file_init(struct file *file, unsigned int hashbits)
{ {
int error; int error;
unsigned long flags;
struct eventpoll *ep; struct eventpoll *ep;
if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL))) if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
...@@ -633,9 +654,9 @@ static int ep_file_init(struct file *file, unsigned int hashbits) ...@@ -633,9 +654,9 @@ static int ep_file_init(struct file *file, unsigned int hashbits)
file->private_data = ep; file->private_data = ep;
/* Add the structure to the linked list that links "struct eventpoll" */ /* Add the structure to the linked list that links "struct eventpoll" */
write_lock_irqsave(&eplock, flags); down_write(&epsem);
list_add(&ep->llink, &eplist); list_add(&ep->llink, &eplist);
write_unlock_irqrestore(&eplock, flags); up_write(&epsem);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
current, ep)); current, ep));
...@@ -694,44 +715,47 @@ static int ep_init(struct eventpoll *ep, unsigned int hashbits) ...@@ -694,44 +715,47 @@ static int ep_init(struct eventpoll *ep, unsigned int hashbits)
static void ep_free(struct eventpoll *ep) static void ep_free(struct eventpoll *ep)
{ {
unsigned int i, hsize; unsigned int i, hsize;
unsigned long flags; struct list_head *lsthead, *lnk;
struct list_head *lsthead;
/*
* We need to lock this because we could be hit by
* ep_notify_file_close() while we're freeing the
* "struct eventpoll".
*/
down_write(&epsem);
/* /*
* Walks through the whole hash by unregistering file callbacks and * Walks through the whole hash by unregistering poll callbacks.
* freeing each "struct epitem".
*/ */
for (i = 0, hsize = 1 << ep->hashbits; i < hsize; i++) { for (i = 0, hsize = 1 << ep->hashbits; i < hsize; i++) {
lsthead = ep_hash_entry(ep, i); lsthead = ep_hash_entry(ep, i);
/* list_for_each(lnk, lsthead) {
* We need to lock this because we could be hit by struct epitem *dpi = list_entry(lnk, struct epitem, llink);
* ep_notify_file_close() while we're freeing this.
*/
write_lock_irqsave(&ep->lock, flags);
while (!list_empty(lsthead)) { ep_unregister_pollwait(ep, dpi);
struct epitem *dpi = list_entry(lsthead->next, struct epitem, llink); }
}
/* The function ep_unlink() must be called with held lock */
ep_unlink(ep, dpi);
/* We release the lock before releasing the "struct epitem" */ /*
write_unlock_irqrestore(&ep->lock, flags); * Walks through the whole hash by freeing each "struct epitem". At this
* point we are sure no poll callbacks will be lingering around, so we can
* avoid the lock on "ep->lock".
*/
for (i = 0, hsize = 1 << ep->hashbits; i < hsize; i++) {
lsthead = ep_hash_entry(ep, i);
ep_release_epitem(dpi); while (!list_empty(lsthead)) {
struct epitem *dpi = list_entry(lsthead->next, struct epitem, llink);
/* And then we reaquire the lock ... */ ep_remove(ep, dpi);
write_lock_irqsave(&ep->lock, flags);
} }
write_unlock_irqrestore(&ep->lock, flags);
} }
/* Remove the structure to the linked list that links "struct eventpoll" */ /* Remove the structure to the linked list that links "struct eventpoll" */
write_lock_irqsave(&eplock, flags);
EP_LIST_DEL(&ep->llink); EP_LIST_DEL(&ep->llink);
write_unlock_irqrestore(&eplock, flags);
up_write(&epsem);
/* Free hash pages */ /* Free hash pages */
ep_free_pages(ep->hpages, EP_HASH_PAGES(ep->hashbits)); ep_free_pages(ep->hpages, EP_HASH_PAGES(ep->hashbits));
...@@ -839,7 +863,16 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil ...@@ -839,7 +863,16 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil
/* Initialize the poll table using the queue callback */ /* Initialize the poll table using the queue callback */
epq.dpi = dpi; epq.dpi = dpi;
poll_initwait_ex(&epq.pt, ep_ptable_queue_proc); poll_initwait_ex(&epq.pt, ep_ptable_queue_proc, NULL);
/*
* Attach the item to the poll hooks and get current event bits.
* We can safely use the file* here because its usage count has
* been increased by the caller of this function.
*/
revents = tfile->f_op->poll(tfile, &epq.pt);
poll_freewait(&epq.pt);
/* We have to drop the new item inside our item list to keep track of it */ /* We have to drop the new item inside our item list to keep track of it */
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
...@@ -847,9 +880,6 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil ...@@ -847,9 +880,6 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil
/* Add the current item to the hash table */ /* Add the current item to the hash table */
list_add(&dpi->llink, ep_hash_entry(ep, ep_hash_index(ep, tfile))); list_add(&dpi->llink, ep_hash_entry(ep, ep_hash_index(ep, tfile)));
/* Attach the item to the poll hooks and get current event bits */
revents = tfile->f_op->poll(tfile, &epq.pt);
/* If the file is already "ready" we drop it inside the ready list */ /* If the file is already "ready" we drop it inside the ready list */
if ((revents & pfd->events) && !EP_IS_LINKED(&dpi->rdllink)) { if ((revents & pfd->events) && !EP_IS_LINKED(&dpi->rdllink)) {
list_add_tail(&dpi->rdllink, &ep->rdllist); list_add_tail(&dpi->rdllink, &ep->rdllist);
...@@ -863,8 +893,6 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil ...@@ -863,8 +893,6 @@ static int ep_insert(struct eventpoll *ep, struct pollfd *pfd, struct file *tfil
write_unlock_irqrestore(&ep->lock, flags); write_unlock_irqrestore(&ep->lock, flags);
poll_freewait(&epq.pt);
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %d)\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %d)\n",
current, ep, pfd->fd)); current, ep, pfd->fd));
...@@ -884,13 +912,21 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even ...@@ -884,13 +912,21 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even
unsigned int revents; unsigned int revents;
unsigned long flags; unsigned long flags;
write_lock_irqsave(&ep->lock, flags); /*
* Set the new event interest mask before calling f_op->poll(), otherwise
* a potential race might occur. In fact if we do this operation inside
* the lock, an event might happen between the f_op->poll() call and the
* new event set registering.
*/
dpi->pfd.events = events;
/* Get current event bits */ /*
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
revents = dpi->file->f_op->poll(dpi->file, NULL); revents = dpi->file->f_op->poll(dpi->file, NULL);
/* Set the new event interest mask */ write_lock_irqsave(&ep->lock, flags);
dpi->pfd.events = events;
/* If the file is already "ready" we drop it inside the ready list */ /* If the file is already "ready" we drop it inside the ready list */
if ((revents & events) && EP_IS_LINKED(&dpi->llink) && if ((revents & events) && EP_IS_LINKED(&dpi->llink) &&
...@@ -910,20 +946,39 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even ...@@ -910,20 +946,39 @@ static int ep_modify(struct eventpoll *ep, struct epitem *dpi, unsigned int even
} }
/*
* This function unregister poll callbacks from the associated file descriptor.
* Since this must be called without holding "ep->lock" the atomic exchange trick
* will protect us from multiple unregister.
*/
static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *dpi)
{
int i, nwait;
/* This is called without locks, so we need the atomic exchange */
nwait = xchg(&dpi->nwait, 0);
/* Removes poll wait queue hooks */
for (i = 0; i < nwait; i++)
remove_wait_queue(dpi->wait[i].whead, &dpi->wait[i].wait);
}
/* /*
* Unlink the "struct epitem" from all places it might have been hooked up. * Unlink the "struct epitem" from all places it might have been hooked up.
* This function must be called with write IRQ lock on "ep->lock". * This function must be called with write IRQ lock on "ep->lock".
*/ */
static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
{ {
int i; int error;
/* /*
* It can happen that this one is called for an item already unlinked. * It can happen that this one is called for an item already unlinked.
* The check protect us from doing a double unlink ( crash ). * The check protect us from doing a double unlink ( crash ).
*/ */
error = -ENOENT;
if (!EP_IS_LINKED(&dpi->llink)) if (!EP_IS_LINKED(&dpi->llink))
goto not_linked; goto eexit_1;
/* /*
* At this point is safe to do the job, unlink the item from our list. * At this point is safe to do the job, unlink the item from our list.
...@@ -932,10 +987,6 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) ...@@ -932,10 +987,6 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
*/ */
EP_LIST_DEL(&dpi->llink); EP_LIST_DEL(&dpi->llink);
/* Removes poll wait queue hooks */
for (i = 0; i < dpi->nwait; i++)
remove_wait_queue(dpi->wait[i].whead, &dpi->wait[i].wait);
/* /*
* If the item we are going to remove is inside the ready file descriptors * If the item we are going to remove is inside the ready file descriptors
* we want to remove it from this list to avoid stale events. * we want to remove it from this list to avoid stale events.
...@@ -943,12 +994,13 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi) ...@@ -943,12 +994,13 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *dpi)
if (EP_IS_LINKED(&dpi->rdllink)) if (EP_IS_LINKED(&dpi->rdllink))
EP_LIST_DEL(&dpi->rdllink); EP_LIST_DEL(&dpi->rdllink);
not_linked: error = 0;
eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %d)\n", DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %d) = %d\n",
current, ep, dpi->pfd.fd)); current, ep, dpi->pfd.fd, error));
return 0; return error;
} }
...@@ -961,6 +1013,16 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi) ...@@ -961,6 +1013,16 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi)
int error; int error;
unsigned long flags; unsigned long flags;
/*
* Removes poll wait queue hooks. We _have_ to do this without holding
* the "ep->lock" otherwise a deadlock might occur. This because of the
* sequence of the lock acquisition. Here we do "ep->lock" then the wait
* queue head lock when unregistering the wait queue. The wakeup callback
* will run by holding the wait queue head lock and will call our callback
* that will try to get "ep->lock".
*/
ep_unregister_pollwait(ep, dpi);
/* We need to acquire the write IRQ lock before calling ep_unlink() */ /* We need to acquire the write IRQ lock before calling ep_unlink() */
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
...@@ -972,14 +1034,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi) ...@@ -972,14 +1034,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *dpi)
if (error) if (error)
goto eexit_1; goto eexit_1;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %d)\n",
current, ep, dpi->pfd.fd));
/* At this point it is safe to free the eventpoll item */ /* At this point it is safe to free the eventpoll item */
ep_release_epitem(dpi); ep_release_epitem(dpi);
error = 0; error = 0;
eexit_1: eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %d) = %d\n",
current, ep, dpi->pfd.fd, error));
return error; return error;
} }
...@@ -1055,19 +1117,19 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) ...@@ -1055,19 +1117,19 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
/* /*
* Perform the transfer of events to user space. Optimize the copy by * Since we have to release the lock during the __copy_to_user() operation and
* caching EP_EVENT_BUFF_SIZE events at a time and then copying it to user space. * during the f_op->poll() call, we try to collect the maximum number of items
* by reducing the irqlock/irqunlock switching rate.
*/ */
static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents) static int ep_collect_ready_items(struct eventpoll *ep, struct epitem **adpi, int maxdpi)
{ {
int eventcnt, ebufcnt, revents; int ndpi;
unsigned long flags; unsigned long flags;
struct list_head *lsthead = &ep->rdllist; struct list_head *lsthead = &ep->rdllist;
struct pollfd eventbuf[EP_EVENT_BUFF_SIZE];
write_lock_irqsave(&ep->lock, flags); write_lock_irqsave(&ep->lock, flags);
for (eventcnt = 0, ebufcnt = 0; (eventcnt + ebufcnt) < maxevents && !list_empty(lsthead);) { for (ndpi = 0; ndpi < maxdpi && !list_empty(lsthead);) {
struct epitem *dpi = list_entry(lsthead->next, struct epitem, rdllink); struct epitem *dpi = list_entry(lsthead->next, struct epitem, rdllink);
/* Remove the item from the ready list */ /* Remove the item from the ready list */
...@@ -1081,41 +1143,112 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m ...@@ -1081,41 +1143,112 @@ static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int m
if (!EP_IS_LINKED(&dpi->llink)) if (!EP_IS_LINKED(&dpi->llink))
continue; continue;
/* Fetch event bits from the signaled file */ /*
* We need to increase the usage count of the "struct epitem" because
* another thread might call EP_CTL_DEL on this target and make the
* object to vanish underneath our nose.
*/
ep_use_epitem(dpi);
adpi[ndpi++] = dpi;
}
write_unlock_irqrestore(&ep->lock, flags);
return ndpi;
}
/*
* This function is called without holding the "ep->lock" since the call to
* __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ
* because of the way poll() is traditionally implemented in Linux.
*/
static int ep_send_events(struct eventpoll *ep, struct epitem **adpi, int ndpi,
struct pollfd *events)
{
int i, eventcnt, eventbuf, revents;
struct epitem *dpi;
struct pollfd pfd[EP_MAX_BUF_EVENTS];
for (i = 0, eventcnt = 0, eventbuf = 0; i < ndpi; i++, adpi++) {
dpi = *adpi;
/* Get the ready file event set */
revents = dpi->file->f_op->poll(dpi->file, NULL); revents = dpi->file->f_op->poll(dpi->file, NULL);
if (revents & dpi->pfd.events) { if (revents & dpi->pfd.events) {
eventbuf[ebufcnt] = dpi->pfd; pfd[eventbuf] = dpi->pfd;
eventbuf[ebufcnt].revents = revents & eventbuf[ebufcnt].events; pfd[eventbuf].revents = revents & pfd[eventbuf].events;
ebufcnt++; eventbuf++;
if (eventbuf == EP_MAX_BUF_EVENTS) {
/* If our buffer page is full we need to flush it to user space */ if (__copy_to_user(&events[eventcnt], pfd,
if (ebufcnt == EP_EVENT_BUFF_SIZE) { eventbuf * sizeof(struct pollfd))) {
/* for (; i < ndpi; i++, adpi++)
* We need to drop the irqlock before using the function ep_release_epitem(*adpi);
* __copy_to_user() because it might fault.
*/
write_unlock_irqrestore(&ep->lock, flags);
if (__copy_to_user(&events[eventcnt], eventbuf,
ebufcnt * sizeof(struct pollfd)))
return -EFAULT; return -EFAULT;
eventcnt += ebufcnt; }
ebufcnt = 0; eventcnt += eventbuf;
write_lock_irqsave(&ep->lock, flags); eventbuf = 0;
} }
} }
ep_release_epitem(dpi);
} }
write_unlock_irqrestore(&ep->lock, flags);
/* There might be still something inside our event buffer */ if (eventbuf) {
if (ebufcnt) { if (__copy_to_user(&events[eventcnt], pfd,
if (__copy_to_user(&events[eventcnt], eventbuf, eventbuf * sizeof(struct pollfd)))
ebufcnt * sizeof(struct pollfd))) return -EFAULT;
eventcnt = -EFAULT; eventcnt += eventbuf;
else }
eventcnt += ebufcnt;
return eventcnt;
}
/*
* Perform the transfer of events to user space.
*/
static int ep_events_transfer(struct eventpoll *ep, struct pollfd *events, int maxevents)
{
int eventcnt, ndpi, sdpi, maxdpi;
struct epitem *adpi[EP_MAX_COLLECT_ITEMS];
/*
* We need to lock this because we could be hit by
* ep_notify_file_close() while we're transfering
* events to userspace. Read-holding "epsem" will lock
* out ep_notify_file_close() during the whole
* transfer loop and this will garantie us that the
* file will not vanish underneath our nose when
* we will call f_op->poll() from ep_send_events().
*/
down_read(&epsem);
for (eventcnt = 0; eventcnt < maxevents;) {
/* Maximum items we can extract this time */
maxdpi = min(EP_MAX_COLLECT_ITEMS, maxevents - eventcnt);
/* Collect/extract ready items */
ndpi = ep_collect_ready_items(ep, adpi, maxdpi);
if (ndpi) {
/* Send events to userspace */
sdpi = ep_send_events(ep, adpi, ndpi, &events[eventcnt]);
if (sdpi < 0) {
up_read(&epsem);
return sdpi;
}
eventcnt += sdpi;
}
if (ndpi < maxdpi)
break;
} }
up_read(&epsem);
return eventcnt; return eventcnt;
} }
...@@ -1239,8 +1372,8 @@ static int __init eventpoll_init(void) ...@@ -1239,8 +1372,8 @@ static int __init eventpoll_init(void)
/* Initialize the list that will link "struct eventpoll" */ /* Initialize the list that will link "struct eventpoll" */
INIT_LIST_HEAD(&eplist); INIT_LIST_HEAD(&eplist);
/* Initialize the rwlock used to access "eplist" */ /* Initialize the rwsem used to access "eplist" */
rwlock_init(&eplock); init_rwsem(&epsem);
/* Allocates slab cache used to allocate "struct epitem" items */ /* Allocates slab cache used to allocate "struct epitem" items */
error = -ENOMEM; error = -ENOMEM;
......
...@@ -54,7 +54,7 @@ struct poll_table_page { ...@@ -54,7 +54,7 @@ struct poll_table_page {
* poll table. * poll table.
*/ */
void poll_freewait(poll_table* pt) void __pollfreewait(poll_table* pt)
{ {
struct poll_table_page * p = pt->table; struct poll_table_page * p = pt->table;
while (p) { while (p) {
......
...@@ -14,14 +14,17 @@ struct poll_table_page; ...@@ -14,14 +14,17 @@ struct poll_table_page;
struct poll_table_struct; struct poll_table_struct;
typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *);
typedef void (*poll_free_proc)(struct poll_table_struct *);
typedef struct poll_table_struct { typedef struct poll_table_struct {
poll_queue_proc qproc; poll_queue_proc qproc;
poll_free_proc fproc;
int error; int error;
struct poll_table_page * table; struct poll_table_page * table;
} poll_table; } poll_table;
extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
extern void __pollfreewait(poll_table* pt);
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{ {
...@@ -29,9 +32,10 @@ static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_addres ...@@ -29,9 +32,10 @@ static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_addres
p->qproc(filp, wait_address, p); p->qproc(filp, wait_address, p);
} }
static inline void poll_initwait_ex(poll_table* pt, poll_queue_proc qproc) static inline void poll_initwait_ex(poll_table* pt, poll_queue_proc qproc, poll_free_proc fproc)
{ {
pt->qproc = qproc; pt->qproc = qproc;
pt->fproc = fproc;
pt->error = 0; pt->error = 0;
pt->table = NULL; pt->table = NULL;
} }
...@@ -39,10 +43,15 @@ static inline void poll_initwait_ex(poll_table* pt, poll_queue_proc qproc) ...@@ -39,10 +43,15 @@ static inline void poll_initwait_ex(poll_table* pt, poll_queue_proc qproc)
static inline void poll_initwait(poll_table* pt) static inline void poll_initwait(poll_table* pt)
{ {
poll_initwait_ex(pt, __pollwait); poll_initwait_ex(pt, __pollwait, __pollfreewait);
} }
extern void poll_freewait(poll_table* pt); static inline void poll_freewait(poll_table* pt)
{
if (pt && pt->fproc)
pt->fproc(pt);
}
/* /*
......
...@@ -269,7 +269,7 @@ EXPORT_SYMBOL(generic_file_llseek); ...@@ -269,7 +269,7 @@ EXPORT_SYMBOL(generic_file_llseek);
EXPORT_SYMBOL(remote_llseek); EXPORT_SYMBOL(remote_llseek);
EXPORT_SYMBOL(no_llseek); EXPORT_SYMBOL(no_llseek);
EXPORT_SYMBOL(__pollwait); EXPORT_SYMBOL(__pollwait);
EXPORT_SYMBOL(poll_freewait); EXPORT_SYMBOL(__pollfreewait);
EXPORT_SYMBOL(ROOT_DEV); EXPORT_SYMBOL(ROOT_DEV);
EXPORT_SYMBOL(find_get_page); EXPORT_SYMBOL(find_get_page);
EXPORT_SYMBOL(find_lock_page); EXPORT_SYMBOL(find_lock_page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment