Commit 57a173bd authored by Jason Baron's avatar Jason Baron Committed by Linus Torvalds

epoll: avoid calling ep_call_nested() from ep_poll_safewake()

ep_poll_safewake() is used to wakeup potentially nested epoll file
descriptors.  The function uses ep_call_nested() to prevent entering the
same wake up queue more than once, and to prevent excessively deep
wakeup paths (deeper than EP_MAX_NESTS).  However, this is not necessary
since we are already preventing these conditions during EPOLL_CTL_ADD.
This saves extra function calls, and avoids taking a global lock during
the ep_call_nested() calls.

I have, however, left ep_call_nested() for the CONFIG_DEBUG_LOCK_ALLOC
case, since ep_call_nested() keeps track of the nesting level, and this
is required by the call to spin_lock_irqsave_nested().  It would be nice
to remove the ep_call_nested() calls for the CONFIG_DEBUG_LOCK_ALLOC
case as well, however its not clear how to simply pass the nesting level
through multiple wake_up() levels without more surgery.  In any case, I
don't think CONFIG_DEBUG_LOCK_ALLOC is generally used for production.
This patch, also apparently fixes a workload at Google that Salman Qazi
reported by completely removing the poll_safewake_ncalls->lock from
wakeup paths.

Link: http://lkml.kernel.org/r/1507920533-8812-1-git-send-email-jbaron@akamai.comSigned-off-by: default avatarJason Baron <jbaron@akamai.com>
Acked-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Salman Qazi <sqazi@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2ae928a9
...@@ -276,9 +276,6 @@ static DEFINE_MUTEX(epmutex); ...@@ -276,9 +276,6 @@ static DEFINE_MUTEX(epmutex);
/* Used to check for epoll file descriptor inclusion loops */ /* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls; static struct nested_calls poll_loop_ncalls;
/* Used for safe wake up implementation */
static struct nested_calls poll_safewake_ncalls;
/* Used to call file's f_op->poll() under the nested calls boundaries */ /* Used to call file's f_op->poll() under the nested calls boundaries */
static struct nested_calls poll_readywalk_ncalls; static struct nested_calls poll_readywalk_ncalls;
...@@ -551,40 +548,21 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, ...@@ -551,40 +548,21 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
* this special case of epoll. * this special case of epoll.
*/ */
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
unsigned long events, int subclass) static struct nested_calls poll_safewake_ncalls;
static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
{ {
unsigned long flags; unsigned long flags;
wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie;
spin_lock_irqsave_nested(&wqueue->lock, flags, subclass); spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1);
wake_up_locked_poll(wqueue, events); wake_up_locked_poll(wqueue, POLLIN);
spin_unlock_irqrestore(&wqueue->lock, flags); spin_unlock_irqrestore(&wqueue->lock, flags);
}
#else
static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
unsigned long events, int subclass)
{
wake_up_poll(wqueue, events);
}
#endif
static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
{
ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
1 + call_nests);
return 0; return 0;
} }
/*
* Perform a safe wake up of the poll wait list. The problem is that
* with the new callback'd wake up system, it is possible that the
* poll callback is reentered from inside the call to wake_up() done
* on the poll wait queue head. The rule is that we cannot reenter the
* wake up code from the same task more than EP_MAX_NESTS times,
* and we cannot reenter the same wait queue head at all. This will
* enable to have a hierarchy of epoll file descriptor of no more than
* EP_MAX_NESTS deep.
*/
static void ep_poll_safewake(wait_queue_head_t *wq) static void ep_poll_safewake(wait_queue_head_t *wq)
{ {
int this_cpu = get_cpu(); int this_cpu = get_cpu();
...@@ -595,6 +573,15 @@ static void ep_poll_safewake(wait_queue_head_t *wq) ...@@ -595,6 +573,15 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
put_cpu(); put_cpu();
} }
#else
static void ep_poll_safewake(wait_queue_head_t *wq)
{
wake_up_poll(wq, POLLIN);
}
#endif
static void ep_remove_wait_queue(struct eppoll_entry *pwq) static void ep_remove_wait_queue(struct eppoll_entry *pwq)
{ {
wait_queue_head_t *whead; wait_queue_head_t *whead;
...@@ -2315,8 +2302,10 @@ static int __init eventpoll_init(void) ...@@ -2315,8 +2302,10 @@ static int __init eventpoll_init(void)
*/ */
ep_nested_calls_init(&poll_loop_ncalls); ep_nested_calls_init(&poll_loop_ncalls);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* Initialize the structure used to perform safe poll wait head wake ups */ /* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls); ep_nested_calls_init(&poll_safewake_ncalls);
#endif
/* Initialize the structure used to perform file's f_op->poll() calls */ /* Initialize the structure used to perform file's f_op->poll() calls */
ep_nested_calls_init(&poll_readywalk_ncalls); ep_nested_calls_init(&poll_readywalk_ncalls);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment