Commit cb038357 authored by Wei Wang's avatar Wei Wang Committed by David S. Miller

net: fix race between napi kthread mode and busy poll

Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
determine if the kthread owns this napi and could call napi->poll() on
it. However, if socket busy poll is enabled, it is possible that the
busy poll thread grabs this SCHED bit (after the previous napi->poll()
invokes napi_complete_done() and clears SCHED bit) and tries to poll
on the same napi. napi_disable() could grab the SCHED bit as well.
This patch tries to fix this race by adding a new bit
NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
____napi_schedule() if the threaded mode is enabled, and gets cleared
in napi_complete_done(), and we only poll the napi in kthread if this
bit is set. This helps distinguish the ownership of the napi between
kthread and other scenarios and fixes the race issue.

Fixes: 29863d41 ("net: implement threaded-able napi poll loop support")
Reported-by: default avatarMartin Zaharinov <micron10@gmail.com>
Suggested-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarWei Wang <weiwan@google.com>
Cc: Alexander Duyck <alexanderduyck@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0692c33c
...@@ -360,6 +360,7 @@ enum { ...@@ -360,6 +360,7 @@ enum {
NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */
NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/
NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */
}; };
enum { enum {
...@@ -372,6 +373,7 @@ enum { ...@@ -372,6 +373,7 @@ enum {
NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL),
NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED),
}; };
enum gro_result { enum gro_result {
......
...@@ -4294,6 +4294,13 @@ static inline void ____napi_schedule(struct softnet_data *sd, ...@@ -4294,6 +4294,13 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/ */
thread = READ_ONCE(napi->thread); thread = READ_ONCE(napi->thread);
if (thread) { if (thread) {
/* Avoid doing set_bit() if the thread is in
* INTERRUPTIBLE state, cause napi_thread_wait()
* makes sure to proceed with napi polling
* if the thread is explicitly woken from here.
*/
if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread); wake_up_process(thread);
return; return;
} }
...@@ -6486,6 +6493,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) ...@@ -6486,6 +6493,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
NAPIF_STATE_SCHED_THREADED |
NAPIF_STATE_PREFER_BUSY_POLL); NAPIF_STATE_PREFER_BUSY_POLL);
/* If STATE_MISSED was set, leave STATE_SCHED set, /* If STATE_MISSED was set, leave STATE_SCHED set,
...@@ -6968,16 +6976,25 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) ...@@ -6968,16 +6976,25 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
static int napi_thread_wait(struct napi_struct *napi) static int napi_thread_wait(struct napi_struct *napi)
{ {
bool woken = false;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) { while (!kthread_should_stop() && !napi_disable_pending(napi)) {
if (test_bit(NAPI_STATE_SCHED, &napi->state)) { /* Testing SCHED_THREADED bit here to make sure the current
* kthread owns this napi and could poll on this napi.
* Testing SCHED bit is not enough because SCHED bit might be
* set by some other busy poll thread or by napi_disable().
*/
if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
WARN_ON(!list_empty(&napi->poll_list)); WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
return 0; return 0;
} }
schedule(); schedule();
/* woken being true indicates this thread owns this napi. */
woken = true;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment