| From: Wei Wang <weiwan@google.com> |
| Date: Mon, 1 Mar 2021 17:21:13 -0800 |
| Subject: [PATCH] net: fix race between napi kthread mode and busy poll |
| |
| Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to |
| determine if the kthread owns this napi and could call napi->poll() on |
| it. However, if socket busy poll is enabled, it is possible that the |
| busy poll thread grabs this SCHED bit (after the previous napi->poll() |
| invokes napi_complete_done() and clears SCHED bit) and tries to poll |
| on the same napi. napi_disable() could grab the SCHED bit as well. |
| This patch tries to fix this race by adding a new bit |
| NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in |
| ____napi_schedule() if the threaded mode is enabled, and gets cleared |
| in napi_complete_done(), and we only poll the napi in kthread if this |
| bit is set. This helps distinguish the ownership of the napi between |
| kthread and other scenarios and fixes the race issue. |
| |
| Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") |
| Reported-by: Martin Zaharinov <micron10@gmail.com> |
| Suggested-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Wei Wang <weiwan@google.com> |
| Cc: Alexander Duyck <alexanderduyck@fb.com> |
| Cc: Eric Dumazet <edumazet@google.com> |
| Cc: Paolo Abeni <pabeni@redhat.com> |
| Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> |
| --- |
| |
| --- a/include/linux/netdevice.h |
| +++ b/include/linux/netdevice.h |
| @@ -361,6 +361,7 @@ enum { |
| NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ |
| NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ |
| NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ |
| + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ |
| }; |
| |
| enum { |
| @@ -372,6 +373,7 @@ enum { |
| NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), |
| NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), |
| NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), |
| + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), |
| }; |
| |
| enum gro_result { |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -3924,6 +3924,8 @@ static inline void ____napi_schedule(str |
| */ |
| thread = READ_ONCE(napi->thread); |
| if (thread) { |
| + if (thread->state != TASK_INTERRUPTIBLE) |
| + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); |
| wake_up_process(thread); |
| return; |
| } |
| @@ -6084,7 +6086,8 @@ bool napi_complete_done(struct napi_stru |
| |
| WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); |
| |
| - new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); |
| + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | |
| + NAPIF_STATE_SCHED_THREADED); |
| |
| /* If STATE_MISSED was set, leave STATE_SCHED set, |
| * because we will call napi->poll() one more time. |
| @@ -6517,16 +6520,25 @@ static int napi_poll(struct napi_struct |
| |
| static int napi_thread_wait(struct napi_struct *napi) |
| { |
| + bool woken = false; |
| + |
| set_current_state(TASK_INTERRUPTIBLE); |
| |
| while (!kthread_should_stop() && !napi_disable_pending(napi)) { |
| - if (test_bit(NAPI_STATE_SCHED, &napi->state)) { |
| + /* Testing SCHED_THREADED bit here to make sure the current |
| + * kthread owns this napi and could poll on this napi. |
| + * Testing SCHED bit is not enough because SCHED bit might be |
| + * set by some other busy poll thread or by napi_disable(). |
| + */ |
| + if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { |
| WARN_ON(!list_empty(&napi->poll_list)); |
| __set_current_state(TASK_RUNNING); |
| return 0; |
| } |
| |
| schedule(); |
| + /* woken being true indicates this thread owns this napi. */ |
| + woken = true; |
| set_current_state(TASK_INTERRUPTIBLE); |
| } |
| __set_current_state(TASK_RUNNING); |