b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame] | 1 | From: Wei Wang <weiwan@google.com> |
| 2 | Date: Mon, 1 Mar 2021 17:21:13 -0800 |
| 3 | Subject: [PATCH] net: fix race between napi kthread mode and busy poll |
| 4 | |
| 5 | Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to |
| 6 | determine if the kthread owns this napi and could call napi->poll() on |
| 7 | it. However, if socket busy poll is enabled, it is possible that the |
| 8 | busy poll thread grabs this SCHED bit (after the previous napi->poll() |
| 9 | invokes napi_complete_done() and clears SCHED bit) and tries to poll |
| 10 | on the same napi. napi_disable() could grab the SCHED bit as well. |
| 11 | This patch tries to fix this race by adding a new bit |
| 12 | NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in |
| 13 | ____napi_schedule() if the threaded mode is enabled, and gets cleared |
| 14 | in napi_complete_done(), and we only poll the napi in kthread if this |
| 15 | bit is set. This helps distinguish the ownership of the napi between |
| 16 | kthread and other scenarios and fixes the race issue. |
| 17 | |
| 18 | Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") |
| 19 | Reported-by: Martin Zaharinov <micron10@gmail.com> |
| 20 | Suggested-by: Jakub Kicinski <kuba@kernel.org> |
| 21 | Signed-off-by: Wei Wang <weiwan@google.com> |
| 22 | Cc: Alexander Duyck <alexanderduyck@fb.com> |
| 23 | Cc: Eric Dumazet <edumazet@google.com> |
| 24 | Cc: Paolo Abeni <pabeni@redhat.com> |
| 25 | Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> |
| 26 | --- |
| 27 | |
| 28 | --- a/include/linux/netdevice.h |
| 29 | +++ b/include/linux/netdevice.h |
| 30 | @@ -361,6 +361,7 @@ enum { |
| 31 | NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ |
| 32 | NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ |
| 33 | NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ |
| 34 | + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ |
| 35 | }; |
| 36 | |
| 37 | enum { |
| 38 | @@ -372,6 +373,7 @@ enum { |
| 39 | NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), |
| 40 | NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), |
| 41 | NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), |
| 42 | + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), |
| 43 | }; |
| 44 | |
| 45 | enum gro_result { |
| 46 | --- a/net/core/dev.c |
| 47 | +++ b/net/core/dev.c |
| 48 | @@ -3924,6 +3924,8 @@ static inline void ____napi_schedule(str |
| 49 | */ |
| 50 | thread = READ_ONCE(napi->thread); |
| 51 | if (thread) { |
| 52 | + if (thread->state != TASK_INTERRUPTIBLE) |
| 53 | + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); |
| 54 | wake_up_process(thread); |
| 55 | return; |
| 56 | } |
| 57 | @@ -6084,7 +6086,8 @@ bool napi_complete_done(struct napi_stru |
| 58 | |
| 59 | WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); |
| 60 | |
| 61 | - new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); |
| 62 | + new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | |
| 63 | + NAPIF_STATE_SCHED_THREADED); |
| 64 | |
| 65 | /* If STATE_MISSED was set, leave STATE_SCHED set, |
| 66 | * because we will call napi->poll() one more time. |
| 67 | @@ -6517,16 +6520,25 @@ static int napi_poll(struct napi_struct |
| 68 | |
| 69 | static int napi_thread_wait(struct napi_struct *napi) |
| 70 | { |
| 71 | + bool woken = false; |
| 72 | + |
| 73 | set_current_state(TASK_INTERRUPTIBLE); |
| 74 | |
| 75 | while (!kthread_should_stop() && !napi_disable_pending(napi)) { |
| 76 | - if (test_bit(NAPI_STATE_SCHED, &napi->state)) { |
| 77 | + /* Testing SCHED_THREADED bit here to make sure the current |
| 78 | + * kthread owns this napi and could poll on this napi. |
| 79 | + * Testing SCHED bit is not enough because SCHED bit might be |
| 80 | + * set by some other busy poll thread or by napi_disable(). |
| 81 | + */ |
| 82 | + if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { |
| 83 | WARN_ON(!list_empty(&napi->poll_list)); |
| 84 | __set_current_state(TASK_RUNNING); |
| 85 | return 0; |
| 86 | } |
| 87 | |
| 88 | schedule(); |
| 89 | + /* woken being true indicates this thread owns this napi. */ |
| 90 | + woken = true; |
| 91 | set_current_state(TASK_INTERRUPTIBLE); |
| 92 | } |
| 93 | __set_current_state(TASK_RUNNING); |