blob: c8902409154315aa11cee7753b4391ca9eacdc71 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001From: Wei Wang <weiwan@google.com>
2Date: Mon, 8 Feb 2021 11:34:09 -0800
3Subject: [PATCH] net: implement threaded-able napi poll loop support
4
5This patch allows running each napi poll loop inside its own
6kernel thread.
7The kthread is created during netif_napi_add() if dev->threaded
8is set. And threaded mode is enabled in napi_enable(). We will
9provide a way to set dev->threaded and enable threaded mode
10without a device up/down in the following patch.
11
12Once that threaded mode is enabled and the kthread is
13started, napi_schedule() will wake-up such thread instead
14of scheduling the softirq.
15
16The threaded poll loop behaves quite likely the net_rx_action,
17but it does not have to manipulate local irqs and uses
18an explicit scheduling point based on netdev_budget.
19
20Co-developed-by: Paolo Abeni <pabeni@redhat.com>
21Signed-off-by: Paolo Abeni <pabeni@redhat.com>
22Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
23Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
24Co-developed-by: Jakub Kicinski <kuba@kernel.org>
25Signed-off-by: Jakub Kicinski <kuba@kernel.org>
26Signed-off-by: Wei Wang <weiwan@google.com>
27Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
28Signed-off-by: David S. Miller <davem@davemloft.net>
29---
30
31--- a/include/linux/netdevice.h
32+++ b/include/linux/netdevice.h
33@@ -349,6 +349,7 @@ struct napi_struct {
34 struct list_head dev_list;
35 struct hlist_node napi_hash_node;
36 unsigned int napi_id;
37+ struct task_struct *thread;
38 };
39
40 enum {
41@@ -359,6 +360,7 @@ enum {
42 NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */
43 NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
44 NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
45+ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/
46 };
47
48 enum {
49@@ -369,6 +371,7 @@ enum {
50 NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED),
51 NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
52 NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
53+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
54 };
55
56 enum gro_result {
57@@ -513,20 +516,7 @@ bool napi_hash_del(struct napi_struct *n
58 */
59 void napi_disable(struct napi_struct *n);
60
61-/**
62- * napi_enable - enable NAPI scheduling
63- * @n: NAPI context
64- *
65- * Resume NAPI from being scheduled on this context.
66- * Must be paired with napi_disable.
67- */
68-static inline void napi_enable(struct napi_struct *n)
69-{
70- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
71- smp_mb__before_atomic();
72- clear_bit(NAPI_STATE_SCHED, &n->state);
73- clear_bit(NAPI_STATE_NPSVC, &n->state);
74-}
75+void napi_enable(struct napi_struct *n);
76
77 /**
78 * napi_synchronize - wait until NAPI is not running
79@@ -1792,6 +1782,8 @@ enum netdev_ml_priv_type {
80 *
81 * @wol_enabled: Wake-on-LAN is enabled
82 *
83+ * @threaded: napi threaded mode is enabled
84+ *
85 * FIXME: cleanup struct net_device such that network protocol info
86 * moves out.
87 */
88@@ -2084,6 +2076,7 @@ struct net_device {
89 struct lock_class_key addr_list_lock_key;
90 bool proto_down;
91 unsigned wol_enabled:1;
92+ unsigned threaded:1;
93 };
94 #define to_net_dev(d) container_of(d, struct net_device, dev)
95
96--- a/net/core/dev.c
97+++ b/net/core/dev.c
98@@ -91,6 +91,7 @@
99 #include <linux/etherdevice.h>
100 #include <linux/ethtool.h>
101 #include <linux/skbuff.h>
102+#include <linux/kthread.h>
103 #include <linux/bpf.h>
104 #include <linux/bpf_trace.h>
105 #include <net/net_namespace.h>
106@@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi
107 }
108 EXPORT_SYMBOL(netdev_notify_peers);
109
110+static int napi_threaded_poll(void *data);
111+
112+static int napi_kthread_create(struct napi_struct *n)
113+{
114+ int err = 0;
115+
116+ /* Create and wake up the kthread once to put it in
117+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
118+ * warning and work with loadavg.
119+ */
120+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
121+ n->dev->name, n->napi_id);
122+ if (IS_ERR(n->thread)) {
123+ err = PTR_ERR(n->thread);
124+ pr_err("kthread_run failed with err %d\n", err);
125+ n->thread = NULL;
126+ }
127+
128+ return err;
129+}
130+
131 static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
132 {
133 const struct net_device_ops *ops = dev->netdev_ops;
134@@ -3891,6 +3913,21 @@ int gro_normal_batch __read_mostly = 8;
135 static inline void ____napi_schedule(struct softnet_data *sd,
136 struct napi_struct *napi)
137 {
138+ struct task_struct *thread;
139+
140+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
141+ /* Paired with smp_mb__before_atomic() in
142+ * napi_enable(). Use READ_ONCE() to guarantee
143+ * a complete read on napi->thread. Only call
144+ * wake_up_process() when it's not NULL.
145+ */
146+ thread = READ_ONCE(napi->thread);
147+ if (thread) {
148+ wake_up_process(thread);
149+ return;
150+ }
151+ }
152+
153 list_add_tail(&napi->poll_list, &sd->poll_list);
154 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
155 }
156@@ -6282,6 +6319,12 @@ void netif_napi_add(struct net_device *d
157 set_bit(NAPI_STATE_NPSVC, &napi->state);
158 list_add_rcu(&napi->dev_list, &dev->napi_list);
159 napi_hash_add(napi);
160+ /* Create kthread for this napi if dev->threaded is set.
161+ * Clear dev->threaded if kthread creation failed so that
162+ * threaded mode will not be enabled in napi_enable().
163+ */
164+ if (dev->threaded && napi_kthread_create(napi))
165+ dev->threaded = 0;
166 }
167 EXPORT_SYMBOL(netif_napi_add);
168
169@@ -6298,9 +6341,28 @@ void napi_disable(struct napi_struct *n)
170 hrtimer_cancel(&n->timer);
171
172 clear_bit(NAPI_STATE_DISABLE, &n->state);
173+ clear_bit(NAPI_STATE_THREADED, &n->state);
174 }
175 EXPORT_SYMBOL(napi_disable);
176
177+/**
178+ * napi_enable - enable NAPI scheduling
179+ * @n: NAPI context
180+ *
181+ * Resume NAPI from being scheduled on this context.
182+ * Must be paired with napi_disable.
183+ */
184+void napi_enable(struct napi_struct *n)
185+{
186+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
187+ smp_mb__before_atomic();
188+ clear_bit(NAPI_STATE_SCHED, &n->state);
189+ clear_bit(NAPI_STATE_NPSVC, &n->state);
190+ if (n->dev->threaded && n->thread)
191+ set_bit(NAPI_STATE_THREADED, &n->state);
192+}
193+EXPORT_SYMBOL(napi_enable);
194+
195 static void flush_gro_hash(struct napi_struct *napi)
196 {
197 int i;
198@@ -6325,6 +6387,11 @@ void netif_napi_del(struct napi_struct *
199
200 flush_gro_hash(napi);
201 napi->gro_bitmask = 0;
202+
203+ if (napi->thread) {
204+ kthread_stop(napi->thread);
205+ napi->thread = NULL;
206+ }
207 }
208 EXPORT_SYMBOL(netif_napi_del);
209
210@@ -6404,6 +6471,51 @@ static int napi_poll(struct napi_struct
211 return work;
212 }
213
214+static int napi_thread_wait(struct napi_struct *napi)
215+{
216+ set_current_state(TASK_INTERRUPTIBLE);
217+
218+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
219+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
220+ WARN_ON(!list_empty(&napi->poll_list));
221+ __set_current_state(TASK_RUNNING);
222+ return 0;
223+ }
224+
225+ schedule();
226+ set_current_state(TASK_INTERRUPTIBLE);
227+ }
228+ __set_current_state(TASK_RUNNING);
229+ return -1;
230+}
231+
232+static int napi_threaded_poll(void *data)
233+{
234+ struct napi_struct *napi = data;
235+ void *have;
236+
237+ while (!napi_thread_wait(napi)) {
238+ for (;;) {
239+ bool repoll = false;
240+
241+ local_bh_disable();
242+
243+ have = netpoll_poll_lock(napi);
244+ __napi_poll(napi, &repoll);
245+ netpoll_poll_unlock(have);
246+
247+ __kfree_skb_flush();
248+ local_bh_enable();
249+
250+ if (!repoll)
251+ break;
252+
253+ cond_resched();
254+ }
255+ }
256+ return 0;
257+}
258+
259 static __latent_entropy void net_rx_action(struct softirq_action *h)
260 {
261 struct softnet_data *sd = this_cpu_ptr(&softnet_data);