| From: Wei Wang <weiwan@google.com> |
| Date: Mon, 8 Feb 2021 11:34:09 -0800 |
| Subject: [PATCH] net: implement threaded-able napi poll loop support |
| |
| This patch allows running each napi poll loop inside its own |
| kernel thread. |
| The kthread is created during netif_napi_add() if dev->threaded |
| is set. And threaded mode is enabled in napi_enable(). We will |
| provide a way to set dev->threaded and enable threaded mode |
| without a device up/down in the following patch. |
| |
| Once that threaded mode is enabled and the kthread is |
| started, napi_schedule() will wake-up such thread instead |
| of scheduling the softirq. |
| |
| The threaded poll loop behaves quite likely the net_rx_action, |
| but it does not have to manipulate local irqs and uses |
| an explicit scheduling point based on netdev_budget. |
| |
| Co-developed-by: Paolo Abeni <pabeni@redhat.com> |
| Signed-off-by: Paolo Abeni <pabeni@redhat.com> |
| Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> |
| Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> |
| Co-developed-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Wei Wang <weiwan@google.com> |
| Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| --- |
| |
| --- a/include/linux/netdevice.h |
| +++ b/include/linux/netdevice.h |
| @@ -349,6 +349,7 @@ struct napi_struct { |
| struct list_head dev_list; |
| struct hlist_node napi_hash_node; |
| unsigned int napi_id; |
| + struct task_struct *thread; |
| }; |
| |
| enum { |
| @@ -359,6 +360,7 @@ enum { |
| NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ |
| NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ |
| NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ |
| + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ |
| }; |
| |
| enum { |
| @@ -369,6 +371,7 @@ enum { |
| NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), |
| NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), |
| NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), |
| + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), |
| }; |
| |
| enum gro_result { |
| @@ -513,20 +516,7 @@ bool napi_hash_del(struct napi_struct *n |
| */ |
| void napi_disable(struct napi_struct *n); |
| |
| -/** |
| - * napi_enable - enable NAPI scheduling |
| - * @n: NAPI context |
| - * |
| - * Resume NAPI from being scheduled on this context. |
| - * Must be paired with napi_disable. |
| - */ |
| -static inline void napi_enable(struct napi_struct *n) |
| -{ |
| - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
| - smp_mb__before_atomic(); |
| - clear_bit(NAPI_STATE_SCHED, &n->state); |
| - clear_bit(NAPI_STATE_NPSVC, &n->state); |
| -} |
| +void napi_enable(struct napi_struct *n); |
| |
| /** |
| * napi_synchronize - wait until NAPI is not running |
| @@ -1792,6 +1782,8 @@ enum netdev_ml_priv_type { |
| * |
| * @wol_enabled: Wake-on-LAN is enabled |
| * |
| + * @threaded: napi threaded mode is enabled |
| + * |
| * FIXME: cleanup struct net_device such that network protocol info |
| * moves out. |
| */ |
| @@ -2084,6 +2076,7 @@ struct net_device { |
| struct lock_class_key addr_list_lock_key; |
| bool proto_down; |
| unsigned wol_enabled:1; |
| + unsigned threaded:1; |
| }; |
| #define to_net_dev(d) container_of(d, struct net_device, dev) |
| |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -91,6 +91,7 @@ |
| #include <linux/etherdevice.h> |
| #include <linux/ethtool.h> |
| #include <linux/skbuff.h> |
| +#include <linux/kthread.h> |
| #include <linux/bpf.h> |
| #include <linux/bpf_trace.h> |
| #include <net/net_namespace.h> |
| @@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi |
| } |
| EXPORT_SYMBOL(netdev_notify_peers); |
| |
| +static int napi_threaded_poll(void *data); |
| + |
| +static int napi_kthread_create(struct napi_struct *n) |
| +{ |
| + int err = 0; |
| + |
| + /* Create and wake up the kthread once to put it in |
| + * TASK_INTERRUPTIBLE mode to avoid the blocked task |
| + * warning and work with loadavg. |
| + */ |
| + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", |
| + n->dev->name, n->napi_id); |
| + if (IS_ERR(n->thread)) { |
| + err = PTR_ERR(n->thread); |
| + pr_err("kthread_run failed with err %d\n", err); |
| + n->thread = NULL; |
| + } |
| + |
| + return err; |
| +} |
| + |
| static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) |
| { |
| const struct net_device_ops *ops = dev->netdev_ops; |
| @@ -3891,6 +3913,21 @@ int gro_normal_batch __read_mostly = 8; |
| static inline void ____napi_schedule(struct softnet_data *sd, |
| struct napi_struct *napi) |
| { |
| + struct task_struct *thread; |
| + |
| + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { |
| + /* Paired with smp_mb__before_atomic() in |
| + * napi_enable(). Use READ_ONCE() to guarantee |
| + * a complete read on napi->thread. Only call |
| + * wake_up_process() when it's not NULL. |
| + */ |
| + thread = READ_ONCE(napi->thread); |
| + if (thread) { |
| + wake_up_process(thread); |
| + return; |
| + } |
| + } |
| + |
| list_add_tail(&napi->poll_list, &sd->poll_list); |
| __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
| } |
| @@ -6282,6 +6319,12 @@ void netif_napi_add(struct net_device *d |
| set_bit(NAPI_STATE_NPSVC, &napi->state); |
| list_add_rcu(&napi->dev_list, &dev->napi_list); |
| napi_hash_add(napi); |
| + /* Create kthread for this napi if dev->threaded is set. |
| + * Clear dev->threaded if kthread creation failed so that |
| + * threaded mode will not be enabled in napi_enable(). |
| + */ |
| + if (dev->threaded && napi_kthread_create(napi)) |
| + dev->threaded = 0; |
| } |
| EXPORT_SYMBOL(netif_napi_add); |
| |
| @@ -6298,9 +6341,28 @@ void napi_disable(struct napi_struct *n) |
| hrtimer_cancel(&n->timer); |
| |
| clear_bit(NAPI_STATE_DISABLE, &n->state); |
| + clear_bit(NAPI_STATE_THREADED, &n->state); |
| } |
| EXPORT_SYMBOL(napi_disable); |
| |
| +/** |
| + * napi_enable - enable NAPI scheduling |
| + * @n: NAPI context |
| + * |
| + * Resume NAPI from being scheduled on this context. |
| + * Must be paired with napi_disable. |
| + */ |
| +void napi_enable(struct napi_struct *n) |
| +{ |
| + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
| + smp_mb__before_atomic(); |
| + clear_bit(NAPI_STATE_SCHED, &n->state); |
| + clear_bit(NAPI_STATE_NPSVC, &n->state); |
| + if (n->dev->threaded && n->thread) |
| + set_bit(NAPI_STATE_THREADED, &n->state); |
| +} |
| +EXPORT_SYMBOL(napi_enable); |
| + |
| static void flush_gro_hash(struct napi_struct *napi) |
| { |
| int i; |
| @@ -6325,6 +6387,11 @@ void netif_napi_del(struct napi_struct * |
| |
| flush_gro_hash(napi); |
| napi->gro_bitmask = 0; |
| + |
| + if (napi->thread) { |
| + kthread_stop(napi->thread); |
| + napi->thread = NULL; |
| + } |
| } |
| EXPORT_SYMBOL(netif_napi_del); |
| |
| @@ -6404,6 +6471,51 @@ static int napi_poll(struct napi_struct |
| return work; |
| } |
| |
| +static int napi_thread_wait(struct napi_struct *napi) |
| +{ |
| + set_current_state(TASK_INTERRUPTIBLE); |
| + |
| + while (!kthread_should_stop() && !napi_disable_pending(napi)) { |
| + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { |
| + WARN_ON(!list_empty(&napi->poll_list)); |
| + __set_current_state(TASK_RUNNING); |
| + return 0; |
| + } |
| + |
| + schedule(); |
| + set_current_state(TASK_INTERRUPTIBLE); |
| + } |
| + __set_current_state(TASK_RUNNING); |
| + return -1; |
| +} |
| + |
| +static int napi_threaded_poll(void *data) |
| +{ |
| + struct napi_struct *napi = data; |
| + void *have; |
| + |
| + while (!napi_thread_wait(napi)) { |
| + for (;;) { |
| + bool repoll = false; |
| + |
| + local_bh_disable(); |
| + |
| + have = netpoll_poll_lock(napi); |
| + __napi_poll(napi, &repoll); |
| + netpoll_poll_unlock(have); |
| + |
| + __kfree_skb_flush(); |
| + local_bh_enable(); |
| + |
| + if (!repoll) |
| + break; |
| + |
| + cond_resched(); |
| + } |
| + } |
| + return 0; |
| +} |
| + |
| static __latent_entropy void net_rx_action(struct softirq_action *h) |
| { |
| struct softnet_data *sd = this_cpu_ptr(&softnet_data); |