b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | From 3420c5bf9d08df074b67c7feda8a37d951d0b232 Mon Sep 17 00:00:00 2001 |
| 2 | From: Ioana Radulescu <ruxandra.radulescu@nxp.com> |
| 3 | Date: Wed, 18 Sep 2019 13:31:07 +0300 |
| 4 | Subject: [PATCH] dpaa2-eth: Avoid unbounded while loops |
| 5 | |
| 6 | Throughout the driver there are several places where we wait |
| 7 | indefinitely for DPIO portal commands to be executed, while |
| 8 | the portal returns a busy response code. |
| 9 | |
| 10 | Even though in theory we are guaranteed the portals become |
| 11 | available eventually, in practice the QBMan hardware module |
| 12 | may become unresponsive in various corner cases. |
| 13 | |
| 14 | Make sure we can never get stuck in an infinite while loop |
| 15 | by adding a retry counter for all portal commands. |
| 16 | |
| 17 | Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com> |
| 18 | --- |
| 19 | drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 30 ++++++++++++++++++++---- |
| 20 | drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 8 +++++++ |
| 21 | 2 files changed, 33 insertions(+), 5 deletions(-) |
| 22 | |
| 23 | --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c |
| 24 | +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c |
| 25 | @@ -221,6 +221,7 @@ static void xdp_release_buf(struct dpaa2 |
| 26 | struct dpaa2_eth_channel *ch, |
| 27 | dma_addr_t addr) |
| 28 | { |
| 29 | + int retries = 0; |
| 30 | int err; |
| 31 | |
| 32 | ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr; |
| 33 | @@ -229,8 +230,11 @@ static void xdp_release_buf(struct dpaa2 |
| 34 | |
| 35 | while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid, |
| 36 | ch->xdp.drop_bufs, |
| 37 | - ch->xdp.drop_cnt)) == -EBUSY) |
| 38 | + ch->xdp.drop_cnt)) == -EBUSY) { |
| 39 | + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) |
| 40 | + break; |
| 41 | cpu_relax(); |
| 42 | + } |
| 43 | |
| 44 | if (err) { |
| 45 | free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt); |
| 46 | @@ -458,7 +462,7 @@ static int consume_frames(struct dpaa2_e |
| 47 | struct dpaa2_eth_fq *fq = NULL; |
| 48 | struct dpaa2_dq *dq; |
| 49 | const struct dpaa2_fd *fd; |
| 50 | - int cleaned = 0; |
| 51 | + int cleaned = 0, retries = 0; |
| 52 | int is_last; |
| 53 | |
| 54 | do { |
| 55 | @@ -469,6 +473,11 @@ static int consume_frames(struct dpaa2_e |
| 56 | * the store until we get some sort of valid response |
| 57 | * token (either a valid frame or an "empty dequeue") |
| 58 | */ |
| 59 | + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) { |
| 60 | + netdev_err_once(priv->net_dev, |
| 61 | + "Unable to read a valid dequeue response\n"); |
| 62 | + return 0; |
| 63 | + } |
| 64 | continue; |
| 65 | } |
| 66 | |
| 67 | @@ -477,6 +486,7 @@ static int consume_frames(struct dpaa2_e |
| 68 | |
| 69 | fq->consume(priv, ch, fd, fq); |
| 70 | cleaned++; |
| 71 | + retries = 0; |
| 72 | } while (!is_last); |
| 73 | |
| 74 | if (!cleaned) |
| 75 | @@ -949,6 +959,7 @@ static int add_bufs(struct dpaa2_eth_pri |
| 76 | u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; |
| 77 | struct page *page; |
| 78 | dma_addr_t addr; |
| 79 | + int retries = 0; |
| 80 | int i, err; |
| 81 | |
| 82 | for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) { |
| 83 | @@ -980,8 +991,11 @@ static int add_bufs(struct dpaa2_eth_pri |
| 84 | release_bufs: |
| 85 | /* In case the portal is busy, retry until successful */ |
| 86 | while ((err = dpaa2_io_service_release(ch->dpio, bpid, |
| 87 | - buf_array, i)) == -EBUSY) |
| 88 | + buf_array, i)) == -EBUSY) { |
| 89 | + if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) |
| 90 | + break; |
| 91 | cpu_relax(); |
| 92 | + } |
| 93 | |
| 94 | /* If release command failed, clean up and bail out; |
| 95 | * not much else we can do about it |
| 96 | @@ -1032,16 +1046,21 @@ static int seed_pool(struct dpaa2_eth_pr |
| 97 | static void drain_bufs(struct dpaa2_eth_priv *priv, int count) |
| 98 | { |
| 99 | u64 buf_array[DPAA2_ETH_BUFS_PER_CMD]; |
| 100 | + int retries = 0; |
| 101 | int ret; |
| 102 | |
| 103 | do { |
| 104 | ret = dpaa2_io_service_acquire(NULL, priv->bpid, |
| 105 | buf_array, count); |
| 106 | if (ret < 0) { |
| 107 | + if (ret == -EBUSY && |
| 108 | + retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) |
| 109 | + continue; |
| 110 | netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n"); |
| 111 | return; |
| 112 | } |
| 113 | free_bufs(priv, buf_array, ret); |
| 114 | + retries = 0; |
| 115 | } while (ret); |
| 116 | } |
| 117 | |
| 118 | @@ -1094,7 +1113,7 @@ static int pull_channel(struct dpaa2_eth |
| 119 | ch->store); |
| 120 | dequeues++; |
| 121 | cpu_relax(); |
| 122 | - } while (err == -EBUSY); |
| 123 | + } while (err == -EBUSY && dequeues < DPAA2_ETH_SWP_BUSY_RETRIES); |
| 124 | |
| 125 | ch->stats.dequeue_portal_busy += dequeues; |
| 126 | if (unlikely(err)) |
| 127 | @@ -1118,6 +1137,7 @@ static int dpaa2_eth_poll(struct napi_st |
| 128 | struct netdev_queue *nq; |
| 129 | int store_cleaned, work_done; |
| 130 | struct list_head rx_list; |
| 131 | + int retries = 0; |
| 132 | int err; |
| 133 | |
| 134 | ch = container_of(napi, struct dpaa2_eth_channel, napi); |
| 135 | @@ -1163,7 +1183,7 @@ static int dpaa2_eth_poll(struct napi_st |
| 136 | do { |
| 137 | err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx); |
| 138 | cpu_relax(); |
| 139 | - } while (err == -EBUSY); |
| 140 | + } while (err == -EBUSY && retries++ < DPAA2_ETH_SWP_BUSY_RETRIES); |
| 141 | WARN_ONCE(err, "CDAN notifications rearm failed on core %d", |
| 142 | ch->nctx.desired_cpu); |
| 143 | |
| 144 | --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |
| 145 | +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |
| 146 | @@ -245,6 +245,14 @@ static inline struct dpaa2_faead *dpaa2_ |
| 147 | */ |
| 148 | #define DPAA2_ETH_ENQUEUE_RETRIES 10 |
| 149 | |
| 150 | +/* Number of times to retry DPIO portal operations while waiting |
| 151 | + * for portal to finish executing current command and become |
| 152 | + * available. We want to avoid being stuck in a while loop in case |
| 153 | + * hardware becomes unresponsive, but not give up too easily if |
| 154 | + * the portal really is busy for valid reasons |
| 155 | + */ |
| 156 | +#define DPAA2_ETH_SWP_BUSY_RETRIES 1000 |
| 157 | + |
| 158 | /* Driver statistics, other than those in struct rtnl_link_stats64. |
| 159 | * These are usually collected per-CPU and aggregated by ethtool. |
| 160 | */ |