b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | From 0b8c6bbb0a561f15598f6701089a992bdea3963c Mon Sep 17 00:00:00 2001 |
| 2 | From: Youri Querry <youri.querry_1@nxp.com> |
| 3 | Date: Mon, 4 Nov 2019 11:03:09 -0500 |
| 4 | Subject: [PATCH] soc: fsl: dpio: Replace QMAN array mode by ring mode enqueue. |
| 5 | |
| 6 | This change of algorithm will enable faster bulk enqueue. |
| 7 | This will grately benefit XDP bulk enqueue. |
| 8 | |
| 9 | Signed-off-by: Youri Querry <youri.querry_1@nxp.com> |
| 10 | --- |
| 11 | drivers/soc/fsl/dpio/qbman-portal.c | 420 +++++++++++++++++++++++++++--------- |
| 12 | drivers/soc/fsl/dpio/qbman-portal.h | 13 ++ |
| 13 | 2 files changed, 335 insertions(+), 98 deletions(-) |
| 14 | |
| 15 | --- a/drivers/soc/fsl/dpio/qbman-portal.c |
| 16 | +++ b/drivers/soc/fsl/dpio/qbman-portal.c |
| 17 | @@ -8,6 +8,7 @@ |
| 18 | #include <asm/cacheflush.h> |
| 19 | #include <linux/io.h> |
| 20 | #include <linux/slab.h> |
| 21 | +#include <linux/spinlock.h> |
| 22 | #include <soc/fsl/dpaa2-global.h> |
| 23 | |
| 24 | #include "qbman-portal.h" |
| 25 | @@ -22,6 +23,7 @@ |
| 26 | |
| 27 | /* CINH register offsets */ |
| 28 | #define QBMAN_CINH_SWP_EQCR_PI 0x800 |
| 29 | +#define QBMAN_CINH_SWP_EQCR_CI 0x840 |
| 30 | #define QBMAN_CINH_SWP_EQAR 0x8c0 |
| 31 | #define QBMAN_CINH_SWP_CR_RT 0x900 |
| 32 | #define QBMAN_CINH_SWP_VDQCR_RT 0x940 |
| 33 | @@ -45,6 +47,8 @@ |
| 34 | #define QBMAN_CENA_SWP_CR 0x600 |
| 35 | #define QBMAN_CENA_SWP_RR(vb) (0x700 + ((u32)(vb) >> 1)) |
| 36 | #define QBMAN_CENA_SWP_VDQCR 0x780 |
| 37 | +#define QBMAN_CENA_SWP_EQCR_CI 0x840 |
| 38 | +#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840 |
| 39 | |
| 40 | /* CENA register offsets in memory-backed mode */ |
| 41 | #define QBMAN_CENA_SWP_DQRR_MEM(n) (0x800 + ((u32)(n) << 6)) |
| 42 | @@ -72,6 +76,12 @@ |
| 43 | /* opaque token for static dequeues */ |
| 44 | #define QMAN_SDQCR_TOKEN 0xbb |
| 45 | |
| 46 | +#define QBMAN_EQCR_DCA_IDXMASK 0x0f |
| 47 | +#define QBMAN_ENQUEUE_FLAG_DCA (1ULL << 31) |
| 48 | + |
| 49 | +#define EQ_DESC_SIZE_WITHOUT_FD 29 |
| 50 | +#define EQ_DESC_SIZE_FD_START 32 |
| 51 | + |
| 52 | enum qbman_sdqcr_dct { |
| 53 | qbman_sdqcr_dct_null = 0, |
| 54 | qbman_sdqcr_dct_prio_ics, |
| 55 | @@ -224,6 +234,15 @@ static inline u32 qbman_set_swp_cfg(u8 m |
| 56 | |
| 57 | #define QMAN_RT_MODE 0x00000100 |
| 58 | |
| 59 | +static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last) |
| 60 | +{ |
| 61 | + /* 'first' is included, 'last' is excluded */ |
| 62 | + if (first <= last) |
| 63 | + return last - first; |
| 64 | + else |
| 65 | + return (2 * ringsize) - (first - last); |
| 66 | +} |
| 67 | + |
| 68 | /** |
| 69 | * qbman_swp_init() - Create a functional object representing the given |
| 70 | * QBMan portal descriptor. |
| 71 | @@ -236,6 +255,10 @@ struct qbman_swp *qbman_swp_init(const s |
| 72 | { |
| 73 | struct qbman_swp *p = kzalloc(sizeof(*p), GFP_KERNEL); |
| 74 | u32 reg; |
| 75 | + u32 mask_size; |
| 76 | + u32 eqcr_pi; |
| 77 | + |
| 78 | + spin_lock_init(&p->access_spinlock); |
| 79 | |
| 80 | if (!p) |
| 81 | return NULL; |
| 82 | @@ -264,25 +287,38 @@ struct qbman_swp *qbman_swp_init(const s |
| 83 | p->addr_cena = d->cena_bar; |
| 84 | p->addr_cinh = d->cinh_bar; |
| 85 | |
| 86 | - if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) |
| 87 | - memset(p->addr_cena, 0, 64 * 1024); |
| 88 | + if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { |
| 89 | |
| 90 | - reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| 91 | - 0, /* Writes cacheable */ |
| 92 | - 0, /* EQCR_CI stashing threshold */ |
| 93 | - 3, /* RPM: Valid bit mode, RCR in array mode */ |
| 94 | - 2, /* DCM: Discrete consumption ack mode */ |
| 95 | - 3, /* EPM: Valid bit mode, EQCR in array mode */ |
| 96 | - 1, /* mem stashing drop enable == TRUE */ |
| 97 | - 1, /* mem stashing priority == TRUE */ |
| 98 | - 1, /* mem stashing enable == TRUE */ |
| 99 | - 1, /* dequeue stashing priority == TRUE */ |
| 100 | - 0, /* dequeue stashing enable == FALSE */ |
| 101 | - 0); /* EQCR_CI stashing priority == FALSE */ |
| 102 | - if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) |
| 103 | + reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| 104 | + 0, /* Writes Non-cacheable */ |
| 105 | + 0, /* EQCR_CI stashing threshold */ |
| 106 | + 3, /* RPM: RCR in array mode */ |
| 107 | + 2, /* DCM: Discrete consumption ack */ |
| 108 | + 2, /* EPM: EQCR in ring mode */ |
| 109 | + 1, /* mem stashing drop enable enable */ |
| 110 | + 1, /* mem stashing priority enable */ |
| 111 | + 1, /* mem stashing enable */ |
| 112 | + 1, /* dequeue stashing priority enable */ |
| 113 | + 0, /* dequeue stashing enable enable */ |
| 114 | + 0); /* EQCR_CI stashing priority enable */ |
| 115 | + } else { |
| 116 | + memset(p->addr_cena, 0, 64 * 1024); |
| 117 | + reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| 118 | + 0, /* Writes Non-cacheable */ |
| 119 | + 1, /* EQCR_CI stashing threshold */ |
| 120 | + 3, /* RPM: RCR in array mode */ |
| 121 | + 2, /* DCM: Discrete consumption ack */ |
| 122 | + 0, /* EPM: EQCR in ring mode */ |
| 123 | + 1, /* mem stashing drop enable */ |
| 124 | + 1, /* mem stashing priority enable */ |
| 125 | + 1, /* mem stashing enable */ |
| 126 | + 1, /* dequeue stashing priority enable */ |
| 127 | + 0, /* dequeue stashing enable */ |
| 128 | + 0); /* EQCR_CI stashing priority enable */ |
| 129 | reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */ |
| 130 | 1 << SWP_CFG_VPM_SHIFT | /* VDQCR read triggered mode */ |
| 131 | 1 << SWP_CFG_CPM_SHIFT; /* CR read triggered mode */ |
| 132 | + } |
| 133 | |
| 134 | qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg); |
| 135 | reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG); |
| 136 | @@ -304,7 +340,9 @@ struct qbman_swp *qbman_swp_init(const s |
| 137 | */ |
| 138 | qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0); |
| 139 | |
| 140 | + p->eqcr.pi_ring_size = 8; |
| 141 | if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) { |
| 142 | + p->eqcr.pi_ring_size = 32; |
| 143 | qbman_swp_enqueue_ptr = |
| 144 | qbman_swp_enqueue_mem_back; |
| 145 | qbman_swp_enqueue_multiple_ptr = |
| 146 | @@ -316,6 +354,15 @@ struct qbman_swp *qbman_swp_init(const s |
| 147 | qbman_swp_release_ptr = qbman_swp_release_mem_back; |
| 148 | } |
| 149 | |
| 150 | + for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1) |
| 151 | + p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1; |
| 152 | + eqcr_pi = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_PI); |
| 153 | + p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask; |
| 154 | + p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; |
| 155 | + p->eqcr.ci = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_CI) |
| 156 | + & p->eqcr.pi_ci_mask; |
| 157 | + p->eqcr.available = p->eqcr.pi_ring_size; |
| 158 | + |
| 159 | return p; |
| 160 | } |
| 161 | |
| 162 | @@ -468,8 +515,9 @@ enum qb_enqueue_commands { |
| 163 | enqueue_rejects_to_fq = 2 |
| 164 | }; |
| 165 | |
| 166 | -#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 |
| 167 | -#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 |
| 168 | +#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 |
| 169 | +#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 |
| 170 | +#define QB_ENQUEUE_CMD_DCA_EN_SHIFT 7 |
| 171 | |
| 172 | /** |
| 173 | * qbman_eq_desc_clear() - Clear the contents of a descriptor to |
| 174 | @@ -582,6 +630,7 @@ static inline void qbman_write_eqcr_am_r |
| 175 | QMAN_RT_MODE); |
| 176 | } |
| 177 | |
| 178 | +#define QB_RT_BIT ((u32)0x100) |
| 179 | /** |
| 180 | * qbman_swp_enqueue_direct() - Issue an enqueue command |
| 181 | * @s: the software portal used for enqueue |
| 182 | @@ -593,35 +642,19 @@ static inline void qbman_write_eqcr_am_r |
| 183 | * |
| 184 | * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. |
| 185 | */ |
| 186 | -int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d, |
| 187 | - const struct dpaa2_fd *fd) |
| 188 | +static |
| 189 | +int qbman_swp_enqueue_direct(struct qbman_swp *s, |
| 190 | + const struct qbman_eq_desc *d, |
| 191 | + const struct dpaa2_fd *fd) |
| 192 | { |
| 193 | - struct qbman_eq_desc_with_fd *p; |
| 194 | - u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); |
| 195 | - |
| 196 | - if (!EQAR_SUCCESS(eqar)) |
| 197 | - return -EBUSY; |
| 198 | + int flags = 0; |
| 199 | + int ret = qbman_swp_enqueue_multiple_direct(s, d, fd, &flags, 1); |
| 200 | |
| 201 | - p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); |
| 202 | - /* This is mapped as DEVICE type memory, writes are |
| 203 | - * with address alignment: |
| 204 | - * desc.dca address alignment = 1 |
| 205 | - * desc.seqnum address alignment = 2 |
| 206 | - * desc.orpid address alignment = 4 |
| 207 | - * desc.tgtid address alignment = 8 |
| 208 | - */ |
| 209 | - p->desc.dca = d->dca; |
| 210 | - p->desc.seqnum = d->seqnum; |
| 211 | - p->desc.orpid = d->orpid; |
| 212 | - memcpy(&p->desc.tgtid, &d->tgtid, 24); |
| 213 | - memcpy(&p->fd, fd, sizeof(*fd)); |
| 214 | - |
| 215 | - /* Set the verb byte, have to substitute in the valid-bit */ |
| 216 | - dma_wmb(); |
| 217 | - p->desc.verb = d->verb | EQAR_VB(eqar); |
| 218 | - dccvac(p); |
| 219 | - |
| 220 | - return 0; |
| 221 | + if (ret >= 0) |
| 222 | + ret = 0; |
| 223 | + else |
| 224 | + ret = -EBUSY; |
| 225 | + return ret; |
| 226 | } |
| 227 | |
| 228 | /** |
| 229 | @@ -635,35 +668,19 @@ int qbman_swp_enqueue_direct(struct qbma |
| 230 | * |
| 231 | * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. |
| 232 | */ |
| 233 | +static |
| 234 | int qbman_swp_enqueue_mem_back(struct qbman_swp *s, |
| 235 | const struct qbman_eq_desc *d, |
| 236 | const struct dpaa2_fd *fd) |
| 237 | { |
| 238 | - struct qbman_eq_desc_with_fd *p; |
| 239 | - u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); |
| 240 | - |
| 241 | - if (!EQAR_SUCCESS(eqar)) |
| 242 | - return -EBUSY; |
| 243 | - |
| 244 | - p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); |
| 245 | - /* This is mapped as DEVICE type memory, writes are |
| 246 | - * with address alignment: |
| 247 | - * desc.dca address alignment = 1 |
| 248 | - * desc.seqnum address alignment = 2 |
| 249 | - * desc.orpid address alignment = 4 |
| 250 | - * desc.tgtid address alignment = 8 |
| 251 | - */ |
| 252 | - p->desc.dca = d->dca; |
| 253 | - p->desc.seqnum = d->seqnum; |
| 254 | - p->desc.orpid = d->orpid; |
| 255 | - memcpy(&p->desc.tgtid, &d->tgtid, 24); |
| 256 | - memcpy(&p->fd, fd, sizeof(*fd)); |
| 257 | - |
| 258 | - p->desc.verb = d->verb | EQAR_VB(eqar); |
| 259 | - dma_wmb(); |
| 260 | - qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); |
| 261 | + int flags = 0; |
| 262 | + int ret = qbman_swp_enqueue_multiple_mem_back(s, d, fd, &flags, 1); |
| 263 | |
| 264 | - return 0; |
| 265 | + if (ret >= 0) |
| 266 | + ret = 0; |
| 267 | + else |
| 268 | + ret = -EBUSY; |
| 269 | + return ret; |
| 270 | } |
| 271 | |
| 272 | /** |
| 273 | @@ -672,26 +689,84 @@ int qbman_swp_enqueue_mem_back(struct qb |
| 274 | * @s: the software portal used for enqueue |
| 275 | * @d: the enqueue descriptor |
| 276 | * @fd: table pointer of frame descriptor table to be enqueued |
| 277 | - * @flags: table pointer of flags, not used for the moment |
| 278 | + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL |
| 279 | * @num_frames: number of fd to be enqueued |
| 280 | * |
| 281 | * Return the number of fd enqueued, or a negative error number. |
| 282 | */ |
| 283 | +static |
| 284 | int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, |
| 285 | const struct qbman_eq_desc *d, |
| 286 | const struct dpaa2_fd *fd, |
| 287 | uint32_t *flags, |
| 288 | int num_frames) |
| 289 | { |
| 290 | - int count = 0; |
| 291 | + uint32_t *p = NULL; |
| 292 | + const uint32_t *cl = (uint32_t *)d; |
| 293 | + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| 294 | + int i, num_enqueued = 0; |
| 295 | + uint64_t addr_cena; |
| 296 | + |
| 297 | + spin_lock(&s->access_spinlock); |
| 298 | + half_mask = (s->eqcr.pi_ci_mask>>1); |
| 299 | + full_mask = s->eqcr.pi_ci_mask; |
| 300 | + |
| 301 | + if (!s->eqcr.available) { |
| 302 | + eqcr_ci = s->eqcr.ci; |
| 303 | + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; |
| 304 | + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); |
| 305 | + |
| 306 | + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| 307 | + eqcr_ci, s->eqcr.ci); |
| 308 | + if (!s->eqcr.available) { |
| 309 | + spin_unlock(&s->access_spinlock); |
| 310 | + return 0; |
| 311 | + } |
| 312 | + } |
| 313 | |
| 314 | - while (count < num_frames) { |
| 315 | - if (qbman_swp_enqueue_direct(s, d, fd) != 0) |
| 316 | - break; |
| 317 | - count++; |
| 318 | + eqcr_pi = s->eqcr.pi; |
| 319 | + num_enqueued = (s->eqcr.available < num_frames) ? |
| 320 | + s->eqcr.available : num_frames; |
| 321 | + s->eqcr.available -= num_enqueued; |
| 322 | + /* Fill in the EQCR ring */ |
| 323 | + for (i = 0; i < num_enqueued; i++) { |
| 324 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 325 | + /* Skip copying the verb */ |
| 326 | + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| 327 | + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| 328 | + &fd[i], sizeof(*fd)); |
| 329 | + eqcr_pi++; |
| 330 | } |
| 331 | |
| 332 | - return count; |
| 333 | + dma_wmb(); |
| 334 | + |
| 335 | + /* Set the verb byte, have to substitute in the valid-bit */ |
| 336 | + eqcr_pi = s->eqcr.pi; |
| 337 | + for (i = 0; i < num_enqueued; i++) { |
| 338 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 339 | + p[0] = cl[0] | s->eqcr.pi_vb; |
| 340 | + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { |
| 341 | + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; |
| 342 | + |
| 343 | + d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | |
| 344 | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); |
| 345 | + } |
| 346 | + eqcr_pi++; |
| 347 | + if (!(eqcr_pi & half_mask)) |
| 348 | + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| 349 | + } |
| 350 | + |
| 351 | + /* Flush all the cacheline without load/store in between */ |
| 352 | + eqcr_pi = s->eqcr.pi; |
| 353 | + addr_cena = (size_t)s->addr_cena; |
| 354 | + for (i = 0; i < num_enqueued; i++) { |
| 355 | + dccvac((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); |
| 356 | + eqcr_pi++; |
| 357 | + } |
| 358 | + s->eqcr.pi = eqcr_pi & full_mask; |
| 359 | + spin_unlock(&s->access_spinlock); |
| 360 | + |
| 361 | + return num_enqueued; |
| 362 | } |
| 363 | |
| 364 | /** |
| 365 | @@ -700,26 +775,80 @@ int qbman_swp_enqueue_multiple_direct(st |
| 366 | * @s: the software portal used for enqueue |
| 367 | * @d: the enqueue descriptor |
| 368 | * @fd: table pointer of frame descriptor table to be enqueued |
| 369 | - * @flags: table pointer of flags, not used for the moment |
| 370 | + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL |
| 371 | * @num_frames: number of fd to be enqueued |
| 372 | * |
| 373 | * Return the number of fd enqueued, or a negative error number. |
| 374 | */ |
| 375 | +static |
| 376 | int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, |
| 377 | - const struct qbman_eq_desc *d, |
| 378 | - const struct dpaa2_fd *fd, |
| 379 | - uint32_t *flags, |
| 380 | - int num_frames) |
| 381 | -{ |
| 382 | - int count = 0; |
| 383 | + const struct qbman_eq_desc *d, |
| 384 | + const struct dpaa2_fd *fd, |
| 385 | + uint32_t *flags, |
| 386 | + int num_frames) |
| 387 | +{ |
| 388 | + uint32_t *p = NULL; |
| 389 | + const uint32_t *cl = (uint32_t *)(d); |
| 390 | + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| 391 | + int i, num_enqueued = 0; |
| 392 | + unsigned long irq_flags; |
| 393 | + |
| 394 | + spin_lock(&s->access_spinlock); |
| 395 | + local_irq_save(irq_flags); |
| 396 | + |
| 397 | + half_mask = (s->eqcr.pi_ci_mask>>1); |
| 398 | + full_mask = s->eqcr.pi_ci_mask; |
| 399 | + if (!s->eqcr.available) { |
| 400 | + eqcr_ci = s->eqcr.ci; |
| 401 | + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; |
| 402 | + s->eqcr.ci = __raw_readl(p) & full_mask; |
| 403 | + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| 404 | + eqcr_ci, s->eqcr.ci); |
| 405 | + if (!s->eqcr.available) { |
| 406 | + local_irq_restore(irq_flags); |
| 407 | + spin_unlock(&s->access_spinlock); |
| 408 | + return 0; |
| 409 | + } |
| 410 | + } |
| 411 | + |
| 412 | + eqcr_pi = s->eqcr.pi; |
| 413 | + num_enqueued = (s->eqcr.available < num_frames) ? |
| 414 | + s->eqcr.available : num_frames; |
| 415 | + s->eqcr.available -= num_enqueued; |
| 416 | + /* Fill in the EQCR ring */ |
| 417 | + for (i = 0; i < num_enqueued; i++) { |
| 418 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 419 | + /* Skip copying the verb */ |
| 420 | + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| 421 | + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| 422 | + &fd[i], sizeof(*fd)); |
| 423 | + eqcr_pi++; |
| 424 | + } |
| 425 | |
| 426 | - while (count < num_frames) { |
| 427 | - if (qbman_swp_enqueue_mem_back(s, d, fd) != 0) |
| 428 | - break; |
| 429 | - count++; |
| 430 | + /* Set the verb byte, have to substitute in the valid-bit */ |
| 431 | + eqcr_pi = s->eqcr.pi; |
| 432 | + for (i = 0; i < num_enqueued; i++) { |
| 433 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 434 | + p[0] = cl[0] | s->eqcr.pi_vb; |
| 435 | + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { |
| 436 | + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; |
| 437 | + |
| 438 | + d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | |
| 439 | + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); |
| 440 | + } |
| 441 | + eqcr_pi++; |
| 442 | + if (!(eqcr_pi & half_mask)) |
| 443 | + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| 444 | } |
| 445 | + s->eqcr.pi = eqcr_pi & full_mask; |
| 446 | + |
| 447 | + dma_wmb(); |
| 448 | + qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, |
| 449 | + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); |
| 450 | + local_irq_restore(irq_flags); |
| 451 | + spin_unlock(&s->access_spinlock); |
| 452 | |
| 453 | - return count; |
| 454 | + return num_enqueued; |
| 455 | } |
| 456 | |
| 457 | /** |
| 458 | @@ -732,20 +861,69 @@ int qbman_swp_enqueue_multiple_mem_back( |
| 459 | * |
| 460 | * Return the number of fd enqueued, or a negative error number. |
| 461 | */ |
| 462 | +static |
| 463 | int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, |
| 464 | const struct qbman_eq_desc *d, |
| 465 | const struct dpaa2_fd *fd, |
| 466 | int num_frames) |
| 467 | { |
| 468 | - int count = 0; |
| 469 | + uint32_t *p; |
| 470 | + const uint32_t *cl; |
| 471 | + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| 472 | + int i, num_enqueued = 0; |
| 473 | + uint64_t addr_cena; |
| 474 | + |
| 475 | + half_mask = (s->eqcr.pi_ci_mask>>1); |
| 476 | + full_mask = s->eqcr.pi_ci_mask; |
| 477 | + if (!s->eqcr.available) { |
| 478 | + eqcr_ci = s->eqcr.ci; |
| 479 | + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; |
| 480 | + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); |
| 481 | + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| 482 | + eqcr_ci, s->eqcr.ci); |
| 483 | + if (!s->eqcr.available) |
| 484 | + return 0; |
| 485 | + } |
| 486 | |
| 487 | - while (count < num_frames) { |
| 488 | - if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0) |
| 489 | - break; |
| 490 | - count++; |
| 491 | + eqcr_pi = s->eqcr.pi; |
| 492 | + num_enqueued = (s->eqcr.available < num_frames) ? |
| 493 | + s->eqcr.available : num_frames; |
| 494 | + s->eqcr.available -= num_enqueued; |
| 495 | + /* Fill in the EQCR ring */ |
| 496 | + for (i = 0; i < num_enqueued; i++) { |
| 497 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 498 | + cl = (uint32_t *)(&d[i]); |
| 499 | + /* Skip copying the verb */ |
| 500 | + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| 501 | + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| 502 | + &fd[i], sizeof(*fd)); |
| 503 | + eqcr_pi++; |
| 504 | } |
| 505 | |
| 506 | - return count; |
| 507 | + dma_wmb(); |
| 508 | + |
| 509 | + /* Set the verb byte, have to substitute in the valid-bit */ |
| 510 | + eqcr_pi = s->eqcr.pi; |
| 511 | + for (i = 0; i < num_enqueued; i++) { |
| 512 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 513 | + cl = (uint32_t *)(&d[i]); |
| 514 | + p[0] = cl[0] | s->eqcr.pi_vb; |
| 515 | + eqcr_pi++; |
| 516 | + if (!(eqcr_pi & half_mask)) |
| 517 | + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| 518 | + } |
| 519 | + |
| 520 | + /* Flush all the cacheline without load/store in between */ |
| 521 | + eqcr_pi = s->eqcr.pi; |
| 522 | + addr_cena = (uint64_t)s->addr_cena; |
| 523 | + for (i = 0; i < num_enqueued; i++) { |
| 524 | + dccvac((uint64_t *)(addr_cena + |
| 525 | + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); |
| 526 | + eqcr_pi++; |
| 527 | + } |
| 528 | + s->eqcr.pi = eqcr_pi & full_mask; |
| 529 | + |
| 530 | + return num_enqueued; |
| 531 | } |
| 532 | |
| 533 | /** |
| 534 | @@ -758,20 +936,62 @@ int qbman_swp_enqueue_multiple_desc_dire |
| 535 | * |
| 536 | * Return the number of fd enqueued, or a negative error number. |
| 537 | */ |
| 538 | +static |
| 539 | int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, |
| 540 | const struct qbman_eq_desc *d, |
| 541 | const struct dpaa2_fd *fd, |
| 542 | int num_frames) |
| 543 | { |
| 544 | - int count = 0; |
| 545 | + uint32_t *p; |
| 546 | + const uint32_t *cl; |
| 547 | + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| 548 | + int i, num_enqueued = 0; |
| 549 | + |
| 550 | + half_mask = (s->eqcr.pi_ci_mask>>1); |
| 551 | + full_mask = s->eqcr.pi_ci_mask; |
| 552 | + if (!s->eqcr.available) { |
| 553 | + eqcr_ci = s->eqcr.ci; |
| 554 | + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; |
| 555 | + s->eqcr.ci = __raw_readl(p) & full_mask; |
| 556 | + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| 557 | + eqcr_ci, s->eqcr.ci); |
| 558 | + if (!s->eqcr.available) |
| 559 | + return 0; |
| 560 | + } |
| 561 | |
| 562 | - while (count < num_frames) { |
| 563 | - if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0) |
| 564 | - break; |
| 565 | - count++; |
| 566 | + eqcr_pi = s->eqcr.pi; |
| 567 | + num_enqueued = (s->eqcr.available < num_frames) ? |
| 568 | + s->eqcr.available : num_frames; |
| 569 | + s->eqcr.available -= num_enqueued; |
| 570 | + /* Fill in the EQCR ring */ |
| 571 | + for (i = 0; i < num_enqueued; i++) { |
| 572 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 573 | + cl = (uint32_t *)(&d[i]); |
| 574 | + /* Skip copying the verb */ |
| 575 | + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| 576 | + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| 577 | + &fd[i], sizeof(*fd)); |
| 578 | + eqcr_pi++; |
| 579 | } |
| 580 | |
| 581 | - return count; |
| 582 | + /* Set the verb byte, have to substitute in the valid-bit */ |
| 583 | + eqcr_pi = s->eqcr.pi; |
| 584 | + for (i = 0; i < num_enqueued; i++) { |
| 585 | + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| 586 | + cl = (uint32_t *)(&d[i]); |
| 587 | + p[0] = cl[0] | s->eqcr.pi_vb; |
| 588 | + eqcr_pi++; |
| 589 | + if (!(eqcr_pi & half_mask)) |
| 590 | + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| 591 | + } |
| 592 | + |
| 593 | + s->eqcr.pi = eqcr_pi & full_mask; |
| 594 | + |
| 595 | + dma_wmb(); |
| 596 | + qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, |
| 597 | + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); |
| 598 | + |
| 599 | + return num_enqueued; |
| 600 | } |
| 601 | |
| 602 | /* Static (push) dequeue */ |
| 603 | @@ -937,6 +1157,7 @@ void qbman_pull_desc_set_channel(struct |
| 604 | * Return 0 for success, and -EBUSY if the software portal is not ready |
| 605 | * to do pull dequeue. |
| 606 | */ |
| 607 | +static |
| 608 | int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d) |
| 609 | { |
| 610 | struct qbman_pull_desc *p; |
| 611 | @@ -973,6 +1194,7 @@ int qbman_swp_pull_direct(struct qbman_s |
| 612 | * Return 0 for success, and -EBUSY if the software portal is not ready |
| 613 | * to do pull dequeue. |
| 614 | */ |
| 615 | +static |
| 616 | int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d) |
| 617 | { |
| 618 | struct qbman_pull_desc *p; |
| 619 | @@ -991,6 +1213,8 @@ int qbman_swp_pull_mem_back(struct qbman |
| 620 | p->dq_src = d->dq_src; |
| 621 | p->rsp_addr = d->rsp_addr; |
| 622 | p->rsp_addr_virt = d->rsp_addr_virt; |
| 623 | + |
| 624 | + /* Set the verb byte, have to substitute in the valid-bit */ |
| 625 | p->verb = d->verb | s->vdq.valid_bit; |
| 626 | s->vdq.valid_bit ^= QB_VALID_BIT; |
| 627 | dma_wmb(); |
| 628 | --- a/drivers/soc/fsl/dpio/qbman-portal.h |
| 629 | +++ b/drivers/soc/fsl/dpio/qbman-portal.h |
| 630 | @@ -143,6 +143,19 @@ struct qbman_swp { |
| 631 | u8 dqrr_size; |
| 632 | int reset_bug; /* indicates dqrr reset workaround is needed */ |
| 633 | } dqrr; |
| 634 | + |
| 635 | + struct { |
| 636 | + u32 pi; |
| 637 | + u32 pi_vb; |
| 638 | + u32 pi_ring_size; |
| 639 | + u32 pi_ci_mask; |
| 640 | + u32 ci; |
| 641 | + int available; |
| 642 | + u32 pend; |
| 643 | + u32 no_pfdr; |
| 644 | + } eqcr; |
| 645 | + |
| 646 | + spinlock_t access_spinlock; |
| 647 | }; |
| 648 | |
| 649 | /* Function pointers */ |