| From 0b8c6bbb0a561f15598f6701089a992bdea3963c Mon Sep 17 00:00:00 2001 |
| From: Youri Querry <youri.querry_1@nxp.com> |
| Date: Mon, 4 Nov 2019 11:03:09 -0500 |
| Subject: [PATCH] soc: fsl: dpio: Replace QMAN array mode by ring mode enqueue. |
| |
| This change of algorithm will enable faster bulk enqueue. |
| This will grately benefit XDP bulk enqueue. |
| |
| Signed-off-by: Youri Querry <youri.querry_1@nxp.com> |
| --- |
| drivers/soc/fsl/dpio/qbman-portal.c | 420 +++++++++++++++++++++++++++--------- |
| drivers/soc/fsl/dpio/qbman-portal.h | 13 ++ |
| 2 files changed, 335 insertions(+), 98 deletions(-) |
| |
| --- a/drivers/soc/fsl/dpio/qbman-portal.c |
| +++ b/drivers/soc/fsl/dpio/qbman-portal.c |
| @@ -8,6 +8,7 @@ |
| #include <asm/cacheflush.h> |
| #include <linux/io.h> |
| #include <linux/slab.h> |
| +#include <linux/spinlock.h> |
| #include <soc/fsl/dpaa2-global.h> |
| |
| #include "qbman-portal.h" |
| @@ -22,6 +23,7 @@ |
| |
| /* CINH register offsets */ |
| #define QBMAN_CINH_SWP_EQCR_PI 0x800 |
| +#define QBMAN_CINH_SWP_EQCR_CI 0x840 |
| #define QBMAN_CINH_SWP_EQAR 0x8c0 |
| #define QBMAN_CINH_SWP_CR_RT 0x900 |
| #define QBMAN_CINH_SWP_VDQCR_RT 0x940 |
| @@ -45,6 +47,8 @@ |
| #define QBMAN_CENA_SWP_CR 0x600 |
| #define QBMAN_CENA_SWP_RR(vb) (0x700 + ((u32)(vb) >> 1)) |
| #define QBMAN_CENA_SWP_VDQCR 0x780 |
| +#define QBMAN_CENA_SWP_EQCR_CI 0x840 |
| +#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840 |
| |
| /* CENA register offsets in memory-backed mode */ |
| #define QBMAN_CENA_SWP_DQRR_MEM(n) (0x800 + ((u32)(n) << 6)) |
| @@ -72,6 +76,12 @@ |
| /* opaque token for static dequeues */ |
| #define QMAN_SDQCR_TOKEN 0xbb |
| |
| +#define QBMAN_EQCR_DCA_IDXMASK 0x0f |
| +#define QBMAN_ENQUEUE_FLAG_DCA (1ULL << 31) |
| + |
| +#define EQ_DESC_SIZE_WITHOUT_FD 29 |
| +#define EQ_DESC_SIZE_FD_START 32 |
| + |
| enum qbman_sdqcr_dct { |
| qbman_sdqcr_dct_null = 0, |
| qbman_sdqcr_dct_prio_ics, |
| @@ -224,6 +234,15 @@ static inline u32 qbman_set_swp_cfg(u8 m |
| |
| #define QMAN_RT_MODE 0x00000100 |
| |
| +static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last) |
| +{ |
| + /* 'first' is included, 'last' is excluded */ |
| + if (first <= last) |
| + return last - first; |
| + else |
| + return (2 * ringsize) - (first - last); |
| +} |
| + |
| /** |
| * qbman_swp_init() - Create a functional object representing the given |
| * QBMan portal descriptor. |
| @@ -236,6 +255,10 @@ struct qbman_swp *qbman_swp_init(const s |
| { |
| struct qbman_swp *p = kzalloc(sizeof(*p), GFP_KERNEL); |
| u32 reg; |
| + u32 mask_size; |
| + u32 eqcr_pi; |
| + |
| + spin_lock_init(&p->access_spinlock); |
| |
| if (!p) |
| return NULL; |
| @@ -264,25 +287,38 @@ struct qbman_swp *qbman_swp_init(const s |
| p->addr_cena = d->cena_bar; |
| p->addr_cinh = d->cinh_bar; |
| |
| - if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) |
| - memset(p->addr_cena, 0, 64 * 1024); |
| + if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { |
| |
| - reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| - 0, /* Writes cacheable */ |
| - 0, /* EQCR_CI stashing threshold */ |
| - 3, /* RPM: Valid bit mode, RCR in array mode */ |
| - 2, /* DCM: Discrete consumption ack mode */ |
| - 3, /* EPM: Valid bit mode, EQCR in array mode */ |
| - 1, /* mem stashing drop enable == TRUE */ |
| - 1, /* mem stashing priority == TRUE */ |
| - 1, /* mem stashing enable == TRUE */ |
| - 1, /* dequeue stashing priority == TRUE */ |
| - 0, /* dequeue stashing enable == FALSE */ |
| - 0); /* EQCR_CI stashing priority == FALSE */ |
| - if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) |
| + reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| + 0, /* Writes Non-cacheable */ |
| + 0, /* EQCR_CI stashing threshold */ |
| + 3, /* RPM: RCR in array mode */ |
| + 2, /* DCM: Discrete consumption ack */ |
| + 2, /* EPM: EQCR in ring mode */ |
| + 1, /* mem stashing drop enable enable */ |
| + 1, /* mem stashing priority enable */ |
| + 1, /* mem stashing enable */ |
| + 1, /* dequeue stashing priority enable */ |
| + 0, /* dequeue stashing enable enable */ |
| + 0); /* EQCR_CI stashing priority enable */ |
| + } else { |
| + memset(p->addr_cena, 0, 64 * 1024); |
| + reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, |
| + 0, /* Writes Non-cacheable */ |
| + 1, /* EQCR_CI stashing threshold */ |
| + 3, /* RPM: RCR in array mode */ |
| + 2, /* DCM: Discrete consumption ack */ |
| + 0, /* EPM: EQCR in ring mode */ |
| + 1, /* mem stashing drop enable */ |
| + 1, /* mem stashing priority enable */ |
| + 1, /* mem stashing enable */ |
| + 1, /* dequeue stashing priority enable */ |
| + 0, /* dequeue stashing enable */ |
| + 0); /* EQCR_CI stashing priority enable */ |
| reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */ |
| 1 << SWP_CFG_VPM_SHIFT | /* VDQCR read triggered mode */ |
| 1 << SWP_CFG_CPM_SHIFT; /* CR read triggered mode */ |
| + } |
| |
| qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg); |
| reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG); |
| @@ -304,7 +340,9 @@ struct qbman_swp *qbman_swp_init(const s |
| */ |
| qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0); |
| |
| + p->eqcr.pi_ring_size = 8; |
| if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) { |
| + p->eqcr.pi_ring_size = 32; |
| qbman_swp_enqueue_ptr = |
| qbman_swp_enqueue_mem_back; |
| qbman_swp_enqueue_multiple_ptr = |
| @@ -316,6 +354,15 @@ struct qbman_swp *qbman_swp_init(const s |
| qbman_swp_release_ptr = qbman_swp_release_mem_back; |
| } |
| |
| + for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1) |
| + p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1; |
| + eqcr_pi = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_PI); |
| + p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask; |
| + p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; |
| + p->eqcr.ci = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_CI) |
| + & p->eqcr.pi_ci_mask; |
| + p->eqcr.available = p->eqcr.pi_ring_size; |
| + |
| return p; |
| } |
| |
| @@ -468,8 +515,9 @@ enum qb_enqueue_commands { |
| enqueue_rejects_to_fq = 2 |
| }; |
| |
| -#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 |
| -#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 |
| +#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 |
| +#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 |
| +#define QB_ENQUEUE_CMD_DCA_EN_SHIFT 7 |
| |
| /** |
| * qbman_eq_desc_clear() - Clear the contents of a descriptor to |
| @@ -582,6 +630,7 @@ static inline void qbman_write_eqcr_am_r |
| QMAN_RT_MODE); |
| } |
| |
| +#define QB_RT_BIT ((u32)0x100) |
| /** |
| * qbman_swp_enqueue_direct() - Issue an enqueue command |
| * @s: the software portal used for enqueue |
| @@ -593,35 +642,19 @@ static inline void qbman_write_eqcr_am_r |
| * |
| * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. |
| */ |
| -int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d, |
| - const struct dpaa2_fd *fd) |
| +static |
| +int qbman_swp_enqueue_direct(struct qbman_swp *s, |
| + const struct qbman_eq_desc *d, |
| + const struct dpaa2_fd *fd) |
| { |
| - struct qbman_eq_desc_with_fd *p; |
| - u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); |
| - |
| - if (!EQAR_SUCCESS(eqar)) |
| - return -EBUSY; |
| + int flags = 0; |
| + int ret = qbman_swp_enqueue_multiple_direct(s, d, fd, &flags, 1); |
| |
| - p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); |
| - /* This is mapped as DEVICE type memory, writes are |
| - * with address alignment: |
| - * desc.dca address alignment = 1 |
| - * desc.seqnum address alignment = 2 |
| - * desc.orpid address alignment = 4 |
| - * desc.tgtid address alignment = 8 |
| - */ |
| - p->desc.dca = d->dca; |
| - p->desc.seqnum = d->seqnum; |
| - p->desc.orpid = d->orpid; |
| - memcpy(&p->desc.tgtid, &d->tgtid, 24); |
| - memcpy(&p->fd, fd, sizeof(*fd)); |
| - |
| - /* Set the verb byte, have to substitute in the valid-bit */ |
| - dma_wmb(); |
| - p->desc.verb = d->verb | EQAR_VB(eqar); |
| - dccvac(p); |
| - |
| - return 0; |
| + if (ret >= 0) |
| + ret = 0; |
| + else |
| + ret = -EBUSY; |
| + return ret; |
| } |
| |
| /** |
| @@ -635,35 +668,19 @@ int qbman_swp_enqueue_direct(struct qbma |
| * |
| * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. |
| */ |
| +static |
| int qbman_swp_enqueue_mem_back(struct qbman_swp *s, |
| const struct qbman_eq_desc *d, |
| const struct dpaa2_fd *fd) |
| { |
| - struct qbman_eq_desc_with_fd *p; |
| - u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); |
| - |
| - if (!EQAR_SUCCESS(eqar)) |
| - return -EBUSY; |
| - |
| - p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); |
| - /* This is mapped as DEVICE type memory, writes are |
| - * with address alignment: |
| - * desc.dca address alignment = 1 |
| - * desc.seqnum address alignment = 2 |
| - * desc.orpid address alignment = 4 |
| - * desc.tgtid address alignment = 8 |
| - */ |
| - p->desc.dca = d->dca; |
| - p->desc.seqnum = d->seqnum; |
| - p->desc.orpid = d->orpid; |
| - memcpy(&p->desc.tgtid, &d->tgtid, 24); |
| - memcpy(&p->fd, fd, sizeof(*fd)); |
| - |
| - p->desc.verb = d->verb | EQAR_VB(eqar); |
| - dma_wmb(); |
| - qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); |
| + int flags = 0; |
| + int ret = qbman_swp_enqueue_multiple_mem_back(s, d, fd, &flags, 1); |
| |
| - return 0; |
| + if (ret >= 0) |
| + ret = 0; |
| + else |
| + ret = -EBUSY; |
| + return ret; |
| } |
| |
| /** |
| @@ -672,26 +689,84 @@ int qbman_swp_enqueue_mem_back(struct qb |
| * @s: the software portal used for enqueue |
| * @d: the enqueue descriptor |
| * @fd: table pointer of frame descriptor table to be enqueued |
| - * @flags: table pointer of flags, not used for the moment |
| + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL |
| * @num_frames: number of fd to be enqueued |
| * |
| * Return the number of fd enqueued, or a negative error number. |
| */ |
| +static |
| int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, |
| const struct qbman_eq_desc *d, |
| const struct dpaa2_fd *fd, |
| uint32_t *flags, |
| int num_frames) |
| { |
| - int count = 0; |
| + uint32_t *p = NULL; |
| + const uint32_t *cl = (uint32_t *)d; |
| + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| + int i, num_enqueued = 0; |
| + uint64_t addr_cena; |
| + |
| + spin_lock(&s->access_spinlock); |
| + half_mask = (s->eqcr.pi_ci_mask>>1); |
| + full_mask = s->eqcr.pi_ci_mask; |
| + |
| + if (!s->eqcr.available) { |
| + eqcr_ci = s->eqcr.ci; |
| + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; |
| + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); |
| + |
| + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| + eqcr_ci, s->eqcr.ci); |
| + if (!s->eqcr.available) { |
| + spin_unlock(&s->access_spinlock); |
| + return 0; |
| + } |
| + } |
| |
| - while (count < num_frames) { |
| - if (qbman_swp_enqueue_direct(s, d, fd) != 0) |
| - break; |
| - count++; |
| + eqcr_pi = s->eqcr.pi; |
| + num_enqueued = (s->eqcr.available < num_frames) ? |
| + s->eqcr.available : num_frames; |
| + s->eqcr.available -= num_enqueued; |
| + /* Fill in the EQCR ring */ |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + /* Skip copying the verb */ |
| + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| + &fd[i], sizeof(*fd)); |
| + eqcr_pi++; |
| } |
| |
| - return count; |
| + dma_wmb(); |
| + |
| + /* Set the verb byte, have to substitute in the valid-bit */ |
| + eqcr_pi = s->eqcr.pi; |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + p[0] = cl[0] | s->eqcr.pi_vb; |
| + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { |
| + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; |
| + |
| + d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | |
| + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); |
| + } |
| + eqcr_pi++; |
| + if (!(eqcr_pi & half_mask)) |
| + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| + } |
| + |
| + /* Flush all the cacheline without load/store in between */ |
| + eqcr_pi = s->eqcr.pi; |
| + addr_cena = (size_t)s->addr_cena; |
| + for (i = 0; i < num_enqueued; i++) { |
| + dccvac((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); |
| + eqcr_pi++; |
| + } |
| + s->eqcr.pi = eqcr_pi & full_mask; |
| + spin_unlock(&s->access_spinlock); |
| + |
| + return num_enqueued; |
| } |
| |
| /** |
| @@ -700,26 +775,80 @@ int qbman_swp_enqueue_multiple_direct(st |
| * @s: the software portal used for enqueue |
| * @d: the enqueue descriptor |
| * @fd: table pointer of frame descriptor table to be enqueued |
| - * @flags: table pointer of flags, not used for the moment |
| + * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL |
| * @num_frames: number of fd to be enqueued |
| * |
| * Return the number of fd enqueued, or a negative error number. |
| */ |
| +static |
| int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, |
| - const struct qbman_eq_desc *d, |
| - const struct dpaa2_fd *fd, |
| - uint32_t *flags, |
| - int num_frames) |
| -{ |
| - int count = 0; |
| + const struct qbman_eq_desc *d, |
| + const struct dpaa2_fd *fd, |
| + uint32_t *flags, |
| + int num_frames) |
| +{ |
| + uint32_t *p = NULL; |
| + const uint32_t *cl = (uint32_t *)(d); |
| + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| + int i, num_enqueued = 0; |
| + unsigned long irq_flags; |
| + |
| + spin_lock(&s->access_spinlock); |
| + local_irq_save(irq_flags); |
| + |
| + half_mask = (s->eqcr.pi_ci_mask>>1); |
| + full_mask = s->eqcr.pi_ci_mask; |
| + if (!s->eqcr.available) { |
| + eqcr_ci = s->eqcr.ci; |
| + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; |
| + s->eqcr.ci = __raw_readl(p) & full_mask; |
| + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| + eqcr_ci, s->eqcr.ci); |
| + if (!s->eqcr.available) { |
| + local_irq_restore(irq_flags); |
| + spin_unlock(&s->access_spinlock); |
| + return 0; |
| + } |
| + } |
| + |
| + eqcr_pi = s->eqcr.pi; |
| + num_enqueued = (s->eqcr.available < num_frames) ? |
| + s->eqcr.available : num_frames; |
| + s->eqcr.available -= num_enqueued; |
| + /* Fill in the EQCR ring */ |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + /* Skip copying the verb */ |
| + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| + &fd[i], sizeof(*fd)); |
| + eqcr_pi++; |
| + } |
| |
| - while (count < num_frames) { |
| - if (qbman_swp_enqueue_mem_back(s, d, fd) != 0) |
| - break; |
| - count++; |
| + /* Set the verb byte, have to substitute in the valid-bit */ |
| + eqcr_pi = s->eqcr.pi; |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + p[0] = cl[0] | s->eqcr.pi_vb; |
| + if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { |
| + struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; |
| + |
| + d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | |
| + ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); |
| + } |
| + eqcr_pi++; |
| + if (!(eqcr_pi & half_mask)) |
| + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| } |
| + s->eqcr.pi = eqcr_pi & full_mask; |
| + |
| + dma_wmb(); |
| + qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, |
| + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); |
| + local_irq_restore(irq_flags); |
| + spin_unlock(&s->access_spinlock); |
| |
| - return count; |
| + return num_enqueued; |
| } |
| |
| /** |
| @@ -732,20 +861,69 @@ int qbman_swp_enqueue_multiple_mem_back( |
| * |
| * Return the number of fd enqueued, or a negative error number. |
| */ |
| +static |
| int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, |
| const struct qbman_eq_desc *d, |
| const struct dpaa2_fd *fd, |
| int num_frames) |
| { |
| - int count = 0; |
| + uint32_t *p; |
| + const uint32_t *cl; |
| + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| + int i, num_enqueued = 0; |
| + uint64_t addr_cena; |
| + |
| + half_mask = (s->eqcr.pi_ci_mask>>1); |
| + full_mask = s->eqcr.pi_ci_mask; |
| + if (!s->eqcr.available) { |
| + eqcr_ci = s->eqcr.ci; |
| + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; |
| + s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); |
| + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| + eqcr_ci, s->eqcr.ci); |
| + if (!s->eqcr.available) |
| + return 0; |
| + } |
| |
| - while (count < num_frames) { |
| - if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0) |
| - break; |
| - count++; |
| + eqcr_pi = s->eqcr.pi; |
| + num_enqueued = (s->eqcr.available < num_frames) ? |
| + s->eqcr.available : num_frames; |
| + s->eqcr.available -= num_enqueued; |
| + /* Fill in the EQCR ring */ |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + cl = (uint32_t *)(&d[i]); |
| + /* Skip copying the verb */ |
| + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| + &fd[i], sizeof(*fd)); |
| + eqcr_pi++; |
| } |
| |
| - return count; |
| + dma_wmb(); |
| + |
| + /* Set the verb byte, have to substitute in the valid-bit */ |
| + eqcr_pi = s->eqcr.pi; |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + cl = (uint32_t *)(&d[i]); |
| + p[0] = cl[0] | s->eqcr.pi_vb; |
| + eqcr_pi++; |
| + if (!(eqcr_pi & half_mask)) |
| + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| + } |
| + |
| + /* Flush all the cacheline without load/store in between */ |
| + eqcr_pi = s->eqcr.pi; |
| + addr_cena = (uint64_t)s->addr_cena; |
| + for (i = 0; i < num_enqueued; i++) { |
| + dccvac((uint64_t *)(addr_cena + |
| + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); |
| + eqcr_pi++; |
| + } |
| + s->eqcr.pi = eqcr_pi & full_mask; |
| + |
| + return num_enqueued; |
| } |
| |
| /** |
| @@ -758,20 +936,62 @@ int qbman_swp_enqueue_multiple_desc_dire |
| * |
| * Return the number of fd enqueued, or a negative error number. |
| */ |
| +static |
| int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, |
| const struct qbman_eq_desc *d, |
| const struct dpaa2_fd *fd, |
| int num_frames) |
| { |
| - int count = 0; |
| + uint32_t *p; |
| + const uint32_t *cl; |
| + uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; |
| + int i, num_enqueued = 0; |
| + |
| + half_mask = (s->eqcr.pi_ci_mask>>1); |
| + full_mask = s->eqcr.pi_ci_mask; |
| + if (!s->eqcr.available) { |
| + eqcr_ci = s->eqcr.ci; |
| + p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; |
| + s->eqcr.ci = __raw_readl(p) & full_mask; |
| + s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, |
| + eqcr_ci, s->eqcr.ci); |
| + if (!s->eqcr.available) |
| + return 0; |
| + } |
| |
| - while (count < num_frames) { |
| - if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0) |
| - break; |
| - count++; |
| + eqcr_pi = s->eqcr.pi; |
| + num_enqueued = (s->eqcr.available < num_frames) ? |
| + s->eqcr.available : num_frames; |
| + s->eqcr.available -= num_enqueued; |
| + /* Fill in the EQCR ring */ |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + cl = (uint32_t *)(&d[i]); |
| + /* Skip copying the verb */ |
| + memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); |
| + memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], |
| + &fd[i], sizeof(*fd)); |
| + eqcr_pi++; |
| } |
| |
| - return count; |
| + /* Set the verb byte, have to substitute in the valid-bit */ |
| + eqcr_pi = s->eqcr.pi; |
| + for (i = 0; i < num_enqueued; i++) { |
| + p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); |
| + cl = (uint32_t *)(&d[i]); |
| + p[0] = cl[0] | s->eqcr.pi_vb; |
| + eqcr_pi++; |
| + if (!(eqcr_pi & half_mask)) |
| + s->eqcr.pi_vb ^= QB_VALID_BIT; |
| + } |
| + |
| + s->eqcr.pi = eqcr_pi & full_mask; |
| + |
| + dma_wmb(); |
| + qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, |
| + (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); |
| + |
| + return num_enqueued; |
| } |
| |
| /* Static (push) dequeue */ |
| @@ -937,6 +1157,7 @@ void qbman_pull_desc_set_channel(struct |
| * Return 0 for success, and -EBUSY if the software portal is not ready |
| * to do pull dequeue. |
| */ |
| +static |
| int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d) |
| { |
| struct qbman_pull_desc *p; |
| @@ -973,6 +1194,7 @@ int qbman_swp_pull_direct(struct qbman_s |
| * Return 0 for success, and -EBUSY if the software portal is not ready |
| * to do pull dequeue. |
| */ |
| +static |
| int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d) |
| { |
| struct qbman_pull_desc *p; |
| @@ -991,6 +1213,8 @@ int qbman_swp_pull_mem_back(struct qbman |
| p->dq_src = d->dq_src; |
| p->rsp_addr = d->rsp_addr; |
| p->rsp_addr_virt = d->rsp_addr_virt; |
| + |
| + /* Set the verb byte, have to substitute in the valid-bit */ |
| p->verb = d->verb | s->vdq.valid_bit; |
| s->vdq.valid_bit ^= QB_VALID_BIT; |
| dma_wmb(); |
| --- a/drivers/soc/fsl/dpio/qbman-portal.h |
| +++ b/drivers/soc/fsl/dpio/qbman-portal.h |
| @@ -143,6 +143,19 @@ struct qbman_swp { |
| u8 dqrr_size; |
| int reset_bug; /* indicates dqrr reset workaround is needed */ |
| } dqrr; |
| + |
| + struct { |
| + u32 pi; |
| + u32 pi_vb; |
| + u32 pi_ring_size; |
| + u32 pi_ci_mask; |
| + u32 ci; |
| + int available; |
| + u32 pend; |
| + u32 no_pfdr; |
| + } eqcr; |
| + |
| + spinlock_t access_spinlock; |
| }; |
| |
| /* Function pointers */ |