ASR_BASE

Change-Id: Icf3719cc0afe3eeb3edc7fa80a2eb5199ca9dda1
diff --git a/marvell/linux/drivers/spi/spi-asr-qspi.c b/marvell/linux/drivers/spi/spi-asr-qspi.c
new file mode 100644
index 0000000..855f84a
--- /dev/null
+++ b/marvell/linux/drivers/spi/spi-asr-qspi.c
@@ -0,0 +1,2121 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * ASR QSPI driver
+ *
+ * Copyright (C) 2019 ASR Micro Limited
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_qos.h>
+#include <linux/pm_runtime.h>
+#include <linux/sizes.h>
+#include <linux/genalloc.h>
+#include <linux/cputype.h>
+
+#include <soc/asr/regs-addr.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+
+/* #define ASR_DUMP_QSPI_REG		0 */
+
+#define QSPI_WAIT_TIMEOUT		(300) /* ms */
+#define QSPI_AUTOSUSPEND_TIMEOUT		2000
+#define ASR_MPMU_ACGR			0x1024
+
+/* QSPI PMUap register */
+#define PMUA_QSPI_CLK_RES_CTRL		0x01282860
+#define QSPI_CLK_SEL(x)			((x) << 6)
+#define QSPI_CLK_SEL_MASK		GENMASK(8, 6)
+#define QSPI_CLK_EN			BIT(4)
+#define QSPI_BUS_CLK_EN			BIT(3)
+#define QSPI_CLK_RST			BIT(1)
+#define QSPI_BUS_RST			BIT(0)
+
+/* QSPI memory base */
+#if 0
+#define QSPI_AMBA_BASE			0x300000
+#define QSPI_FLASH_A1_BASE		QSPI_AMBA_BASE
+#define QSPI_FLASH_A1_TOP		(QSPI_FLASH_A1_BASE + 0xa00000)
+#define QSPI_FLASH_A2_BASE		QSPI_FLASH_A1_TOP
+#define QSPI_FLASH_A2_TOP		(QSPI_FLASH_A2_BASE + 0x100000)
+#define QSPI_FLASH_B1_BASE		QSPI_FLASH_A2_TOP
+#define QSPI_FLASH_B1_TOP		(QSPI_FLASH_B1_BASE + 0x100000)
+#define QSPI_FLASH_B2_BASE		QSPI_FLASH_B1_TOP
+#define QSPI_FLASH_B2_TOP		(QSPI_FLASH_B2_BASE + 0x100000)
+#else
+/* AHB base addr */
+#define QSPI_AMBA_BASE			0x80000000
+#define QSPI_FLASH_A1_BASE		0x80000000
+#define QSPI_FLASH_A1_TOP		0x88000000
+#define QSPI_FLASH_A2_BASE		0x88000000
+#define QSPI_FLASH_A2_TOP		0x90000000
+#define QSPI_FLASH_B1_BASE		0x90000000
+#define QSPI_FLASH_B1_TOP		0x98000000
+#define QSPI_FLASH_B2_BASE		0x98000000
+#define QSPI_FLASH_B2_TOP		0xa0000000
+
+#endif
+
+/* TX/RX/ABH buffer max */
+#define QSPI_RX_BUFF_MAX 		SZ_128
+#define QSPI_TX_BUFF_MAX 		SZ_256
+#define QSPI_TX_BUFF_POP_MIN 		16
+#define QSPI_AHB_BUFF_MAX_SIZE		SZ_512
+#define QSPI_TX_DMA_BURST 		SZ_32
+
+#define QSPI_WAIT_BIT_CLEAR		0
+#define QSPI_WAIT_BIT_SET		1
+
+/* clk source from PLL1 */
+#define QSPI_CLK_PLL1_51P2		51200000
+#define QSPI_CLK_PLL1_76P8		76800000
+#define QSPI_CLK_PLL1_102P4		102400000
+#define QSPI_CLK_PLL1_153P6		153600000
+#define QSPI_CLK_PLL1_204P8		204800000
+#define QSPI_CLK_PLL1_307P2		307200000
+#define QSPI_CLK_PLL1_409P6		409600000
+#define ASR_QSPI_DEFAULT_CLK_FREQ	(QSPI_CLK_PLL1_102P4 >> 2)
+
+/* QSPI Host Registers used by the driver */
+#define QSPI_MCR			0x00
+#define QSPI_MCR_DQS_INV_EN		BIT(26)
+#define QSPI_MCR_DQS_LP_EN		BIT(25)
+#define QSPI_MCR_ISD_MASK		GENMASK(19, 16)
+#define QSPI_MCR_MDIS_MASK		BIT(14)
+#define QSPI_MCR_CLR_TXF_MASK		BIT(11)
+#define QSPI_MCR_CLR_RXF_MASK		BIT(10)
+#define QSPI_MCR_DDR_EN_MASK		BIT(7)
+#define QSPI_MCR_DQS_EN			BIT(6)
+#define QSPI_MCR_END_CFG_MASK		GENMASK(3, 2)
+#define QSPI_MCR_SWRSTHD_MASK		BIT(1)
+#define QSPI_MCR_SWRSTSD_MASK		BIT(0)
+
+#define QSPI_TCR			0x04
+#define QSPI_IPCR			0x08
+#define QSPI_IPCR_SEQID(x)		((x) << 24)
+
+#define QSPI_FLSHCR			0x0c
+
+#define QSPI_BUF0CR			0x10
+#define QSPI_BUF1CR			0x14
+#define QSPI_BUF2CR			0x18
+#define QSPI_BUF3CR			0x1c
+#define QSPI_BUF3CR_ALLMST_MASK		BIT(31)
+#define QSPI_BUF3CR_ADATSZ(x)		((x) << 8)
+#define QSPI_BUF3CR_ADATSZ_MASK		GENMASK(15, 8)
+
+#define QSPI_BFGENCR			0x20
+#define QSPI_BFGENCR_SEQID(x)		((x) << 12)
+
+#define QSPI_SOCCR			0x24
+#define QSPI_SOCCR_DLINE_EN		BIT(8)
+
+#define QSPI_DLACR_DLINE_CODE_SHIFT	0
+#define QSPI_DLACR_DLINE_CODE_MASK	GENMASK(7, 0)
+#define QSPI_DLACR_DLINE_STEP_SHIFT	8
+#define QSPI_DLACR_DLINE_STEP_MASK	GENMASK(15, 8)
+
+#define QSPI_BUF0IND			0x30
+#define QSPI_BUF1IND			0x34
+#define QSPI_BUF2IND			0x38
+#define QSPI_DLACR			0x3C
+
+#define QSPI_SFAR			0x100
+#define QSPI_SFACR			0x104
+
+#define QSPI_SMPR			0x108
+#define QSPI_SMPR_DDRSMP_SHIFT		16
+#define QSPI_SMPR_DDRSMP_MASK		GENMASK(18, 16)
+#define QSPI_SMPR_FSDLY_MASK		BIT(6)
+#define QSPI_SMPR_FSPHS_MASK		BIT(5)
+#define QSPI_SMPR_HSENA_MASK		BIT(0)
+
+#define QSPI_RBSR			0x10c
+
+#define QSPI_RBCT			0x110
+#define QSPI_RBCT_WMRK_MASK		GENMASK(4, 0)
+#define QSPI_RBCT_RXBRD_MASK		BIT(8)
+
+#define QSPI_TBSR			0x150
+#define QSPI_TBDR			0x154
+#define QSPI_TBCT			0x158
+#define QSPI_TX_WMRK			(QSPI_TX_DMA_BURST / 4 - 1)
+
+#define QSPI_SR				0x15c
+#define QSPI_SR_BUSY			BIT(0)
+#define QSPI_SR_IP_ACC_MASK		BIT(1)
+#define QSPI_SR_AHB_ACC_MASK		BIT(2)
+#define QSPI_SR_TXFULL			BIT(27)
+
+#define QSPI_FR				0x160
+#define QSPI_FR_TFF_MASK		BIT(0)
+#define QSPI_FR_IPGEF			BIT(4)
+#define QSPI_FR_IPIEF			BIT(6)
+#define QSPI_FR_IPAEF			BIT(7)
+#define QSPI_FR_IUEF			BIT(11)
+#define QSPI_FR_ABOF			BIT(12)
+#define QSPI_FR_AIBSEF			BIT(13)
+#define QSPI_FR_AITEF			BIT(14)
+#define QSPI_FR_ABSEF			BIT(15)
+#define QSPI_FR_RBDF			BIT(16)
+#define QSPI_FR_RBOF			BIT(17)
+#define QSPI_FR_ILLINE			BIT(23)
+#define QSPI_FR_TBUF			BIT(26)
+#define QSPI_FR_TBFF			BIT(27)
+#define BUFFER_FR_FLAG			(QSPI_FR_ABOF| QSPI_FR_RBOF| \
+					QSPI_FR_TBUF)
+
+#define COMMAND_FR_FLAG			(QSPI_FR_ABSEF | QSPI_FR_AITEF | \
+					QSPI_FR_AIBSEF | QSPI_FR_IUEF | \
+					QSPI_FR_IPAEF |QSPI_FR_IPIEF | \
+					QSPI_FR_IPGEF)
+
+#define QSPI_RSER			0x164
+#define QSPI_RSER_TFIE			BIT(0)
+#define QSPI_RSER_IPGEIE		BIT(4)
+#define QSPI_RSER_IPIEIE		BIT(6)
+#define QSPI_RSER_IPAEIE		BIT(7)
+#define QSPI_RSER_IUEIE			BIT(11)
+#define QSPI_RSER_ABOIE			BIT(12)
+#define QSPI_RSER_AIBSIE		BIT(13)
+#define QSPI_RSER_AITIE			BIT(14)
+#define QSPI_RSER_ABSEIE		BIT(15)
+#define QSPI_RSER_RBDIE			BIT(16)
+#define QSPI_RSER_RBOIE			BIT(17)
+#define QSPI_RSER_RBDDE			BIT(21)
+#define QSPI_RSER_ILLINIE		BIT(23)
+#define QSPI_RSER_TBFDE			BIT(25)
+#define QSPI_RSER_TBUIE			BIT(26)
+#define QSPI_RSER_TBFIE			BIT(27)
+#define BUFFER_ERROR_INT		(QSPI_RSER_ABOIE| QSPI_RSER_RBOIE| \
+					QSPI_RSER_TBUIE)
+
+#define COMMAND_ERROR_INT		(QSPI_RSER_ABSEIE | QSPI_RSER_AITIE | \
+					QSPI_RSER_AIBSIE | QSPI_RSER_IUEIE | \
+					QSPI_RSER_IPAEIE |QSPI_RSER_IPIEIE | \
+					QSPI_RSER_IPGEIE)
+
+#define QSPI_SPNDST			0x168
+#define QSPI_SPTRCLR			0x16c
+#define QSPI_SPTRCLR_IPPTRC		BIT(8)
+#define QSPI_SPTRCLR_BFPTRC		BIT(0)
+
+#define QSPI_SFA1AD			0x180
+#define QSPI_SFA2AD			0x184
+#define QSPI_SFB1AD			0x188
+#define QSPI_SFB2AD			0x18c
+#define QSPI_DLPR			0x190
+#define QSPI_RBDR(x)			(0x200 + ((x) * 4))
+
+#define QSPI_LUTKEY			0x300
+#define QSPI_LUTKEY_VALUE		0x5af05af0
+
+#define QSPI_LCKCR			0x304
+#define QSPI_LCKER_LOCK			BIT(0)
+#define QSPI_LCKER_UNLOCK		BIT(1)
+
+#define QSPI_LUT_BASE			0x310
+/* 16Bytes per sequence */
+#define QSPI_LUT_REG(seqid, i)		(QSPI_LUT_BASE + (seqid) * 16 + (i) * 4)
+
+/*
+ * QSPI Sequence index.
+ * index 0 is preset at boot for AHB read,
+ * index 1 is used for other command.
+ */
+#define	SEQID_LUT_AHBREAD_ID		0
+#define	SEQID_LUT_SHARED_ID		1
+
+/* QSPI Instruction set for the LUT register */
+#define LUT_INSTR_STOP			0
+#define LUT_INSTR_CMD			1
+#define LUT_INSTR_ADDR			2
+#define LUT_INSTR_DUMMY			3
+#define LUT_INSTR_MODE			4
+#define LUT_INSTR_MODE2			5
+#define LUT_INSTR_MODE4			6
+#define LUT_INSTR_READ			7
+#define LUT_INSTR_WRITE			8
+#define LUT_INSTR_JMP_ON_CS		9
+#define LUT_INSTR_ADDR_DDR		10
+#define LUT_INSTR_MODE_DDR		11
+#define LUT_INSTR_MODE2_DDR		12
+#define LUT_INSTR_MODE4_DDR		13
+#define LUT_INSTR_READ_DDR		14
+#define LUT_INSTR_WRITE_DDR		15
+#define LUT_INSTR_DATA_LEARN		16
+#define LUT_INSTR_CMD_DDR		17
+
+/*
+ * The PAD definitions for LUT register.
+ *
+ * The pad stands for the number of IO lines [0:3].
+ * For example, the quad read needs four IO lines,
+ * so you should use LUT_PAD(4).
+ */
+#define LUT_PAD(x) (fls(x) - 1)
+
+/*
+ * One sequence must be consisted of 4 LUT enteries(16Bytes).
+ * LUT entries with the following register layout:
+ * b'31                                                                     b'0
+ *  ---------------------------------------------------------------------------
+ *  |INSTR1[15~10]|PAD1[9~8]|OPRND1[7~0] | INSTR0[15~10]|PAD0[9~8]|OPRND0[7~0]|
+ *  ---------------------------------------------------------------------------
+ */
+#define LUT_DEF(idx, ins, pad, opr)					\
+	((((ins) << 10) | ((pad) << 8) | (opr)) << (((idx) & 0x1) * 16))
+
+#define READ_FROM_CACHE_OP		0x03
+#define READ_FROM_CACHE_OP_Fast		0x0b
+#define READ_FROM_CACHE_OP_X2		0x3b
+#define READ_FROM_CACHE_OP_X4		0x6b
+#define READ_FROM_CACHE_OP_DUALIO	0xbb
+#define READ_FROM_CACHE_OP_QUADIO	0xeb
+
+u32 reg_offset_table[] = {
+	QSPI_MCR,	QSPI_TCR,	QSPI_IPCR,	QSPI_FLSHCR,
+	QSPI_BUF0CR,	QSPI_BUF1CR,	QSPI_BUF2CR,	QSPI_BUF3CR,
+	QSPI_BFGENCR,	QSPI_SOCCR,	QSPI_BUF0IND,	QSPI_BUF1IND,
+	QSPI_BUF2IND,	QSPI_SFAR,	QSPI_SFACR,	QSPI_SMPR,
+	QSPI_RBSR,	QSPI_RBCT,	QSPI_TBSR,	QSPI_TBDR,
+	QSPI_TBCT,	QSPI_SR,	QSPI_FR,	QSPI_RSER,
+	QSPI_SPNDST,	QSPI_SPTRCLR,	QSPI_SFA1AD,	QSPI_SFA2AD,
+	QSPI_SFB1AD,	QSPI_SFB2AD,	QSPI_DLPR,	QSPI_LUTKEY,
+	QSPI_LCKCR
+};
+
+#define QSPI_MAX_SEQ_NUM	16
+
+/* asr qspi host priv */
+struct asr_qspi {
+	struct device *dev;
+	struct spi_controller *ctrl;
+	void __iomem *io_map;
+	phys_addr_t io_phys;
+
+	void __iomem *ahb_map;
+	phys_addr_t memmap_base;
+	u32 memmap_size;
+	struct spi_mem_op *ahb_op;
+
+	struct {
+		struct gen_pool *pool;
+		void __iomem *virt;
+		dma_addr_t dma;
+	} sram;
+
+	u32 sfa1ad;
+	u32 sfa2ad;
+	u32 sfb1ad;
+	u32 sfb2ad;
+
+	u32 pmuap_reg;
+	void __iomem *pmuap_addr;
+
+	u32 rx_buf_size;
+	u32 tx_buf_size;
+	u32 ahb_buf_size;
+	u32 ahb_read_enable;
+	u32 tx_unit_size;
+	u32 rx_unit_size;
+
+	u32 has_dtr;
+	u32 support_dqs;
+	u32 dtr_tx_delay;
+	u32 dtr_rx_delay;
+	u32 cmd_interrupt;
+	u32 fr_error_flag;
+
+	u32 tx_dma_enable;
+	u32 tx_wmrk;
+	struct dma_chan *tx_dma;
+	struct dma_slave_config tx_dma_cfg;
+
+	u32 rx_dma_enable;
+	struct dma_chan *rx_dma;
+
+	struct sg_table sgt;
+	struct completion dma_completion;
+
+	u32 cs_selected;
+	u32 max_hz;
+	u32 endian_xchg;
+	u32 dma_enable;
+
+	struct clk *clk, *bus_clk;
+	struct completion cmd_completion;
+	struct mutex lock;
+	struct pm_qos_request pm_qos_req;
+	struct pm_qos_request pm_ddr_qos;
+	u32 lpm_qos;
+	bool rst_protect;
+
+	/* seq id 0 and 1 is reserved */
+	u8 seq_opcode[QSPI_MAX_SEQ_NUM];
+};
+
+enum qpsi_cs {
+	QSPI_CS_A1 = 0,
+	QSPI_CS_A2,
+	QSPI_CS_B1,
+	QSPI_CS_B2,
+	QSPI_CS_MAX,
+};
+#define QSPI_DEFAULT_CS		(QSPI_CS_A1)
+
+
+enum qpsi_mode {
+	QSPI_NORMAL_MODE = 0,
+	QSPI_DISABLE_MODE,
+	QSPI_STOP_MODE,
+};
+
+
+static void qspi_writel(struct asr_qspi *qspi, u32 val, void __iomem *addr)
+{
+	if (qspi->endian_xchg)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct asr_qspi *qspi, void __iomem *addr)
+{
+	if (qspi->endian_xchg)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+static void qspi_enter_mode(struct asr_qspi *qspi, uint32_t mode)
+{
+	uint32_t mcr;
+
+	mcr = qspi_readl(qspi, qspi->io_map + QSPI_MCR);
+	if (mode == QSPI_NORMAL_MODE)
+		mcr &= ~QSPI_MCR_MDIS_MASK;
+	else if (mode == QSPI_DISABLE_MODE)
+		mcr |= QSPI_MCR_MDIS_MASK;
+	qspi_writel(qspi, mcr, qspi->io_map + QSPI_MCR);
+}
+
+static int asr_qspi_set_default_timing(struct asr_qspi *qspi, int clk_hz)
+{
+	void __iomem *base = qspi->io_map;
+	u32 reg;
+
+	/* clock settings */
+	qspi_enter_mode(qspi, QSPI_DISABLE_MODE);
+
+	/* disable DQS */
+	reg = qspi_readl(qspi, base + QSPI_MCR);
+	reg &= ~(QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN | QSPI_MCR_DQS_INV_EN);
+	qspi_writel(qspi, reg, base + QSPI_MCR);
+
+	reg = 0;
+	qspi_writel(qspi, reg, base + QSPI_SMPR);
+
+	/* set tx hold time */
+	reg = 0x202;
+	if (qspi->has_dtr)
+		reg |=  qspi->dtr_tx_delay << 16;
+	qspi_writel(qspi, reg, base + QSPI_FLSHCR);
+
+	/* Module enabled */
+	qspi_enter_mode(qspi, QSPI_NORMAL_MODE);
+
+	return 0;
+}
+
+static int qspi_set_func_clk(struct asr_qspi *qspi, int max_hz)
+{
+	int ret = 0;
+
+	if (qspi->has_dtr) {
+		qspi->clk = devm_clk_get(qspi->dev, "qspi_clk_dtr");
+		if (IS_ERR_OR_NULL(qspi->clk)) {
+			dev_err(qspi->dev, "can not find the clock\n");
+			return -EINVAL;
+		}
+	} else {
+		qspi->clk = devm_clk_get(qspi->dev, "qspi_clk");
+		if (IS_ERR_OR_NULL(qspi->clk)) {
+			dev_err(qspi->dev, "can not find the clock\n");
+			return -EINVAL;
+		}
+	}
+
+	qspi->bus_clk = devm_clk_get(qspi->dev, "qspi_bus_clk");
+	if (IS_ERR_OR_NULL(qspi->bus_clk)) {
+		dev_err(qspi->dev, "can not find the bus clock\n");
+		return -EINVAL;
+	}
+	clk_prepare_enable(qspi->bus_clk);
+
+	ret = clk_set_rate(qspi->clk, max_hz);
+	if (ret) {
+		dev_err(qspi->dev, "fail to set clk, ret:%d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(qspi->clk);
+	if (ret) {
+		dev_err(qspi->dev, "fail to enable clk, ret:%d\n", ret);
+		return ret;
+	}
+
+	asr_qspi_set_default_timing(qspi, max_hz);
+
+	dev_notice(qspi->dev, "bus clock %dHz, PMUap reg[0x%08x]:0x%08x\n",
+		max_hz, qspi->pmuap_reg, qspi_readl(qspi, qspi->pmuap_addr));
+
+	return 0;
+}
+
+static void qspi_config_mfp(struct asr_qspi *qspi)
+{
+	int cs = qspi->cs_selected;
+
+	/* TODO: only for FPGA */
+#if 0
+	if (cs == QSPI_CS_A1 || cs == QSPI_CS_A2) {
+		writel(0x1002, 0x0101e2c4); // QSPI_DAT3
+		writel(0x1002, 0x0101e2c8); // QSPI_DAT2
+		writel(0x1002, 0x0101e2cc); // QSPI_DAT1
+		writel(0x1002, 0x0101e2d0); // QSPI_DAT0
+		writel(0x1002, 0x0101e2d4); // QSPI_CLK
+		writel(0xd002, 0x0101e2d8); // QSPI_CS1
+		writel(0xd002, 0x0101e2dc); // QSPI_CS2
+	}
+#endif
+	dev_info(qspi->dev, "config mfp for cs:[%d]\n", cs);
+}
+
+static int asr_qspi_readl_poll_tout(struct asr_qspi *qspi, void __iomem *base,
+					u32 mask, u32 timeout_us, u8 wait_set)
+{
+	u32 reg;
+
+	if (qspi->endian_xchg)
+		mask = swab32(mask);
+
+	if (wait_set)
+		return readl_poll_timeout_atomic(base, reg, (reg & mask),
+						10, timeout_us);
+	else
+		return readl_poll_timeout_atomic(base, reg, !(reg & mask),
+						10, timeout_us);
+}
+
+static void qspi_reset(struct asr_qspi *qspi)
+{
+	uint32_t reg;
+	int err;
+
+	/* QSPI_SR[QSPI_SR_BUSY] must be 0 */
+	err = asr_qspi_readl_poll_tout(qspi, qspi->io_map + QSPI_SR,
+			QSPI_SR_BUSY, QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_CLEAR);
+	if (err) {
+		dev_err(qspi->dev, "failed to reset qspi host.\n");
+	} else {
+		/* qspi softreset first */
+		reg = qspi_readl(qspi, qspi->io_map + QSPI_MCR);
+		reg |= QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK;
+		qspi_writel(qspi, reg, qspi->io_map + QSPI_MCR);
+		reg = qspi_readl(qspi, qspi->io_map + QSPI_MCR);
+		if ((reg & 0x3) != 0x3)
+			dev_info(qspi->dev, "reset ignored 0x%x.\n", reg);
+
+		udelay(1);
+		reg &= ~(QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK);
+		qspi_writel(qspi, reg, qspi->io_map + QSPI_MCR);
+	}
+}
+
+static void qspi_write_sfar(struct asr_qspi *qspi, uint32_t val)
+{
+	int err;
+
+	/* QSPI_SR[IP_ACC] must be 0 */
+	err = asr_qspi_readl_poll_tout(qspi, qspi->io_map + QSPI_SR,
+			QSPI_SR_IP_ACC_MASK, QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_CLEAR);
+	if (err)
+		dev_err(qspi->dev, "failed to set QSPI_SFAR.\n");
+	else
+		qspi_writel(qspi, val, qspi->io_map + QSPI_SFAR);
+}
+
+/*
+ * IP Command Trigger could not be executed Error Flag may happen for write
+ * access to RBCT/SFAR register, need retry for these two register
+ */
+static void qspi_write_rbct(struct asr_qspi *qspi, uint32_t val)
+{
+	int err;
+
+	/* QSPI_SR[IP_ACC] must be 0 */
+	err = asr_qspi_readl_poll_tout(qspi, qspi->io_map + QSPI_SR,
+			QSPI_SR_IP_ACC_MASK, QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_CLEAR);
+	if (err)
+		dev_err(qspi->dev, "failed to set QSPI_RBCT.\n");
+	else
+		qspi_writel(qspi, val, qspi->io_map + QSPI_RBCT);
+}
+
+void qspi_init_ahbread(struct asr_qspi *qspi, int seq_id)
+{
+	u32 buf_cfg = 0;
+
+	buf_cfg = QSPI_BUF3CR_ALLMST_MASK |
+			QSPI_BUF3CR_ADATSZ((qspi->ahb_buf_size / 8));
+
+#ifdef CONFIG_CPU_ASR1903
+	/* Disable BUF1~BUF2, use BUF0 for all masters */
+	qspi_writel(qspi, (512/8 - 1) * 8, qspi->io_map + QSPI_BUF0IND);
+	qspi_writel(qspi, 512, qspi->io_map + QSPI_BUF1IND);
+	qspi_writel(qspi, 512, qspi->io_map + QSPI_BUF2IND);
+
+	/* AHB Master port */
+	qspi_writel(qspi, buf_cfg, qspi->io_map + QSPI_BUF0CR); // other masters
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF1CR);
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF2CR);
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF3CR);
+#else
+	/* Disable BUF0~BUF1, use BUF3 for all masters */
+	qspi_writel(qspi, 0, qspi->io_map + QSPI_BUF0IND);
+	qspi_writel(qspi, 0, qspi->io_map + QSPI_BUF1IND);
+	qspi_writel(qspi, 0, qspi->io_map + QSPI_BUF2IND);
+
+	/* AHB Master port */
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF0CR);
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF1CR);
+	qspi_writel(qspi, 0xe, qspi->io_map + QSPI_BUF2CR);
+	qspi_writel(qspi, buf_cfg, qspi->io_map + QSPI_BUF3CR); // other masters
+#endif
+	/* set AHB read sequence id */
+	qspi_writel(qspi, QSPI_BFGENCR_SEQID(seq_id), qspi->io_map + QSPI_BFGENCR);
+}
+
+void qspi_dump_reg(struct asr_qspi *qspi)
+{
+	u32 reg = 0;
+	void __iomem *base = qspi->io_map;
+	int i;
+
+	dev_notice(qspi->dev, "dump qspi host register:\n");
+	for (i = 0; i < ARRAY_SIZE(reg_offset_table); i++) {
+		if (i > 0 && (i % 4 == 0))
+			dev_notice(qspi->dev, "\n");
+		reg = qspi_readl(qspi, base + reg_offset_table[i]);
+		dev_notice(qspi->dev, "offset[0x%03x]:0x%08x\t\t",
+				reg_offset_table[i], reg);
+	}
+
+	dev_notice(qspi->dev, "\ndump AHB read LUT:\n");
+	for (i = 0; i < 4; i++) {
+		reg = qspi_readl(qspi, base + QSPI_LUT_REG(SEQID_LUT_AHBREAD_ID, i));
+		dev_notice(qspi->dev, "lut_reg[0x%03x]:0x%08x\t\t",
+				QSPI_LUT_REG(SEQID_LUT_AHBREAD_ID, i), reg);
+	}
+
+	dev_notice(qspi->dev, "\ndump shared LUT:\n");
+	for (i = 0; i < 4; i++) {
+		reg = qspi_readl(qspi, base + QSPI_LUT_REG(SEQID_LUT_SHARED_ID, i));
+		dev_notice(qspi->dev, "lut_reg[0x%03x]:0x%08x\t\t",
+				QSPI_LUT_REG(SEQID_LUT_SHARED_ID, i), reg);
+	}
+	dev_notice(qspi->dev, "\n");
+}
+
+/*
+ * If the slave device content being changed by Write/Erase, need to
+ * invalidate the AHB buffer. This can be achieved by doing the reset
+ * of controller after setting MCR0[SWRESET] bit.
+ */
+static inline void asr_qspi_invalid(struct asr_qspi *qspi)
+{
+	u32 reg;
+
+	reg = qspi_readl(qspi, qspi->io_map + QSPI_MCR);
+	reg |= QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK;
+	qspi_writel(qspi, reg, qspi->io_map + QSPI_MCR);
+
+	/*
+	 * The minimum delay : 1 AHB + 2 SFCK clocks.
+	 * Delay 1 us is enough.
+	 */
+	udelay(1);
+
+	reg &= ~(QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK);
+	qspi_writel(qspi, reg, qspi->io_map + QSPI_MCR);
+}
+
+static u8 asr_qspi_prepare_lut(struct asr_qspi *qspi,
+				const struct spi_mem_op *op, u32 seq_id)
+{
+	u32 lutval[4] = {0,};
+	int lutidx = 0;
+	int i;
+	u8 opcode;
+
+	if (seq_id != SEQID_LUT_AHBREAD_ID) {
+		for (i = 2; i < QSPI_MAX_SEQ_NUM; i++) {
+			opcode = qspi->seq_opcode[i];
+			if (!opcode) {
+				seq_id = i;
+				break;
+			} else if (opcode == op->cmd.opcode) {
+				return i;
+			}
+		}
+	}
+
+	/* qspi cmd */
+	lutval[0] |= LUT_DEF(lutidx,
+			(op->cmd.dtr ? LUT_INSTR_CMD_DDR : LUT_INSTR_CMD),
+			LUT_PAD(op->cmd.buswidth),
+			op->cmd.opcode);
+	lutidx++;
+
+	/* addr bytes */
+	if (op->addr.nbytes) {
+		lutval[lutidx / 2] |=
+			LUT_DEF(lutidx,
+				(op->addr.dtr ? LUT_INSTR_ADDR_DDR : LUT_INSTR_ADDR),
+				LUT_PAD(op->addr.buswidth),
+				op->addr.nbytes * 8);
+		lutidx++;
+	}
+
+	/* dummy bytes, if needed */
+	if (op->dummy.nbytes) {
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_INSTR_DUMMY,
+					      LUT_PAD(op->dummy.buswidth),
+					      op->dummy.nbytes * 8 /
+					      op->dummy.buswidth /
+					      (op->dummy.dtr ? 2 : 1));
+		lutidx++;
+	}
+
+	/* read/write data bytes */
+	if (op->data.buswidth) {
+		u8 inst;
+
+		if ( op->data.dir == SPI_MEM_DATA_IN) {
+			if (op->data.dtr)
+				inst = LUT_INSTR_READ_DDR;
+			else
+				inst = LUT_INSTR_READ;
+		} else {
+			if (op->data.dtr)
+				inst = LUT_INSTR_WRITE_DDR;
+			else
+				inst = LUT_INSTR_WRITE;
+		}
+		lutval[lutidx / 2] |= LUT_DEF(lutidx, inst,
+					      LUT_PAD(op->data.buswidth),
+					      0);
+		lutidx++;
+	}
+
+	/* stop condition. */
+	lutval[lutidx / 2] |= LUT_DEF(lutidx, LUT_INSTR_STOP, 0, 0);
+
+	/* unlock LUT */
+	qspi_writel(qspi, QSPI_LUTKEY_VALUE, qspi->io_map + QSPI_LUTKEY);
+	qspi_writel(qspi, QSPI_LCKER_UNLOCK, qspi->io_map + QSPI_LCKCR);
+
+	/* fill LUT register */
+	for (i = 0; i < ARRAY_SIZE(lutval); i++)
+		qspi_writel(qspi, lutval[i], qspi->io_map + QSPI_LUT_REG(seq_id, i));
+
+	/* lock LUT */
+	qspi_writel(qspi, QSPI_LUTKEY_VALUE, qspi->io_map + QSPI_LUTKEY);
+	qspi_writel(qspi, QSPI_LCKER_LOCK, qspi->io_map + QSPI_LCKCR);
+
+	dev_dbg(qspi->dev, "opcode:0x%x, lut_reg[0:0x%x, 1:0x%x, 2:0x%x, 3:0x%x]\n",
+		op->cmd.opcode, lutval[0], lutval[1], lutval[2], lutval[3]);
+
+	qspi->seq_opcode[seq_id] = op->cmd.opcode;
+	return seq_id;
+}
+
+static void asr_qspi_enable_interrupt(struct asr_qspi *qspi, u32 val)
+{
+	u32 resr = 0;
+
+	resr = qspi_readl(qspi, qspi->io_map + QSPI_RSER);
+	resr |= val;
+	qspi_writel(qspi, resr, qspi->io_map + QSPI_RSER);
+}
+
+static void asr_qspi_disable_interrupt(struct asr_qspi *qspi, u32 val)
+{
+	u32 resr = 0;
+
+	resr = qspi_readl(qspi, qspi->io_map + QSPI_RSER);
+	resr &= ~val;
+	qspi_writel(qspi, resr, qspi->io_map + QSPI_RSER);
+}
+
+static void asr_qspi_prepare_dma(struct asr_qspi *qspi)
+{
+	struct dma_slave_config dma_cfg;
+	struct device *dev = qspi->dev;
+	dma_cap_mask_t mask;
+
+	/* RX DMA: DMA_MEMCPY type */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	if (qspi->rx_dma_enable) {
+		qspi->rx_dma = dma_request_chan_by_mask(&mask);
+		if (IS_ERR_OR_NULL(qspi->rx_dma)) {
+			dev_err(dev, "rx dma request channel failed\n");
+			qspi->rx_dma = NULL;
+			qspi->rx_dma_enable = 0;
+		} else {
+			dev_notice(dev, "rx dma enable, channel:%d\n",
+					qspi->rx_dma->chan_id);
+		}
+	} else {
+		dev_notice(dev, "rx dma not enable\n");
+	}
+
+	/* TX DMA: DMA_SLAVE type */
+	if (qspi->tx_dma_enable) {
+		qspi->tx_dma = dma_request_slave_channel(dev, "tx-dma");
+		if (qspi->tx_dma) {
+			memset(&dma_cfg, 0, sizeof(struct dma_slave_config));
+			dma_cfg.direction = DMA_MEM_TO_DEV;
+			dma_cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+			dma_cfg.dst_addr = qspi->io_phys + QSPI_TBDR - 4;
+			dma_cfg.dst_maxburst = QSPI_TX_DMA_BURST;
+			if (dmaengine_slave_config(qspi->tx_dma, &dma_cfg)) {
+				dev_err(dev, "tx dma slave config failed\n");
+				dma_release_channel(qspi->tx_dma);
+				qspi->tx_dma = NULL;
+				qspi->tx_dma_enable = 0;
+			} else {
+				dev_notice(dev, "tx dma enable, channel:%d\n",
+						qspi->tx_dma->chan_id);
+			}
+		}
+	} else {
+		dev_notice(dev, "tx dma not enable\n");
+	}
+
+	if (qspi->tx_dma || qspi->rx_dma)
+		init_completion(&qspi->dma_completion);
+}
+
+static void asr_qspi_dma_callback(void *arg)
+{
+	struct completion *dma_completion = arg;
+
+	complete(dma_completion);
+}
+
+int asr_qspi_tx_dma_exec(struct asr_qspi *qspi,
+			const struct spi_mem_op *op)
+{
+	struct dma_async_tx_descriptor *desc;
+	enum dma_transfer_direction dma_dir = DMA_MEM_TO_DEV;
+	dma_cookie_t cookie;
+	int err = 0;
+
+	if (qspi->sram.virt) {
+		/* use buffer from sram to avoid tx underrun error */
+		memcpy(qspi->sram.virt, op->data.buf.in, op->data.nbytes);
+
+		desc = dmaengine_prep_slave_single(qspi->tx_dma,
+			qspi->sram.dma, op->data.nbytes, dma_dir,
+			DMA_PREP_INTERRUPT);
+		if (!desc) {
+			dev_err(qspi->dev, "tx dma prep error\n");
+			return -ENOMEM;
+		}
+	} else {
+		if (!virt_addr_valid(op->data.buf.in) ||
+		    spi_controller_dma_map_mem_op_data(qspi->ctrl,
+							op, &qspi->sgt)) {
+			dev_err(qspi->dev, "tx dma map error\n");
+			return -EIO;
+		}
+
+		desc = dmaengine_prep_slave_sg(
+			qspi->tx_dma, qspi->sgt.sgl, qspi->sgt.nents,
+			dma_dir, DMA_PREP_INTERRUPT);
+		if (!desc) {
+			dev_err(qspi->dev, "tx dma prep error\n");
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	reinit_completion(&qspi->dma_completion);
+	desc->callback = asr_qspi_dma_callback;
+	desc->callback_param = &qspi->dma_completion;
+
+	cookie = dmaengine_submit(desc);
+	err = dma_submit_error(cookie);
+	if (err) {
+		dev_err(qspi->dev, "tx dma dmaengine_submit error\n");
+		goto out;
+	}
+
+	dma_async_issue_pending(qspi->tx_dma);
+
+	return 0;
+
+out:
+	if (!qspi->sram.virt)
+		spi_controller_dma_unmap_mem_op_data(qspi->ctrl, op, &qspi->sgt);
+	return err;
+}
+
+int asr_qspi_rx_dma_exec(struct asr_qspi *qspi, dma_addr_t dma_dst,
+			dma_addr_t dma_src, size_t len)
+{
+	dma_cookie_t cookie;
+	enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+	struct dma_async_tx_descriptor *desc;
+	int ret;
+
+	desc = dmaengine_prep_dma_memcpy(qspi->rx_dma, dma_dst, dma_src, len, flags);
+	if (!desc) {
+		dev_err(qspi->dev, "dmaengine_prep_dma_memcpy error\n");
+		return -EIO;
+	}
+
+	reinit_completion(&qspi->dma_completion);
+	desc->callback = asr_qspi_dma_callback;
+	desc->callback_param = &qspi->dma_completion;
+	cookie = dmaengine_submit(desc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dev_err(qspi->dev, "dma_submit_error %d\n", cookie);
+		return -EIO;
+	}
+
+	dma_async_issue_pending(qspi->rx_dma);
+	ret = wait_for_completion_timeout(&qspi->dma_completion,
+					  msecs_to_jiffies(len));
+	if (ret <= 0) {
+		dmaengine_terminate_sync(qspi->rx_dma);
+		dev_err(qspi->dev, "DMA wait_for_completion_timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int asr_qspi_rx_dma_sg(struct asr_qspi *qspi, struct sg_table rx_sg,
+			       loff_t from)
+{
+	struct scatterlist *sg;
+	dma_addr_t dma_src = qspi->memmap_base + from;
+	dma_addr_t dma_dst;
+	int i, len, ret;
+
+	for_each_sg(rx_sg.sgl, sg, rx_sg.nents, i) {
+		dma_dst = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		dev_dbg(qspi->dev, "rx dma, dst:0x%08x, src:0x%08x, len:%d\n",
+			dma_dst, dma_src, len);
+		ret = asr_qspi_rx_dma_exec(qspi, dma_dst, dma_src, len);
+		if (ret)
+			return ret;
+		dma_src += len;
+	}
+
+	return 0;
+}
+
+static int asr_qspi_ahb_read(struct asr_qspi *qspi,
+				const struct spi_mem_op *op)
+{
+	int ret = 0;
+	u32 len = op->data.nbytes;
+	u32 from = op->addr.val;
+	struct sg_table sgt;
+
+	/* Read out the data directly from the AHB buffer. */
+	dev_dbg(qspi->dev, "ahb read %d bytes from address:0x%llx\n",
+				len, (qspi->memmap_base + op->addr.val));
+	if (from + len > qspi->memmap_size)
+		return -ENOTSUPP;
+
+	/* firstly try the DMA */
+	if (qspi->rx_dma_enable) {
+		if (virt_addr_valid(op->data.buf.in) &&
+		    !spi_controller_dma_map_mem_op_data(qspi->ctrl, op, &sgt)) {
+			ret = asr_qspi_rx_dma_sg(qspi, sgt, from);
+			spi_controller_dma_unmap_mem_op_data(qspi->ctrl, op, &sgt);
+		} else {
+			ret = -EIO;
+			dev_err(qspi->dev, "spi_controller_dma_map_mem_op_data error\n");
+		}
+
+		/* DMA completed */
+		if (!ret)
+			return 0;
+	}
+
+	if (qspi->rx_dma_enable && ret) {
+		dev_notice(qspi->dev, "rx dma read fallback to memcpy read.\n");
+	}
+
+restart:
+	qspi->rst_protect = false;
+	if (!qspi->rx_dma_enable || (qspi->rx_dma_enable && ret)) {
+		memcpy_fromio(op->data.buf.in, (qspi->ahb_map + op->addr.val), len);
+	}
+
+	if (qspi->rst_protect) {
+		dev_info_ratelimited(qspi->dev, "retry read for reset protect\n");
+		goto restart;
+	}
+
+	return 0;
+}
+
+static int asr_qspi_fill_txfifo(struct asr_qspi *qspi,
+				 const struct spi_mem_op *op)
+{
+	void __iomem *base = qspi->io_map;
+	int i;
+	u32 val;
+	u32 tbsr;
+	u32 wait_cnt;
+
+	if (!qspi->tx_dma_enable || op->data.nbytes < QSPI_TX_BUFF_POP_MIN*2 ||
+	    (op->data.nbytes % QSPI_TX_DMA_BURST)) {
+		int tbdr_cnt = 0;
+
+		qspi->tx_wmrk = 0;
+		for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 4); i += 4) {
+			memcpy(&val, op->data.buf.out + i, 4);
+			qspi_writel(qspi, val, base + QSPI_TBDR);
+			tbdr_cnt += 4;
+		}
+
+		if (i < op->data.nbytes) {
+			memcpy(&val, op->data.buf.out + i, op->data.nbytes - i);
+			qspi_writel(qspi, val, base + QSPI_TBDR);
+			tbdr_cnt += 4;
+		}
+
+		/*
+		 * There must be at least 128bit data available in TX FIFO
+		 * for any pop operation otherwise QSPI_FR[TBUF] will be set
+		 */
+		tbdr_cnt = tbdr_cnt % QSPI_TX_BUFF_POP_MIN;
+		for (i = tbdr_cnt; i < QSPI_TX_BUFF_POP_MIN; i += 4)
+			qspi_writel(qspi, 0, base + QSPI_TBDR);
+	} else {
+		/*
+		 * Note that the number of bytes per DMA loop is determined
+		 * by thee size of the QSPI_TBCT[WMRK].
+		 * bytes per DMA loop = (QSPI_TBCT[WMRK] + 1) * 4.
+		 * set QSPI_TX_WMRK as the TX watermark.
+		 */
+		qspi->tx_wmrk = QSPI_TX_WMRK;
+		qspi_writel(qspi, qspi->tx_wmrk, base + QSPI_TBCT);
+
+		/* increase ddr freq for tx dma, avoid fifo underrun */
+		if (!qspi->sram.virt && qspi->tx_unit_size > qspi->tx_buf_size)
+			pm_qos_update_request_timeout(
+				&qspi->pm_ddr_qos, INT_MAX, 100*1000);
+
+		/* config DMA channel and start */
+		if (asr_qspi_tx_dma_exec(qspi, op)) {
+			qspi->tx_wmrk = 0;
+			dev_err(qspi->dev, "failed to start tx dma\n");
+			return -EIO;
+		}
+		/* enable DMA request */
+		asr_qspi_enable_interrupt(qspi, QSPI_RSER_TBFDE);
+
+		/*
+		 * before trigger qspi to send data to external bus, TX bufer
+		 * need to have some data, or underrun error may happen.
+		 * DMA need some time to write data to TX buffer, so add
+		 * a delay here for this requirement.
+		 */
+		wait_cnt = 0;
+		do {
+			tbsr = qspi_readl(qspi, base + QSPI_TBSR);
+			tbsr = 4 * (tbsr >> 16);
+			if (tbsr >= min_t(unsigned int, qspi->tx_buf_size,
+					  op->data.nbytes))
+				break;
+
+			if (wait_cnt > 10050) {
+				dev_err(qspi->dev,
+					"TX DMA failed, TBSR=0x%x\n", tbsr);
+				qspi_dump_reg(qspi);
+
+				/* disable all interrupts */
+				qspi_writel(qspi, 0, qspi->io_map + QSPI_RSER);
+				dmaengine_dump_status(qspi->tx_dma);
+				dmaengine_terminate_all(qspi->tx_dma);
+				if (!qspi->sram.virt)
+					spi_controller_dma_unmap_mem_op_data(
+						qspi->ctrl, op, &qspi->sgt);
+				qspi->tx_wmrk = 0;
+
+				return -EAGAIN;
+			}
+
+			if (wait_cnt++ >= 10000)
+				msleep(10);
+			else
+				udelay(1);
+		} while (1);
+	}
+
+	return 0;
+}
+
+static void asr_qspi_read_rxfifo(struct asr_qspi *qspi,
+			  const struct spi_mem_op *op)
+{
+	void __iomem *base = qspi->io_map;
+	int i;
+	u8 *buf = op->data.buf.in;
+	u32 val;
+
+	dev_dbg(qspi->dev, "ip read %d bytes\n", op->data.nbytes);
+	for (i = 0; i < ALIGN_DOWN(op->data.nbytes, 4); i += 4) {
+		val = qspi_readl(qspi, base + QSPI_RBDR(i / 4));
+		memcpy(buf + i, &val, 4);
+	}
+
+	if (i < op->data.nbytes) {
+		val = qspi_readl(qspi, base + QSPI_RBDR(i / 4));
+		memcpy(buf + i, &val, op->data.nbytes - i);
+	}
+}
+
+static irqreturn_t asr_qspi_reset_handler(int irq, void *dev_id)
+{
+	struct asr_qspi *qspi = dev_id;
+
+	qspi->rst_protect = true;
+	dev_info_ratelimited(qspi->dev, "qspi catch reset signal\n");
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t asr_qspi_irq_handler(int irq, void *dev_id)
+{
+	struct asr_qspi *qspi = dev_id;
+	u32 fr;
+
+	fr = qspi_readl(qspi, qspi->io_map + QSPI_FR);
+	qspi_writel(qspi, fr & ~QSPI_FR_RBDF, qspi->io_map + QSPI_FR);
+	dev_dbg(qspi->dev, "QSPI_FR:0x%08x\n", fr);
+
+	/* check QSPI_FR error flag */
+	if (fr & (COMMAND_FR_FLAG | BUFFER_FR_FLAG)) {
+		qspi->fr_error_flag = fr & (COMMAND_FR_FLAG | BUFFER_FR_FLAG);
+
+		if (fr & QSPI_FR_IPGEF)
+			dev_err(qspi->dev, "IP command trigger during AHB grant\n");
+		if (fr & QSPI_FR_IPIEF)
+			dev_err(qspi->dev, "IP command trigger could not be executed\n");
+		if (fr & QSPI_FR_IPAEF)
+			dev_err(qspi->dev, "IP command trigger during AHB access\n");
+		if (fr & QSPI_FR_IUEF)
+			dev_err(qspi->dev, "IP command usage error\n");
+		if (fr & QSPI_FR_AIBSEF)
+			dev_err(qspi->dev, "AHB illegal burst size error\n");
+		if (fr & QSPI_FR_AITEF)
+			dev_err(qspi->dev, "AHB illegal trancaction error\n");
+		if (fr & QSPI_FR_ABSEF)
+			dev_err(qspi->dev, "AHB sequence error\n");
+
+		if (fr & QSPI_FR_TBUF)
+			dev_err_ratelimited(qspi->dev, "TX buffer underrun\n");
+		if (fr & QSPI_FR_RBOF)
+			dev_err(qspi->dev, "RX buffer overflow\n");
+		if (fr & QSPI_FR_ABOF)
+			dev_err(qspi->dev, "AHB buffer overflow\n");
+	}
+
+	if (qspi->cmd_interrupt && (fr & (QSPI_FR_TFF_MASK | COMMAND_FR_FLAG | BUFFER_FR_FLAG)))
+		complete(&qspi->cmd_completion);
+
+	return IRQ_HANDLED;
+}
+
+static int asr_qspi_do_op(struct asr_qspi *qspi, const struct spi_mem_op *op,
+			u8 seq_id)
+{
+	void __iomem *base = qspi->io_map;
+	int err = 0;
+	u32 mcr;
+	//void __iomem *mpmu_acgr = regs_addr_get_va(REGS_ADDR_MPMU) + ASR_MPMU_ACGR;
+
+#ifdef ASR_DUMP_QSPI_REG
+	/* dump reg if need */
+	qspi_dump_reg(qspi);
+#endif
+
+	if (qspi->cmd_interrupt) {
+		asr_qspi_enable_interrupt(qspi, QSPI_RSER_TFIE | BUFFER_ERROR_INT | COMMAND_ERROR_INT);
+		init_completion(&qspi->cmd_completion);
+	}
+
+	/* trigger LUT */
+	qspi_writel(qspi, op->data.nbytes | QSPI_IPCR_SEQID(seq_id),
+		    base + QSPI_IPCR);
+
+	/* wait for the transaction complete */
+	if (qspi->cmd_interrupt) {
+		if (!wait_for_completion_timeout(&qspi->cmd_completion,
+						msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	} else {
+		err = asr_qspi_readl_poll_tout(qspi, base + QSPI_FR, QSPI_FR_TFF_MASK,
+						QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_SET);
+	}
+	if (err) {
+		dev_err(qspi->dev, "opcode:0x%x transaction abort, ret:%d, error flag:0x%08x\n",
+			op->cmd.opcode, err, qspi->fr_error_flag);
+		dev_err(qspi->dev, "pmuap[0x%08x]:0x%08x\n", qspi->pmuap_reg, qspi_readl(qspi, qspi->pmuap_addr));
+		//dev_err(qspi->dev, "mpmu[0x%08x]:0x%08x\n", ASR_MPMU_ACGR, qspi_readl(qspi, mpmu_acgr));
+		qspi_dump_reg(qspi);
+		goto tx_dma_unmap;
+	}
+
+	/* read RX buffer for IP command read */
+	if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_IN) {
+#ifdef ASR_DUMP_QSPI_REG
+		qspi_dump_reg(qspi);
+#endif
+		asr_qspi_read_rxfifo(qspi, op);
+	}
+
+	if (qspi->fr_error_flag & QSPI_FR_TBUF) {
+		/* abort current dma transfer */
+		if (qspi->tx_dma_enable)
+			dmaengine_terminate_all(qspi->tx_dma);
+
+		/* clear TX buf */
+		mcr = qspi_readl(qspi, qspi->io_map + QSPI_MCR);
+		mcr |= QSPI_MCR_CLR_TXF_MASK ;
+		qspi_writel(qspi, mcr, qspi->io_map + QSPI_MCR);
+
+		err = -EAGAIN;
+	}
+
+tx_dma_unmap:
+	if (qspi->tx_wmrk) {
+		if (!qspi->sram.virt)
+			spi_controller_dma_unmap_mem_op_data(qspi->ctrl, op, &qspi->sgt);
+		qspi->tx_wmrk = 0;
+	}
+
+	/* disable all interrupts */
+	qspi_writel(qspi, 0, qspi->io_map + QSPI_RSER);
+	return err;
+}
+
+static void dump_spi_mem_op_info(struct asr_qspi *qspi,
+				const struct spi_mem_op *op)
+{
+	dev_dbg(qspi->dev, "cmd.opcode:0x%x\n", op->cmd.opcode);
+	dev_dbg(qspi->dev, "cmd.buswidth:%d\n", op->cmd.buswidth);
+	dev_dbg(qspi->dev, "addr.nbytes:%d,\n", op->addr.nbytes);
+	dev_dbg(qspi->dev, "addr.buswidth:%d\n", op->addr.buswidth);
+	dev_dbg(qspi->dev, "addr.val:0x%llx\n", op->addr.val);
+	dev_dbg(qspi->dev, "dummy.nbytes:%d\n", op->dummy.nbytes);
+	dev_dbg(qspi->dev, "dummy.buswidth:%d\n", op->dummy.buswidth);
+	dev_dbg(qspi->dev, "%s data.nbytes:%d\n",
+		(op->data.dir == SPI_MEM_DATA_IN) ? "read" :"write",
+		op->data.nbytes);
+	dev_dbg(qspi->dev, "data.buswidth:%d\n", op->data.buswidth);
+	dev_dbg(qspi->dev, "data.buf:0x%p\n", op->data.buf.in);
+}
+
+static int asr_qspi_check_buswidth(struct asr_qspi *qspi, u8 width)
+{
+	switch (width) {
+	case 1:
+	case 2:
+	case 4:
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool asr_qspi_supports_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+	int op_dtr;
+
+	ret = asr_qspi_check_buswidth(qspi, op->cmd.buswidth);
+
+	if (op->addr.nbytes)
+		ret |= asr_qspi_check_buswidth(qspi, op->addr.buswidth);
+
+	if (op->dummy.nbytes)
+		ret |= asr_qspi_check_buswidth(qspi, op->dummy.buswidth);
+
+	if (op->data.nbytes)
+		ret |= asr_qspi_check_buswidth(qspi, op->data.buswidth);
+
+	if (ret)
+		return false;
+
+	/* address bytes should be equal to or less than 4 bytes */
+	if (op->addr.nbytes > 4)
+		return false;
+
+	/* check controller TX/RX buffer limits and alignment */
+	if (op->data.dir == SPI_MEM_DATA_IN &&
+	    (op->data.nbytes > qspi->rx_unit_size ||
+	    (op->data.nbytes > qspi->rx_buf_size - 4 && !IS_ALIGNED(op->data.nbytes, 4)))) {
+		return false;
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_OUT && op->data.nbytes > qspi->tx_unit_size)
+		return false;
+
+	/*
+	 * If requested address value is greater than controller assigned
+	 * memory mapped space, return error as it didn't fit in the range.
+	 */
+	if (op->addr.val >= qspi->memmap_size) {
+		pr_err("asr_qspi_supports_op: addr.val:%lld greater than the map size\n", op->addr.val);
+		return false;
+	}
+
+	/* number of dummy clock cycles should be <= 64 cycles */
+	if (op->dummy.buswidth &&
+	    (op->dummy.nbytes * 8 / op->dummy.buswidth > 64))
+		return false;
+
+	if (op->cmd.dtr || op->addr.dtr || op->data.dtr)
+		op_dtr = 1;
+	else
+		op_dtr = 0;
+
+	if (!qspi->has_dtr && op_dtr)
+		return false;
+
+	return true;
+}
+
+void asr_qspi_adjust_tx_size(struct spi_mem *mem, u32 reduce_sz)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+
+	if (qspi->tx_dma_enable) {
+		if (reduce_sz)
+			qspi->tx_unit_size = qspi->tx_buf_size;
+		else
+			qspi->tx_unit_size = SZ_4K;
+	}
+}
+
+static int __asr_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	void __iomem *mpmu_acgr = regs_addr_get_va(REGS_ADDR_MPMU) + ASR_MPMU_ACGR;
+	void __iomem *base = qspi->io_map;
+	int err = 0;
+	u32 mask;
+	u32 reg;
+	u8 seq_id;
+
+	mutex_lock(&qspi->lock);
+
+	dump_spi_mem_op_info(qspi, op);
+
+	/* wait for controller being ready */
+	mask = QSPI_SR_BUSY | QSPI_SR_IP_ACC_MASK | QSPI_SR_AHB_ACC_MASK;
+	err = asr_qspi_readl_poll_tout(qspi, base + QSPI_SR, mask,
+		QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_CLEAR);
+	if (err) {
+		dev_err(qspi->dev, "controller not ready!\n");
+		dev_err(qspi->dev, "pmuap[0x%08x]:0x%08x\n", qspi->pmuap_reg,
+			qspi_readl(qspi, qspi->pmuap_addr));
+		dev_err(qspi->dev, "mpmu[0x%08x]:0x%08x\n", ASR_MPMU_ACGR,
+			qspi_readl(qspi, mpmu_acgr));
+		qspi_dump_reg(qspi);
+		mutex_unlock(&qspi->lock);
+		return err;
+	}
+
+	/* clear TX/RX buffer before transaction */
+	reg = qspi_readl(qspi, base + QSPI_MCR);
+	reg |= QSPI_MCR_CLR_TXF_MASK | QSPI_MCR_CLR_RXF_MASK;
+	qspi_writel(qspi, reg, base + QSPI_MCR);
+
+	/*
+	 * reset the sequence pointers whenever the sequence ID is changed by
+	 * updating the SEDID filed in QSPI_IPCR OR QSPI_BFGENCR.
+	 */
+	reg = qspi_readl(qspi, base + QSPI_SPTRCLR);
+	reg |= (QSPI_SPTRCLR_IPPTRC | QSPI_SPTRCLR_BFPTRC);
+	qspi_writel(qspi, reg, base + QSPI_SPTRCLR);
+
+	/* set the flash address into the QSPI_SFAR */
+	qspi_write_sfar(qspi, qspi->memmap_base + op->addr.val);
+
+	/* clear QSPI_FR before trigger LUT command */
+	reg = qspi_readl(qspi, base + QSPI_FR);
+	if (reg)
+		qspi_writel(qspi, reg, base + QSPI_FR);
+	qspi->fr_error_flag = 0;
+
+	/* IP command */
+	seq_id = asr_qspi_prepare_lut(qspi, op, SEQID_LUT_SHARED_ID);
+	if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
+		err = asr_qspi_fill_txfifo(qspi, op);
+	if (!err)
+		err = asr_qspi_do_op(qspi, op, seq_id);
+
+	/* invalidate the data in the AHB buffer. */
+	if (op->cmd.opcode != 0x1F && op->cmd.opcode != 0x0F)
+		asr_qspi_invalid(qspi);
+
+	mutex_unlock(&qspi->lock);
+
+	return err;
+}
+
+static int asr_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		if (op->data.nbytes > qspi->tx_unit_size)
+			op->data.nbytes = qspi->tx_unit_size;
+	} else {
+		if (op->data.nbytes > qspi->rx_unit_size)
+			op->data.nbytes = qspi->rx_unit_size;
+	}
+
+	return 0;
+}
+
+static int asr_qspi_config_dqs_clk(struct asr_qspi *qspi, int dcode)
+{
+	void __iomem *base = qspi->io_map;
+	u32 reg;
+
+	reg = qspi_readl(qspi, base + QSPI_MCR);
+
+	if (dcode <= 0 || dcode > 255) {
+		reg &= ~(QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN |
+			QSPI_MCR_DQS_INV_EN);
+		qspi_writel(qspi, reg, base + QSPI_MCR);
+		return -1;
+	}
+
+	reg |= QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN | QSPI_MCR_DQS_INV_EN;
+	qspi_writel(qspi, reg, base + QSPI_MCR);
+
+	/* DQS enabled, use sample point N/1 */
+	qspi_writel(qspi, 0x0, base + QSPI_SMPR);
+
+	reg = qspi_readl(qspi, base + QSPI_SOCCR);
+	reg |= QSPI_SOCCR_DLINE_EN;
+	qspi_writel(qspi, reg, base + QSPI_SOCCR);
+
+	reg = qspi_readl(qspi, base + QSPI_DLACR);
+	reg &= ~QSPI_DLACR_DLINE_STEP_MASK;
+	reg = 0x7 << QSPI_DLACR_DLINE_STEP_SHIFT;
+	reg |= dcode & QSPI_DLACR_DLINE_CODE_MASK;
+	qspi_writel(qspi, reg, base + QSPI_DLACR);
+
+	asr_qspi_invalid(qspi);
+	dev_info(qspi->dev, "enable DQS clock, QSPI_DLACR=0x%x\n", reg);
+	return 0;
+}
+
+static int __asr_qspi_adjust_timing(struct asr_qspi *qspi,
+		struct spi_mem_timing *timing, int clk_hz)
+{
+	void __iomem *base = qspi->io_map;
+	u32 t, delay = 0;
+	u32 reg;
+	int dcode;
+
+	if (clk_hz <= 13000000)
+		return 0;
+
+	t = 1000000000 / (clk_hz/1000); /* in picosecond */
+
+	/* clock settings */
+	qspi_enter_mode(qspi, QSPI_DISABLE_MODE);
+
+	if (timing->tclqv == 0) {
+		timing->tclqv = 8;
+		timing->tset = timing->thold = 2;
+	}
+
+	delay = (timing->tclqv + timing->tset + 1) * 1000;
+	if (delay <= t)
+		reg = 0; /* sample point N1 */
+	else
+		reg = QSPI_SMPR_FSPHS_MASK; /* sample point I1 */
+
+	if (timing->use_dtr && qspi->has_dtr) {
+		int ddr_point;
+
+		delay -= t/2;
+		if (delay > 0)
+			ddr_point = (delay + t/8 - 1) / (t/8);
+		else
+			ddr_point = qspi->dtr_rx_delay;
+
+		reg |= ddr_point << QSPI_SMPR_DDRSMP_SHIFT;
+	} else if (qspi->support_dqs && clk_hz > 52000000) {
+		/*
+		* Do not use QDS for DDR, since SDR/DDR can not share
+		* same delay code.
+		* If DQS enabled, must use sample point N/1, clear SMPR.
+		*
+		* delay step: 52ps
+		*/
+		delay = timing->tclqv * 1000 - t/2;
+		dcode = delay / 52;
+		if (!asr_qspi_config_dqs_clk(qspi, dcode))
+			reg = 0;
+	}
+
+	qspi_writel(qspi, reg, base + QSPI_SMPR);
+	reg = qspi_readl(qspi, base + QSPI_SMPR);
+	dev_info(qspi->dev, "QSPI_SMPR=0x%x t=%d\n", reg, t);
+
+	/* set tx hold time */
+	reg = 0x202;
+	if (timing->use_dtr && qspi->has_dtr)
+		reg |= qspi->dtr_tx_delay << 16;
+	qspi_writel(qspi, reg, base + QSPI_FLSHCR);
+
+	reg = qspi_readl(qspi, base + QSPI_FLSHCR);
+	dev_info(qspi->dev, "QSPI_FLSHCR=0x%x, delay=%d\n", reg, delay);
+
+	/* Module enabled */
+	qspi_enter_mode(qspi, QSPI_NORMAL_MODE);
+
+	return 0;
+}
+
+static int asr_qspi_adjust_timing(struct spi_mem *mem, struct spi_mem_timing *timing)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	int ret = 0;
+
+	dev_notice(qspi->dev, "tclqv=%dns tset=%dns thold=%dns\n",
+		timing->tclqv, timing->tset, timing->thold);
+
+	if (timing->max_hz > 0 && timing->max_hz < qspi->max_hz)
+		qspi->max_hz = timing->max_hz;
+
+	__asr_qspi_adjust_timing(qspi, timing, qspi->max_hz);
+
+	ret = clk_set_rate(qspi->clk, qspi->max_hz);
+	if (ret) {
+		dev_err(qspi->dev, "fail to set clk, ret:%d\n", ret);
+		return ret;
+	}
+
+	dev_notice(qspi->dev, "bus clock %dHz, PMUap reg[0x%08x]:0x%08x\n",
+		qspi->max_hz, qspi->pmuap_reg,
+		qspi_readl(qspi, qspi->pmuap_addr));
+
+	return 0;
+}
+
+#define ASR_QSPI_MAX_RETRY 3
+static int asr_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct asr_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	int ret, i;
+
+restart:
+	qspi->rst_protect = false;
+
+	ret = __asr_qspi_exec_op(mem, op);
+	if (ret == -EAGAIN) {
+		/*
+		 * For tx underrun error, reduce data length to be less
+		 * than tx fifo size and try again.
+		 */
+		asr_qspi_adjust_tx_size(mem, 1);
+		asr_qspi_adjust_op_size(mem, (struct spi_mem_op *)op);
+
+		i = 0;
+		do {
+			ret = __asr_qspi_exec_op(mem, op);
+		} while (ret == -EAGAIN && ++i < ASR_QSPI_MAX_RETRY);
+
+		BUG_ON(ret);
+
+		dev_dbg(qspi->dev, "pass after %dth retry.\n", i+1);
+		asr_qspi_adjust_tx_size(mem, 0);
+	}
+
+	if (qspi->rst_protect) {
+		dev_info_ratelimited(qspi->dev, "retry for reset protect\n");
+		goto restart;
+	}
+
+	return ret;
+}
+
+static int asr_qspi_host_init(struct asr_qspi *qspi)
+{
+	void __iomem *base = qspi->io_map;
+	u32 reg;
+
+	/* rest qspi */
+	qspi_reset(qspi);
+
+	/* clock settings */
+	qspi_enter_mode(qspi, QSPI_DISABLE_MODE);
+
+	/* Fix wirte failure issue*/
+	reg = qspi_readl(qspi, base + QSPI_SOCCR);
+	reg &= ~0xFF;
+	reg |= 0x8;
+	qspi_writel(qspi, reg, base + QSPI_SOCCR);
+
+	/* set the default source address QSPI_AMBA_BASE*/
+	qspi_write_sfar(qspi, qspi->memmap_base);
+	qspi_writel(qspi, 0x0, base + QSPI_SFACR);
+
+	 /* config ahb read */
+	qspi_init_ahbread(qspi, SEQID_LUT_AHBREAD_ID);
+
+	/* set flash memory map */
+	qspi_writel(qspi, qspi->sfa1ad & 0xfffffc00, base + QSPI_SFA1AD);
+	qspi_writel(qspi, qspi->sfa2ad & 0xfffffc00, base + QSPI_SFA2AD);
+	qspi_writel(qspi, qspi->sfb1ad & 0xfffffc00, base + QSPI_SFB1AD);
+	qspi_writel(qspi, qspi->sfb2ad & 0xfffffc00, base + QSPI_SFB2AD);
+
+	/* ISD3FB, ISD2FB, ISD3FA, ISD2FA = 1; END_CFG=0x3 */
+	reg = qspi_readl(qspi, base + QSPI_MCR);
+	reg |= QSPI_MCR_END_CFG_MASK | QSPI_MCR_ISD_MASK;
+	if (qspi->has_dtr)
+		reg |= QSPI_MCR_DDR_EN_MASK;
+	else
+		reg &= ~QSPI_MCR_DDR_EN_MASK;
+	qspi_writel(qspi, reg, base + QSPI_MCR);
+
+	/* Module enabled */
+	qspi_enter_mode(qspi, QSPI_NORMAL_MODE);
+
+	/* Read using the IP Bus registers QSPI_RBDR0 to QSPI_RBDR31*/
+	qspi_write_rbct(qspi, QSPI_RBCT_RXBRD_MASK);
+
+	if (!qspi->cmd_interrupt)
+		asr_qspi_disable_interrupt(qspi, 0xffffffff);
+
+	/* clear all interrupt status */
+	qspi_writel(qspi, 0xffffffff, base + QSPI_FR);
+
+#ifdef ASR_DUMP_QSPI_REG
+	qspi_dump_reg(qspi);
+#endif
+	return 0;
+}
+
+static int asr_qspi_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+	struct spi_controller *ctrl = desc->mem->spi->master;
+	struct asr_qspi *qspi = spi_controller_get_devdata(ctrl);
+	struct spi_mem_op *op = &desc->info.op_tmpl;
+
+	if (op->data.dir != SPI_MEM_DATA_IN || !qspi->ahb_read_enable)
+		return -ENOTSUPP;
+
+	asr_qspi_prepare_lut(qspi, op, SEQID_LUT_AHBREAD_ID);
+	qspi->ahb_op = op;
+
+	if (op->cmd.dtr || op->addr.dtr || op->data.dtr)
+		printk("enable dtr command 0x%x\n", op->cmd.opcode);
+
+	return 0;
+}
+
+static ssize_t asr_qspi_direct_read(struct spi_mem_dirmap_desc *desc,
+				    u64 offs, size_t len, void *buf)
+{
+	struct spi_controller *ctrl = desc->mem->spi->master;
+	struct asr_qspi *qspi = spi_controller_get_devdata(ctrl);
+	struct spi_mem_op op = desc->info.op_tmpl;
+	int err;
+
+	/* Below check not need for ahb read, comment out */
+#if 0
+	void __iomem *base = qspi->io_map;
+	u32 mask;
+
+	mutex_lock(&qspi->lock);
+
+	/* wait for controller being ready */
+	mask = QSPI_SR_BUSY | QSPI_SR_IP_ACC_MASK | QSPI_SR_AHB_ACC_MASK;
+	err = asr_qspi_readl_poll_tout(base, base + QSPI_SR, mask,
+			QSPI_WAIT_TIMEOUT*1000, QSPI_WAIT_BIT_CLEAR);
+	if (err) {
+		dev_err(qspi->dev, "controller not ready!\n");
+		mutex_unlock(&qspi->lock);
+		return err;
+	}
+
+	mutex_unlock(&qspi->lock);
+#endif
+
+	op.addr.val = desc->info.offset + offs;
+	op.data.buf.in = buf;
+	op.data.nbytes = len;
+	asr_qspi_adjust_op_size(desc->mem, &op);
+
+	err = asr_qspi_ahb_read(qspi, &op);
+	if (err)
+		return err;
+
+	return op.data.nbytes;
+}
+
+static const struct spi_controller_mem_ops asr_qspi_mem_ops = {
+	.adjust_op_size = asr_qspi_adjust_op_size,
+	.adjust_timing = asr_qspi_adjust_timing,
+	.supports_op = asr_qspi_supports_op,
+	.exec_op = asr_qspi_exec_op,
+	.dirmap_create = asr_qspi_dirmap_create,
+	.dirmap_read = asr_qspi_direct_read,
+};
+
+static int asr_qspi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct asr_qspi *qspi;
+	struct resource *res;
+
+	int ret = 0;
+	u32 qspi_bus_num = 0;
+	int host_irq = 0;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct asr_qspi));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD ;
+	qspi = spi_controller_get_devdata(ctlr);
+	qspi->dev = dev;
+	qspi->ctrl = ctlr;
+
+	platform_set_drvdata(pdev, qspi);
+
+	/* get qspi register base address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi-base");
+	qspi->io_map = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->io_map)) {
+		ret = PTR_ERR(qspi->io_map);
+		goto err_put_ctrl;
+	}
+	qspi->io_phys = res->start;
+
+	/* get qspi memory-map address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi-mmap");
+	qspi->ahb_map = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qspi->ahb_map)) {
+		ret = PTR_ERR(qspi->ahb_map);
+		goto err_put_ctrl;
+	}
+
+	qspi->memmap_base = res->start;
+	qspi->memmap_size = resource_size(res);
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-sfa1ad", &qspi->sfa1ad))
+		qspi->sfa1ad = QSPI_FLASH_A1_TOP;
+	else
+		qspi->sfa1ad += qspi->memmap_base;
+	if (of_property_read_u32(dev->of_node, "asr,qspi-sfa2ad", &qspi->sfa2ad))
+		qspi->sfa2ad = QSPI_FLASH_A2_TOP;
+	else
+		qspi->sfa2ad += qspi->sfa1ad;
+	if (of_property_read_u32(dev->of_node, "asr,qspi-sfb1ad", &qspi->sfb1ad))
+		qspi->sfb1ad = QSPI_FLASH_B1_TOP;
+	else
+		qspi->sfb1ad = qspi->sfa2ad;
+	if (of_property_read_u32(dev->of_node, "asr,qspi-sfb2ad", &qspi->sfb2ad))
+		qspi->sfb2ad = QSPI_FLASH_B2_TOP;
+	else
+		qspi->sfb2ad += qspi->sfb1ad;
+
+	dev_notice(dev, "asr_qspi_probe:memmap base:0x%08x, memmap size:0x%x\n",
+			qspi->memmap_base, qspi->memmap_size);
+
+	qspi->sram.pool = of_gen_pool_get(dev->of_node, "asr,qspi-sram", 0);
+	if (qspi->sram.pool) {
+		qspi->sram.virt =
+			(void __iomem *)gen_pool_dma_alloc(
+				qspi->sram.pool, SZ_4K, &qspi->sram.dma);
+		dev_notice(dev, "use sram as tx buf, virt=0x%x phy=0x%x\n",
+			(unsigned)qspi->sram.virt, (unsigned)qspi->sram.dma);
+	}
+
+	host_irq = platform_get_irq(pdev, 0);
+	if (host_irq < 0) {
+		dev_err(dev, "invalid host irq:%d\n", host_irq);
+		goto err_put_ctrl;
+	}
+	ret = devm_request_irq(dev, host_irq, asr_qspi_irq_handler,
+				0, pdev->name, qspi);
+	if (ret) {
+		dev_err(dev, "failed to request irq:%d\n", ret);
+		goto err_put_ctrl;
+	}
+	init_completion(&qspi->cmd_completion);
+	dev_notice(qspi->dev, "host_irq:%d\n", host_irq);
+
+	host_irq = platform_get_irq(pdev, 1);
+	if (host_irq >= 0) {
+		ret = devm_request_irq(dev, host_irq, asr_qspi_reset_handler,
+					0, pdev->name, qspi);
+		if (ret) {
+			dev_err(dev, "failed to request irq:%d\n", ret);
+			goto err_put_ctrl;
+		}
+
+		dev_notice(qspi->dev, "reset irq:%d\n", host_irq);
+	}
+
+	/* map QSPI PMUap register address */
+	if (of_property_read_u32(dev->of_node, "asr,qspi-pmuap-reg", &qspi->pmuap_reg)) {
+		qspi->pmuap_reg = PMUA_QSPI_CLK_RES_CTRL;
+	}
+	qspi->pmuap_addr = ioremap(qspi->pmuap_reg, 4);
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-freq", &qspi->max_hz)) {
+		qspi->max_hz = ASR_QSPI_DEFAULT_CLK_FREQ;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-rx-buf", &qspi->rx_buf_size)) {
+		qspi->rx_buf_size = QSPI_RX_BUFF_MAX;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-tx-buf", &qspi->tx_buf_size)) {
+		qspi->tx_buf_size = QSPI_TX_BUFF_MAX;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-ahb-buf", &qspi->ahb_buf_size)) {
+		qspi->ahb_buf_size = QSPI_AHB_BUFF_MAX_SIZE;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-ahb-enable", &qspi->ahb_read_enable)) {
+		qspi->ahb_read_enable = 1;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-interrupt", &qspi->cmd_interrupt)) {
+		qspi->cmd_interrupt = 1;
+	}
+
+	/* RX not use dma default, read from ahb directly show better performance */
+	if (of_property_read_u32(dev->of_node, "asr,en-rx-dma", &qspi->rx_dma_enable)) {
+		qspi->rx_dma_enable = 0;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,en-tx-dma", &qspi->tx_dma_enable)) {
+		qspi->tx_dma_enable = 1;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-support-dtr", &qspi->has_dtr)) {
+		qspi->has_dtr = 0;
+	} else {
+		if (of_property_read_u32(dev->of_node, "asr,qspi-dtr-tx-delay", &qspi->dtr_tx_delay))
+			qspi->dtr_tx_delay = 1;
+		if (of_property_read_u32(dev->of_node, "asr,qspi-dtr-rx-delay", &qspi->dtr_rx_delay))
+			qspi->dtr_rx_delay = 0;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-support-dqs", &qspi->support_dqs))
+		qspi->support_dqs = 0;
+	if (cpu_is_asr1903_a0() || cpu_is_asr1903_z1())
+		qspi->support_dqs = 0;
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-endian-xchg", &qspi->endian_xchg)) {
+		qspi->endian_xchg = 0;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-cs", &qspi->cs_selected)) {
+		qspi->cs_selected = QSPI_DEFAULT_CS;
+	}
+
+	if (of_property_read_u32(dev->of_node, "asr,qspi-lpm-qos", &qspi->lpm_qos)) {
+		qspi->lpm_qos = PM_QOS_CPUIDLE_BLOCK_DEFAULT_VALUE;
+	}
+
+	asr_qspi_prepare_dma(qspi);
+	mutex_init(&qspi->lock);
+
+	/* set the qspi device default index */
+	if (of_property_read_u32(dev->of_node, "asr,qspi-id", &qspi_bus_num))
+		ctlr->bus_num = -1;
+	else
+		ctlr->bus_num = qspi_bus_num;
+	ctlr->num_chipselect = 1;
+	ctlr->mem_ops = &asr_qspi_mem_ops;
+
+	dev_notice(dev, "asr_qspi_probe: rx_buf_size:%d, tx_buf_size:%d\n",
+			qspi->rx_buf_size, qspi->tx_buf_size);
+	dev_notice(dev, "asr_qspi_probe: ahb_buf_size:%d, ahb_read:%d\n",
+			qspi->ahb_buf_size, qspi->ahb_read_enable);
+
+	if (qspi->tx_dma_enable)
+		qspi->tx_unit_size = SZ_4K;
+	else
+		qspi->tx_unit_size = qspi->tx_buf_size;
+
+	if (qspi->ahb_read_enable)
+		qspi->rx_unit_size = SZ_4K;
+	else
+		qspi->rx_unit_size = qspi->rx_buf_size;
+
+	/* config mfp */
+	qspi_config_mfp(qspi);
+	/* set PMUap */
+	qspi_set_func_clk(qspi, 13000000);
+	asr_qspi_host_init(qspi);
+	dev_info(qspi->dev, "AHB buf size: %d\n", qspi->ahb_buf_size);
+	dev_notice(qspi->dev, "qspi host init done.\n");
+
+	qspi->pm_qos_req.name = pdev->name;
+	ctlr->auto_runtime_pm = true;
+	pm_qos_add_request(&qspi->pm_qos_req, PM_QOS_CPUIDLE_BLOCK,
+				PM_QOS_CPUIDLE_BLOCK_DEFAULT_VALUE);
+
+	qspi->pm_ddr_qos.name = pdev->name;
+	pm_qos_add_request(&qspi->pm_ddr_qos, PM_QOS_DDR_DEVFREQ_MIN,
+				PM_QOS_DEFAULT_VALUE);
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, QSPI_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_set_active(&pdev->dev);
+	pm_suspend_ignore_children(&pdev->dev, 0);
+	pm_runtime_enable(&pdev->dev);
+
+	/* get qos */
+	pm_qos_update_request(&qspi->pm_qos_req, qspi->lpm_qos);
+	ctlr->dev.of_node = np;
+	ret = spi_register_controller(ctlr);
+	if (ret)
+		goto err_destroy_mutex;
+
+	pm_runtime_put_autosuspend(&pdev->dev);
+
+	return 0;
+
+err_destroy_mutex:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_qos_remove_request(&qspi->pm_qos_req);
+	pm_qos_remove_request(&qspi->pm_ddr_qos);
+
+	mutex_destroy(&qspi->lock);
+	iounmap(qspi->pmuap_addr);
+
+err_put_ctrl:
+	spi_controller_put(ctlr);
+
+	dev_err(dev, "ASR QSPI probe failed\n");
+	return ret;
+}
+
+static int asr_qspi_remove(struct platform_device *pdev)
+{
+	struct asr_qspi *qspi = platform_get_drvdata(pdev);
+
+	pm_runtime_get_sync(&pdev->dev);
+
+	/* set disable mode */
+	qspi_writel(qspi, QSPI_MCR_MDIS_MASK, qspi->io_map + QSPI_MCR);
+	qspi_writel(qspi, 0x0, qspi->io_map + QSPI_RSER);
+
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_qos_remove_request(&qspi->pm_qos_req);
+	pm_qos_remove_request(&qspi->pm_ddr_qos);
+
+	if (qspi->tx_dma)
+		dma_release_channel(qspi->tx_dma);
+	if (qspi->rx_dma)
+		dma_release_channel(qspi->rx_dma);
+
+	mutex_destroy(&qspi->lock);
+	iounmap(qspi->pmuap_addr);
+
+	clk_disable_unprepare(qspi->clk);
+	clk_disable_unprepare(qspi->bus_clk);
+
+	if (qspi->sram.pool)
+		gen_pool_free(qspi->sram.pool,
+			      (unsigned long)qspi->sram.virt, SZ_4K);
+	return 0;
+}
+
+static void asr_qspi_default_setup(struct asr_qspi *qspi)
+{
+	struct spi_mem_op *op = qspi->ahb_op;
+	int i;
+
+	asr_qspi_host_init(qspi);
+
+	for (i = 0; i < QSPI_MAX_SEQ_NUM; i++)
+		qspi->seq_opcode[i] = 0;
+
+	if (op)
+		asr_qspi_prepare_lut(qspi, op, SEQID_LUT_AHBREAD_ID);
+
+	return;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int asr_qspi_suspend(struct device *dev)
+{
+	int ret;
+	u32 sr;
+	struct asr_qspi *qspi = dev_get_drvdata(dev);
+
+	pm_runtime_get_sync(qspi->dev);
+
+	sr = qspi_readl(qspi, qspi->io_map + QSPI_SR);
+	if (sr & QSPI_SR_BUSY) {
+		dev_err(dev, "qspi busy with ongoing cmd\n");
+		return -EBUSY;
+	}
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret) {
+		dev_err(dev, "failed to suspend(ret:%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int asr_qspi_resume(struct device *dev)
+{
+	struct asr_qspi *qspi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret) {
+		dev_err(dev, "failed to resume(ret:%d)\n", ret);
+		return ret;
+	}
+
+	/* reset qspi via bus reset */
+	clk_disable_unprepare(qspi->bus_clk);
+	udelay(1000);
+	clk_prepare_enable(qspi->bus_clk);
+	asr_qspi_default_setup(qspi);
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int asr_qspi_runtime_suspend(struct device *dev)
+{
+	u32 sr;
+	struct asr_qspi *qspi = dev_get_drvdata(dev);
+
+	mutex_lock(&qspi->lock);
+	sr = qspi_readl(qspi, qspi->io_map + QSPI_SR);
+	if (sr & QSPI_SR_BUSY) {
+		dev_err(dev, "qspi busy with ongoing cmd\n");
+		mutex_unlock(&qspi->lock);
+		return -EBUSY;
+	}
+	qspi_enter_mode(qspi, QSPI_DISABLE_MODE);
+	mutex_unlock(&qspi->lock);
+
+	clk_disable_unprepare(qspi->clk);
+
+	/* put qos */
+	pm_qos_update_request(&qspi->pm_qos_req, PM_QOS_CPUIDLE_BLOCK_DEFAULT_VALUE);
+
+	return 0;
+}
+
+static int asr_qspi_runtime_resume(struct device *dev)
+{
+	struct asr_qspi *qspi = dev_get_drvdata(dev);
+
+	/* get qos */
+	pm_qos_update_request(&qspi->pm_qos_req, qspi->lpm_qos);
+
+	clk_prepare_enable(qspi->clk);
+	qspi_enter_mode(qspi, QSPI_NORMAL_MODE);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops asr_qspi_pmops = {
+	SET_SYSTEM_SLEEP_PM_OPS(asr_qspi_suspend, asr_qspi_resume)
+	SET_RUNTIME_PM_OPS(asr_qspi_runtime_suspend,
+		asr_qspi_runtime_resume, NULL)
+};
+
+static const struct of_device_id asr_qspi_dt_ids[] = {
+	{ .compatible = "asr,qspi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, asr_qspi_dt_ids);
+
+static struct platform_driver asr_qspi_driver = {
+	.driver = {
+		.name	= "asr-qspi",
+		.of_match_table = asr_qspi_dt_ids,
+		.pm = &asr_qspi_pmops,
+	},
+	.probe          = asr_qspi_probe,
+	.remove		= asr_qspi_remove,
+};
+module_platform_driver(asr_qspi_driver);
+
+MODULE_DESCRIPTION("ASR QSPI Host Controller Driver");
+MODULE_AUTHOR("ASR Micro");
+MODULE_LICENSE("GPL v2");