ASR_BASE

Change-Id: Icf3719cc0afe3eeb3edc7fa80a2eb5199ca9dda1
diff --git a/marvell/uboot/drivers/mtd/spi-flash/asr_qspi.c b/marvell/uboot/drivers/mtd/spi-flash/asr_qspi.c
new file mode 100644
index 0000000..0850a16
--- /dev/null
+++ b/marvell/uboot/drivers/mtd/spi-flash/asr_qspi.c
@@ -0,0 +1,1543 @@
+#include <common.h>
+#include <malloc.h>
+#include <asm/arch/pxa_dma.h>
+#include <power/asr1802s_freq.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <mtd/pxa3xx_bbm.h>
+#include <asm/arch/cpu.h>
+#include <asm/io.h>
+#include "asr_qspi.h"
+
+#define SPINAND_SUPPORT_DTR		(1 << 7)
+
+#define qspi_writel(val, addr) __raw_writel((val), addr)
+#define qspi_readl(addr) __raw_readl(addr)
+
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_FC_REQ		BIT_12
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_DIV_MSK		(0x7 << 9)
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_DIV_BASE		9
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_SEL_MSK		(0x7 << 6)
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_SEL_BASE		6
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_EN			BIT_4
+#define	PMUA_QSPI_CLK_RES_CTRL_AXICLK_EN		BIT_3
+#define	PMUA_QSPI_CLK_RES_CTRL_CLK_RST			BIT_1
+#define	PMUA_QSPI_CLK_RES_CTRL_AXI_RST			BIT_0
+
+struct qspi_host qspi_host;
+//static DMA_DESCRIPTOR dma_desc __attribute__ ((aligned (16)));
+//static u8 *tx_desc = &dma_desc;
+/* qspi write with check*/
+
+static inline void qspi_writel_check(struct qspi_host *host, u32 val,
+		int reg, u32 mask)
+{
+	u32 tmp;
+
+	qspi_writel(val, reg);
+	val &= ~mask;
+	do {
+		tmp = qspi_readl(reg);
+		tmp &= ~mask;
+		if (tmp == val)
+			break;
+	} while (1);
+}
+
+/* qspi write check with auto-clear field */
+static inline void qspi_writel_clear(struct qspi_host *host, u32 val,
+		int reg, u32 mask)
+{
+	u32 tmp;
+
+	qspi_writel(val, reg);
+	val &= ~mask;
+	do {
+		tmp = qspi_readl(reg);
+		tmp &= ~mask;
+		if (!(tmp & val))
+			break;
+	} while (1);
+}
+
+/*
+ * Error Flag may happen for write access to RBCT/SFAR register,
+ * need retry for these two register
+ */
+static void qspi_write_rbct(struct qspi_host *host, u32 val)
+{
+	u32 fr;
+
+	do {
+		qspi_writel_check(host, val, QSPI0_RBCT, QSPI_RBCT_RESV);
+		fr = qspi_readl(QSPI0_FR);
+		if (!(fr & QSPI_FR_IPIEF))
+			break;
+
+		fr &= QSPI_FR_IPIEF;
+		qspi_writel(fr, QSPI0_FR);
+	} while (1);
+}
+
+static void qspi_write_sfar(struct qspi_host *host, u32 val)
+{
+	u32 fr;
+
+	do {
+		qspi_writel_check(host, val, QSPI0_SFAR, 0x0);
+		fr = qspi_readl(QSPI0_FR);
+		if (!(fr & QSPI_FR_IPIEF))
+			break;
+
+		fr &= QSPI_FR_IPIEF;
+		qspi_writel(fr, QSPI0_FR);
+	} while (1);
+}
+
+static void qspi_config_mfp(int cs)
+{
+	if (cs == QSPI_CS_A1) {
+		qspi_writel(0xd000, 0xd401e2c4); // QSPI_DAT3
+		qspi_writel(0x1000, 0xd401e2c8); // QSPI_DAT2
+		qspi_writel(0x1000, 0xd401e2cc); // QSPI_DAT1
+		qspi_writel(0x1000, 0xd401e2d0); // QSPI_DAT0
+		qspi_writel(0x1000, 0xd401e2d4); // QSPI_CLK
+		qspi_writel(0x1000, 0xd401e2d8); // QSPI_CS1
+	}
+}
+
+static void qspi_enter_mode(struct qspi_host *host, u32 mode)
+{
+	u32 mcr;
+
+	mcr = qspi_readl(QSPI0_MCR);
+	if (mode == QSPI_NORMAL_MODE)
+		mcr &= ~QSPI_MCR_MDIS;
+	else if (mode == QSPI_DISABLE_MODE)
+		mcr |= QSPI_MCR_MDIS;
+	qspi_writel_check(host, mcr, QSPI0_MCR, QSPI_MCR_RESV);
+}
+
+void qspi_init_ahb(int lut, int page_size)
+{
+#if 0
+	//Top index of BUFs
+	qspi_writel(0x20, QSPI0_BUF0IND);
+	qspi_writel(0x40, QSPI0_BUF1IND);
+	qspi_writel(0x60, QSPI0_BUF2IND);
+
+	//128Byte, master ID?
+	qspi_writel(0x00001001, QSPI0_BUF0CR);
+	qspi_writel(0x1006, QSPI0_BUF1CR);
+	qspi_writel(0x1003, QSPI0_BUF2CR);
+	qspi_writel(0x80001002, QSPI0_BUF3CR);
+#else
+	u32 buf_cfg;
+	int data_size;
+	int i = 0;
+
+	do {
+		i++;
+		data_size = page_size/(1 << i);
+		if (data_size <= QSPI_AHB_BUFF_MAX_SIZE)
+			break;
+	} while(1);
+
+	buf_cfg = QSPI_BUF3CR_ALLMST |
+		  (data_size / 8) << QSPI_BUF3CR_ADATSZ_SHIFT;
+
+	if (cpu_is_asr1903())  {
+		/*
+		* Config the ahb buffer
+		* Disable BUF1~BUF2, use BUF0 for all masters
+		*/
+		qspi_writel((512/8 - 1) * 8, QSPI0_BUF0IND); // other masters
+		qspi_writel(512, QSPI0_BUF1IND);
+		qspi_writel(512, QSPI0_BUF2IND);
+
+		/* AHB Master port */
+		qspi_writel(buf_cfg, QSPI0_BUF0CR);
+		qspi_writel(0xe, QSPI0_BUF1CR);
+		qspi_writel(0xe, QSPI0_BUF2CR);
+		qspi_writel(0xe, QSPI0_BUF3CR);
+	} else {
+		/*
+		* Config the ahb buffer
+		* Disable BUF0~BUF1, use BUF3 for all masters
+		*/
+		qspi_writel(0, QSPI0_BUF0IND);
+		qspi_writel(0, QSPI0_BUF1IND);
+		qspi_writel(0, QSPI0_BUF2IND);
+
+		/* AHB Master port */
+		qspi_writel(0xe, QSPI0_BUF0CR);
+		qspi_writel(0xe, QSPI0_BUF1CR);
+		qspi_writel(0xe, QSPI0_BUF2CR);
+		qspi_writel(buf_cfg, QSPI0_BUF3CR); // other masters
+	}
+#endif
+	qspi_writel(lut << QSPI_BFGENCR_SEQID_SHIFT, QSPI0_BFGENCR);
+	printf("AHB data transfer size: %d\n", data_size);
+}
+
+static void qspi_lock_lut(void)
+{
+	u32 lckcr;
+
+	lckcr = qspi_readl(QSPI0_LCKCR);
+	if (lckcr & 0x1)
+		return;
+	qspi_writel(0x5af05af0, QSPI0_LUTKEY);
+	qspi_writel(0x1, QSPI0_LCKCR);
+}
+
+static void qspi_unlock_lut(void)
+{
+	u32 lckcr;
+
+	lckcr = qspi_readl(QSPI0_LCKCR);
+	if (lckcr & 0x2)
+		return;
+
+	qspi_writel(0x5af05af0, QSPI0_LUTKEY);
+	qspi_writel(0x2, QSPI0_LCKCR);
+}
+
+static void qspi_config_lookup_tbl(struct spi_flash_chip *chip,
+				   struct spi_flash_cmd_cfg *cmd_cfg,
+				   u8 mode, int seq_id)
+{
+	u32 lut_value;
+	u16 lut_entry[8];
+	u8 pins[] = {0, QSPI_PAD_1X, QSPI_PAD_2X, 0, QSPI_PAD_4X};
+	u8 seq = 0, i;
+	u8 mode_instr, dummy_cycles, addr_bytes;
+	int lut_addr;
+
+	/* Set Lookup table entry: CMD, ADDR, MODE, DUMMY, DATA, etc */
+	if (chip->qpi_enabled)
+		lut_entry[seq++] =
+			(cmd_cfg->cmd_dtr ?
+			 QSPI_INSTR_CMD_DDR : QSPI_INSTR_CMD) << 10 |
+			QSPI_PAD_4X << 8 | cmd_cfg->opcode;
+	else
+		lut_entry[seq++] =
+			(cmd_cfg->cmd_dtr ?
+			 QSPI_INSTR_CMD_DDR : QSPI_INSTR_CMD) << 10 |
+			QSPI_PAD_1X << 8 | cmd_cfg->opcode;
+
+	if (cmd_cfg->addr_bytes) {
+		if (chip->en_addr_4byte && cmd_cfg->addr_bytes == 3)
+			addr_bytes = 4;
+		else
+			addr_bytes = cmd_cfg->addr_bytes;
+
+		lut_entry[seq++] =
+			(cmd_cfg->addr_dtr ?
+			 QSPI_INSTR_ADDR_DDR : QSPI_INSTR_ADDR) << 10 |
+			pins[cmd_cfg->addr_pins] << 8 | addr_bytes*8;
+		if (addr_bytes == 1)
+			lut_entry[seq - 1] |= 0x4000;
+	}
+
+	if (cmd_cfg->mode_bits) {
+		if (cmd_cfg->mode_bits == 2) {
+			mode_instr = (cmd_cfg->mode_dtr ?
+				      QSPI_INSTR_MODE2_DDR : QSPI_INSTR_MODE2);
+			mode &= 0x3;
+		} else if (cmd_cfg->mode_bits == 4) {
+			mode_instr = (cmd_cfg->mode_dtr ?
+				      QSPI_INSTR_MODE4_DDR : QSPI_INSTR_MODE4);
+			mode &= 0xf;
+		} else {
+			mode_instr = (cmd_cfg->mode_dtr ?
+				      QSPI_INSTR_MODE_DDR : QSPI_INSTR_MODE);
+		}
+		lut_entry[seq++] = mode_instr << 10 |
+				   pins[cmd_cfg->mode_pins] << 8 |
+				   mode;
+	}
+
+	if (cmd_cfg->dummy_pins) {
+		if (chip->qpi_enabled)
+			dummy_cycles = chip->qpi_dummy;
+		else
+			dummy_cycles = cmd_cfg->dummy_cycles;
+		lut_entry[seq++] = QSPI_INSTR_DUMMY << 10 |
+				   pins[cmd_cfg->dummy_pins] << 8 |
+				   dummy_cycles;
+	}
+
+	if (cmd_cfg->data_pins) {
+		if (cmd_cfg->type == CMD_W_RX_DATA) {
+			lut_entry[seq++] =
+				(cmd_cfg->data_dtr ?
+				 QSPI_INSTR_READ_DDR : QSPI_INSTR_READ) << 10 |
+				pins[cmd_cfg->data_pins] << 8;
+			/* Add JMP_ON_CS for read */
+			lut_entry[seq++] = QSPI_INSTR_JMP_ON_CS << 10;
+
+		} else if (cmd_cfg->type == CMD_W_TX_DATA) {
+			lut_entry[seq++] =
+				(cmd_cfg->data_dtr ?
+				 QSPI_INSTR_WRITE_DDR : QSPI_INSTR_WRITE) << 10 |
+				pins[cmd_cfg->data_pins] << 8;
+		} else {
+			printf("err: type of cmd %d is wrong in table\r\n",
+				   cmd_cfg->opcode);
+			return;
+		}
+	}
+
+	/* Add stop at the end */
+	lut_entry[seq++] = QSPI_INSTR_STOP << 10;
+	/*
+	 * lut read back value may be different from write,
+	 * so add extra read to make sure write take effect
+	 */
+	for (i = 0; i < seq/2; i++) {
+		lut_addr = QSPI0_LUT0 + seq_id*0x10 + i*0x4;
+		lut_value = lut_entry[i*2] | (lut_entry[i*2 + 1] << 16);
+
+		qspi_writel(lut_value, lut_addr);
+		lut_value = qspi_readl(lut_addr);
+	}
+
+	if (seq % 2) {
+		lut_addr = QSPI0_LUT0 + seq_id*0x10 + (seq/2)*0x4;
+		lut_value =lut_entry[seq - 1];
+
+		qspi_writel(lut_value, lut_addr);
+		lut_value = qspi_readl(lut_addr);
+	}
+
+//	for (i = 0; i < seq; i++)
+//		printf("seq=%d, lut_entry[%d]=0x%x\n\r", seq, i, lut_entry[i]);
+	return;
+}
+
+static int qspi_update_shared_lut(struct spi_flash_chip *chip,
+				  struct spi_flash_cmd *cmd)
+{
+	struct spi_flash_cmd_cfg *cmd_cfg = cmd->cmd_cfg;
+	u32 seq_id = QSPI_LUT_SEQID1;
+
+	qspi_config_lookup_tbl(chip, cmd_cfg, cmd->mode, seq_id);
+	return seq_id;
+}
+
+/*
+ * Reserved for future optimization
+ * Pre-init some lookup tables for special commands to accelerate
+ */
+static int qspi_preinit_lookup_tbl(struct spi_flash_chip *chip)
+{
+	struct spi_flash_cmd_cfg *cmd_cfg = chip->table;
+	int lut_map = 0;
+
+	for (; cmd_cfg->opcode != 0x0; cmd_cfg++) {
+		if (cmd_cfg->seq_id != -1) {
+			if (lut_map & 1 << cmd_cfg->seq_id) {
+				printf("err: LUT %d already used\n",
+					cmd_cfg->seq_id);
+				return -1;
+			}
+
+			qspi_config_lookup_tbl(chip, cmd_cfg, 0xff,
+						cmd_cfg->seq_id);
+			lut_map |= 1 << cmd_cfg->seq_id;
+		}
+	}
+
+	chip->host->lut_map = lut_map;
+	printf("Fixed LUT bit-map: 0x%x\n", lut_map);
+	return 0;
+}
+
+static int qspi_enable_xip(struct spi_flash_chip *chip,
+		    struct spi_flash_cmd_cfg *cmd_cfg)
+{
+	struct qspi_host *host = chip->host;
+
+	qspi_config_lookup_tbl(chip, cmd_cfg, 0xff, QSPI_LUT_SEQID0);
+
+	qspi_enter_mode(host, QSPI_DISABLE_MODE);
+	qspi_init_ahb(QSPI_LUT_SEQID0, chip->page_size);
+	qspi_enter_mode(host, QSPI_NORMAL_MODE);
+
+	if (host->use_xip) {
+		chip->xip_read = 1;
+		printf("XIP Read mode enabled\n");
+	} else {
+		chip->xip_read = 0;
+		printf("IPS Read mode enabled\n");
+	}
+
+	return 0;
+}
+
+static void qspi_invalid_ahb(struct qspi_host *host)
+{
+	u32 reg;
+
+	/* qspi softreset first */
+	reg = qspi_readl(QSPI0_MCR);
+	reg |= QSPI_MCR_SWRSTHD | QSPI_MCR_SWRSTSD;
+	qspi_writel_check(host, reg, QSPI0_MCR, QSPI_MCR_RESV);
+
+	/* Test show no delay is needed */
+	udelay(1);
+
+	reg &= ~(QSPI_MCR_SWRSTHD | QSPI_MCR_SWRSTSD);
+	qspi_writel_check(host, reg, QSPI0_MCR, QSPI_MCR_RESV);
+}
+
+static void qspi_clk_enable(void)
+{
+	PMUA->QSPI_CLK_RES_CTRL |= 0x1 << 4 | 0x1 << 3;;
+	PMUA->QSPI_CLK_RES_CTRL |= 0x3;
+}
+
+static void qspi_clk_disable(void)
+{
+	PMUA->QSPI_CLK_RES_CTRL &= ~(0x1 << 4 | 0x1 << 3);
+}
+
+static int qspi_set_func_clk_fc(struct qspi_host *host, int mhz)
+{
+	uint32_t timeout = 5*1000;
+	uint32_t reg;
+	int sel, div;
+
+	/* for dtr, qspi pmu clk must be 4x bus clk */
+	if (host->has_dtr)
+		mhz = mhz << 2;
+
+	if (cpu_is_asr1806()) {
+		/* enable PLL1_DIV23 */
+		APBSPARE->PLL3_SW_CTRL |= 0x7;
+	} else if (cpu_is_asr1903()) {
+		/* enable PLL1_DIV23/PLL1_DIV13/PLL1_DIV11 */
+		APBSPARE->apb_spare12_reg |= BIT_16 | BIT_17 | BIT_18;
+	}
+
+	/* Enabled QSPI clock, then take out of reset */
+	PMUA->QSPI_CLK_RES_CTRL |= (PMUA_QSPI_CLK_RES_CTRL_CLK_EN |
+		PMUA_QSPI_CLK_RES_CTRL_AXICLK_EN);
+	PMUA->QSPI_CLK_RES_CTRL |= (PMUA_QSPI_CLK_RES_CTRL_CLK_RST |
+		PMUA_QSPI_CLK_RES_CTRL_AXI_RST);
+
+	if (mhz >= 416) {
+		sel = 0;
+		div = 0;
+	} else if (mhz >= 312) {
+		div = 0;
+		if (cpu_is_asr1906())
+			sel = 1;
+		else
+			sel = 2;
+	} else if (mhz >= 208) {
+		sel = 0;
+		div = 1;
+	} else if (mhz >= 104) {
+		sel = 0;
+		div = 3;
+	} else if (mhz >= 78) {
+		if (cpu_is_asr1906()) {
+			sel = 1;
+			div = 3;
+		} else {
+			sel = 2;
+			div = 3;
+		}
+	} else if (mhz >= 52) {
+		sel = 0;
+		div = 7;
+	} else {
+		/* default 13M */
+		if (cpu_is_asr1906()) {
+			sel = 7;
+			div = 3;
+		} else {
+			sel = 5;
+			div = 7;
+		}
+	}
+	PMUA->QSPI_CLK_RES_CTRL &= ~(PMUA_QSPI_CLK_RES_CTRL_CLK_DIV_MSK |
+		PMUA_QSPI_CLK_RES_CTRL_CLK_SEL_MSK);
+	PMUA->QSPI_CLK_RES_CTRL |=
+		(div << PMUA_QSPI_CLK_RES_CTRL_CLK_DIV_BASE |
+		 sel << PMUA_QSPI_CLK_RES_CTRL_CLK_SEL_BASE);
+	PMUA->QSPI_CLK_RES_CTRL |= BIT_12;
+	do {
+		reg = PMUA->QSPI_CLK_RES_CTRL;
+		if (!(reg & BIT_12))
+			break;
+
+		udelay(1);
+		timeout--;
+		if (!timeout) {
+			printf("err: qspi fc timeout!\n");
+			return -1;
+		}
+	} while (1);
+
+	return 0;
+}
+
+static void qspi_set_func_clk_nofc(int mhz)
+{
+	int clk_sel;
+	int freq;
+
+	/* Default qspi clock is divided by 4 in PMU */
+	freq = mhz << 2;
+	freq *= 1000000;
+	if (freq >= 416000000)
+		clk_sel = QSPI_FUNC_CLK_416MHZ;
+	else if (freq >= 312000000)
+		clk_sel = QSPI_FUNC_CLK_312MHZ;
+	else if (freq >= 208000000)
+		clk_sel = QSPI_FUNC_CLK_208MHZ;
+	else if (freq >= 156000000)
+		clk_sel = QSPI_FUNC_CLK_156MHZ;
+	else if (freq >= 104000000)
+		clk_sel = QSPI_FUNC_CLK_104MHZ;
+	else if (freq >= 78000000)
+		clk_sel = QSPI_FUNC_CLK_78MHZ;
+	else if (freq >= 52000000)
+		clk_sel = QSPI_FUNC_CLK_52MHZ;
+	else
+		clk_sel = QSPI_FUNC_CLK_26MHZ;
+	qspi_clk_disable();
+	PMUA->QSPI_CLK_RES_CTRL &= ~(0x7 << 6);
+	PMUA->QSPI_CLK_RES_CTRL |= clk_sel << 6;
+	qspi_clk_enable();
+}
+
+static int qspi_config_dqs_clk(struct qspi_host *host, int dcode)
+{
+	uint32_t reg;
+
+	reg = qspi_readl(QSPI0_MCR);
+	if (dcode <= 0 || dcode > 255) {
+		reg &= ~(QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN |
+			QSPI_MCR_DQS_INV_EN);
+		qspi_writel(reg, QSPI0_MCR);
+		return 0;
+	}
+
+	/* DQS enabled, use sample point N/1 */
+	qspi_writel(0x0, QSPI0_SMPR);
+
+	reg |= QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN | QSPI_MCR_DQS_INV_EN;
+	qspi_writel(reg, QSPI0_MCR);
+
+	reg = qspi_readl(QSPI0_SOCCR);
+	reg |= QSPI_SOCCR_DLINE_EN;
+	qspi_writel(reg, QSPI0_SOCCR);
+
+	reg = qspi_readl(QSPI0_DLACR);
+	reg &= ~QSPI_DLACR_DLINE_STEP_MASK;
+	reg = 0x7 << QSPI_DLACR_DLINE_STEP_SHIFT;
+	reg |= dcode & QSPI_DLACR_DLINE_CODE_MASK;
+	qspi_writel(reg, QSPI0_DLACR);
+
+	qspi_invalid_ahb(host);
+	return 1;
+}
+
+static void qspi_set_func_clk(struct qspi_host *host, int mhz, int use_dtr,
+			int tclqv, int tset, int thold)
+{
+	uint32_t t = 1000000 / mhz; /* in ps */
+	uint32_t reg;
+	int delay = 0;
+	int dcode;
+
+	if (cpu_is_asr1806() || cpu_is_asr1906() || cpu_is_asr1903())
+		qspi_set_func_clk_fc(host, mhz);
+	else
+		qspi_set_func_clk_nofc(mhz);
+
+	qspi_enter_mode(host, QSPI_DISABLE_MODE);
+
+	if (tclqv == 0) {
+		tclqv = 8;
+		tset = thold = 2;
+	}
+
+	delay = (tclqv + tset + 1) * 1000;
+	if (delay <= t)
+		reg = 0; /* sample point N1 */
+	else
+		reg = QSPI_SMPR_FSPHS; /* sample point I1 */
+
+	if (use_dtr) {
+		int ddr_point;
+
+		delay -= t/2;
+		if (delay > 0)
+			ddr_point = (delay + t/8 - 1) / (t/8);
+		else
+			ddr_point = 0;
+		reg |= ddr_point << QSPI_SMPR_DDRSMP_SHIFT;
+	} else if (host->support_dqs && mhz > 52) {
+		/*
+		* Do not use QDS for DDR, since SDR/DDR can not share
+		* same delay code.
+		* If DQS enabled, must use sample point N/1, clear SMPR.
+		*
+		* delay step: 52ps
+		*/
+		delay = tclqv * 1000 - t/2;
+		dcode = delay / 52;
+		if (qspi_config_dqs_clk(host, dcode))
+			reg = 0;
+	}
+
+	qspi_writel(reg, QSPI0_SMPR);
+	reg = qspi_readl(QSPI0_SMPR);
+	printf("QSPI_SMPR=0x%x t=%d tclqv=%d delay=%d\n",
+		reg, t, tclqv, delay);
+
+	/* set tx hold time */
+	reg = 0x202;
+	if (use_dtr)
+		reg |= QSPI_FLSHCR_TDH_HALF_2X;
+	qspi_writel(reg, QSPI0_FLSHCR);
+
+	/* Module enabled */
+	qspi_enter_mode(host, QSPI_NORMAL_MODE);
+
+	host->bus_clk = mhz;
+	printf("Bus clock: %dMHz QSPI_CLK_RES_CTRL: 0x%x\n",
+			mhz, PMUA->QSPI_CLK_RES_CTRL);
+}
+
+void asr_qspi_disable_dqs(void)
+{
+	struct qspi_host *host = &qspi_host;
+	uint32_t reg;
+
+	if (!host->support_dqs)
+		return;
+
+	qspi_enter_mode(host, QSPI_DISABLE_MODE);
+
+	reg = qspi_readl(QSPI0_MCR);
+	reg &= ~(QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN |
+		QSPI_MCR_DQS_INV_EN);
+	qspi_writel(reg, QSPI0_MCR);
+
+	qspi_enter_mode(host, QSPI_NORMAL_MODE);
+
+	qspi_invalid_ahb(host);
+	return;
+}
+
+static void qspi_enable_dma(struct qspi_host *host)
+{
+	uint32_t resr;
+
+	resr = qspi_readl(QSPI0_RSER);
+	resr |= QSPI_RSER_TBFDE;
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+}
+
+static void qspi_disable_dma(struct qspi_host *host)
+{
+	uint32_t resr;
+
+	resr = qspi_readl(QSPI0_RSER);
+	resr &= ~QSPI_RSER_TBFDE;
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+}
+
+static void qspi_config_interrupt(struct qspi_host *host)
+{
+	uint32_t resr;
+
+	resr = qspi_readl(QSPI0_RSER);
+	resr |= QSPI_RSER_ILLINIE | QSPI_RSER_ABSEIE | QSPI_RSER_AITIE |
+		QSPI_RSER_AIBSIE | QSPI_RSER_ABOIE |QSPI_RSER_IUEIE |
+		QSPI_RSER_IPIEIE | QSPI_RSER_IPGEIE;
+
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+}
+
+static void qspi_enable_interrupt(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	uint32_t resr;
+
+	resr = qspi_readl(QSPI0_RSER);
+
+	resr |= QSPI_RSER_TFIE;
+	if (cmd->n_tx) {
+		resr |= QSPI_RSER_TBUIE;
+		if (!host->use_dma && host->bytes_left > 0) {
+			resr |= QSPI_RSER_TBFIE;
+		}
+	} else if (cmd->n_rx) {
+		resr |= QSPI_RSER_RBOIE | QSPI_RSER_RBDIE;
+	}
+
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+}
+
+static void qspi_disable_interrupt(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	uint32_t resr;
+	(void)host;
+
+	resr = qspi_readl(QSPI0_RSER);
+
+	resr &= ~QSPI_RSER_TFIE;
+	if (cmd->n_tx)
+		resr &= ~(QSPI_RSER_TBUIE | QSPI_RSER_TBFIE);
+	else if (cmd->n_rx)
+		resr &= ~(QSPI_RSER_RBOIE | QSPI_RSER_RBDIE);
+
+	if (host->use_dma)
+		resr &= ~QSPI_RSER_TBFDE;
+
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+}
+
+static int qspi_fill_to_txbuff(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	int left_bytes = host->bytes_left;
+	int total_cnt;
+	u32 reg;
+	u32 data;
+
+	if (!cmd) {
+		printf("err: receive tx interrupt while no cmd sent\n");
+		return 1;
+	}
+
+	total_cnt = ALIGN(cmd->n_tx, QSPI_TX_BUFF_POP_MIN);
+
+	while (left_bytes > 0) {
+		qspi_writel(QSPI_FR_TBFF, QSPI0_SR);
+		reg = qspi_readl(QSPI0_SR);
+		if (reg & QSPI_FR_TBFF)
+			break;
+
+		data = 0;
+		if (cmd->n_tx - total_cnt + left_bytes >= 4)
+			data = (*(u32 *)&cmd->tx_buf[total_cnt - left_bytes]);
+		else if (cmd->n_tx - total_cnt + left_bytes > 0)
+			memcpy(&data, &cmd->tx_buf[total_cnt - left_bytes],
+				left_bytes);
+		qspi_writel(data, QSPI0_TBDR);
+		left_bytes -= 4;
+	}
+
+	host->bytes_left = left_bytes;
+	if (left_bytes <= 0)
+		return 0;
+	return 1;
+}
+
+static void qspi_read_from_rxbuff(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	int left_bytes = host->bytes_left;
+	u32 sr;
+	u32 data;
+	int i;
+
+	while (left_bytes > 0) {
+		sr = qspi_readl(QSPI0_FR);
+		if (!(sr & QSPI_FR_RBDF))
+			break;
+
+		/* Check RXWE flag for data comming */
+		for (i = 0; i <= host->wmrk; i++) {
+			data = qspi_readl(QSPI0_RBDR0 + i*4);
+			//printf("i=%d data=0x%x\n", i , data);
+			if (left_bytes >= 4)
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, 4);
+			else
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, left_bytes);
+			left_bytes -= 4;
+		}
+
+		/* Set RBDF to trigger RX Buffer POP */
+		qspi_writel(QSPI_FR_RBDF, QSPI0_FR);
+	}
+
+	host->bytes_left = left_bytes;
+	return;
+}
+
+static void qspi_xfer_done(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	int left_bytes = host->bytes_left;
+	u32 rdbfl;
+	u32 data;
+	u32 i;
+
+	if (host->use_dma)
+		left_bytes = 0;
+
+	if (cmd->n_rx && left_bytes > 0) {
+		rdbfl = qspi_readl(QSPI0_RBSR);
+		rdbfl &= QSPI_RBSR_RDBFL_MASK;
+		rdbfl = rdbfl >> QSPI_RBSR_RDBFL_SHIFT;
+
+		for (i = 0; i <= rdbfl; i++) {
+			data = qspi_readl(QSPI0_RBDR0 + i*4);
+			if (left_bytes >= 4)
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, 4);
+			else
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, left_bytes);
+			left_bytes -= 4;
+		}
+
+		if (left_bytes > 0) {
+			printf("Error: Not read enough data: "
+				"left_bytes=%d, cmd->n_rx=%d\n",
+				left_bytes, cmd->n_rx);
+		}
+	}
+
+	host->bytes_left = left_bytes;
+	host->complete = 1;
+	return;
+}
+
+static void qspi_irq_handler(void *data)
+{
+	struct qspi_host *host = &qspi_host;
+	struct spi_flash_cmd *cmd = host->cmd;
+	u32 fr, resr;
+	(void)data;
+
+	fr = qspi_readl(QSPI0_FR);
+	qspi_writel(fr & ~QSPI_FR_RBDF, QSPI0_FR);
+	if (!cmd) {
+		printf("Interrupt happen while no cmd sent, fr=0x%x\n", fr);
+		return;
+	}
+
+	resr = qspi_readl(QSPI0_RSER);
+	qspi_writel_check(host, 0, QSPI0_RSER, QSPI_RSER_RESV);
+
+	if (fr & (QSPI_FR_ILLINE | QSPI_FR_IUEF | QSPI_FR_IPAEF |
+		QSPI_FR_IPIEF | QSPI_FR_IPGEF |
+		QSPI_FR_RBOF | QSPI_FR_TBUF)) {
+		if (fr & QSPI_FR_ILLINE)
+			printf("Err: Illegal Instruction Error Flag\n");
+		if (fr & QSPI_FR_IUEF)
+			printf("Err: IP Command Usage Error Flag\n");
+		if (fr & QSPI_FR_IPAEF)
+			printf("Err: IP Command Trigger during AHB Access Error Flag\n");
+		if (fr & QSPI_FR_IPIEF)
+			printf("Err: IP Command Trigger could not be executed Error Flag\n");
+		if (fr & QSPI_FR_IPGEF)
+			printf("Err: IP Command Trigger during AHB Grant Error Flag\n");
+		if (fr & QSPI_FR_RBOF)
+			printf("Error: RX Buffer Overflow\n");
+		if (fr & QSPI_FR_TBUF) {
+			int mcr;
+
+			mcr = qspi_readl(QSPI0_MCR);
+			mcr |= QSPI_MCR_CLR_TXF;
+			qspi_writel_clear(host, mcr, QSPI0_MCR,
+						~QSPI_MCR_CLR_TXF);
+			cmd->error = -EAGAIN;
+			printf("Error: TX Buffer Underrun Flag\n");
+		}
+		host->complete = 1;		
+	}
+
+	if (!host->use_dma && host->bytes_left > 0) {
+		if (cmd->n_tx && (fr & QSPI_FR_TBFF)) {
+			if (!qspi_fill_to_txbuff(host))
+				resr &= ~QSPI_RSER_TBFIE;
+		}
+		if (cmd->n_rx && (fr & QSPI_FR_RBDF))
+			qspi_read_from_rxbuff(host);
+	}
+
+	if (fr & QSPI_FR_TFF)
+		qspi_xfer_done(host);
+
+	qspi_writel_check(host, resr, QSPI0_RSER, QSPI_RSER_RESV);
+	return;
+}
+
+struct qspi_host * qspi_host_init(int cs, int mhz, int use_xip)
+{
+	struct qspi_host *host = &qspi_host;
+	u32 reg;
+
+	memset(host, 0, sizeof(struct qspi_host));
+	host->cs_addr[QSPI_CS_A1] = QSPI0_FLASH_A1_BASE;
+	host->cs_addr[QSPI_CS_A2] = QSPI0_FLASH_A2_BASE;
+	host->cs_addr[QSPI_CS_B1] = QSPI0_FLASH_B1_BASE;
+	host->cs_addr[QSPI_CS_B2] = QSPI0_FLASH_B2_BASE;
+
+	host->use_intr = 0;
+	host->en_tx_dma = 1;
+	host->use_xip = use_xip;
+
+	if (cpu_is_asr1806() || cpu_is_asr1903_b0()) {
+		host->has_dtr = 1;
+		host->support_dqs = 1;
+	}
+
+	qspi_config_mfp(cs);
+
+	/* Enable qspi clk, and release reset */
+	qspi_set_func_clk(host, mhz, 0, 0, 0, 0);
+
+	/* qspi softreset first */
+	qspi_invalid_ahb(host);
+
+	qspi_enter_mode(host, QSPI_DISABLE_MODE);
+
+	/* Give the default source address */
+	qspi_write_sfar(host, host->cs_addr[QSPI_CS_A1]);
+	qspi_writel_check(host, 0x0, QSPI0_SFACR, QSPI_SFACR_RESV);
+
+	//qspi_init_ahb(0); /* config ahb */
+
+	/* Set flash memory map */
+	qspi_writel(QSPI0_FLASH_A1_TOP & 0xfffffc00, QSPI0_SFA1AD);
+	qspi_writel(QSPI0_FLASH_A2_TOP & 0xfffffc00, QSPI0_SFA2AD);
+	qspi_writel(QSPI0_FLASH_B1_TOP & 0xfffffc00, QSPI0_SFB1AD);
+	qspi_writel(QSPI0_FLASH_B2_TOP & 0xfffffc00, QSPI0_SFB2AD);
+
+	/*
+	 * ISD3FB, ISD2FB, ISD3FA, ISD2FA = 1; ENDIAN = 0x3; END_CFG=0x3 
+	 * DELAY_CLK4X_EN = 1
+	 */
+	reg = qspi_readl(QSPI0_MCR);
+	reg &= ~(QSPI_MCR_END_CFD_MASK | QSPI_MCR_ISDX_MASK);
+	reg &= ~(QSPI_MCR_DQS_EN | QSPI_MCR_DQS_LP_EN | QSPI_MCR_DQS_INV_EN);
+	reg |= QSPI_MCR_END_CFD_LE | 0xf << QSPI_MCR_ISDX_SHIFT;
+	if (host->has_dtr)
+		reg |= QSPI_MCR_DDR_EN;
+	else
+		reg &= ~QSPI_MCR_DDR_EN;
+	qspi_writel_check(host, reg, QSPI0_MCR, QSPI_MCR_RESV);
+
+	/* Module enabled */
+	qspi_enter_mode(host, QSPI_NORMAL_MODE);
+
+	/* Read using the IP Bus registers QSPI_RBDR0 to QSPI_RBDR31*/
+	qspi_write_rbct(host, QSPI_RBCT_RXBRD);
+
+	//if (host->use_intr) {
+	//	ISR_Connect(45, qspi_irq_handler, 0);
+	//	INT_Enable(45, 0, 15);
+	//}
+
+	//printf("tx_desc: 0x%x\n", tx_desc);
+	printf("use_intr=%d en_tx_dma=%d use_xip=%d\n",
+		   host->use_intr, host->en_tx_dma, host->use_xip);
+	return host;
+}
+
+static void qspi_wait_cmd_done(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	u32 fr;
+
+	/*
+	 * Known BUG:
+	 * Poll QSPI register during TX may lead to bus hang, add
+	 * a delay here for this requirement.
+	 */
+	if (cmd->n_tx && !host->use_intr)
+		udelay(5);
+
+	do {
+		fr = qspi_readl(QSPI0_FR);
+		if (fr)
+			qspi_writel(fr, QSPI0_FR);
+
+		if (fr & QSPI_FR_ILLINE) {
+			printf("Err: Illegal Instruction Error Flag\n");
+			break;
+		}
+
+		if (fr & QSPI_FR_IUEF) {
+			printf("Err: IP Command Usage Error Flag\n");
+			break;
+		}
+
+		if (fr & QSPI_FR_IPAEF) {
+			printf("Err: IP Command Trigger during AHB Access Error Flag\n");
+			break;
+		}
+
+		if (fr & QSPI_FR_IPIEF) {
+			printf("Err: IP Command Trigger could not be executed Error Flag\n");
+			break;
+		}
+
+		if (fr & QSPI_FR_IPGEF) {
+			printf("Err: IP Command Trigger during AHB Grant Error Flag\n");
+			break;
+		}
+
+		if (fr & QSPI_FR_TFF) {
+			qspi_writel(0x1, QSPI0_FR);
+			break;
+		}
+	} while(1);
+
+	if (cmd->rx_buf && (fr & QSPI_FR_RBOF))
+		printf("Error: RX Buffer Overflow\n");
+	if (cmd->tx_buf && (fr & QSPI_FR_TBUF)) {
+		int mcr;
+
+		mcr = qspi_readl(QSPI0_MCR);
+		mcr |= QSPI_MCR_CLR_TXF;
+		qspi_writel_clear(host, mcr, QSPI0_MCR, ~QSPI_MCR_CLR_TXF);
+		cmd->error = -EAGAIN;
+		printf("Error: TX Buffer Underrun Flag\n");
+	}
+}
+
+static void qspi_poll_rx_buff(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	int left_bytes = host->bytes_left;
+	u32 sr;
+	u32 data;
+	int rdbfl, i;
+
+	do {
+		/* Check RXWE flag for data comming */
+		sr = qspi_readl(QSPI0_FR);
+		if (!(sr & QSPI_FR_RBDF))
+			continue;
+
+		for (i = 0; i <= host->wmrk; i++) {
+			data = qspi_readl(QSPI0_RBDR0 + i*4);
+			if (left_bytes >= 4)
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, 4);
+			else
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, left_bytes);
+			left_bytes -= 4;
+		}
+
+		/* Set RBDF to trigger RX Buffer POP */
+		qspi_writel(QSPI_FR_RBDF, QSPI0_FR);
+
+		if (left_bytes < 0) {
+			break;
+		} else if ((left_bytes + 3)/4 < (host->wmrk + 1)) {
+			/* Left bytes < wmrk will not trigger RXWE */
+			break;
+		}
+	} while(1);
+
+	/* Wait cmd to be finished */
+	qspi_wait_cmd_done(host);
+	if (left_bytes > 0) {
+		rdbfl = qspi_readl(QSPI0_RBSR);
+		rdbfl = (rdbfl & QSPI_RBSR_RDBFL_MASK) >> QSPI_RBSR_RDBFL_SHIFT;
+		for (i = 0; i <= rdbfl; i++) {
+			data = qspi_readl(QSPI0_RBDR0 + i*4);
+			if (left_bytes >= 4)
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, 4);
+			else
+				memcpy(&cmd->rx_buf[cmd->n_rx - left_bytes],
+					&data, left_bytes);
+			left_bytes -= 4;
+		}
+	}
+
+	if (left_bytes > 0) {
+		printf("Error: Not read enough data: left_bytes=%d, cmd->n_rx=%d\n",
+			left_bytes, cmd->n_rx);
+	}
+	return;
+}
+
+static void qspi_fill_tx_buff(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	int left_bytes = host->bytes_left;
+	int total_cnt;
+	u32 reg;
+	u32 data;
+
+	total_cnt = ALIGN(cmd->n_tx, QSPI_TX_BUFF_POP_MIN);
+
+	while (left_bytes > 0) {
+		reg = qspi_readl(QSPI0_SR);
+		if (reg & QSPI_SR_TXFULL)
+			continue;
+
+		data = 0;
+		if (cmd->n_tx - total_cnt + left_bytes >= 4)
+			data = (*(u32 *)&cmd->tx_buf[total_cnt - left_bytes]);
+		else if (cmd->n_tx - total_cnt + left_bytes > 0)
+			memcpy(&data, &cmd->tx_buf[total_cnt - left_bytes],
+				left_bytes);
+		qspi_writel(data, QSPI0_TBDR);
+		left_bytes -= 4;
+	}
+
+	host->bytes_left = left_bytes;
+	qspi_wait_cmd_done(host);
+	return;
+}
+
+int qspi_cmd_done_pio(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+
+	/* receive rx data */
+	if (cmd->n_rx)
+		qspi_poll_rx_buff(host);
+	else if (cmd->n_tx)
+		qspi_fill_tx_buff(host);
+	else
+		qspi_wait_cmd_done(host);
+
+	return 0;
+}
+
+int qspi_cmd_done_interrupt(struct qspi_host *host)
+{
+	while (!(host->complete));
+
+	qspi_disable_interrupt(host);
+	return 0;
+}
+
+int qspi_start_dma_xfer(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	//DMA_CMDx_T TX_data;
+
+	//TX_data.value = 0;
+	//TX_data.bits.IncSrcAddr = 1;
+	//TX_data.bits.FlowTrg = 1;
+	//TX_data.bits.Width = 3;
+	//TX_data.bits.MaxBurstSize = 3;
+	//TX_data.bits.Length = host->bytes_left;
+
+	dmac_map_device_to_channel(QSPI_DMA_TX_DRCMR, QSPI_DMA_TX_CHANNEL); //TX
+	dmac_user_aligment(QSPI_DMA_TX_CHANNEL);
+
+#if 0
+	config_descriptor((u32 *)tx_desc, 0, (u32)cmd->tx_buf,
+				QSPI0_TBDR, TX_data.value, 1);
+	load_descriptor((void *)tx_desc, QSPI_DMA_TX_CHANNEL);
+#else
+//	dma_set_mode(DMA_MODE_NONFETCH, QSPI_DMA_TX_CHANNEL);
+//	dma_set_reg_nf((u32)cmd->tx_buf, QSPI0_TBDR, &TX_data,
+//			QSPI_DMA_TX_CHANNEL);
+#endif
+	pxa_dma_write(QSPI0_TBDR - 4, (u32)cmd->tx_buf,
+			host->bytes_left, QSPI_DMA_TX_CHANNEL);
+
+	flush_dcache_range((u32)cmd->tx_buf, (u32)cmd->tx_buf + cmd->n_tx);
+	dmac_start_transfer(QSPI_DMA_TX_CHANNEL);
+	return 0;
+}
+
+static int qspi_check_dtr(struct spi_flash_chip *chip)
+{
+	return chip->host->has_dtr;
+}
+
+static int qspi_setup_memmap_read(struct spi_flash_chip *chip,
+				  struct spi_flash_cmd_cfg *xip_cfg)
+{
+	/*
+	 * TODO:
+	 * Maybe change due to different vendor
+	 */
+	qspi_enable_xip(chip, xip_cfg);
+	if (qspi_preinit_lookup_tbl(chip) < 0) {
+		pr_info("preinit_lookup_tbl failed, check cmd table\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int qspi_search_bbm_table(struct spi_flash_chip *chip, int addr)
+{
+	struct pxa3xx_bbm *pxa3xx_bbm = chip->mtd->bbm;
+
+	if (pxa3xx_bbm)
+		addr = pxa3xx_bbm->search(chip->mtd, addr);
+
+	return addr;
+}
+
+#ifdef CONFIG_SPINAND_BITFLIP_SCRUB
+static int qspi_low_level_scrub(struct spi_flash_chip *chip, int page_addr,
+				int corrected)
+{
+	struct pxa3xx_bbm *pxa3xx_bbm = chip->mtd->bbm;
+
+	if (pxa3xx_bbm && pxa3xx_bbm->scrub_read_disturb &&
+	    chip->refresh_threshold) {
+		if (corrected >= chip->refresh_threshold)
+			pxa3xx_bbm->scrub_read_disturb(chip->mtd,
+				page_addr << chip->mtd->writesize_shift);
+
+		/* Do not report bit-flip if bbm enabled */
+		corrected = 0;
+	}
+
+	return corrected;
+}
+#endif
+
+static int qspi_ahb_read(struct spi_flash_chip *chip,
+			 u8 *buf, u32 from, u32 len)
+{
+	struct qspi_host *host = chip->host;
+
+	memcpy(buf, host->cs_addr[chip->cs] + from, len);
+	return 0;
+}
+
+static void qspi_prepare_recv(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	u32 reg;
+	int wmrk;
+
+	/* Clear RX FIFO. Invalidate the RX Buffer */
+	reg = qspi_readl(QSPI0_MCR);
+	reg |= QSPI_MCR_CLR_RXF;
+	qspi_writel_clear(host, reg, QSPI0_MCR, ~QSPI_MCR_CLR_RXF);
+
+	/* Set RX Buffer Watermark */
+	if (cmd->n_rx <= (QSPI_RX_BUFF_MAX << 2))
+		wmrk = (ALIGN(cmd->n_rx, 4) >> 2) - 1;
+	else
+		wmrk = 0x1; /* Water Mark: 16*4byte */
+
+	reg = qspi_readl(QSPI0_RBCT);
+	reg &= ~QSPI_RBCT_WMRK_MASK;
+	reg |= wmrk;
+	qspi_write_rbct(host, reg);
+
+	host->wmrk = wmrk;
+	host->bytes_left = cmd->n_rx;
+}
+
+static void qspi_prepare_transmit(struct qspi_host *host)
+{
+	struct spi_flash_cmd *cmd = host->cmd;
+	u32 reg;
+	int wmrk;
+
+	/* Clear TX FIFO/Buffer */
+	reg = qspi_readl(QSPI0_MCR);
+	reg |= QSPI_MCR_CLR_TXF;
+	qspi_writel_clear(host, reg, QSPI0_MCR, ~QSPI_MCR_CLR_TXF);
+
+	host->bytes_left = ALIGN(cmd->n_tx, QSPI_TX_BUFF_POP_MIN);
+	if (host->en_tx_dma && host->bytes_left >= 32) {
+		wmrk = 0x7; /* 32bytes watermark */
+
+		reg = qspi_readl(QSPI0_TBCT);
+		reg &= ~0x1f;
+		reg |= wmrk;
+		qspi_writel_check(host, reg, QSPI0_TBCT, QSPI_TBCT_RESV);
+
+		host->wmrk = wmrk;
+		host->use_dma = 1;
+	} else {
+		int i, left_bytes, tx_cnt;
+
+		/* Copy initial data into the circular buffer */
+		host->use_dma = 0;
+		tx_cnt = min(host->bytes_left >> 2, QSPI_TX_BUFF_MAX);
+		for (i = 0; i < tx_cnt; i++) {
+			int data = 0;
+
+			left_bytes = cmd->n_tx - (i << 2);
+			if (left_bytes >= 4)
+				data = (*(u32 *)&cmd->tx_buf[i << 2]);
+			else if (left_bytes > 0)
+				memcpy(&data, &cmd->tx_buf[i << 2], left_bytes);
+
+			qspi_writel(data, QSPI0_TBDR);
+		}
+		host->bytes_left -= tx_cnt << 2;
+	}
+}
+
+static int qspi_start_cmd(struct spi_flash_chip *chip,
+			  struct spi_flash_cmd *cmd)
+{
+	struct qspi_host *host = chip->host;
+	struct spi_flash_cmd_cfg *cmd_cfg = cmd->cmd_cfg;
+	u32 ipcr, sfar, fr, tmp;
+	int seq_id = cmd_cfg->seq_id;
+	int i;
+
+	do {
+		tmp = qspi_readl(QSPI0_SR);
+		if (!(tmp & QSPI_SR_BUSY))
+			break;
+		udelay(1);
+		//printf("The controller is busy, 0x%x\n", tmp);
+	} while (1);
+
+	host->cmd = cmd;
+	host->bytes_left = 0;
+	host->complete = 0;
+	host->use_dma = 0;
+
+	if (seq_id < 0 || !(host->lut_map & 1 << seq_id))
+		seq_id = qspi_update_shared_lut(chip, cmd);
+
+	/* Reset the IP sequence pointers */
+	tmp = qspi_readl(QSPI0_SPTRCLR);
+	tmp |= QSPI_SPTRCLR_IPPTRC;
+	qspi_writel_clear(host, tmp, QSPI0_SPTRCLR, QSPI_SPTRCLR_RESV);
+
+	/* Set flash address to be accessed */
+	sfar = 0;
+	for (i = 0; i < cmd->n_addr; i++) {
+		sfar <<= 8;
+		sfar |= cmd->addr[i];
+	}
+	sfar += host->cs_addr[chip->cs];
+	qspi_write_sfar(host, sfar);
+
+	/*
+	 * AHB memory-map to be changed, invalidate d-cache here
+	 * For spi-nand, only one-page mapped, use AHB_MAP_SIZE_PAGE
+	 * for this situation.
+	 */
+	if (host->use_xip && (cmd->flag & RST_AHB_DOMAIN)) {
+		int addr, size;
+
+		if (cmd->tx_buf) {
+			addr = sfar;
+			size = cmd->n_tx;
+		} else if (cmd->flag & AHB_MAP_SIZE_PAGE) {
+			addr = host->cs_addr[chip->cs];
+			size = chip->page_size;
+		} else {
+			addr = sfar;
+			size = chip->block_size;
+		}
+
+		flush_dcache_range(addr, size);
+	}
+
+	/* Clear FR before trigger command */
+	fr = qspi_readl(QSPI0_FR);
+	if (fr)
+		qspi_writel(fr, QSPI0_FR);
+
+	/* Set SFACR to fix issue of 1-byte address command */
+	if (cmd->n_addr == 1)
+		qspi_writel_check(host, 0x8, QSPI0_SFACR, QSPI_SFACR_RESV);
+	if (cmd->n_rx) {
+		qspi_prepare_recv(host);
+	} else if (cmd->n_tx) {
+		qspi_prepare_transmit(host);
+		if (host->use_dma) {
+			qspi_enable_dma(host);
+			qspi_start_dma_xfer(host);
+			/*
+			 * Before trigger qspi to send data to externl bus,FIFO
+			 * need to have some data, or FIFO underflow error may happen.
+			 * DMA need some time to write data to TX FIFO, but
+			 * poll QSPI register may lead to bus hang(known bug), so we add
+			 * a delay here for this requirement.
+			 */
+			udelay(5);
+		}
+	}
+
+	/* trigger command */
+	ipcr = (seq_id << QSPI_IPCR_SEQID_SHIFT) & QSPI_IPCR_SEQID_MASK;
+	if (cmd->rx_buf)
+		ipcr |= (cmd->n_rx & 0xffff);
+	else if (cmd->tx_buf)
+		ipcr |= (cmd->n_tx & 0xffff);
+ restart:
+	qspi_writel(ipcr, QSPI0_IPCR);
+
+	if (cmd->n_tx && host->use_dma) {
+		int timeout = 10000;
+
+		do {
+			if (dmac_read_dcsr(QSPI_DMA_TX_CHANNEL) & DCSR_STOPSTATE) {
+				/*
+				 * Add extra delay to make sure dma transfer
+				 * finished on APB bus
+				 */
+				udelay(2);
+				break;
+			}
+
+			if (--timeout < 0) {
+				fr = qspi_readl(QSPI0_FR);
+				if (fr & (QSPI_FR_IPAEF | QSPI_FR_IPIEF |
+					  QSPI_FR_IPGEF)) {
+					printf("qspi: cmd trigger failed, "
+						"fr=0x%x. restart...\n", fr);
+					qspi_writel(fr, QSPI0_FR);
+					goto restart;
+				}
+
+				DCSR(QSPI_DMA_TX_CHANNEL) &= ~DCSR_RUN;
+				printf("err: qspi tx dma timeout\n");
+				if (fr & (QSPI_FR_TBUF | QSPI_FR_TFF)) {
+					cmd->error = -EAGAIN;
+					pr_debug("TX Buffer Underrun, retry\n");
+					break;
+				}
+				BUG();
+			}
+
+			udelay(1);
+		} while (1);
+	}
+
+	fr = qspi_readl(QSPI0_FR);
+	if (fr & (QSPI_FR_IPAEF | QSPI_FR_IPIEF | QSPI_FR_IPGEF)) {
+		printf("qspi: cmd trigger failed, fr=0x%x. restart...\n", fr);
+		qspi_writel(fr, QSPI0_FR);
+		goto restart;
+	}
+
+	if (host->use_intr) {
+		qspi_enable_interrupt(host);
+		qspi_cmd_done_interrupt(host);
+	} else {
+		if (host->use_dma) {
+			qspi_wait_cmd_done(host);
+			qspi_disable_dma(host);
+			host->bytes_left = 0;
+		} else {
+			qspi_cmd_done_pio(host);
+		}
+	}
+
+	/* Resume SFACR */
+	if (cmd->n_addr == 1)
+		qspi_writel_check(host, 0x0, QSPI0_SFACR, QSPI_SFACR_RESV);
+
+	if (cmd->flag & RST_AHB_DOMAIN)
+		qspi_invalid_ahb(host);
+
+	host->cmd = NULL;
+	return cmd->error;
+}
+
+int asr_qspi_probe_flash(int nand, int cs, int mhz,
+			 int rx_mode, int tx_mode)
+{
+	struct spi_flash_chip *chip;
+	struct mtd_info *mtd;
+	struct qspi_host *host;
+	int ret;
+
+	host = qspi_host_init(cs, 13, 1);
+	chip = kzalloc(sizeof(struct spi_flash_chip), GFP_KERNEL);
+	if (!chip) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	chip->host = host;
+	chip->cs = cs >= QSPI_CS_MAX ? QSPI_CS_A1 : cs;
+	chip->rx_mode = rx_mode;
+	chip->tx_mode = tx_mode;
+	chip->bus_clk = host->bus_clk;
+	chip->issue_cmd = qspi_start_cmd;
+	chip->memmap_read = qspi_ahb_read;
+	chip->search_bbm_table = qspi_search_bbm_table;
+#ifdef CONFIG_SPINAND_BITFLIP_SCRUB
+	chip->low_level_scrub = qspi_low_level_scrub;
+#endif
+	chip->setup_memmap_read = qspi_setup_memmap_read;
+	chip->check_dtr = qspi_check_dtr;
+
+	mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
+	if (!mtd) {
+		ret = -ENOMEM;
+		goto err2;
+	}
+
+	mtd->priv = chip;
+	chip->mtd = mtd;
+
+	/* Init rx_max_len/tx_max_len because spi_nand_scan_tail may need this */
+	chip->tx_max_len = QSPI_TX_BUFF_MAX << 2;
+	chip->rx_max_len = QSPI_RX_BUFF_MAX << 2;
+	if (nand) {
+		chip->name = "nand0";
+		chip->options |= BBT_RELOCATION_IFBAD;
+#ifdef CONFIG_BBM
+		chip->scan_bbt = pxa3xx_scan_bbt;
+		chip->block_bad = pxa3xx_block_bad;
+		chip->block_markbad = pxa3xx_block_markbad;
+#endif
+#ifdef CONFIG_CMD_SPIFLASH_NAND
+		spi_nand_scan_ident(mtd);
+		spi_nand_scan_tail(mtd);
+#endif
+	} else {
+		chip->name = "nor0";
+#ifdef CONFIG_CMD_SPIFLASH_NOR
+		spi_nor_scan_ident(mtd);
+		spi_nor_scan_tail(mtd);
+#endif
+	}
+
+	if (chip->max_mhz && mhz > chip->max_mhz) {
+		printf("warn: device max supported frequency is %d MHz!!!\n",
+			chip->max_mhz);
+		mhz = chip->max_mhz;
+	}
+
+	qspi_set_func_clk(host, mhz, chip->options & SPINAND_SUPPORT_DTR,
+		chip->tclqv, chip->tset, chip->thold);
+
+	if (!host->en_tx_dma)
+		chip->tx_max_len = QSPI_TX_BUFF_MAX << 2;
+	else
+		chip->tx_max_len = chip->page_size;
+
+	if (chip->xip_read)
+		chip->rx_max_len = chip->page_size;
+	else
+		chip->rx_max_len = QSPI_RX_BUFF_MAX << 2;
+
+#ifdef CONFIG_CMD_UBI
+	add_mtd_device(mtd);
+#endif
+	return 0;
+err3:
+	kfree(mtd);
+err2:
+	kfree(chip);
+err1:
+	return ret;
+}