ASR_BASE

Change-Id: Icf3719cc0afe3eeb3edc7fa80a2eb5199ca9dda1
diff --git a/marvell/linux/drivers/crypto/asr/Kconfig b/marvell/linux/drivers/crypto/asr/Kconfig
new file mode 100644
index 0000000..0e936f3
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/Kconfig
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig	ASR_BCM
+    bool	"Support ASR Trust Engine(BCM)"
+    depends on CPU_ASR1901
+    default	n
+
+config	ASR_BCM_SHA
+    bool	"Support ASR BCM SHA engine"
+    depends on ASR_BCM
+    default	n
+    help
+      This will enable ASR SHA driver.
+
+config	ASR_BCM_CIPHER
+    bool	"Support ASR BCM CIPHER engine"
+    depends on ASR_BCM
+    default	n
+    help
+      This will enable ASR CIPHER driver.
+
+config	ASR_HW_RNG
+	bool	"Support ASR Hardware rng"
+	depends on CPU_ASR1901
+	default	n
+
+config	ASR_TE200
+	bool	"Support ASR Trust Engine(TE200)"
+	depends on CPU_ASR1903
+	default	n
+
+config	ASR_TE200_CIPHER
+	bool	"Support ASR TE200 CIPHER engine"
+	depends on ASR_TE200
+	default	n
+	help
+	  This will enable ASR CIPHER driver.
+
+config	ASR_TE200_SHA
+	bool	"Support ASR TE200 SHA engine"
+	depends on ASR_TE200
+	default	n
+	help
+	  This will enable ASR SHA driver.
+
+config	ASR_TE200_RSA
+	bool	"Support ASR TE200 RSA engine"
+	depends on ASR_TE200
+	default	n
+	help
+	  This will enable ASR RSA driver.
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/Makefile b/marvell/linux/drivers/crypto/asr/Makefile
new file mode 100644
index 0000000..0dea046
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+ifeq ($(CONFIG_OPTEE),y)
+obj-y += bcm_optee/
+obj-$(CONFIG_ASR_HW_RNG) += asr-rng-optee.o
+obj-y += te200_optee/
+else
+obj-y += bcm/
+obj-$(CONFIG_ASR_HW_RNG) += asr-rng.o
+obj-y += te200/
+obj-y += asr_aes_clk.o
+endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/asr-rng-optee.c b/marvell/linux/drivers/crypto/asr/asr-rng-optee.c
new file mode 100644
index 0000000..d31fbfc
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr-rng-optee.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/drivers/char/hw_random/asr-rng.c - Random Number Generator driver
+ *
+ * Copyright (C) 2023 ASR Micro Limited
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+
+#include "asr-rng-optee.h"
+
+static struct teec_uuid pta_rng_uuid = ASR_RNG_ACCESS_UUID;
+
+static void asrrng_uuid_to_octets(uint8_t d[TEE_IOCTL_UUID_LEN], struct teec_uuid *s)
+{
+	d[0] = s->timeLow >> 24;
+	d[1] = s->timeLow >> 16;
+	d[2] = s->timeLow >> 8;
+	d[3] = s->timeLow;
+	d[4] = s->timeMid >> 8;
+	d[5] = s->timeMid;
+	d[6] = s->timeHiAndVersion >> 8;
+	d[7] = s->timeHiAndVersion;
+	memcpy(d + 8, s->clockSeqAndNode, sizeof(s->clockSeqAndNode));
+}
+
+static int asrrng_tee_match_cb(struct tee_ioctl_version_data *ver, const void *data)
+{
+	return 1;
+}
+
+static int asrrng_optee_open_ta(struct asrrng_tee_context *ctx, struct teec_uuid *uuid)
+{
+	struct tee_ioctl_open_session_arg open_session_arg;
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ctx->session = 0;
+	ctx->tee_ctx = tee_client_open_context(NULL, asrrng_tee_match_cb, NULL, NULL);
+	if (IS_ERR(ctx->tee_ctx)) {
+		ret = PTR_ERR(ctx->tee_ctx);
+		ctx->tee_ctx = NULL;
+		return ret;
+	}
+
+	memset(&open_session_arg, 0x0, sizeof(struct tee_ioctl_open_session_arg));
+	asrrng_uuid_to_octets(open_session_arg.uuid, uuid);
+	open_session_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
+	open_session_arg.num_params = 0;
+	ret = tee_client_open_session(ctx->tee_ctx, &open_session_arg, NULL);
+	if (ret != 0) {
+		goto err_exit;
+	} else if (open_session_arg.ret != 0) {
+		ret = -EIO;
+		goto err_exit;
+	}
+
+	ctx->session = open_session_arg.session;
+
+	return ret;
+err_exit:
+	tee_client_close_context(ctx->tee_ctx);
+	ctx->tee_ctx = NULL;
+	return ret;
+}
+
+static int asrrng_optee_close_ta(struct asrrng_tee_context *ctx)
+{
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ret = tee_client_close_session(ctx->tee_ctx, ctx->session);
+
+	tee_client_close_context(ctx->tee_ctx);
+
+	return ret;
+}
+
+static int asrrng_optee_acquire_ta(struct teec_uuid *uuid, u32 cmd, void *buff, size_t len, size_t *outlen)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params;
+	struct asrrng_tee_context asrrng_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	size_t size;
+	char *ma = NULL;
+
+	ret = asrrng_optee_open_ta(&asrrng_tee_ctx, uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = asrrng_tee_ctx.session;
+	invoke_arg.num_params = 1;
+
+	shm = tee_shm_alloc(asrrng_tee_ctx.tee_ctx, len, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params.attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params.u.memref.shm_offs = 0;
+
+	params.u.memref.size = len;
+	params.u.memref.shm = shm;
+
+	ret = tee_client_invoke_func(asrrng_tee_ctx.tee_ctx, &invoke_arg, &params);
+	if (ret != 0) {
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto free_shm;
+	}
+
+	size = (params.u.memref.size > len) ? (int)len: (int)params.u.memref.size;
+	ma = tee_shm_get_va(shm, 0);
+	memcpy(buff, ma, size);
+
+	if (outlen)
+		*outlen = size;
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrrng_optee_close_ta(&asrrng_tee_ctx);
+	return ret;
+}
+
+static int asr_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	int ret = 0;
+	size_t readsize;
+	size_t size = max < 4096 ? max : 4096;
+
+	(void)wait;
+
+	ret = asrrng_optee_acquire_ta(&pta_rng_uuid, ASR_RNG_GET_DATA,
+		data, size, &readsize);
+
+	if (!ret)
+		return readsize;
+
+	return 0;
+}
+
+static int asr_rng_probe(struct platform_device *pdev)
+{
+    struct asr_rng *prng;
+    struct device *dev = &pdev->dev;
+    int err = 0;
+
+    prng = devm_kzalloc(&pdev->dev, sizeof(*prng), GFP_KERNEL);
+    if (prng == NULL) 
+        return -ENOMEM;
+
+    prng->dev = dev;
+    platform_set_drvdata(pdev, prng);
+
+	prng->rng.name = "asr";
+	prng->rng.read = asr_rng_read;
+	prng->rng.quality = 1000;
+
+	err = hwrng_register(&prng->rng);
+	if (err) {
+		dev_err(dev, "failed to register asr_rng!\n");
+        goto res_err;
+	}
+
+    dev_info(dev, "H/W RNG is initialized\n");
+    return 0;
+
+res_err:
+    devm_kfree(dev, prng);
+    dev_err(dev, "initialization failed.\n");
+
+    return err;
+}
+
+static int asr_rng_remove(struct platform_device *pdev)
+{
+    struct asr_rng *prng;
+
+    prng = platform_get_drvdata(pdev);
+    if (!prng) {
+        return -ENODEV;
+    }
+	hwrng_unregister(&prng->rng);
+
+    devm_kfree(prng->dev, prng);
+
+    return 0;
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_rng_dt_ids[] = {
+    { .compatible = "asr,asr-hwrng" },
+    { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_rng_dt_ids);
+#endif
+
+static struct platform_driver asr_rng_driver = {
+    .probe		= asr_rng_probe,
+    .remove		= asr_rng_remove,
+    .driver		= {
+        .name	= "asr_rng",
+        .of_match_table = of_match_ptr(asr_rng_dt_ids),
+    },
+};
+
+static int __init asr_random_init(void)
+{
+    int ret;
+
+    ret = platform_driver_register(&asr_rng_driver);
+
+    return ret;
+}
+
+device_initcall_sync(asr_random_init);
+
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yu Zhang <yuzhang@asrmicro.com>");
+MODULE_AUTHOR("Wang Yonggan <wangyonggan@asrmicro.com>");
+MODULE_DESCRIPTION("ASR H/W RNG driver with optee-os");
diff --git a/marvell/linux/drivers/crypto/asr/asr-rng-optee.h b/marvell/linux/drivers/crypto/asr/asr-rng-optee.h
new file mode 100644
index 0000000..cfe18c0
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr-rng-optee.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASR_RNG_OPTEE_
+#define _ASR_RNG_OPTEE_
+
+#define ASR_RNG_GET_DATA 	0x1
+
+#define ASR_RNG_ACCESS_UUID \
+		{ \
+			0x185e0a22, 0x969f, 0x43b9, \
+			{ 0xbb, 0x94, 0x66, 0xe2, 0x88, 0x8e, 0x26, 0x26 } \
+		}
+
+struct asrrng_tee_context {
+	struct tee_context *tee_ctx;
+	int session;
+};
+
+struct teec_uuid {
+	uint32_t timeLow;
+	uint16_t timeMid;
+	uint16_t timeHiAndVersion;
+	uint8_t clockSeqAndNode[8];
+};
+
+struct asr_rng {
+	struct device		*dev;
+	struct hwrng rng;
+};
+
+#endif
diff --git a/marvell/linux/drivers/crypto/asr/asr-rng.c b/marvell/linux/drivers/crypto/asr/asr-rng.c
new file mode 100644
index 0000000..8d8b5ca
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr-rng.c
@@ -0,0 +1,341 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/cputype.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+
+#include "asr-rng.h"
+#include "asr_aes_clk.h"
+
+static inline u32 rng_read(struct asr_rng *hwrng, u32 offset)
+{
+    u32 value = readl_relaxed(hwrng->io_base + offset);
+	return value;
+}
+
+static inline void rng_write(struct asr_rng *hwrng, u32 offset, u32 val)
+{
+	writel_relaxed(val, hwrng->io_base + offset);
+}
+
+static int _rng_read(struct asr_rng *hwrng, bool wait)
+{
+	uint32_t val, random;
+    uint32_t cnt = 0;
+
+	/* generate software seed */
+	rng_write(hwrng, RNG_SEED_VAL, jiffies & 0xFFFFFFFF);
+
+	val = rng_read(hwrng, RNG_CTRL);
+	val |= CTRL_RNG_SEED_EN;
+	rng_write(hwrng, RNG_CTRL, val);
+
+	do {
+		val = rng_read(hwrng, RNG_CTRL);
+
+		if (cnt >= 100*1000) {
+			dev_err(hwrng->dev, "fail to generate rng seed, time out !");
+			return 0;
+		}
+        udelay(1);
+        cnt++;
+	} while(!(val & CTRL_RNG_SEED_VALID));
+
+	/* clr squ fifo */
+	rng_write(hwrng, RNG_SQU_CTRL, SQU_CTRL_FIFO_CLR);
+
+	/* generate random value */
+	val = rng_read(hwrng, RNG_CTRL);
+	val |= CTRL_RNG_EN;
+	rng_write(hwrng, RNG_CTRL, val);
+
+    cnt = 0;
+    do {
+		val = rng_read(hwrng, RNG_CTRL);
+
+		if (cnt >= 100*1000) {
+			dev_err(hwrng->dev, "fail to generate rng, time out !");
+			return 0;
+		}
+        udelay(1);
+        cnt++;
+	} while(!(val & CTRL_RNG_VALID));
+
+    cnt = 0;
+	/* get random value */
+	do {
+		random = rng_read(hwrng, RNG_DATA);
+
+        if (wait) {
+		    if (cnt >= 100*1000) {
+                dev_err(hwrng->dev, "fail to generate rng, time out !");
+                return 0;
+            }
+            udelay(1);
+            cnt++;
+        } else {
+            break;
+        }
+	} while(random == 0 || random == hwrng->rn_saved);
+
+    hwrng->rn_saved = random;
+	return random;
+}
+
+static int asr_rng_disable(struct asr_rng *hwrng)
+{
+	u32 val;
+
+	val = rng_read(hwrng, RNG_CTRL);
+	val &= ~CTRL_RNG_SEED_EN;
+	rng_write(hwrng, RNG_CTRL, val);
+
+	val = rng_read(hwrng, RNG_CTRL);
+	val &= ~CTRL_RNG_EN;
+	rng_write(hwrng, RNG_CTRL, val);
+
+	return 0;
+}
+
+static int asr_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+	unsigned int val;
+	u32 *data = (u32 *)buf;
+	size_t read = 0;
+
+	struct asr_rng *hwrng = (struct asr_rng *)rng->priv;
+	struct asr_rng_ops *rng_ops = hwrng->rng_ops;
+
+	rng_ops->dev_get(hwrng);
+
+	while (read < max) {
+		val = _rng_read(hwrng, wait);
+        if (!val) {
+            rng_ops->dev_put(hwrng);
+            return read;
+        }
+        *data = val;
+        data++;
+        read += 4;
+	}
+
+	asr_rng_disable(hwrng);
+	rng_ops->dev_put(hwrng);
+
+	return read;
+}
+
+static struct hwrng asr_rng = {
+	.name		= "asr",
+	.read		= asr_rng_read,
+	.quality = 1000,
+};
+
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_rng_dt_ids[] = {
+    { .compatible = "asr,asr-hwrng" },
+    { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_rng_dt_ids);
+#endif
+
+static int asr_rng_clk_sync(struct asr_rng *rng)
+{
+    struct clk *rng_clk;
+
+    if (rng->clk_synced)
+        return 0;
+
+    rng_clk =  rng->rng_clk;
+    /* BCM clk will be disable by CP core, but the enable count is still 1.
+    * Need to sync the clk enable state here and re-enable the clk.
+    */
+    if (__clk_is_enabled(rng_clk) == false &&
+        __clk_get_enable_count(rng_clk))
+    {
+        asr_aes_clk_put(rng_clk);
+        asr_aes_clk_get(rng_clk);
+        rng->clk_synced = 1;
+        dev_dbg(rng->dev, "sync rng clk done\n");
+        return 1;
+    }
+
+    return 0;
+}
+
+static int asr_rng_dev_get(struct asr_rng *rng)
+{
+    mutex_lock(&rng->rng_lock);
+
+    asr_rng_clk_sync(rng);
+    asr_aes_clk_get(rng->rng_clk);
+
+    return 0;
+}
+
+static int asr_rng_dev_put(struct asr_rng *rng)
+{
+
+    asr_aes_clk_put(rng->rng_clk);
+
+    mutex_unlock(&rng->rng_lock);
+    return 0;
+}
+
+static struct asr_rng_ops rng_ops = {
+    .dev_get = asr_rng_dev_get,
+    .dev_put = asr_rng_dev_put,
+};
+
+#ifdef CONFIG_PM
+static int asr_rng_suspend(struct device *dev)
+{
+    struct asr_rng *prng = dev_get_drvdata(dev);
+
+    asr_aes_clk_put(prng->rng_clk);
+
+    return 0;
+}
+
+static int asr_rng_resume(struct device *dev)
+{
+    struct asr_rng *prng = dev_get_drvdata(dev);
+
+    return asr_aes_clk_get(prng->rng_clk);
+}
+
+static const struct dev_pm_ops asr_rng_pm_ops = {
+    .suspend	= asr_rng_suspend,
+    .resume		= asr_rng_resume,
+};
+#endif /* CONFIG_PM */
+
+static int asr_rng_probe(struct platform_device *pdev)
+{
+    struct asr_rng *prng;
+    struct device *dev = &pdev->dev;
+    struct resource *rng_res;
+    int err = 0;
+
+    prng = devm_kzalloc(&pdev->dev, sizeof(*prng), GFP_KERNEL);
+    if (prng == NULL) 
+        return -ENOMEM;
+
+    prng->dev = dev;
+    prng->rng_ops = &rng_ops;
+    platform_set_drvdata(pdev, prng);
+
+    mutex_init(&prng->rng_lock);
+
+    /* Get the base address */
+    rng_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+    if (!rng_res) {
+        dev_err(dev, "no MEM resource info\n");
+        err = -ENODEV;
+        goto res_err;
+    }
+    prng->phys_base = rng_res->start;
+
+    /* Initializing the clock */
+    prng->rng_clk = devm_clk_get(&pdev->dev, NULL);
+    if (IS_ERR(prng->rng_clk)) {
+        dev_err(dev, "clock initialization failed.\n");
+        err = PTR_ERR(prng->rng_clk);
+        goto res_err;
+    }
+    prng->clk_synced = 0;
+
+    prng->io_base = devm_ioremap_resource(&pdev->dev, rng_res);
+    if (IS_ERR(prng->io_base)) {
+        dev_err(dev, "can't ioremap\n");
+        err = PTR_ERR(prng->io_base);
+        goto res_err;
+    }
+
+    err = clk_prepare(prng->rng_clk);
+    if (err)
+        goto res_err;
+
+    err = asr_aes_clk_get(prng->rng_clk);
+    if (err)
+        goto rng_clk_unprepare;
+    refcount_set(&prng->refcount, 1);
+
+	prng->rn_saved = 0xdeadbeef;
+	prng->hwrng = &asr_rng;
+	asr_rng.priv = (unsigned long)prng;
+
+	err = hwrng_register(&asr_rng);
+	if (err) {
+		dev_err(dev, "failed to register asr_rng!\n");
+        goto rng_asr_aes_clk_put;
+	}
+
+    dev_info(dev, "H/W RNG is initialized\n");
+    return 0;
+
+rng_asr_aes_clk_put:
+    asr_aes_clk_put(prng->rng_clk);
+rng_clk_unprepare:
+    clk_unprepare(prng->rng_clk);
+res_err:
+    devm_kfree(dev, prng);
+    dev_err(dev, "initialization failed.\n");
+
+    return err;
+}
+
+static int asr_rng_remove(struct platform_device *pdev)
+{
+    struct asr_rng *prng;
+
+    prng = platform_get_drvdata(pdev);
+    if (!prng)
+        return -ENODEV;
+
+    clk_unprepare(prng->rng_clk);
+    asr_aes_clk_put(prng->rng_clk);
+
+	hwrng_unregister(prng->hwrng);
+
+    devm_kfree(prng->dev, prng);
+
+    return 0;
+}
+
+static struct platform_driver asr_rng_driver = {
+    .probe		= asr_rng_probe,
+    .remove		= asr_rng_remove,
+    .driver		= {
+        .name	= "asr_rng",
+#ifdef CONFIG_PM
+        .pm	= &asr_rng_pm_ops,
+#endif
+        .of_match_table = of_match_ptr(asr_rng_dt_ids),
+    },
+};
+
+static int __init asr_random_init(void)
+{
+    int ret;
+
+    ret = platform_driver_register(&asr_rng_driver);
+
+    return ret;
+}
+
+device_initcall_sync(asr_random_init);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yu Zhang <yuzhang@asrmicro.com>");
+MODULE_AUTHOR("Wang Yonggan <yongganwnag@asrmicro.com>");
+MODULE_DESCRIPTION("ASR H/W RNG driver");
diff --git a/marvell/linux/drivers/crypto/asr/asr-rng.h b/marvell/linux/drivers/crypto/asr/asr-rng.h
new file mode 100644
index 0000000..74cfce6
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr-rng.h
@@ -0,0 +1,50 @@
+#ifndef _ASR_RNG_H_
+#define _ASR_RNG_H_
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+
+#define RNG_SYTE_CNT				(0x04)
+#define RNG_SRC_ADDR				(0x14)
+#define RNG_DEST_ADDR				(0x24)
+#define RNG_NEXTDEST_ADDR			(0x34)
+#define RNG_SQU_CTRL				(0x44)
+#define RNG_CURR_DESC_PTR			(0x74)
+#define RNG_INT_MASK				(0x84)
+#define RNG_INT_STATUS				(0xa4)
+#define RNG_CTRL					(0xc0)
+#define RNG_DATA					(0xc4)
+#define RNG_SEED_VAL				(0xc8)
+
+#define SQU_CTRL_FIFO_CLR			(1 << 30)
+#define CTRL_RNG_VALID				(1 << 31)
+#define CTRL_RNG_SEED_VALID			(1 << 30)
+#define CTRL_RNG_SEED_EN			(1 << 1)
+#define CTRL_RNG_EN					(1 << 0)
+
+struct asr_rng {
+	struct device		*dev;
+    unsigned long		phys_base;
+	void __iomem		*io_base;
+	void __iomem		*seed_base;
+	struct hwrng *hwrng;
+	unsigned int rn_saved;
+
+    struct mutex	rng_lock;
+    struct clk		*rng_clk;
+    int			clk_synced;
+    refcount_t	refcount;
+
+    struct asr_rng_ops	*rng_ops;
+};
+
+struct asr_rng_ops {
+    int (*dev_get)(struct asr_rng *);
+    int (*dev_put)(struct asr_rng *);
+};
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/asr_aes_clk.c b/marvell/linux/drivers/crypto/asr/asr_aes_clk.c
new file mode 100644
index 0000000..d4510f5
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr_aes_clk.c
@@ -0,0 +1,51 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/mutex.h>
+
+struct asr_crypto {
+	struct clk *aes_clk;
+	refcount_t	refcount;
+	struct mutex clk_lock;
+};
+
+static struct asr_crypto asr_crypto = {
+	.aes_clk = 0,
+	.refcount = REFCOUNT_INIT(0),
+	.clk_lock = __MUTEX_INITIALIZER(asr_crypto.clk_lock),
+};
+
+int asr_aes_clk_get(struct clk *aes_clk)
+{
+	mutex_lock(&asr_crypto.clk_lock);
+
+	if (asr_crypto.aes_clk != aes_clk) {
+		asr_crypto.aes_clk = aes_clk;
+	}
+
+	if (refcount_read(&asr_crypto.refcount) == 0) {
+		clk_enable(asr_crypto.aes_clk);
+		refcount_set(&asr_crypto.refcount, 1);
+	} else {
+		refcount_inc(&asr_crypto.refcount);
+	}
+
+	mutex_unlock(&asr_crypto.clk_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_aes_clk_get);
+
+int asr_aes_clk_put(struct clk *aes_clk)
+{
+	mutex_lock(&asr_crypto.clk_lock);
+
+	if (refcount_dec_and_test(&asr_crypto.refcount)) {
+		clk_disable(asr_crypto.aes_clk);
+	}
+
+	mutex_unlock(&asr_crypto.clk_lock);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(asr_aes_clk_put);
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/asr_aes_clk.h b/marvell/linux/drivers/crypto/asr/asr_aes_clk.h
new file mode 100644
index 0000000..b73ae20
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/asr_aes_clk.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASR_AES_CLK_
+#define _ASR_AES_CLK_
+
+int asr_aes_clk_get(struct clk *aes_clk);
+int asr_aes_clk_put(struct clk *aes_clk);
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/Makefile b/marvell/linux/drivers/crypto/asr/bcm/Makefile
new file mode 100644
index 0000000..de8120f
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ASR_BCM) += asr-bcm.o
+obj-$(CONFIG_ASR_BCM_CIPHER) += asr-cipher.o
+obj-$(CONFIG_ASR_BCM_SHA) += asr-sha.o
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.c b/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.c
new file mode 100644
index 0000000..bc2e700
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.c
@@ -0,0 +1,539 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+
+#include "asr-bcm.h"
+#include "../asr_aes_clk.h"
+
+static inline u32 asr_bcm_read(struct asr_bcm_dev *dd, u32 offset)
+{
+    u32 value = readl_relaxed(dd->io_base + offset);
+
+    return value;
+}
+
+static inline void asr_bcm_write(struct asr_bcm_dev *dd,
+                    u32 offset, u32 value)
+{
+    writel_relaxed(value, dd->io_base + offset);
+}
+
+int dma_input_config(struct asr_bcm_dev *dd, int rid_ext, int rid)
+{
+    uint32_t val;
+
+    val = asr_bcm_read(dd, DMA_IN_CTRL);
+    val &= 0x0f0f0000;
+    val |= (0x7 << 28) |    /* dis error check */
+        ((rid_ext & 0xF) << 20) |   /* rid ext */
+        (0x1 << 18) |   /* dis out-of-order */
+        (0x1 << 17) |   /* data 64 byte aligned */
+        (0x1 << 15) |   /* FIFO bus size 64bit */
+        (0x1 << 13) |   /* brust type: Inc */
+        (0x8 << 8)  |   /* brust len */
+        ((rid & 0xF) << 4);
+
+    asr_bcm_write(dd, DMA_IN_CTRL, val);
+
+    return 0;
+}
+
+int dma_output_config(struct asr_bcm_dev *dd, int wid_ext, int wid)
+{
+	uint32_t val;
+
+	val = asr_bcm_read(dd, DMA_OUT_CTRL);
+	val &= 0x0f0f0000;
+	val |= (0x7 << 28) |    /* dis error check */
+		((wid_ext & 0xF) << 20) |   /* rid ext */
+		(0x1 << 18) |   /* dis out-of-order */
+		(0x1 << 17) |   /* data 64 byte aligned */
+		(0x1 << 15) |   /* FIFO bus size 64bit */
+		(0x1 << 13) |   /* brust type: Inc */
+		(0x8 << 8)  |   /* brust len */
+		((wid & 0xF) << 4);
+
+	asr_bcm_write(dd, DMA_OUT_CTRL, val);
+
+	return 0;
+}
+
+
+int dma_input_address(struct asr_bcm_dev *dd, uint32_t src_addr, \
+                        uint32_t src_size, int chained)
+{
+    if (chained) {
+        asr_bcm_write(dd, DMA_IN_NX_LL_ADR, src_addr);
+        asr_bcm_write(dd, DMA_IN_SRC_ADR, 0x0);
+        asr_bcm_write(dd, DMA_IN_XFER_CNTR, 0x0);
+    } else {
+        asr_bcm_write(dd, DMA_IN_NX_LL_ADR, 0x0);
+        asr_bcm_write(dd, DMA_IN_SRC_ADR, src_addr);
+        asr_bcm_write(dd, DMA_IN_XFER_CNTR, src_size);
+    }
+
+    return 0;
+}
+
+int dma_output_address(struct asr_bcm_dev *dd, uint32_t dst_addr, uint32_t dst_size, int chained)
+{
+	if (chained) {
+		asr_bcm_write(dd, DMA_OUT_NX_LL_ADR, dst_addr);
+		asr_bcm_write(dd, DMA_OUT_DEST_ADR, 0x0);
+		asr_bcm_write(dd, DMA_OUT_XFER_CNTR, 0x0);
+	} else {
+		asr_bcm_write(dd, DMA_OUT_NX_LL_ADR, 0x0);
+		asr_bcm_write(dd, DMA_OUT_DEST_ADR, dst_addr);
+		asr_bcm_write(dd, DMA_OUT_XFER_CNTR, dst_size);
+	}
+
+	return 0;
+}
+
+void dma_input_start(struct asr_bcm_dev *dd)
+{
+    uint32_t val;
+
+    val = asr_bcm_read(dd, DMA_IN_INT);
+    asr_bcm_write(dd, DMA_IN_INT, val);
+
+    val = asr_bcm_read(dd, DMA_IN_CTRL);
+    val |= 0x1;
+    asr_bcm_write(dd, DMA_IN_CTRL, val);
+}
+
+void dma_output_start(struct asr_bcm_dev *dd)
+{
+	uint32_t val;
+
+	val = asr_bcm_read(dd, DMA_OUT_INT);
+	asr_bcm_write(dd, DMA_OUT_INT, val);
+
+	val = asr_bcm_read(dd, DMA_OUT_CTRL);
+	val |= 0x1;
+	asr_bcm_write(dd, DMA_OUT_CTRL, val);
+
+	return;
+}
+
+void dma_input_stop(struct asr_bcm_dev *dd)
+{
+    uint32_t val;
+
+    val = asr_bcm_read(dd, DMA_IN_CTRL);
+    val &= ~0x1;
+    asr_bcm_write(dd, DMA_IN_CTRL, val);
+}
+
+void dma_output_stop(struct asr_bcm_dev *dd)
+{
+	uint32_t val;
+
+	val = asr_bcm_read(dd, DMA_OUT_CTRL);
+	val &= ~0x1;
+	asr_bcm_write(dd, DMA_OUT_CTRL, val);
+
+	return;
+}
+
+int dma_wait_input_finish(struct asr_bcm_dev *dd)
+{
+    uint32_t val, val_ori;
+    int loop;
+    int ret = 0;
+
+    loop = 10000;
+    while (loop > 0) {
+        val_ori = asr_bcm_read(dd, DMA_IN_INT);
+        val = (val_ori & 0x1);
+        if (val !=0)
+            break;
+        loop--;
+        udelay(1);
+    }
+
+    if (loop == 0) {
+        ret = -1;
+    } else {
+        ret = 0;
+    }
+
+    dma_input_stop(dd);
+
+    val = asr_bcm_read(dd, DMA_IN_INT);
+    asr_bcm_write(dd, DMA_IN_INT, val);
+
+    return ret;
+}
+
+int dma_wait_output_finish(struct asr_bcm_dev *dd)
+{
+	uint32_t val, val_ori;
+	int loop;
+	int ret = 0;
+
+	loop = 10000;
+	while (loop > 0) {
+		val_ori = asr_bcm_read(dd, DMA_OUT_INT);
+		val = (val_ori & 0x1);
+		if (val !=0)
+			break;
+		loop--;
+		udelay(1);
+	}
+
+	if (loop == 0) {
+		ret = -1;
+	} else {
+		ret = 0;
+	}
+
+	dma_output_stop(dd);
+
+	val = asr_bcm_read(dd, DMA_OUT_INT);
+	asr_bcm_write(dd, DMA_OUT_INT, val);
+
+	return ret;
+}
+
+int adec_engine_hw_reset(struct asr_bcm_dev *dd, \
+                        ADEC_ACC_ENG_T engine)
+{
+    uint32_t val;
+    int tmp;
+
+    if (engine == ACC_ENG_ALL)
+        tmp = 0xffff;
+    else
+        tmp = 1 << engine;
+
+    val = asr_bcm_read(dd, ADEC_CTRL);
+    val |= tmp;
+    asr_bcm_write(dd, ADEC_CTRL, val);
+    val &= ~tmp;
+    asr_bcm_write(dd, ADEC_CTRL, val);
+
+    return 0;
+}
+
+int abus_set_mode(struct asr_bcm_dev *dd,
+                  ABUS_GRP_A_T grp_a_mode,
+                  ABUS_GRP_B_T grp_b_mode,
+                  ABUS_CROSS_BAR_T input_bar,
+                  ABUS_CROSS_BAR_T output_bar)
+{
+    uint32_t val;
+
+    val = asr_bcm_read(dd, ABUS_BUS_CTRL);
+
+    val &= ~(0x77 << 0x4);
+    val |= (grp_a_mode << 0x4) | (grp_b_mode << 0x8);
+
+    if (input_bar == ABUS_STRAIGHT) {
+        val &= ~(0x1 << 0x0);
+    } else if (input_bar == ABUS_CROSS) {
+        val |= (0x1 << 0x0);
+    } else {
+        return -1;
+    }
+
+    if (output_bar == ABUS_STRAIGHT) {
+        val &= ~(0x1 << 0x2);
+    } else if (input_bar == ABUS_CROSS) {
+        val |= (0x1 << 0x2);
+    } else {
+        return -1;
+    }
+
+    asr_bcm_write(dd, ABUS_BUS_CTRL, val);
+
+    return 0;
+}
+
+static int asr_bcm_clk_sync(struct asr_bcm_dev *dd)
+{
+    struct clk *bcm_clk;
+
+    if (dd->clk_synced)
+        return 0;
+
+    bcm_clk =  dd->bcm_clk;
+    /* BCM clk will be disable by CP core, but the enable count is still 1.
+    * Need to sync the clk enable state here and re-enable the clk.
+    */
+    if (__clk_is_enabled(bcm_clk) == false &&
+        __clk_get_enable_count(bcm_clk))
+    {
+        asr_aes_clk_put(bcm_clk);
+        asr_aes_clk_get(bcm_clk);
+        dd->clk_synced = 1;
+        dev_dbg(dd->dev, "sync bcm clk done\n");
+        return 1;
+    }
+
+    return 0;
+}
+
+static int asr_bcm_dev_get(struct asr_bcm_dev *dd)
+{
+    mutex_lock(&dd->bcm_lock);
+
+    asr_bcm_clk_sync(dd);
+    asr_aes_clk_get(dd->bcm_clk);
+
+    return 0;
+}
+
+static int asr_bcm_dev_put(struct asr_bcm_dev *dd)
+{
+    asr_aes_clk_put(dd->bcm_clk);
+
+    mutex_unlock(&dd->bcm_lock);
+    return 0;
+}
+
+static int bcm_hw_init(struct asr_bcm_dev *dd)
+{
+    /* init */
+    asr_bcm_write(dd, BIU_SP_CONTROL, 0x6);
+    asr_bcm_write(dd, BIU_SP_INTERRUPT_MASK, 0xFFFFFFFF);
+    asr_bcm_write(dd, BIU_HST_INTERRUPT_MASK, 0xFFFFFFFF);
+    asr_bcm_write(dd, ADEC_INT_MSK, 0xFFFFFFFF);
+    return 0;
+}
+
+static void asr_bcm_hw_init(struct asr_bcm_dev *dd)
+{
+    bcm_hw_init(dd);
+    adec_engine_hw_reset(dd, ACC_ENG_ALL);
+}
+
+static irqreturn_t asr_bcm_irq(int irq, void *dev_id)
+{
+    irqreturn_t ret = IRQ_NONE;
+
+    // TODO irq
+
+    return ret;
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_bcm_dt_ids[] = {
+    { .compatible = "asr,asr-bcm" },
+    { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_bcm_dt_ids);
+#endif
+
+static struct asr_bcm_ops bcm_ops = {
+    .dev_get = asr_bcm_dev_get,
+    .dev_put = asr_bcm_dev_put,
+};
+
+static int asr_bcm_probe(struct platform_device *pdev)
+{
+    struct asr_bcm_dev *bcm_dd;
+    struct device *dev = &pdev->dev;
+    struct resource *bcm_res;
+    struct device_node *np = NULL;
+    int err = 0, devnum = 0;
+
+    bcm_dd = devm_kzalloc(&pdev->dev, sizeof(*bcm_dd), GFP_KERNEL);
+    if (bcm_dd == NULL) {
+        err = -ENOMEM;
+        goto res_err;
+    }
+
+    np = dev->of_node;
+    bcm_dd->dev = dev;
+    bcm_dd->bcm_ops = &bcm_ops;
+
+    platform_set_drvdata(pdev, bcm_dd);
+
+    mutex_init(&bcm_dd->bcm_lock);
+
+    /* Get the base address */
+    bcm_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+    if (!bcm_res) {
+        dev_err(dev, "no MEM resource info\n");
+        err = -ENODEV;
+        goto res_err;
+    }
+    bcm_dd->phys_base = bcm_res->start;
+
+    /* Get the IRQ */
+    bcm_dd->irq = platform_get_irq(pdev, 0);
+    if (bcm_dd->irq < 0) {
+        err = bcm_dd->irq;
+        goto res_err;
+    }
+    err = devm_request_irq(&pdev->dev, bcm_dd->irq, asr_bcm_irq,
+                   IRQF_SHARED, "asr-bcm", bcm_dd);
+    if (err) {
+        dev_err(dev, "unable to request bcm irq.\n");
+        goto no_mem_err;
+    }
+
+    /* Initializing the clock */
+    bcm_dd->bcm_clk = devm_clk_get(&pdev->dev, NULL);
+    if (IS_ERR(bcm_dd->bcm_clk)) {
+        dev_err(dev, "clock initialization failed.\n");
+        err = PTR_ERR(bcm_dd->bcm_clk);
+        goto res_err;
+    }
+    bcm_dd->clk_synced = 0;
+
+    bcm_dd->io_base = devm_ioremap_resource(&pdev->dev, bcm_res);
+    if (IS_ERR(bcm_dd->io_base)) {
+        dev_err(dev, "can't ioremap\n");
+        err = PTR_ERR(bcm_dd->io_base);
+        goto res_err;
+    }
+
+    err = clk_prepare(bcm_dd->bcm_clk);
+    if (err)
+        goto res_err;
+
+    err = asr_aes_clk_get(bcm_dd->bcm_clk);
+    if (err)
+        goto bcm_clk_unprepare;
+
+    asr_bcm_hw_init(bcm_dd);
+
+#ifdef CONFIG_ASR_BCM_CIPHER
+	if (of_get_property(np, "asr,asr-cipher", NULL)) {
+		err = asr_bcm_cipher_register(bcm_dd);
+		if (err)
+			goto bcm_asr_aes_clk_put;
+		dev_info(dev, "CIPHER engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+#ifdef CONFIG_ASR_BCM_SHA
+    if (of_get_property(np, "asr,asr-sha", NULL)) {
+        err = asr_bcm_sha_register(bcm_dd);
+        if (err)
+			goto sha_err;
+        dev_info(dev, "SHA engine is initialized\n");
+        devnum ++;
+    }
+#endif
+
+    if (!devnum) {
+        dev_err(dev, "No BCM device enabled\n");
+        err = -ENODEV;
+        goto bcm_asr_aes_clk_put;
+    }
+
+    return 0;
+
+#ifdef CONFIG_ASR_BCM_SHA
+sha_err:
+#ifdef CONFIG_ASR_BCM_CIPHER
+	asr_bcm_cipher_unregister(bcm_dd);
+#endif
+#endif
+bcm_asr_aes_clk_put:
+    asr_aes_clk_put(bcm_dd->bcm_clk);
+bcm_clk_unprepare:
+    clk_unprepare(bcm_dd->bcm_clk);
+res_err:
+    devm_kfree(dev, bcm_dd);
+no_mem_err:
+    dev_err(dev, "initialization failed.\n");
+
+    return err;
+}
+
+static int asr_bcm_remove(struct platform_device *pdev)
+{
+    struct asr_bcm_dev *bcm_dd;
+
+    bcm_dd = platform_get_drvdata(pdev);
+    if (!bcm_dd)
+        return -ENODEV;
+
+    clk_unprepare(bcm_dd->bcm_clk);
+    asr_aes_clk_put(bcm_dd->bcm_clk);
+
+#ifdef CONFIG_ASR_BCM_CIPHER
+	asr_bcm_cipher_unregister(bcm_dd);
+#endif
+
+#ifdef CONFIG_ASR_BCM_SHA
+    asr_bcm_sha_unregister(bcm_dd);
+#endif
+
+    devm_kfree(bcm_dd->dev, bcm_dd);
+
+    return 0;
+}
+
+#ifdef CONFIG_PM
+static int asr_bcm_suspend(struct device *dev)
+{
+    struct asr_bcm_dev *bcm_dd = dev_get_drvdata(dev);
+
+    asr_aes_clk_put(bcm_dd->bcm_clk);
+
+    return 0;
+}
+
+static int asr_bcm_resume(struct device *dev)
+{
+    struct asr_bcm_dev *bcm_dd = dev_get_drvdata(dev);
+
+    return asr_aes_clk_get(bcm_dd->bcm_clk);
+}
+
+static const struct dev_pm_ops asr_bcm_pm_ops = {
+    .suspend	= asr_bcm_suspend,
+    .resume		= asr_bcm_resume,
+};
+#endif /* CONFIG_PM */
+
+static struct platform_driver asr_bcm_driver = {
+    .probe		= asr_bcm_probe,
+    .remove		= asr_bcm_remove,
+    .driver		= {
+        .name	= "asr_bcm",
+#ifdef CONFIG_PM
+        .pm	= &asr_bcm_pm_ops,
+#endif
+        .of_match_table = of_match_ptr(asr_bcm_dt_ids),
+    },
+};
+
+static int __init asr_bcm_init(void)
+{
+    int ret;
+
+    ret = platform_driver_register(&asr_bcm_driver);
+
+    return ret;
+}
+
+device_initcall_sync(asr_bcm_init);
+
+MODULE_DESCRIPTION("BCM: ASR Trust Engine support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.h b/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.h
new file mode 100644
index 0000000..c63f386
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-bcm.h
@@ -0,0 +1,160 @@
+#ifndef _ASR_BCM_H_
+#define _ASR_BCM_H_
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+
+#include "asr-sha.h"
+#include "asr-cipher.h"
+
+#define BIU_OFFSET               (0x00000000L)
+#define ADEC_OFFSET              (0x00000400L)
+#define DMA_OFFSET               (0x00000800L)
+#define ABUS_OFFSET              (0x00000C00L)
+#define CRYPTO_OFFSET            (0x00001000L)
+#define HASH_OFFSET              (0x00001800L)
+#define SCRATCH_TBL_OFFSET       (0x00001C00L)
+
+/* biu registers */
+#define BIU_HST_INTERRUPT_MASK   (BIU_OFFSET + 0x00CC)
+#define BIU_SP_INTERRUPT_MASK    (BIU_OFFSET + 0x021C)
+#define BIU_SP_CONTROL           (BIU_OFFSET + 0x0220)
+
+/* adec registers */
+#define ADEC_CTRL                (ADEC_OFFSET + 0x0000)
+#define ADEC_CTRL2               (ADEC_OFFSET + 0x0004)
+#define AXI_SL_CTRL              (ADEC_OFFSET + 0x0008)
+#define ADEC_INT                 (ADEC_OFFSET + 0x000C)
+#define ADEC_INT_MSK             (ADEC_OFFSET + 0x0010)
+#define ADEC_ACC_ERR_ADR         (ADEC_OFFSET + 0x0014)
+#define ADEC_MP_FIFO_ERR_ADR     (ADEC_OFFSET + 0x0018)
+
+/* dma registers */
+#define DMA_IN_CTRL              (DMA_OFFSET + 0x0000)
+#define DMA_IN_STATUS            (DMA_OFFSET + 0x0004)
+#define DMA_IN_SRC_ADR           (DMA_OFFSET + 0x0008)
+#define DMA_IN_XFER_CNTR         (DMA_OFFSET + 0x000C)
+#define DMA_IN_NX_LL_ADR         (DMA_OFFSET + 0x0010)
+#define DMA_IN_INT               (DMA_OFFSET + 0x0014)
+#define DMA_IN_INT_MSK           (DMA_OFFSET + 0x0018)
+#define DMA_OUT_CTRL             (DMA_OFFSET + 0x001C)
+#define DMA_OUT_STATUS           (DMA_OFFSET + 0x0020)
+#define DMA_OUT_DEST_ADR         (DMA_OFFSET + 0x0024)
+#define DMA_OUT_XFER_CNTR        (DMA_OFFSET + 0x0028)
+#define DMA_OUT_NX_LL_ADR        (DMA_OFFSET + 0x002C)
+#define DMA_OUT_INT              (DMA_OFFSET + 0x0030)
+#define DMA_OUT_INT_MSK          (DMA_OFFSET + 0x0034)
+
+/* accel bus registers */
+#define ABUS_BUS_CTRL            (ABUS_OFFSET + 0x0000)
+
+/* hash bus registers */
+#define HASH_CONFIG              (HASH_OFFSET + 0x0000)
+#define HASH_CONTROL             (HASH_OFFSET + 0x0004)
+#define HASH_COMMAND             (HASH_OFFSET + 0x0008)
+#define HASH_STATUS              (HASH_OFFSET + 0x000C)
+#define HASH_INCOME_SEG_SZ       (HASH_OFFSET + 0x0010)
+#define HASH_TOTAL_MSG_SZ_L      (HASH_OFFSET + 0x0018)
+#define HASH_TOTAL_MSG_SZ_H      (HASH_OFFSET + 0x001C)
+#define HASH_DIGEST_BASE         (HASH_OFFSET + 0x0020)
+#define HASH_DIGEST(a)           (HASH_DIGEST_BASE + ((a) << 2))
+#define HASH_DIGEST_H_BASE       (HASH_OFFSET + 0x0040)
+#define HASH_DIGEST_H(a)         (HASH_DIGEST_H_BASE + ((a) << 2))
+
+/* crypto bus registers */
+#define CRYPTO_AES_CONFIG_REG            (CRYPTO_OFFSET + 0x0000)
+#define CRYPTO_AES_CONTROL_REG           (CRYPTO_OFFSET + 0x0004)
+#define CRYPTO_AES_COMMAND_REG           (CRYPTO_OFFSET + 0x0008)
+#define CRYPTO_AES_STATUS_REG            (CRYPTO_OFFSET + 0x000C)
+#define CRYPTO_AES_INTRPT_SRC_REG        (CRYPTO_OFFSET + 0x0010)
+#define CRYPTO_AES_INTRPT_SRC_EN_REG     (CRYPTO_OFFSET + 0x0014)
+#define CRYPTO_AES_STREAM_SIZE_REG       (CRYPTO_OFFSET + 0x0018)
+#define CRYPTO_ENGINE_SEL_REG            (CRYPTO_OFFSET + 0x00A8)
+
+#define CRYPTO_K2_BASE                   (CRYPTO_OFFSET + 0x0058)
+#define CRYPTO_K2_W_REG(a)               (CRYPTO_K2_BASE + a*0x04)
+#define CRYPTO_K1_BASE                   (CRYPTO_OFFSET + 0x0078)
+#define CRYPTO_K1_W_REG(a)               (CRYPTO_K1_BASE + a*0x04)
+#define CRYPTO_IV_BASE                   (CRYPTO_OFFSET + 0x0098)
+#define CRYPTO_IV_REG(a)                 (CRYPTO_IV_BASE + a*0x04)
+
+typedef enum {
+    ABUS_GRP_A_HASH = 0x0,
+    ABUS_GRP_A_RC4 = 0x4,
+    ABUS_GRP_A_ECP = 0x5,
+    ABUS_GRP_A_ZMODP = 0x6,
+} ABUS_GRP_A_T;
+
+typedef enum {
+    ABUS_GRP_B_AES = 0x0,
+    ABUS_GRP_B_DES = 0x1,
+    ABUS_GRP_B_BYPASS = 0x2,
+    ABUS_GRP_B_RC4 = 0x4,
+} ABUS_GRP_B_T;
+
+typedef enum {
+    ABUS_STRAIGHT = 0,
+    ABUS_CROSS,
+} ABUS_CROSS_BAR_T;
+
+typedef enum {
+    /* reset bit */
+    ACC_ENG_DMA = 1,
+    ACC_ENG_HASH = 5,
+    ACC_ENG_CRYPTO = 3,
+    ACC_ENG_EBG = 10,
+    ACC_ENG_MCT = 8,
+    ACC_ENG_SCRATCH_PAD = 6,
+    ACC_ENG_ZMOP = 7,
+    ACC_ENG_ALL,    
+} ADEC_ACC_ENG_T;
+
+struct asr_te200_sha;
+
+struct asr_bcm_dev {
+    unsigned long		phys_base;
+    void __iomem		*io_base;
+    struct mutex		bcm_lock;
+    struct device		*dev;
+
+    struct clk		*bcm_clk;
+    int			clk_synced;
+    refcount_t	refcount;
+
+    int			irq;
+
+    struct asr_bcm_sha asr_sha;
+	struct asr_bcm_cipher asr_cipher;
+
+    struct asr_bcm_ops	*bcm_ops;
+};
+
+struct asr_bcm_ops {
+    int (*dev_get)(struct asr_bcm_dev *);
+    int (*dev_put)(struct asr_bcm_dev *);
+};
+
+void dma_input_start(struct asr_bcm_dev *dd);
+void dma_input_stop(struct asr_bcm_dev *dd);
+int dma_input_config(struct asr_bcm_dev *dd, int rid_ext, int rid);
+int dma_input_address(struct asr_bcm_dev *dd, uint32_t src_addr, uint32_t src_size, int chained);
+void dma_output_start(struct asr_bcm_dev *dd);
+void dma_output_stop(struct asr_bcm_dev *dd);
+int dma_output_config(struct asr_bcm_dev *dd, int wid_ext, int wid);
+int dma_output_address(struct asr_bcm_dev *dd, uint32_t dst_addr, uint32_t dst_size, int chained);
+int dma_wait_input_finish(struct asr_bcm_dev *dd);
+int dma_wait_output_finish(struct asr_bcm_dev *dd);
+int adec_engine_hw_reset(struct asr_bcm_dev *dd, ADEC_ACC_ENG_T engine);
+int abus_set_mode(struct asr_bcm_dev *dd, ABUS_GRP_A_T grp_a_mode, 
+                  ABUS_GRP_B_T grp_b_mode, ABUS_CROSS_BAR_T input_bar, ABUS_CROSS_BAR_T output_bar);
+
+int asr_bcm_sha_register(struct asr_bcm_dev *bcm_dd);
+int asr_bcm_sha_unregister(struct asr_bcm_dev *bcm_dd);
+
+int asr_bcm_cipher_register(struct asr_bcm_dev *bcm_dd);
+int asr_bcm_cipher_unregister(struct asr_bcm_dev *bcm_dd);
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.c b/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.c
new file mode 100644
index 0000000..9be5ca4
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.c
@@ -0,0 +1,1370 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <linux/jiffies.h>
+#include <crypto/aes.h>
+#include <crypto/sm4.h>
+#include <crypto/internal/skcipher.h>
+#include "asr-bcm.h"
+#include "asr-cipher.h"
+
+#define CIPHER_BLOCK_SIZE AES_BLOCK_SIZE
+#define CIPHER_MIN_KEY_SIZE AES_MIN_KEY_SIZE
+#define CIPHER_MAX_KEY_SIZE AES_MAX_KEY_SIZE
+
+static struct asr_bcm_cipher *asr_cipher_local = NULL;
+
+static inline u32 asr_cipher_read(struct asr_bcm_cipher *dd, u32 offset)
+{
+	u32 value = readl_relaxed(dd->io_base + offset);
+
+	return value;
+}
+
+static inline void asr_cipher_write(struct asr_bcm_cipher *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static inline void cipher_cache_operation(void *addr, int size)
+{
+	__cpuc_flush_dcache_area(addr, size);
+}
+
+
+/* hardware handle */
+static void crypto_aes_sw_reset(struct asr_bcm_cipher *dd)
+{
+	uint32_t val;
+
+	val = 0x1;
+	asr_cipher_write(dd, CRYPTO_AES_CONTROL_REG, val);
+	val = 0x0;
+	asr_cipher_write(dd, CRYPTO_AES_CONTROL_REG, val);
+
+	return;
+}
+
+static void crypto_aes_start(struct asr_bcm_cipher *dd)
+{
+	uint32_t val;
+
+	val = 0x1;
+	asr_cipher_write(dd, CRYPTO_AES_COMMAND_REG, val);
+
+	return;
+}
+
+static int crypto_aes_wait(struct asr_bcm_cipher *dd)
+{
+	uint32_t val;
+	
+	val = asr_cipher_read(dd, CRYPTO_AES_INTRPT_SRC_REG);
+	asr_cipher_write(dd, CRYPTO_AES_INTRPT_SRC_REG, val);
+
+	return 0;
+}
+
+static int crypto_engine_select(struct asr_bcm_cipher *dd, CRYPTO_ENG_SEL_T engine)
+{
+	uint32_t val;
+
+	val = asr_cipher_read(dd, CRYPTO_ENGINE_SEL_REG);
+	val &= ~0x3;
+
+	switch (engine) {
+	case ENG_AES:
+		val |= 0x1;
+		break;
+	case ENG_DES:
+		val |= 0x2;
+		break;
+	case ENG_RC4:
+		val |= 0x3;
+		break; 
+	default:
+		dev_err(dd->dev, "Illegal engine %d\n", engine);
+		return -1;
+	}
+
+	asr_cipher_write(dd, CRYPTO_ENGINE_SEL_REG, val);
+
+	return 0;
+}
+
+static int crypto_aes_set_iv(struct asr_bcm_cipher *dd, const uint8_t *iv)
+{
+	uint32_t val;
+	int reg_index;
+
+	if (iv == NULL)
+		return -1;
+
+	for (reg_index = 0; reg_index < 4; reg_index++) {
+		val = ((iv[(reg_index << 2) +0] & 0xFF) << 0) | \
+			  ((iv[(reg_index << 2) + 1] & 0xFF) << 8) | \
+			  ((iv[(reg_index << 2) + 2] & 0xFF) << 16) | \
+			  ((iv[(reg_index << 2) + 3] & 0xFF) << 24);
+		asr_cipher_write(dd, CRYPTO_IV_REG(reg_index), val);
+	}
+
+	return 0;
+}
+
+static int crypto_aes_get_iv(struct asr_bcm_cipher *dd, uint8_t *iv)
+{
+	uint32_t val;
+	int reg_index;
+
+	if (iv == NULL)
+		return -1;
+
+	for (reg_index = 0; reg_index < 4; reg_index++) {
+		val = asr_cipher_read(dd, CRYPTO_IV_REG(reg_index));
+		iv[(reg_index << 2) +0] = val & 0xFF;
+		iv[(reg_index << 2) +1] = (val >> 8) & 0xFF;
+		iv[(reg_index << 2) +2] = (val >> 16) & 0xFF;
+		iv[(reg_index << 2) +3] = (val >> 24) & 0xFF;        
+	}
+
+	return 0;
+}
+
+static int crypto_aes_set_mode(struct asr_bcm_cipher *dd,
+							AES_MODE_T mode, AES_OP_MODE_T op_mode,
+							AES_KEY_LEN_T keylen, bool use_rkey)
+{
+	uint32_t val;
+
+	crypto_engine_select(dd, ENG_AES);
+	val = asr_cipher_read(dd, CRYPTO_AES_CONFIG_REG);
+	val &= ~(0x7 << 0x3);
+
+	switch (mode) {
+	case AES_ECB_ALG:
+		val |= (0x0 << 0x3);
+		break;
+	case AES_CBC_ALG:
+		val |= (0x1 << 0x3);
+		break;
+	case AES_CTR_ALG:
+		val |= (0x2 << 0x3);
+		break;
+	case AES_XTS_ALG:
+		val |= (0x3 << 0x3);
+		break;
+	case AES_KEYWRAP:
+		val |= (0x4 << 0x3);
+		break;
+	default:
+		dev_err(dd->dev, "Illegal aes mode %d\n", mode);
+		return -1;
+	}
+
+	val &= ~(0x3 << 0x1);
+	switch (keylen) {
+	case AES_128:
+		val |= (0x0 << 0x1);
+		break;
+	case AES_192:
+		val |= (0x2 << 0x1);
+		break;
+	case AES_256:
+		val |= (0x1 << 0x1);
+		break;
+	default:
+		dev_err(dd->dev, "Illegal aes keylen %d\n", mode);
+		return -1;
+	}
+
+	val &= ~(0x1 << 0x0);
+	if (op_mode == AES_DECRYPT_OP) {
+		val |= (0x1 << 0x0);
+	} else {
+		val |= (0x0 << 0x0);        
+	}
+
+	val &= ~(0x1 << 0x6);
+	if (use_rkey == false) {
+		val |= (0x0 << 0x6);
+	} else {
+		val |= (0x1 << 0x6);        
+	}
+	asr_cipher_write(dd, CRYPTO_AES_CONFIG_REG, val);
+
+	return 0;
+}
+
+static int crypto_aes_set_key1(struct asr_bcm_cipher *dd, const uint8_t *key, AES_KEY_LEN_T keylen)
+{
+	uint32_t val;
+	int reg_index, key_end;
+
+	if (!key) 
+		return 0;
+
+	switch (keylen) {
+	case AES_128:
+		key_end = 4;
+		break;
+	case AES_192:
+		key_end = 6;
+		break;
+	case AES_256:
+		key_end = 8;
+		break;
+	default:
+		key_end = 0;
+		dev_err(dd->dev, "Illegal aes keylen %d\n", keylen);
+		return -1;
+	}
+
+	for (reg_index = 0; reg_index < 8; reg_index++) {
+		if (reg_index < key_end) {
+			val = ((key[(reg_index << 2) +0] & 0xFF) << 0) | \
+				((key[(reg_index << 2) + 1] & 0xFF) << 8) | \
+				((key[(reg_index << 2) + 2] & 0xFF) << 16) | \
+				((key[(reg_index << 2) + 3] & 0xFF) << 24);
+		} else {
+			val = 0;
+		}
+		asr_cipher_write(dd, CRYPTO_K1_W_REG(reg_index), val);
+	}
+
+	return 0;
+}
+
+static int crypto_aes_set_key2(struct asr_bcm_cipher *dd, const uint8_t *key, AES_KEY_LEN_T keylen)
+{
+	uint32_t val;
+	int reg_index, key_end;
+
+	if (!key) 
+		return 0;
+
+	switch (keylen) {
+	case AES_128:
+		key_end = 4;
+		break;
+	case AES_192:
+		key_end = 6;
+		break;
+	case AES_256:
+		key_end = 8;
+		break;
+	default:
+		key_end = 0;
+		dev_err(dd->dev, "Illegal aes keylen %d\n", keylen);
+		return -1;
+	}
+
+	for (reg_index = 0; reg_index < 8; reg_index++) {
+		if (reg_index < key_end) {
+			val = ((key[(reg_index << 2) +0] & 0xFF) << 0) | \
+				((key[(reg_index << 2) + 1] & 0xFF) << 8) | \
+				((key[(reg_index << 2) + 2] & 0xFF) << 16) | \
+				((key[(reg_index << 2) + 3] & 0xFF) << 24);
+		} else {
+			val = 0;
+		}
+		asr_cipher_write(dd, CRYPTO_K2_W_REG(reg_index), val);
+	}
+
+	return 0;
+}
+
+static void __maybe_unused *align_ptr_malloc(int size, int align_bytes)
+{
+	void *base_ptr = NULL;
+	void *mem_ptr = NULL;
+
+	base_ptr = kmalloc((size + align_bytes), GFP_KERNEL);
+	mem_ptr = (void *)((uint32_t)((uint32_t)base_ptr + align_bytes - 1) & ~(align_bytes - 1));
+	if (mem_ptr == base_ptr) {
+		mem_ptr = (void *)((uint32_t)base_ptr + align_bytes);
+	}
+	*((uint32_t *)mem_ptr - 1) = (uint32_t)mem_ptr - (uint32_t)base_ptr;
+	return mem_ptr;
+}
+
+static void __maybe_unused align_ptr_free(void *ptr)
+{
+	void *base_addr = NULL;
+	base_addr = (void *)((uint32_t)ptr - *((uint32_t *)ptr - 1));
+	kfree(base_addr);
+	return;
+}
+
+static void __maybe_unused free_dma_chain(DMA_DESC_T *header)
+{
+	DMA_DESC_T *p = header, *q = NULL;
+
+	while(p) {
+		if (p->next_desc) {
+			q = phys_to_virt(p->next_desc);
+			align_ptr_free(p);
+			p = q;
+		} else {
+			align_ptr_free(p);
+			break;
+		}
+	}
+
+	return;
+}
+
+static DMA_DESC_T __maybe_unused *alloc_dma_chain(uint32_t vaddr, uint32_t size)
+{
+	uint32_t paddr_s = virt_to_phys((void *)vaddr);
+	uint32_t paddr_e = virt_to_phys((void *)(vaddr + size));
+	DMA_DESC_T *header = NULL;
+	DMA_DESC_T *p = NULL, *q = NULL;
+	uint32_t vaddr_tmp = vaddr;
+
+	/* descriptor must be aligned to 16 bytes */
+	header = align_ptr_malloc(sizeof(DMA_DESC_T), 16);
+	if (header == NULL) {
+		return NULL;
+	}
+
+	/* handle continous physical memory area */
+	if (paddr_s + size == paddr_e) {
+		header->paddr = (uint32_t) paddr_s;
+		header->size = size >> 2;
+		header->next_desc = 0;
+		header->reserved = 0;
+		cipher_cache_operation((char *)header, sizeof(DMA_DESC_T));
+		return header;
+	}
+
+	/* handle non-continous physical memory area */
+	p = header;
+	header->paddr = (uint32_t) paddr_s;
+	header->size = ((uint32_t)(PAGE_SIZE - (paddr_s & (PAGE_SIZE - 1)))) >> 2;
+	header->next_desc = 0;
+	header->reserved = 0;
+
+	while (1) {
+		if ((p->paddr + (p->size << 2)) == virt_to_phys((void *)(vaddr_tmp + (p->size << 2))))
+			p->size += PAGE_SIZE >> 2;
+		else {
+			vaddr_tmp += (p->size << 2);
+			/* descriptor must be aligned to 16 bytes */
+			q = align_ptr_malloc(sizeof(DMA_DESC_T), 16);
+			if (q == NULL) {
+				free_dma_chain(header);
+				return NULL;
+			}
+			q->paddr = (uint32_t)virt_to_phys((void *)vaddr_tmp);
+			q->size = PAGE_SIZE >> 2;
+			q->next_desc = 0;
+			p->next_desc = (uint32_t)(virt_to_phys(q));
+			cipher_cache_operation((char *)p, sizeof(DMA_DESC_T));
+			p = q;
+		}
+		if (p->paddr + (p->size << 2) > paddr_e) {
+			p->size -= ((uint32_t)(PAGE_SIZE - (paddr_e & (PAGE_SIZE - 1)))) >> 2;
+			cipher_cache_operation((char *)p, sizeof(DMA_DESC_T));
+			break;
+		}
+	}
+
+	return header;
+}
+
+static int rkek_cfg_init(struct asr_bcm_cipher *dd, int hwkey_select)
+{
+#define CIU_SYSSEC_CTRL1           (0x5C)
+
+    uint32_t value;
+	struct device_node *np;
+	struct resource res;
+	void __iomem *io_base;
+
+	/* set rkek or ssk */
+	np = of_find_compatible_node(NULL, NULL, "marvell,mmp-ciu");
+	if (!np) {
+		dev_err(dd->dev, "can't find ciu node for set opt key sel");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		return -1;
+	}
+
+	io_base = ioremap(res.start, res.end - res.start);
+	if (!io_base) {
+		dev_err(dd->dev, "geu regs can't remap");
+		return 0;
+	}
+
+	value = readl_relaxed(io_base + CIU_SYSSEC_CTRL1);
+	if (hwkey_select == RK_KEY) {
+		value &= ~(1 << 22);
+	} else if (hwkey_select == SSK_KEY) {
+		value |= (1 << 22);
+	} else {
+		return -1;
+	}
+	writel_relaxed(value, io_base + CIU_SYSSEC_CTRL1);
+
+	iounmap(io_base);
+	return 0;
+}
+
+static int aes_nblocks(struct asr_bcm_cipher *dd, AES_OP_MODE_T op_mode, 
+					const uint8_t *in , uint8_t *out, unsigned long blocks, 
+					const symmetric_key *skey1, const symmetric_key *skey2, AES_MODE_T mode, uint8_t *iv)
+{
+	int ret = 0;
+	int key_real_length;
+	int hwkey_select;
+	uint32_t pos, time_start;
+	uint8_t tmp[16];
+	DMA_DESC_T *in_list, *out_list;
+	uint8_t *key_data;
+    struct asr_bcm_dev *dev_dd = container_of(dd, struct asr_bcm_dev, asr_cipher);
+
+	/* save last block of in for encryption result check */
+	pos = (blocks - 1) * 16;
+	memcpy(tmp, in + pos, 16);
+	memcpy(out + pos, in + pos, 16);
+
+	in_list = alloc_dma_chain((uint32_t)in, blocks << 4);
+	if (!in_list)
+		return -1;
+
+	out_list = alloc_dma_chain((uint32_t)out, blocks << 4);
+	if (!out_list) {
+		free_dma_chain(in_list);
+		return -1;
+	}
+
+	adec_engine_hw_reset(dev_dd, ACC_ENG_DMA);
+	adec_engine_hw_reset(dev_dd, ACC_ENG_CRYPTO);
+	abus_set_mode(dev_dd, ABUS_GRP_A_HASH, ABUS_GRP_B_AES, ABUS_STRAIGHT, ABUS_STRAIGHT);
+	crypto_aes_sw_reset(dd);
+
+	/* HW requires abs(rid - wid) > 2 */
+	dma_input_config(dev_dd, 0, 0);
+	dma_output_config(dev_dd, 0, 4);
+	ret = dma_input_address(dev_dd, (uint32_t)virt_to_phys((void *)in_list), 0, true);
+	if (ret != 0) {
+		dev_err(dd->dev, "dma_input_address error.");
+		goto exit;
+	}
+
+	ret = dma_output_address(dev_dd, (uint32_t)virt_to_phys((void *)out_list), 0, true);
+	if (ret != 0) {
+		dev_err(dd->dev, "dma_input_address error.");
+		goto exit;
+	}
+
+	/* Process key1 */
+	if (skey1 == NULL) {
+		goto exit;
+	}
+	key_real_length = skey1->rijndael.Nr & ~(0x3);
+	hwkey_select = skey1->rijndael.Nr & 0x3;
+
+	if (op_mode == AES_ENCRYPT_OP) {
+		key_data = (uint8_t *)skey1->rijndael.eK;
+	} else if (op_mode == AES_DECRYPT_OP) {
+		key_data = (uint8_t *)skey1->rijndael.dK;
+	} else {
+		goto exit;
+	}
+
+	switch (hwkey_select) {
+	case EXT_KEY: /* use provide key */
+		ret = crypto_aes_set_mode(dd, mode, op_mode, key_real_length / BYTES_TO_BITS, false);
+		if (ret) {
+			goto exit;
+		}
+		ret = crypto_aes_set_key1(dd, key_data, key_real_length / BYTES_TO_BITS);
+		if (ret) {
+			goto exit;
+		}
+		break;
+	case RK_KEY: /* use root key */
+		ret = crypto_aes_set_mode(dd, mode, op_mode, key_real_length / BYTES_TO_BITS, true);
+		if (ret) {
+			goto exit;
+		}
+		ret = rkek_cfg_init(dd, RK_KEY);
+		if (ret) {
+			goto exit;
+		}
+		break;
+	case SSK_KEY: /* use ssk key */
+		ret = crypto_aes_set_mode(dd, mode, op_mode, key_real_length / BYTES_TO_BITS, true);
+		if (ret) {
+			goto exit;
+		}
+		ret = rkek_cfg_init(dd, SSK_KEY);
+		if (ret) {
+			goto exit;
+		}
+		break;
+	default:
+		return -1;
+		goto exit;
+	}
+
+	/* Process IV and XTS key2 here */
+	switch(mode) {
+	case AES_XTS_ALG:
+		if (skey2 == NULL) {
+			goto exit;           
+		}
+		key_real_length = skey2->rijndael.Nr & ~(0x3);
+		ret = crypto_aes_set_key2(dd, (uint8_t *)skey2->rijndael.eK, key_real_length / BYTES_TO_BITS);
+		if (ret) {
+			goto exit;
+		}
+		break;
+	case AES_CBC_ALG:
+	case AES_CTR_ALG:
+		ret = crypto_aes_set_iv(dd, iv);
+		if (ret != 0) {
+			goto exit;
+		}
+		break;
+	case AES_ECB_ALG:
+		break;
+	default:
+		goto exit;
+	}
+
+	asr_cipher_write(dd, CRYPTO_AES_STREAM_SIZE_REG, blocks << 4);
+	cipher_cache_operation((char *)in, blocks << 4);
+	cipher_cache_operation((char *)out, blocks << 4);    
+
+	dma_output_start(dev_dd);
+	udelay(1);
+	crypto_aes_start(dd);
+	udelay(1);
+	dma_input_start(dev_dd);
+
+	ret = dma_wait_output_finish(dev_dd);
+	if (ret)
+		goto exit;
+	ret = crypto_aes_wait(dd);
+	if (ret)
+		goto exit;
+	ret = dma_wait_input_finish(dev_dd);
+	if (ret)
+		goto exit;    
+
+	/* Process IV */
+	switch(mode) {
+	case AES_XTS_ALG:
+	case AES_CBC_ALG:
+	case AES_CTR_ALG:
+		ret = crypto_aes_get_iv(dd, iv);
+		if (ret != 0) {
+			goto exit;
+		}
+		break;
+	case AES_ECB_ALG:
+		break;
+	default:
+		goto exit;
+	}
+
+	time_start = jiffies;
+	/* make sure dma data transfered to DDR by checking last block of out changes */
+	while (!memcmp(out + pos, tmp, 16)) {
+
+		cipher_cache_operation(out+pos, 16);
+
+		if ((jiffies - time_start) > 500) {
+			dev_err(dd->dev, "Encryption: plaintext ciphertext are the same !!!");
+			break;
+		}
+	}
+
+exit:
+	free_dma_chain(in_list);
+	free_dma_chain(out_list);
+	return ret;
+}
+
+/* ciphers */
+static int se_rijndael_setup_internal(const uint8_t *key, int keylen, symmetric_key *skey)
+{
+	int key_real_length;
+	int hwkey_select;
+
+	if (!skey || keylen <= 0) {
+		return -1;
+	}
+
+	key_real_length = keylen & ~(0x3);
+	hwkey_select = keylen & 0x3;
+	switch (hwkey_select) {
+	case EXT_KEY: /* use provide key */
+		if ((!key) || (key_real_length > (int)(BYTES_TO_BITS * sizeof(skey->rijndael.eK)))
+			|| (key_real_length > (int)(BYTES_TO_BITS * sizeof(skey->rijndael.dK)))) {
+			return -1;
+		}
+		memcpy(skey->rijndael.eK, key, key_real_length / BYTES_TO_BITS);
+		memcpy(skey->rijndael.dK, key, key_real_length / BYTES_TO_BITS);
+		break;
+	case RK_KEY: /* use huk */
+	case SSK_KEY: /* use ssk */
+		skey->rijndael.Nr = keylen;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int se_rijndael_setup(const uint8_t *key, int keylen, symmetric_key *skey)
+{
+	return se_rijndael_setup_internal(key, (((keylen & ~0x3) * BYTES_TO_BITS) | (keylen & 0x3)), skey);
+}
+
+static int se_rijndael_ecb_decrypt(struct asr_bcm_cipher *dd, const uint8_t *ct, uint8_t *pt, 
+									const symmetric_key *skey)
+{
+	return aes_nblocks(dd, AES_DECRYPT_OP, ct, pt, 1, skey, NULL, AES_ECB_ALG, NULL);
+}
+
+static int _aes_handle_noalign(struct asr_bcm_cipher *dd, AES_OP_MODE_T op_mode, 
+							const uint8_t *in,uint8_t *out, uint32_t length, 
+							const symmetric_key *skey1, const symmetric_key *skey2, 
+							AES_MODE_T mode, uint8_t *iv)
+{
+	int ret = 0;
+	uint32_t len_bytes = (length + 0xf) & (~0xf);
+	uint8_t *in_cpy = NULL, *out_cpy = NULL;
+	uint8_t *in_work = NULL, *out_work = NULL;
+	uint8_t *aligned_buf_in = NULL, *aligned_buf_out = NULL;
+	int size;
+
+	if (((uint32_t)out & 0x3) || ((uint32_t)in & 0x3) || (len_bytes > length)) {
+		in_cpy = (uint8_t *)in;
+		out_cpy = (uint8_t *)out;
+
+		/* if length is not a multiple of 16, zero padding */
+		if (((uint32_t)in & 0x3) || (len_bytes > length)) {
+			aligned_buf_in = kmalloc(min((int)len_bytes, WORK_BUF_SIZE), GFP_KERNEL);
+			if (!aligned_buf_in)
+				return -1;
+			memset(aligned_buf_in, 0, min((int)len_bytes, WORK_BUF_SIZE));
+		}
+
+		if (((uint32_t)out & 0x3) || (len_bytes > length)) {
+			aligned_buf_out = kmalloc(min((int)len_bytes, WORK_BUF_SIZE), GFP_KERNEL);
+			if (!aligned_buf_out)
+				return -1;
+		}
+
+		while (len_bytes) {
+			size = min((int)len_bytes, WORK_BUF_SIZE);
+
+			if ((uint32_t)in & 0x3) {
+				memcpy(aligned_buf_in, in_cpy, size);
+				in_work = aligned_buf_in;
+			} else {
+				in_work = in_cpy;
+			}
+
+			if ((uint32_t)out & 0x3) {
+				memset(aligned_buf_out, 0x0, size);
+				out_work = aligned_buf_out;
+			} else {
+				out_work = out_cpy;
+			}
+
+			ret = aes_nblocks(dd, op_mode, in_work, out_work, size >> 4, skey1, skey2, mode, iv);
+			if (ret)
+				goto exit;
+
+			if ((uint32_t) out & 0x3)
+				memcpy(out_cpy, aligned_buf_out, size);
+
+			if (mode == AES_XTS_ALG && len_bytes != 0 && (len_bytes > WORK_BUF_SIZE)) {
+				symmetric_key *skey_local = kmalloc(sizeof(symmetric_key), GFP_KERNEL);
+				if (!skey_local) {
+					ret = -1;
+					goto exit;
+				}
+
+				ret = se_rijndael_setup((uint8_t *)skey2->rijndael.eK, 
+										(skey2->rijndael.Nr/BYTES_TO_BITS), skey_local);
+				if (ret) {
+					kfree(skey_local);
+					goto exit;
+				}
+
+				ret = se_rijndael_ecb_decrypt(dd, iv, iv, skey_local);
+				if (ret) {
+					kfree(skey_local);
+					goto exit;
+				}
+
+				kfree(skey_local);
+			}
+
+			out_cpy += size;
+			in_cpy += size;
+			len_bytes -= size;
+		}
+exit:
+		if (aligned_buf_in)
+			kfree(aligned_buf_in);
+		if (aligned_buf_out)
+			kfree(aligned_buf_out);
+	} else {
+		ret = aes_nblocks(dd, op_mode, in, out, len_bytes >> 4, skey1, skey2, mode, iv);
+	}
+
+	return ret;
+}
+
+static int aes_handle_noalign(struct asr_bcm_cipher *dd, AES_MODE_T mode, AES_OP_MODE_T op_mode, AES_KEY_SELECT_T key_select, 
+							const uint8_t *key1, uint32_t keylen1, const uint8_t *key2, uint32_t keylen2,
+							const uint8_t *in, uint8_t *out, uint32_t size, uint8_t *iv)
+{
+	int ret;
+	symmetric_key *pskey1, *pskey2;
+
+	pskey1 = kmalloc(sizeof(symmetric_key), GFP_KERNEL);
+	if (!pskey1) {
+		return -1;
+	}
+
+	pskey2 = kmalloc(sizeof(symmetric_key), GFP_KERNEL);
+	if (!pskey2) {
+		kfree(pskey1);
+		return -1;
+	}	
+
+	memset(pskey1, 0, sizeof(symmetric_key));
+	memset(pskey1, 0, sizeof(symmetric_key));
+
+	if (op_mode == AES_ENCRYPT_OP) {
+		pskey1->rijndael.eK = (uint32_t *)key1;
+	} else if (op_mode == AES_DECRYPT_OP) {
+		pskey1->rijndael.dK = (uint32_t *)key1;
+	}
+
+	if (key_select == EXT_KEY) {
+		pskey1->rijndael.Nr = (keylen1 * BYTES_TO_BITS) & (~0x3);
+	} else if (key_select == RK_KEY) {
+		pskey1->rijndael.Nr = keylen1 * BYTES_TO_BITS | 0x1;
+	} else if (key_select == SSK_KEY) {
+		pskey1->rijndael.Nr = keylen1 * BYTES_TO_BITS | 0x2;
+	} else {
+		return -1;
+	}
+
+	if (mode == AES_XTS_ALG) {
+		if (op_mode == AES_ENCRYPT_OP) {
+			pskey2->rijndael.eK = (uint32_t *)key2;
+			pskey2->rijndael.Nr = keylen2 * BYTES_TO_BITS;
+		} else if (op_mode == AES_DECRYPT_OP) {
+			pskey2->rijndael.dK = (uint32_t *)key2;
+			pskey2->rijndael.Nr = keylen2 * BYTES_TO_BITS;
+		}
+		ret = _aes_handle_noalign(dd, op_mode, in, out, size, pskey1, pskey2, mode, iv);
+	} else {
+		ret = _aes_handle_noalign(dd, op_mode, in, out, size, pskey1, NULL, mode, iv);
+	}
+
+	kfree(pskey1);
+	kfree(pskey2);
+	return ret;
+}
+
+/* crypto framework */
+static void asr_cipher_set_iv_as_last_ciphertext_block(struct asr_bcm_cipher*dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
+static int asr_cipher_complete(struct asr_bcm_cipher *dd, int err)
+{
+	struct asr_bcm_dev *bcm_dd = dev_get_drvdata(dd->dev);
+	struct asr_bcm_ops *bcm_ops = bcm_dd->bcm_ops;
+
+	dd->flags &= ~FLAGS_BUSY;
+
+	asr_cipher_set_iv_as_last_ciphertext_block(dd);
+
+	if (dd->is_async)
+		dd->areq->complete(dd->areq, err);
+
+	bcm_ops->dev_put(bcm_dd);
+
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static int asr_complete(struct asr_bcm_cipher *dd)
+{
+	return asr_cipher_complete(dd, 0);
+}
+
+static inline size_t asr_cipher_padlen(size_t len, size_t block_size)
+{
+	len &= block_size - 1;
+	return len ? block_size - len : 0;
+}
+
+static int asr_cipher_buff_init(struct asr_bcm_cipher *dd, uint32_t len)
+{
+	dd->buf = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+
+	if (!dd->buf) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		return -ENOMEM;
+	}
+
+	dd->buflen = PAGE_SIZE << get_order(len);
+
+	return 0;
+}
+
+static void asr_cipher_buff_cleanup(struct asr_bcm_cipher *dd, uint32_t len)
+{
+	free_pages((unsigned long)dd->buf, get_order(len));
+	dd->buflen = 0;
+}
+
+static inline void asr_cipher_get(struct asr_bcm_cipher *dd)
+{
+	mutex_lock(&dd->cipher_lock);
+}
+
+static inline void asr_cipher_put(struct asr_bcm_cipher *dd)
+{
+	if(mutex_is_locked(&dd->cipher_lock))
+		mutex_unlock(&dd->cipher_lock);
+}
+
+static int asr_sca_cipher_process(struct asr_bcm_cipher *dd, 
+					struct skcipher_request *req, asr_cipher_fn_t resume)
+{
+	int ret;
+	size_t padlen = asr_cipher_padlen(req->cryptlen, CIPHER_BLOCK_SIZE);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	AES_MODE_T mode;
+	AES_OP_MODE_T op_mode;
+	AES_KEY_SELECT_T key_select;
+
+	asr_cipher_get(dd);
+
+	if (unlikely(req->cryptlen == 0)) {
+		asr_cipher_put(dd);
+		return -EINVAL;
+	}
+
+	dd->datalen = req->cryptlen + padlen;
+	ret = asr_cipher_buff_init(dd, dd->datalen);
+	if (ret) {
+		asr_cipher_put(dd);
+		return ret;
+	}
+
+	sg_copy_to_buffer(req->src, sg_nents(req->src), dd->buf, req->cryptlen);
+
+	dd->total = req->cryptlen;
+	dd->real_dst = req->dst;
+	dd->resume = resume;
+	dd->data = (u32 *)dd->buf;
+
+	if ((dd->flags & FLAGS_ENCRYPT))
+		op_mode = AES_ENCRYPT_OP;
+	else
+		op_mode = AES_DECRYPT_OP;
+
+	if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_ECB)
+		mode = AES_ECB_ALG;
+	else if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_CBC)
+		mode = AES_CBC_ALG;
+	else if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_CTR)
+		mode = AES_CTR_ALG;
+
+	if (rctx->use_rkek) {
+		key_select = RK_KEY;
+	} else {
+		key_select = EXT_KEY;
+	}
+
+	ret = aes_handle_noalign(dd, mode, op_mode, key_select, (uint8_t *)dd->ctx->key,
+						dd->ctx->keylen, NULL, 0, (const uint8_t *)dd->data, (uint8_t *)dd->data, 
+						dd->datalen, req->iv);
+	if (ret)
+		ret = -EINVAL;
+
+	if (!sg_copy_from_buffer(dd->real_dst, sg_nents(dd->real_dst),
+				 dd->buf, dd->total)) 
+		ret = -EINVAL;
+
+	asr_cipher_buff_cleanup(dd, dd->datalen);
+	asr_cipher_put(dd);
+
+	return asr_cipher_complete(dd, ret);
+}
+
+static inline void asr_cipher_set_mode(struct asr_bcm_cipher *dd,
+					  const struct asr_cipher_reqctx *rctx)
+{
+	/* Clear all but persistent flags and set request flags. */
+	dd->flags = (dd->flags & CIPHER_FLAGS_PERSISTENT) | rctx->mode;
+}
+
+static int asr_cipher_start(struct asr_bcm_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct asr_bcm_dev *bcm_dd = dev_get_drvdata(dd->dev);
+	struct asr_bcm_ops *bcm_ops = bcm_dd->bcm_ops;
+
+	bcm_ops->dev_get(bcm_dd);
+
+	asr_cipher_set_mode(dd, rctx);
+	return asr_sca_cipher_process(dd, req, asr_complete);
+}
+
+static int asr_cipher_handle_queue(struct asr_bcm_cipher *dd,
+				  struct crypto_async_request *new_areq)
+{
+	struct crypto_async_request *areq, *backlog;
+	struct asr_cipher_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (new_areq)
+		ret = crypto_enqueue_request(&dd->queue, new_areq);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	areq = crypto_dequeue_request(&dd->queue);
+	if (areq) {
+		dd->flags |= FLAGS_BUSY;
+	}
+	spin_unlock_irqrestore(&dd->lock, flags);
+	if (!areq)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(areq->tfm);
+	dd->areq = areq;
+	dd->ctx = ctx;
+	start_async = (areq != new_areq);
+	dd->is_async = start_async;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int asr_cipher(struct skcipher_request *req, unsigned long mode)
+{
+	int ret;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_cipher_reqctx *rctx;
+
+	ctx->block_size = CIPHER_BLOCK_SIZE;
+	rctx = skcipher_request_ctx(req);
+	rctx->mode = mode;
+	rctx->use_rkek = ctx->use_rkek;
+
+	if (!(mode & FLAGS_ENCRYPT) && (req->src == req->dst)) {
+		unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+		if (req->cryptlen >= ivsize) {
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+		}
+	}
+
+	ret = asr_cipher_handle_queue(ctx->dd, &req->base);
+
+	asr_cipher_put(ctx->dd);
+	return ret;
+}
+
+static int asr_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	ctx->use_rkek = false;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_set_hwkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	ctx->use_rkek = true;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_rkek_fused(struct asr_bcm_cipher *dd)
+{
+#define GEU_KSTR_BANK6_LCS			(0x0168)
+#define GEU_KSTR_LCS_DM_BASE		(3)
+#define GEU_KSTR_LCS_MASK			(0x7)
+
+    uint32_t value;
+	struct device_node *np;
+	struct resource res;
+	void __iomem *io_base;
+
+	/* get geu node */
+	np = of_find_compatible_node(NULL, NULL, "asr,asr-geu");
+	if (!np) {
+		dev_err(dd->dev, "can't find geu node to check rkek burned");
+		return 0;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		dev_err(dd->dev, "can't find geu address");
+		return 0;
+	}
+
+	io_base = ioremap(res.start, res.end - res.start);
+	if (!io_base) {
+		dev_err(dd->dev, "geu regs can't remap");
+		return 0;
+	}
+
+	value = readl_relaxed(io_base + GEU_KSTR_BANK6_LCS);
+	value >>= GEU_KSTR_LCS_DM_BASE;
+	value &= GEU_KSTR_LCS_MASK;	
+	if (hweight32(value) > 1) {
+		iounmap(io_base);
+		return 1;
+	}
+
+	iounmap(io_base);
+	return 0;
+}
+
+static int asr_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_ECB | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_ECB);
+}
+
+static int asr_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CBC | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CBC);
+}
+
+static int asr_aes_ctr_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CTR | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ctr_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CTR);
+}
+
+static int asr_cipher_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static int asr_cipher_hwkey_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static void asr_cipher_exit(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static struct skcipher_alg cipher_algs[] = {
+	/* AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+	/* AES - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "asr-ctr-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ctr_encrypt,
+		.decrypt = asr_aes_ctr_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+
+	/* hardware key AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes-hwkey)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes-hwkey)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+};
+
+static void asr_cipher_queue_task(unsigned long data)
+{
+	struct asr_bcm_cipher *dd = (struct asr_bcm_cipher *)data;
+
+	asr_cipher_handle_queue(dd, NULL);
+}
+
+static void asr_cipher_done_task(unsigned long data)
+{
+	struct asr_bcm_cipher *dd = (struct asr_bcm_cipher *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
+}
+
+int asr_bcm_cipher_register(struct asr_bcm_dev *bcm_dd)
+{
+	int err, i, j;
+	struct device_node *np = NULL;
+	struct asr_bcm_cipher *cipher_dd;
+
+	cipher_dd = &bcm_dd->asr_cipher;
+	cipher_dd->dev = bcm_dd->dev;
+	cipher_dd->io_base = bcm_dd->io_base;
+	cipher_dd->phys_base = bcm_dd->phys_base;
+
+	np = cipher_dd->dev->of_node;
+
+	cipher_dd->rkek_burned = asr_cipher_rkek_fused(cipher_dd);
+
+	asr_cipher_local = cipher_dd;
+
+	spin_lock_init(&cipher_dd->lock);
+	mutex_init(&cipher_dd->cipher_lock);
+	tasklet_init(&cipher_dd->done_task, asr_cipher_done_task,
+					(unsigned long)cipher_dd);
+	tasklet_init(&cipher_dd->queue_task, asr_cipher_queue_task,
+					(unsigned long)cipher_dd);
+	crypto_init_queue(&cipher_dd->queue, ASR_CIPHER_QUEUE_LENGTH);
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++) {
+		err = crypto_register_skcipher(&cipher_algs[i]);
+		if (err){
+			for (j = 0; j < i; j++)
+				crypto_unregister_skcipher(&cipher_algs[j]);
+			return err;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_cipher_register);
+
+int asr_bcm_cipher_unregister(struct asr_bcm_dev *bcm_dd)
+{
+	int i;
+	struct asr_bcm_cipher *cipher_dd = &bcm_dd->asr_cipher;
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++)
+		crypto_unregister_skcipher(&cipher_algs[i]);
+
+	tasklet_kill(&cipher_dd->done_task);
+	tasklet_kill(&cipher_dd->queue_task);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_cipher_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wangyonggan <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR bcm cipher driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.h b/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.h
new file mode 100644
index 0000000..3902cf3
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-cipher.h
@@ -0,0 +1,135 @@
+#ifndef ASR_CIPHER_H
+#define ASR_CIPHER_H
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+
+/* CIHER flags */
+#define FLAGS_ENCRYPT		(1 << 0)
+#define FLAGS_ECB			(0 << 1)
+#define FLAGS_CBC			(1 << 1)
+#define FLAGS_CTR			(2 << 1)
+#define FLAGS_OPMODE_MASK 	(3 << 1)
+#define FLAGS_BUSY			(1 << 3)
+#define FLAGS_AES			(0 << 4)
+#define FLAGS_SM4			(1 << 4)
+
+#define CIPHER_FLAGS_PERSISTENT	FLAGS_BUSY
+
+#define BYTES_TO_BITS  8
+#define WORK_BUF_SIZE 2048
+
+#define ASR_CIPHER_QUEUE_LENGTH	50
+#define ASR_CIPHER_PRIORITY		300
+
+#define ASR_CIPHER_BUFFER_ORDER	2
+#define ASR_CIPHER_BUFFER_SIZE	(PAGE_SIZE << ASR_CIPHER_BUFFER_ORDER)
+
+typedef enum {
+    EXT_KEY = 0,
+    RK_KEY,
+    SSK_KEY,
+} AES_KEY_SELECT_T;
+
+typedef enum {
+    AES_128 = 128/8,
+    AES_192 = 192/8,
+    AES_256 = 256/8,
+} AES_KEY_LEN_T;
+
+typedef enum {
+    AES_ECB_ALG = 0,
+    AES_CBC_ALG,
+    AES_CTR_ALG,
+    AES_XTS_ALG,
+    AES_KEYWRAP,
+} AES_MODE_T;
+
+typedef enum {
+    AES_DECRYPT_OP = 0,
+    AES_ENCRYPT_OP,
+} AES_OP_MODE_T;
+
+typedef enum {
+    ENG_AES = 0,
+    ENG_DES,
+    ENG_RC4,
+} CRYPTO_ENG_SEL_T;
+
+typedef struct {
+    uint32_t paddr;
+    uint32_t size;
+    uint32_t next_desc;
+    uint32_t reserved;
+} DMA_DESC_T;
+
+struct rijndael_key {
+   uint8_t K[(60 + 60 + 4) * sizeof(uint32_t)];
+   uint32_t *eK;
+   uint32_t *dK;
+   int Nr;
+};
+
+typedef union Symmetric_key {
+   struct rijndael_key rijndael;
+} symmetric_key;
+
+struct asr_bcm_cipher;
+
+typedef int (*asr_cipher_fn_t)(struct asr_bcm_cipher *dd);
+typedef irqreturn_t (*asr_cipher_irq_t)(void *);
+
+
+struct asr_cipher_ctx {
+	struct asr_bcm_cipher	*dd;
+	asr_cipher_fn_t		    start;
+	int			            keylen;
+	u32			            key[AES_KEYSIZE_256 / sizeof(u32)];
+	u16			            block_size;
+	bool		            use_rkek;
+};
+
+struct asr_cipher_reqctx {
+	unsigned long		mode;
+	bool		use_rkek;
+	u32			lastc[AES_BLOCK_SIZE / sizeof(u32)];
+};
+
+struct asr_bcm_cipher {
+	struct device		*dev;
+	struct crypto_async_request	*areq;
+
+	void __iomem		*io_base;
+	unsigned long		phys_base;
+
+	struct asr_cipher_ctx	*ctx;
+
+	bool			is_async;
+	bool			rkek_burned;
+	unsigned long		flags;
+
+	spinlock_t		lock;
+	struct mutex cipher_lock;
+	struct crypto_queue	queue;
+	struct tasklet_struct	queue_task;
+
+	asr_cipher_fn_t		resume;
+	struct tasklet_struct	done_task;
+
+	size_t			total;
+	size_t			datalen;
+	u32			*data;
+
+	size_t			buflen;
+	void			*buf;
+
+	struct scatterlist	aligned_sg;
+	struct scatterlist	*real_dst;
+};
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-sha.c b/marvell/linux/drivers/crypto/asr/bcm/asr-sha.c
new file mode 100644
index 0000000..acaac25
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-sha.c
@@ -0,0 +1,1223 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <crypto/hmac.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#include "asr-bcm.h"
+#include "asr-sha.h"
+
+// #define ASR_BCM_SHA_TEST
+
+static struct asr_bcm_sha *asr_sha_local = NULL;
+
+static inline u32 asr_sha_read(struct asr_bcm_sha *dd, u32 offset)
+{
+    u32 value = readl_relaxed(dd->io_base + offset);
+
+    return value;
+}
+
+static inline void asr_sha_write(struct asr_bcm_sha *dd,
+                    u32 offset, u32 value)
+{
+    writel_relaxed(value, dd->io_base + offset);
+}
+
+/* ------- bcm sha hardware operation -------- */
+static void hash_sw_reset(struct asr_bcm_sha *dd)
+{
+    uint32_t val;
+
+    val = (0x1 << 0x3);
+    asr_sha_write(dd, HASH_CONTROL, val);
+    val = 0x0;
+    asr_sha_write(dd, HASH_CONTROL, val);
+
+    return;
+}
+
+static int hash_set_mode(struct asr_bcm_sha *dd, \
+                        HASH_MODE_T mode, HASH_ALGO_T algo)
+{
+    uint32_t val;
+    
+    val = asr_sha_read(dd, HASH_CONFIG);
+    val &= ~0xf;
+    val |= algo;
+    if (mode == HASH_HMAC)
+        val |= (0x1 << 0x3);
+    asr_sha_write(dd, HASH_CONFIG, val);
+
+    return 0;
+}
+
+static int hash_kick(struct asr_bcm_sha *dd)
+{
+    uint32_t val;
+    uint32_t cnt;
+
+    val = asr_sha_read(dd, HASH_COMMAND);
+    val |= (0x1 << 0x0);
+    asr_sha_write(dd, HASH_COMMAND, val);
+
+    cnt = 1;
+    /* wait for command */
+    
+    do {
+        val = asr_sha_read(dd, HASH_STATUS);
+        if (cnt == 1000000) {
+            dev_err(dd->dev, "hash kick wait busy %u times..0x%08x\n", cnt, val);
+            return -1;
+        }
+        val &= 0xE;
+        udelay(1);
+        cnt++;
+    } while(val != 0);
+
+    cnt = 1;
+    do {
+        val = asr_sha_read(dd, HASH_STATUS);
+        if (cnt == 1000000) {
+            dev_err(dd->dev, "hash kick wait busy %u times..0x%08x\n", cnt, val);
+            return -1;
+        }
+        val &= 0x1;
+        udelay(1);
+        cnt++;
+    } while(val == 0);
+
+    /* clear status so next command can be issued */
+    asr_sha_write(dd, HASH_STATUS, val);
+
+    return 0;
+}
+
+static int hash_config_op(struct asr_bcm_sha *dd, HASH_OP_MODE_T op_mode)
+{
+    uint32_t val;
+    int ret = 0;
+
+    if (op_mode < HASH_INIT || op_mode > HASH_FINAL)
+        return -1;
+
+    val = asr_sha_read(dd, HASH_CONTROL);
+    val &= ~(0x3 << 0x0);
+    val |= op_mode;
+    asr_sha_write(dd, HASH_CONTROL, val);
+
+    ret = hash_kick(dd);
+    return ret;
+}
+
+static int hash_save_context(struct asr_sha_reqctx *ctx, int alg)
+{
+    int i;
+    struct hash_state *md = &ctx->md;
+    struct asr_bcm_sha *dd = ctx->dd;
+    switch(alg) {
+    case HASH_SHA384:
+    case HASH_SHA512:
+        for (i = 0; i < 8; i++) {
+            md->sha512.state[i] = asr_sha_read(dd, HASH_DIGEST(i));
+            md->sha512.state[i+8] = asr_sha_read(dd, HASH_DIGEST_H(i));
+        }
+        break;
+    case HASH_SHA256:
+    case HASH_SHA224:
+        for (i = 0; i < 8; i++) {
+            md->sha256.state[i] = asr_sha_read(dd, HASH_DIGEST(i));
+        }
+        break;
+    case HASH_SHA1:
+        for (i = 0; i < 5; i++) {
+            md->sha1.state[i] = asr_sha_read(dd, HASH_DIGEST(i));
+        }
+        break;
+    case HASH_MD5:
+        for (i = 0; i < 4; i++) {
+            md->md5.state[i] = asr_sha_read(dd, HASH_DIGEST(i));
+        }
+        break;
+    default:
+        dev_err(dd->dev, "hash save context: invalid alg!\r\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int hash_restore_context(struct asr_sha_reqctx *ctx, int alg)
+{
+    int i;
+    struct hash_state *md = &ctx->md;
+    struct asr_bcm_sha *dd = ctx->dd;
+
+    switch(alg) {
+    case HASH_SHA384:
+    case HASH_SHA512:
+        for (i = 0; i < 8; i++) {
+            asr_sha_write(dd, HASH_DIGEST(i), md->sha512.state[i]);
+            asr_sha_write(dd, HASH_DIGEST_H(i), md->sha512.state[i+8]);
+        }
+        break;
+    case HASH_SHA256:
+    case HASH_SHA224:
+        for (i = 0; i < 8; i++) {
+           asr_sha_write(dd, HASH_DIGEST(i), md->sha256.state[i]);
+        }
+        break;
+    case HASH_SHA1:
+        for (i = 0; i < 5; i++) {
+            asr_sha_write(dd, HASH_DIGEST(i), md->sha1.state[i]);
+        }
+        break;
+    case HASH_MD5:
+        for (i = 0; i < 4; i++) {
+            asr_sha_write(dd, HASH_DIGEST(i), md->md5.state[i]);
+        }
+        break;
+    default:
+        dev_err(dd->dev, "hash restore context: invalid alg!\r\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static inline void sha_cache_operation(void *addr, int size)
+{
+    __cpuc_flush_dcache_area(addr, size);
+}
+
+static int hash_compress_aligned(struct asr_sha_reqctx *ctx, int alg, uint8_t *in, int data_len)
+{
+    int ret = 0;
+    struct asr_bcm_sha *dd = ctx->dd;
+    struct asr_bcm_dev *dev_dd = container_of(dd, struct asr_bcm_dev, asr_sha);
+	struct asr_bcm_ops *bcm_ops = dev_dd->bcm_ops;
+
+    bcm_ops->dev_get(dev_dd);
+
+    if (((uint32_t)in & 0x3) || (data_len == 0))
+        return -1;
+
+    adec_engine_hw_reset(dev_dd, ACC_ENG_HASH);
+    hash_sw_reset(dd);
+    ret = hash_set_mode(dd, HASH_SIMPLE, alg);
+    if (ret) 
+        goto error;
+
+    adec_engine_hw_reset(dev_dd, ACC_ENG_DMA);
+    abus_set_mode(dev_dd, ABUS_GRP_A_HASH, ABUS_GRP_B_AES, ABUS_CROSS, ABUS_STRAIGHT);
+    dma_input_config(dev_dd, 0, 0);
+    ret = hash_restore_context(ctx, alg);
+    if (ret)
+        goto error;
+
+    ret = dma_input_address(dev_dd, (uint32_t)virt_to_phys((void *)in), \
+                            ROUND_UP_TO_WORD_CNT(data_len), 0);
+    if (ret)
+        goto error;
+
+    sha_cache_operation(in, (ROUND_UP_TO_WORD_CNT(data_len) << 2));
+    dma_input_start(dev_dd);
+    asr_sha_write(dd, HASH_INCOME_SEG_SZ, data_len);
+    ret = hash_config_op(dd, HASH_UPDATE);
+    if (ret) {
+        dma_input_stop(dev_dd);
+        goto error;
+    }
+
+    dma_wait_input_finish(dev_dd);
+    dma_input_stop(dev_dd);
+
+    ret = hash_save_context(ctx, alg);
+    if (ret) 
+        goto error;
+
+error:
+    bcm_ops->dev_put(dev_dd);
+    return ret;    
+}
+
+static int hash_compress(struct asr_sha_reqctx *ctx, int alg, uint8_t *in, int blks, int blk_sz)
+{
+    uint8_t *dma_in = NULL;
+    int data_len = blks * blk_sz;
+    int ret, n;
+    uint8_t *ptr_in;
+
+    if (((uint32_t)in & 0x3) == 0) {
+        dma_in = in;
+        ret = hash_compress_aligned(ctx, alg, dma_in, data_len);
+        return ret;
+    }
+
+    n = min(data_len, HASH_ALIGN_BUF_SIZE);
+    dma_in = (uint8_t *)kmalloc((n + 0x10), GFP_KERNEL);
+    if (!dma_in) {
+        ret = -1;
+        goto exit;
+    }
+    dma_in = (uint8_t *)(((uint32_t)(dma_in)) & (~0x3));
+
+    ptr_in = in;
+    do {
+        n = min(data_len, HASH_ALIGN_BUF_SIZE);
+        memcpy((void *)dma_in, (void *)ptr_in, n);
+        ret = hash_compress_aligned(ctx, alg, dma_in, n);
+        if (ret) {
+            goto exit;
+        }
+        data_len -= n;
+        ptr_in +=n; 
+    } while(data_len > 0);
+
+exit:
+    if (dma_in)
+        kfree(dma_in);
+    return ret;
+}
+
+static int hash_tail_process(struct asr_sha_reqctx *ctx, uint8_t *out, int out_size, \
+                        uint64_t total_size, int tail_size, unsigned char *dma_addr, int alg)
+{
+    int  ret = 0;
+    int reg_val, i;
+    struct asr_bcm_sha *dd = ctx->dd;
+    struct asr_bcm_dev *dev_dd = container_of(dd, struct asr_bcm_dev, asr_sha);
+	struct asr_bcm_ops *bcm_ops = dev_dd->bcm_ops;
+
+    bcm_ops->dev_get(dev_dd);
+
+    adec_engine_hw_reset(dev_dd, ACC_ENG_HASH);
+    hash_sw_reset(dd);
+    ret = hash_set_mode(dd, HASH_SIMPLE, alg);
+    if (ret) 
+        goto error;
+
+    adec_engine_hw_reset(dev_dd, ACC_ENG_DMA);
+    abus_set_mode(dev_dd, ABUS_GRP_A_HASH, ABUS_GRP_B_AES, ABUS_CROSS, ABUS_STRAIGHT);
+    dma_input_config(dev_dd, 0, 0);
+    ret = hash_restore_context(ctx, alg);
+    if (ret)
+        goto error;
+
+    ret = dma_input_address(dev_dd, (uint32_t)virt_to_phys((void *)dma_addr), \
+                            ROUND_UP_TO_WORD_CNT(tail_size), 0);
+    if (ret)
+        goto error;
+
+    if (tail_size) {
+        sha_cache_operation(dma_addr, (ROUND_UP_TO_WORD_CNT(tail_size) << 2));
+        dma_input_start(dev_dd);
+    }
+    
+    asr_sha_write(dd, HASH_INCOME_SEG_SZ, tail_size);
+    asr_sha_write(dd, HASH_TOTAL_MSG_SZ_L, (total_size & 0xffffffff));
+    asr_sha_write(dd, HASH_TOTAL_MSG_SZ_H, (total_size >> 32));
+
+    reg_val = asr_sha_read(dd, HASH_CONTROL);
+    reg_val |= (0x1 << 0x2);
+    asr_sha_write(dd, HASH_CONTROL, reg_val);
+
+    ret = hash_config_op(dd, HASH_FINAL);
+    if (ret) {
+        if (tail_size)
+            dma_input_stop(dev_dd);
+        goto error;
+    }
+
+    if (tail_size) {
+        dma_wait_input_finish(dev_dd);
+        dma_input_stop(dev_dd);
+    }
+
+    /* copy digest out */
+    if (alg == HASH_SHA384 || alg == HASH_SHA512) {
+        for (i = 0; i < (out_size / 8); i++) {
+            reg_val =  asr_sha_read(dd, HASH_DIGEST(i));
+            out[4 + i * 8] = (uint8_t)(reg_val & 0xFF);
+            out[5 + i * 8] = (uint8_t)((reg_val >> 8) & 0xFF);
+            out[6 + i * 8] = (uint8_t)((reg_val >> 16) & 0xFF);
+            out[7 + i * 8] = (uint8_t)((reg_val >> 24) & 0xFF);
+            reg_val =  asr_sha_read(dd, HASH_DIGEST_H(i));
+            out[0 + i * 8] = (uint8_t)(reg_val & 0xFF);
+            out[1 + i * 8] = (uint8_t)((reg_val >> 8) & 0xFF);
+            out[2 + i * 8] = (uint8_t)((reg_val >> 16) & 0xFF);
+            out[3 + i * 8] = (uint8_t)((reg_val >> 24) & 0xFF);
+        }
+    } else {
+        for (i = 0; i < (out_size / 4); i++) {
+            reg_val =  asr_sha_read(dd, HASH_DIGEST(i));
+            out[0 + i * 4] = (uint8_t)(reg_val & 0xFF);
+            out[1 + i * 4] = (uint8_t)((reg_val >> 8) & 0xFF);
+            out[2 + i * 4] = (uint8_t)((reg_val >> 16) & 0xFF);
+            out[3 + i * 4] = (uint8_t)((reg_val >> 24) & 0xFF);
+        }        
+    }
+
+error:
+    bcm_ops->dev_put(dev_dd);
+    return ret;
+}
+
+static int hash_init(struct asr_sha_reqctx *ctx, int alg)
+{
+    int ret;
+    struct asr_bcm_sha *dd = ctx->dd;
+    struct asr_bcm_dev *dev_dd = container_of(dd, struct asr_bcm_dev, asr_sha);
+	struct asr_bcm_ops *bcm_ops = dev_dd->bcm_ops;
+
+    bcm_ops->dev_get(dev_dd);
+
+    adec_engine_hw_reset(dev_dd, ACC_ENG_HASH);
+    hash_sw_reset(dd);
+
+    ret = hash_set_mode(dd, HASH_SIMPLE, alg);
+    if (ret) 
+        goto error;
+    ret = hash_config_op(dd, HASH_INIT);
+    if (ret) 
+        goto error;
+
+    ret = hash_save_context(ctx, alg);
+    if (ret) 
+        goto error;
+
+error:
+    bcm_ops->dev_put(dev_dd);
+    return ret;
+}
+
+/* Only block algnie is processed at a time */
+static int hash_process(struct asr_sha_reqctx *ctx, int alg, uint8_t *in, uint32_t inlen)
+{
+    int err;
+    uint32_t n, blocks;
+    struct hash_state *md = &ctx->md;
+
+    if (md->curlen > sizeof(md->buf)) {
+        return -1;
+    }
+
+    while (inlen > 0) {
+        if (md->curlen == 0 && inlen >= md->block_size) {
+            blocks = inlen / md->block_size;
+            err = hash_compress(ctx, alg, in, blocks, md->block_size);
+            if (err)
+                return err;
+            md->length += blocks * md->block_size * 8;
+            in += blocks * md->block_size;
+            inlen -= blocks * md->block_size;
+        } else {
+            n = min(inlen, (md->block_size - md->curlen));
+            memcpy(md->buf + md->curlen, in, n);
+            md->curlen += n;
+            in += n;
+            inlen -= n;
+            if (md->curlen == md->block_size) {
+                err = hash_compress(ctx, alg, md->buf, 1, md->block_size);
+                if (err) 
+                    return err;
+                md->length += 8*md->block_size;
+                md->curlen = 0;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int hash_done(struct asr_sha_reqctx *ctx, int alg, uint8_t *out)
+{
+    uint32_t out_len;
+    struct hash_state *md = &ctx->md;
+    struct asr_bcm_sha *dd = ctx->dd;
+
+    switch(alg) {
+    case HASH_SHA512:
+        out_len = HASH_LEN_SHA512;
+        break;
+    case HASH_SHA384:
+        out_len = HASH_LEN_SHA384;
+        break;
+    case HASH_SHA256:
+        out_len = HASH_LEN_SHA256;
+        break;
+    case HASH_SHA224:
+        out_len = HASH_LEN_SHA224;
+        break;
+    case HASH_SHA1:
+        out_len = HASH_LEN_SHA1;
+        break;
+    case HASH_MD5:
+        out_len = HASH_LEN_MD5;
+        break;
+    default:
+        dev_err(dd->dev, "err: not support hash alg\n");
+        return -1;
+    }
+
+    return hash_tail_process(ctx, out, out_len, \
+            (md->length / 8 + md->curlen), md->curlen, md->buf, alg);
+}
+/* ------- end -------- */
+
+static size_t asr_sha_append_sg(struct asr_sha_reqctx *ctx)
+{
+    size_t count;
+
+    while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+        count = min(ctx->sg->length - ctx->offset, ctx->total);
+        count = min(count, ctx->buflen - ctx->bufcnt);
+
+        if (count <= 0) {
+            /*
+            * Check if count <= 0 because the buffer is full or
+            * because the sg length is 0. In the latest case,
+            * check if there is another sg in the list, a 0 length
+            * sg doesn't necessarily mean the end of the sg list.
+            */
+            if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+                ctx->sg = sg_next(ctx->sg);
+                continue;
+            } else {
+                break;
+            }
+        }
+
+        scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+            ctx->offset, count, 0);
+
+        ctx->bufcnt += count;
+        ctx->offset += count;
+        ctx->total -= count;
+
+        if (ctx->offset == ctx->sg->length) {
+            ctx->sg = sg_next(ctx->sg);
+            if (ctx->sg)
+                ctx->offset = 0;
+            else
+                ctx->total = 0;
+        }
+    }
+
+    return 0;
+}
+
+static int asr_sha_handle_queue(struct asr_bcm_sha *dd,
+                  struct ahash_request *req)
+{
+    struct crypto_async_request *async_req, *backlog;
+    struct asr_sha_ctx *ctx;
+    unsigned long flags;
+    bool start_async;
+    int err = 0, ret = 0;
+
+    spin_lock_irqsave(&dd->lock, flags);
+    if (req)
+        ret = ahash_enqueue_request(&dd->queue, req);
+
+    if (SHA_FLAGS_BUSY & dd->flags) {
+        spin_unlock_irqrestore(&dd->lock, flags);
+        return ret;
+    }
+
+    backlog = crypto_get_backlog(&dd->queue);
+    async_req = crypto_dequeue_request(&dd->queue);
+    if (async_req)
+        dd->flags |= SHA_FLAGS_BUSY;
+
+    spin_unlock_irqrestore(&dd->lock, flags);
+
+    if (!async_req) {
+        return ret;
+    }
+
+    if (backlog)
+        backlog->complete(backlog, -EINPROGRESS);
+
+    ctx = crypto_tfm_ctx(async_req->tfm);
+
+    dd->req = ahash_request_cast(async_req);
+    start_async = (dd->req != req);
+    dd->is_async = start_async;
+    dd->force_complete = false;
+ 
+    /* WARNING: ctx->start() MAY change dd->is_async. */
+    err = ctx->start(dd);
+    return (start_async) ? ret : err;
+}
+
+static int asr_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+    struct asr_bcm_sha *dd = ctx->dd;
+
+    ctx->op = op;
+
+    return asr_sha_handle_queue(dd, req);
+}
+
+static void asr_sha_copy_ready_hash(struct ahash_request *req)
+{
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    if (!req->result)
+        return;
+
+    switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+    case SHA_FLAGS_MD5:
+        memcpy(req->result, ctx->digest, MD5_DIGEST_SIZE);
+        break;
+    case SHA_FLAGS_SHA1:
+        memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+        break;
+    case SHA_FLAGS_SHA224:
+        memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
+        break;
+    case SHA_FLAGS_SHA256:
+        memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+        break;
+    case SHA_FLAGS_SHA384:
+        memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE);
+        break;
+    case SHA_FLAGS_SHA512:
+        memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE);
+        break;
+    default:
+        return;
+    }
+}
+
+static inline int asr_sha_complete(struct asr_bcm_sha *dd, int err)
+{
+    struct ahash_request *req = dd->req;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    dd->flags &= ~(SHA_FLAGS_BUSY);
+    ctx->flags &= ~(SHA_FLAGS_FINAL);
+
+    if ((dd->is_async || dd->force_complete) && req->base.complete)
+        req->base.complete(&req->base, err);
+
+    /* handle new request */
+    tasklet_schedule(&dd->queue_task);
+
+    return err;
+}
+
+static int asr_sha_buff_init(struct asr_bcm_sha *dd, uint32_t len)
+{
+    struct ahash_request *req = dd->req;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    ctx->buffer = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+    if (!ctx->buffer) {
+        dev_err(dd->dev, "unable to alloc pages.\n");
+        return -ENOMEM;
+    }
+
+    ctx->buflen = PAGE_SIZE << get_order(len);
+
+    return 0;
+}
+
+static void asr_sha_buff_cleanup(struct asr_bcm_sha *dd, uint32_t len)
+{
+    struct ahash_request *req = dd->req;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    free_pages((unsigned long)ctx->buffer, get_order(len));
+    ctx->buflen = 0;
+}
+
+static int sha_init_req(struct asr_sha_reqctx *ctx)
+{
+    int ret = 0;
+
+    /* hardware: hash init */
+    ret = hash_init(ctx, ctx->md.alg);
+    if (ret)
+        return -EINVAL;
+    return 0;
+}
+
+static int sha_update_req(struct asr_sha_reqctx *ctx)
+{
+    int ret = 0;
+    int bufcnt;
+    uint32_t buflen = ctx->total;
+
+    ret = asr_sha_buff_init(ctx->dd, ctx->total);
+    if (ret)
+        return -ENOMEM;
+    
+    asr_sha_append_sg(ctx);
+    bufcnt = ctx->bufcnt;
+    ctx->bufcnt = 0;
+
+    /* hashware: hash process */
+    ret = hash_process(ctx, ctx->md.alg, ctx->buffer, bufcnt);
+    if (ret)
+        ret = -EINVAL;
+
+    asr_sha_buff_cleanup(ctx->dd, buflen);
+    return ret;
+}
+
+static void sha_finish_req(struct asr_sha_reqctx *ctx, int *err)
+{
+    uint8_t *hash = (uint8_t *)ctx->digest;
+    
+    if (!(*err) && (ctx->flags & SHA_FLAGS_FINAL)) {
+        *err = hash_done(ctx, ctx->md.alg, (uint8_t *)hash);
+        asr_sha_copy_ready_hash(ctx->dd->req);
+        ctx->flags &= (~SHA_FLAGS_FINAL);
+    } else {
+        ctx->flags |= SHA_FLAGS_ERROR;
+    }
+}
+
+static void sha_next_req(struct asr_sha_reqctx *ctx, int *err)
+{
+    if (likely(!(*err) && (SHA_FLAGS_FINAL & ctx->flags)))
+        sha_finish_req(ctx, err);
+
+    (void)asr_sha_complete(ctx->dd, *err);
+}
+
+static int asr_sha_start(struct asr_bcm_sha *dd)
+{
+    int err = 0;
+    struct ahash_request *req = dd->req;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    mutex_lock(&dd->queue_lock);
+
+    if ((ctx->flags & SHA_FLAGS_INIT)) {
+        err = sha_init_req(ctx);
+        ctx->flags &= (~SHA_FLAGS_INIT);
+        if (err) {
+            mutex_unlock(&dd->queue_lock);
+            return err;
+        }
+    }
+
+    if (ctx->op == SHA_OP_UPDATE) {
+        err = sha_update_req(ctx);
+        if (!err && (ctx->flags & SHA_FLAGS_FINUP))
+            /* no final() after finup() */
+            sha_finish_req(ctx, &err);
+    } else if (ctx->op == SHA_OP_FINAL) {
+        sha_finish_req(ctx, &err);
+    }
+
+    if (unlikely(err != -EINPROGRESS)) {
+        /* Task will not finish it, so do it here */
+        sha_next_req(ctx, &err);
+    }
+    
+    mutex_unlock(&dd->queue_lock);
+    return err;
+}
+
+static int asr_sha_cra_init(struct crypto_tfm *tfm)
+{
+    struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+    crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                 sizeof(struct asr_sha_reqctx));
+    ctx->start = asr_sha_start;
+
+    return 0;
+}
+
+static void asr_sha_cra_exit(struct crypto_tfm *tfm)
+{
+    struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+    memset(ctx, 0, sizeof(*ctx));
+}
+
+static inline void asr_sha_get(struct asr_bcm_sha *dd)
+{
+    mutex_lock(&dd->sha_lock);
+}
+
+static inline void asr_sha_put(struct asr_bcm_sha *dd)
+{
+    if(mutex_is_locked(&dd->sha_lock))
+        mutex_unlock(&dd->sha_lock);
+}
+
+static int asr_sha_init(struct ahash_request *req)
+{
+    struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+    struct asr_bcm_sha *dd = asr_sha_local;
+
+    asr_sha_get(dd);
+
+    ctx->dd = dd;
+    memset(&ctx->md, 0, sizeof(ctx->md));
+    ctx->flags = 0;
+
+    switch (crypto_ahash_digestsize(tfm)) {
+    case MD5_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_MD5;
+        ctx->md.alg = HASH_MD5;
+        ctx->md.block_size = MD5_HMAC_BLOCK_SIZE;
+        break;
+    case SHA1_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_SHA1;
+        ctx->md.alg = HASH_SHA1;
+        ctx->md.block_size = SHA1_BLOCK_SIZE;
+        break;
+    case SHA224_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_SHA224;
+        ctx->md.alg = HASH_SHA224;
+        ctx->md.block_size = SHA224_BLOCK_SIZE;
+        break;
+    case SHA256_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_SHA256;
+        ctx->md.alg = HASH_SHA256;
+        ctx->md.block_size = SHA256_BLOCK_SIZE;
+        break;
+    case SHA384_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_SHA384;
+        ctx->md.alg = HASH_SHA384;
+        ctx->md.block_size = SHA384_BLOCK_SIZE;
+        break;
+    case SHA512_DIGEST_SIZE:
+        ctx->flags |= SHA_FLAGS_SHA512;
+        ctx->md.alg = HASH_SHA512;
+        ctx->md.block_size = SHA512_BLOCK_SIZE;
+        break;
+    default:
+        asr_sha_put(dd);
+        return -EINVAL;
+    }
+
+    ctx->bufcnt = 0;
+    ctx->flags |= SHA_FLAGS_INIT;
+
+    asr_sha_put(dd);
+    return 0;
+}
+
+static int asr_sha_update(struct ahash_request *req)
+{
+    int ret = 0;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    asr_sha_get(ctx->dd);
+
+    ctx->total = req->nbytes;
+    ctx->sg = req->src;
+    ctx->offset = 0;
+
+    ret = asr_sha_enqueue(req, SHA_OP_UPDATE);
+
+    asr_sha_put(ctx->dd);
+    return ret;
+}
+
+static int asr_sha_final(struct ahash_request *req)
+{
+    int ret = 0;
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    asr_sha_get(ctx->dd);
+
+    ctx->flags |= SHA_FLAGS_FINAL;
+    if (ctx->flags & SHA_FLAGS_ERROR) {
+        asr_sha_put(ctx->dd);
+        return 0; /* uncompleted hash is not needed */
+    }
+    ret = asr_sha_enqueue(req, SHA_OP_FINAL);
+
+    asr_sha_put(ctx->dd);
+    return ret;
+}
+
+static int asr_sha_finup(struct ahash_request *req)
+{
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+    int err1, err2;
+
+    ctx->flags |= SHA_FLAGS_FINUP;
+
+    err1 = asr_sha_update(req);
+    if (err1 == -EINPROGRESS ||
+        (err1 == -EBUSY && (ahash_request_flags(req) &
+                CRYPTO_TFM_REQ_MAY_BACKLOG))) {
+        asr_sha_put(ctx->dd);
+        return err1;
+    }
+    /*
+     * final() has to be always called to cleanup resources
+     * even if udpate() failed, except EINPROGRESS
+     */
+    err2 = asr_sha_final(req);
+
+    return err1 ?: err2;
+}
+
+static int asr_sha_digest(struct ahash_request *req)
+{
+    return asr_sha_init(req) ?: asr_sha_finup(req);
+}
+
+static int asr_sha_export(struct ahash_request *req, void *out)
+{
+    const struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    memcpy(out, ctx, sizeof(*ctx));
+    return 0;
+}
+
+static int asr_sha_import(struct ahash_request *req, const void *in)
+{
+    struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+    memcpy(ctx, in, sizeof(*ctx));
+    return 0;
+}
+
+static struct ahash_alg sha_algs[] = {
+    /* md5 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= MD5_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "md5",
+                .cra_driver_name	= "asr-md5",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= MD5_HMAC_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+
+    /* sha1 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= SHA1_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "sha1",
+                .cra_driver_name	= "asr-sha1",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= SHA1_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+
+    /* sha224 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= SHA224_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "sha224",
+                .cra_driver_name	= "asr-sha224",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= SHA224_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+
+    /* sha256 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= SHA256_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "sha256",
+                .cra_driver_name	= "asr-sha256",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= SHA256_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+
+    /* sha384 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= SHA384_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "sha384",
+                .cra_driver_name	= "asr-sha384",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= SHA384_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+
+    /* sha512 */
+    {
+        .init		= asr_sha_init,
+        .update		= asr_sha_update,
+        .final		= asr_sha_final,
+        .finup		= asr_sha_finup,
+        .digest		= asr_sha_digest,
+        .export		= asr_sha_export,
+        .import		= asr_sha_import,
+        .halg = {
+            .digestsize	= SHA512_DIGEST_SIZE,
+            .statesize	= sizeof(struct asr_sha_reqctx),
+            .base	= {
+                .cra_name		= "sha512",
+                .cra_driver_name	= "asr-sha512",
+                .cra_priority		= ASR_SHA_PRIORITY,
+                .cra_flags		= CRYPTO_ALG_ASYNC,
+                .cra_blocksize		= SHA512_BLOCK_SIZE,
+                .cra_ctxsize		= sizeof(struct asr_sha_ctx),
+                .cra_alignmask		= 0,
+                .cra_module		= THIS_MODULE,
+                .cra_init		= asr_sha_cra_init,
+                .cra_exit		= asr_sha_cra_exit,
+            }
+        }
+    },
+};
+
+static void asr_sha_queue_task(unsigned long data)
+{
+    struct asr_bcm_sha *dd = (struct asr_bcm_sha *)data;
+
+    asr_sha_handle_queue(dd, NULL);
+}
+
+#ifdef ASR_BCM_SHA_TEST
+    static int bcm_sha_test(struct asr_bcm_sha *dd);
+#endif
+
+int asr_bcm_sha_register(struct asr_bcm_dev *bcm_dd)
+{
+    int err, i, j;
+    struct asr_bcm_sha *sha_dd;
+
+    sha_dd = &bcm_dd->asr_sha;
+
+    sha_dd->dev = bcm_dd->dev;
+    sha_dd->io_base = bcm_dd->io_base;
+    sha_dd->phys_base = bcm_dd->phys_base;
+
+    asr_sha_local = sha_dd;
+
+    spin_lock_init(&sha_dd->lock);
+    mutex_init(&sha_dd->sha_lock);
+    mutex_init(&sha_dd->queue_lock);
+    tasklet_init(&sha_dd->queue_task, asr_sha_queue_task,
+                    (unsigned long)sha_dd);
+    crypto_init_queue(&sha_dd->queue, ASR_SHA_QUEUE_LENGTH);
+    
+    for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+        err = crypto_register_ahash(&sha_algs[i]);
+        if (err)
+            goto err_sha_algs;
+    }
+
+#ifdef ASR_BCM_SHA_TEST
+    bcm_sha_test(sha_dd);
+#endif
+
+    return 0;
+
+err_sha_algs:
+    for (j = 0; j < i; j++)
+        crypto_unregister_ahash(&sha_algs[j]);
+
+    return err;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_sha_register);
+
+int asr_bcm_sha_unregister(struct asr_bcm_dev *bcm_dd)
+{
+    int i;
+    struct asr_bcm_sha *sha_dd = &bcm_dd->asr_sha;
+
+
+    for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+        crypto_unregister_ahash(&sha_algs[i]);
+
+    tasklet_kill(&sha_dd->queue_task);
+
+    return 0;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_sha_unregister);
+
+#ifdef ASR_BCM_SHA_TEST
+
+static int bcm_sha_test(struct asr_bcm_sha *dd)
+{
+    int ret = 0;
+
+    const struct {
+        const char *msg;
+        uint8_t hash[20];
+    } sha1_tests[] = {
+        {
+            "abc", 
+            {   0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 
+                0x81, 0x6a, 0xba, 0x3e, 0x25, 0x71, 
+                0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0,
+                0xd8, 0x9d 
+            }
+        },
+        {
+            "asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqds"
+            "jklfdfjlkdfkfsfkjlfskjdflioherfjjfdjkfd"
+            "nkfdfdojjodfjdfjflj;sljjlfkklnfnkgbhhoi"
+            "gfhigfopojpfjojpoffkjlfskjdflioherfjjfd"
+            "jkfdnkfdfdojjodfjdfjfljnfnkgbhhoigfhigf"
+            "oponfnkgbhhoigfhigfopojpfjoewiroiowiods"
+            "djkisijdknknkskdnknflnnesniewinoinknmdn"
+            "kknknsdnjjfsnnkfnkknslnklknfnknkflksnlk"
+            "lskldklklklnmlflmlmlfmlfml",
+            {
+                0xc4, 0x53, 0xca, 0x24, 0xfa, 0xe5,
+                0x39, 0x53, 0x08, 0x8c, 0x57, 0x1a, 
+                0x96, 0xe9, 0x64, 0x7f, 0xd5, 0xf9, 
+                0x13, 0x91
+            }
+        }
+    };
+
+    struct asr_sha_reqctx ctx1;
+    struct asr_sha_reqctx ctx2;
+
+    unsigned char out_sha1_1[20] = {0};
+    unsigned char out_sha1_2[20] = {0};
+
+    memset(&ctx1.md, 0, sizeof(ctx1.md));
+    ctx1.md.block_size = BLOCK_ALGIN_SIZE;
+    ctx1.dd = dd;
+
+    memset(&ctx2.md, 0, sizeof(ctx2.md));
+    ctx2.md.block_size = BLOCK_ALGIN_SIZE;
+    ctx2.dd = dd;
+
+    ret = hash_init(&ctx1, HASH_SHA1);
+    if (ret) {
+        return ret;
+    }
+    ret = hash_init(&ctx2, HASH_SHA1);
+    if (ret) {
+        return ret;
+    }
+    ret = hash_process(&ctx1, HASH_SHA1, (uint8_t *)sha1_tests[0].msg, strlen(sha1_tests[0].msg));
+    if (ret) {
+        return ret;
+    }
+    ret = hash_done(&ctx1, HASH_SHA1, out_sha1_1);
+    if (ret) {
+        return ret;
+    }
+    ret = hash_process(&ctx2, HASH_SHA1, (uint8_t *)sha1_tests[1].msg, strlen(sha1_tests[1].msg));
+    if (ret) {
+        return ret;
+    }
+    ret = hash_done(&ctx2, HASH_SHA1, out_sha1_2);
+    if (ret) {
+        return ret;
+    }
+
+	if (memcmp(out_sha1_1, sha1_tests[0].hash, sizeof(out_sha1_1))) {
+		printk("sha1 test 0 failed");
+	} else {
+		printk("sha1 test 0 pass");
+	}
+	if (memcmp(out_sha1_2, sha1_tests[1].hash, sizeof(out_sha1_2))) {
+		printk("sha1 test 1 failed");
+	} else {
+		printk("sha1 test 1 pass");
+	}
+
+    return 0;
+}
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wangyonggan <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR bcm sha driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm/asr-sha.h b/marvell/linux/drivers/crypto/asr/bcm/asr-sha.h
new file mode 100644
index 0000000..c7058f4
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm/asr-sha.h
@@ -0,0 +1,163 @@
+#ifndef _ASR_BCM_SHA_H_
+#define _ASR_BCM_SHA_H_
+
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#define ROUND_UP_TO_WORD_CNT(x) ((x + 0x3) >> 2)
+#define HASH_ALIGN_BUF_SIZE 4096
+#define BLOCK_ALGIN_SIZE 64
+
+#define ASR_SHA_BUFFER_ORDER	2
+#define ASR_SHA_BUFFER_SIZE	(PAGE_SIZE << ASR_SHA_BUFFER_ORDER)
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY				BIT(0)
+#define	SHA_FLAGS_INIT				BIT(1)
+#define	SHA_FLAGS_FINAL				BIT(2)
+#define SHA_FLAGS_FINUP				BIT(3)
+#define SHA_FLAGS_OUTPUT_READY		BIT(4)
+#define SHA_FLAGS_ALGO_MASK			GENMASK(10, 5)
+#define SHA_FLAGS_MD5				BIT(5)
+#define SHA_FLAGS_SHA1				BIT(6)
+#define SHA_FLAGS_SHA224			BIT(7)
+#define SHA_FLAGS_SHA256			BIT(8)
+#define SHA_FLAGS_SHA384			BIT(9)
+#define SHA_FLAGS_SHA512			BIT(10)
+#define SHA_FLAGS_HMAC				BIT(11)
+#define SHA_FLAGS_PAD				BIT(12)
+#define SHA_FLAGS_ERROR				BIT(13)
+
+#define SHA_OP_INIT	    1
+#define SHA_OP_UPDATE	2
+#define SHA_OP_FINAL	3
+
+typedef enum {
+    HASH_INIT = 0x1,
+    HASH_UPDATE = 0x2,
+    HASH_FINAL = 0x3,    
+} HASH_OP_MODE_T;
+
+typedef enum {
+    HASH_LEN_SHA1 = 20,
+    HASH_LEN_SHA256 = 32,
+    HASH_LEN_SHA224 = 28,
+    HASH_LEN_MD5 = 16,
+    HASH_LEN_SHA512 = 64,
+    HASH_LEN_SHA384 = 48,
+} HASH_LEN_T;
+
+typedef enum {
+    HASH_SIMPLE = 0,
+    HASH_HMAC,
+} HASH_MODE_T;
+
+typedef enum {
+    HASH_SHA1 = 0x0,
+    HASH_SHA256 = 0x1,
+    HASH_SHA224 = 0X2,
+    HASH_MD5 = 0x3,
+    HASH_SHA512 = 0x4,
+    HASH_SHA384 = 0X5,
+} HASH_ALGO_T;
+
+struct sha512_digst {
+    unsigned int state[16];
+};
+
+struct sha256_digst {
+    unsigned int state[8];
+};
+
+struct sha1_digst {
+    unsigned int state[5];
+};
+
+struct md5_digst {
+    unsigned int state[4];
+};
+
+struct hash_state {
+    uint64_t length;
+    unsigned int curlen;
+    unsigned char buf[BLOCK_ALGIN_SIZE];
+    uint32_t block_size;
+    int alg;
+
+    struct sha512_digst sha512;
+    struct sha256_digst sha256;
+    struct sha1_digst sha1;
+    struct md5_digst md5;
+};
+
+struct asr_bcm_sha;
+
+typedef int (*asr_sha_fn_t)(struct asr_bcm_sha *);
+typedef irqreturn_t (*asr_sha_irq_t)(void *);
+
+struct asr_bcm_sha {
+    unsigned long		phys_base;
+    struct device		*dev;
+    struct clk			*iclk;
+    int					irq;
+    void __iomem		*io_base;
+
+    spinlock_t		lock;
+    struct mutex sha_lock;
+    struct mutex queue_lock;
+
+    int			err;
+    struct tasklet_struct	done_task;
+    struct tasklet_struct	queue_task;
+
+    int				int_mode;
+    asr_sha_irq_t	sha_irq;
+
+    unsigned long		flags;
+    struct crypto_queue	queue;
+    struct ahash_request	*req;
+    bool			is_async;
+    bool			force_complete;
+    asr_sha_fn_t		resume;
+
+    struct hash_state md;
+};
+
+
+/*
+ * .statesize = sizeof(struct asr_sha_reqctx) must be <= PAGE_SIZE / 8 as
+ * tested by the ahash_prepare_alg() function.
+ */
+
+struct asr_sha_reqctx {
+    struct asr_bcm_sha	*dd;
+    unsigned long	op;
+
+    u8	digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32));
+    void *buffer;
+    size_t	bufcnt;
+    size_t	buflen;
+
+    /* walk state */
+    struct scatterlist	*sg;
+    unsigned int	offset;	/* offset in current sg */
+    unsigned int	total;	/* total request */
+
+    unsigned long		flags;
+    struct hash_state md;
+};
+
+struct asr_sha_ctx {
+    struct asr_bcm_sha	*dd;
+    asr_sha_fn_t		start;
+
+    unsigned long		flags;
+};
+
+#define ASR_SHA_QUEUE_LENGTH	50
+#define ASR_SHA_PRIORITY		300
+
+#endif
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/Makefile b/marvell/linux/drivers/crypto/asr/bcm_optee/Makefile
new file mode 100644
index 0000000..8a2975a
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ASR_BCM) += asr-bcm-optee.o
+obj-$(CONFIG_ASR_BCM_SHA) += asr-sha-optee.o
+obj-$(CONFIG_ASR_BCM_CIPHER) += asr-cipher-optee.o
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.c b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.c
new file mode 100644
index 0000000..2ad66f1
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.c
@@ -0,0 +1,207 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <linux/tee_drv.h>
+
+#include "asr-bcm-optee.h"
+
+static void asrbcm_uuid_to_octets(uint8_t d[TEE_IOCTL_UUID_LEN], struct teec_uuid *s)
+{
+	d[0] = s->timeLow >> 24;
+	d[1] = s->timeLow >> 16;
+	d[2] = s->timeLow >> 8;
+	d[3] = s->timeLow;
+	d[4] = s->timeMid >> 8;
+	d[5] = s->timeMid;
+	d[6] = s->timeHiAndVersion >> 8;
+	d[7] = s->timeHiAndVersion;
+	memcpy(d + 8, s->clockSeqAndNode, sizeof(s->clockSeqAndNode));
+}
+
+static int asrbcm_tee_match_cb(struct tee_ioctl_version_data *ver, const void *data)
+{
+	return 1;
+}
+
+int asrbcm_optee_open_ta(struct asrbcm_tee_context *ctx, struct teec_uuid *uuid)
+{
+	struct tee_ioctl_open_session_arg open_session_arg;
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ctx->session = 0;
+	ctx->tee_ctx = tee_client_open_context(NULL, asrbcm_tee_match_cb, NULL, NULL);
+	if (IS_ERR(ctx->tee_ctx)) {
+		ret = PTR_ERR(ctx->tee_ctx);
+		ctx->tee_ctx = NULL;
+		return ret;
+	}
+
+	memset(&open_session_arg, 0x0, sizeof(struct tee_ioctl_open_session_arg));
+	asrbcm_uuid_to_octets(open_session_arg.uuid, uuid);
+	open_session_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
+	open_session_arg.num_params = 0;
+	ret = tee_client_open_session(ctx->tee_ctx, &open_session_arg, NULL);
+	if (ret != 0) {
+		goto err_exit;
+	} else if (open_session_arg.ret != 0) {
+		ret = -EIO;
+		goto err_exit;
+	}
+
+	ctx->session = open_session_arg.session;
+
+	return ret;
+err_exit:
+	tee_client_close_context(ctx->tee_ctx);
+	ctx->tee_ctx = NULL;
+	return ret;
+}
+
+int asrbcm_optee_close_ta(struct asrbcm_tee_context *ctx)
+{
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ret = tee_client_close_session(ctx->tee_ctx, ctx->session);
+
+	tee_client_close_context(ctx->tee_ctx);
+
+	return ret;
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_bcm_dt_ids[] = {
+    { .compatible = "asr,asr-bcm" },
+    { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_bcm_dt_ids);
+#endif
+
+static int asr_bcm_probe(struct platform_device *pdev)
+{
+    struct asr_bcm_dev *bcm_dd;
+    struct device *dev = &pdev->dev;
+    struct device_node *np = NULL;
+    int err = 0, devnum = 0;
+
+    bcm_dd = devm_kzalloc(&pdev->dev, sizeof(*bcm_dd), GFP_KERNEL);
+    if (bcm_dd == NULL) {
+        err = -ENOMEM;
+        goto no_mem_err;
+    }
+
+    np = dev->of_node;
+    bcm_dd->dev = dev;
+
+    platform_set_drvdata(pdev, bcm_dd);
+
+#ifdef CONFIG_ASR_BCM_CIPHER
+    if (of_get_property(np, "asr,asr-cipher", NULL)) {
+        err = asr_bcm_cipher_register(bcm_dd);
+        if (err)
+            goto res_err;
+        dev_info(dev, "CIPHER engine is initialized\n");
+        devnum ++;
+    }
+#endif
+
+#ifdef CONFIG_ASR_BCM_SHA
+    if (of_get_property(np, "asr,asr-sha", NULL)) {
+        err = asr_bcm_sha_register(bcm_dd);
+        if (err)
+            goto sha_err;
+        dev_info(dev, "SHA engine is initialized\n");
+        devnum ++;
+    }
+#endif
+
+    if (!devnum) {
+        dev_err(dev, "No BCM device enabled\n");
+        err = -ENODEV;
+        goto res_err;
+    }
+
+    return 0;
+
+#ifdef CONFIG_ASR_BCM_SHA
+sha_err:
+#ifdef CONFIG_ASR_BCM_CIPHER
+	asr_bcm_cipher_unregister(bcm_dd);
+#endif
+#endif
+
+res_err:
+    devm_kfree(dev, bcm_dd);
+no_mem_err:
+    dev_err(dev, "initialization failed.\n");
+
+    return err;
+}
+
+static int asr_bcm_remove(struct platform_device *pdev)
+{
+    struct asr_bcm_dev *bcm_dd;
+
+    bcm_dd = platform_get_drvdata(pdev);
+    if (!bcm_dd)
+        return -ENODEV;
+
+#ifdef CONFIG_ASR_BCM_CIPHER
+	asr_bcm_cipher_unregister(bcm_dd);
+#endif
+
+#ifdef CONFIG_ASR_BCM_SHA
+    asr_bcm_sha_unregister(bcm_dd);
+#endif
+
+    devm_kfree(bcm_dd->dev, bcm_dd);
+
+    return 0;
+}
+
+static struct platform_driver asr_bcm_driver = {
+    .probe		= asr_bcm_probe,
+    .remove		= asr_bcm_remove,
+    .driver		= {
+        .name	= "asr_bcm",
+        .of_match_table = of_match_ptr(asr_bcm_dt_ids),
+    },
+};
+
+static int __init asr_bcm_init(void)
+{
+    int ret;
+
+    ret = platform_driver_register(&asr_bcm_driver);
+
+    return ret;
+}
+
+device_initcall_sync(asr_bcm_init);
+
+MODULE_DESCRIPTION("BCM: ASR Trust Engine support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.h b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.h
new file mode 100644
index 0000000..be73036
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-bcm-optee.h
@@ -0,0 +1,44 @@
+#ifndef _ASR_BCM_OPTEE_H_
+#define _ASR_BCM_OPTEE_H_
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+
+#include "../bcm/asr-sha.h"
+#include "../bcm/asr-cipher.h"
+
+struct teec_uuid {
+	uint32_t timeLow;
+	uint16_t timeMid;
+	uint16_t timeHiAndVersion;
+	uint8_t clockSeqAndNode[8];
+};
+
+struct asrbcm_tee_context {
+	struct tee_context *tee_ctx;
+	int session;
+};
+
+struct asr_bcm_dev {
+    struct device		*dev;
+    struct asr_bcm_sha asr_sha;
+    struct asr_bcm_cipher asr_cipher;
+};
+
+struct asr_bcm_ops {
+    int (*dev_get)(struct asr_bcm_dev *);
+    int (*dev_put)(struct asr_bcm_dev *);
+};
+
+int asrbcm_optee_open_ta(struct asrbcm_tee_context *ctx, struct teec_uuid *uuid);
+int asrbcm_optee_close_ta(struct asrbcm_tee_context *ctx);
+
+int asr_bcm_sha_register(struct asr_bcm_dev *bcm_dd);
+int asr_bcm_sha_unregister(struct asr_bcm_dev *bcm_dd);
+
+int asr_bcm_cipher_register(struct asr_bcm_dev *bcm_dd);
+int asr_bcm_cipher_unregister(struct asr_bcm_dev *bcm_dd);
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.c b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.c
new file mode 100644
index 0000000..8d5912a
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ASR Micro Limited
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+#include "asr-bcm-optee.h"
+#include "asr-cipher-optee.h"
+
+struct asr_bcm_cipher *asr_cipher_local;
+
+static struct teec_uuid pta_cipher_uuid = ASR_AES_ACCESS_UUID;
+
+static int asr_optee_cipher_get_rkek_state(u32 *state)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[1];
+	struct asrbcm_tee_context asrbcm_tee_ctx;
+	int ret = 0;
+
+	ret = asrbcm_optee_open_ta(&asrbcm_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = CMD_AES_HWKEY_STATUS;
+	invoke_arg.session  = asrbcm_tee_ctx.session;
+	invoke_arg.num_params = 1;
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT;
+	params[0].u.value.a = 0;
+	params[0].u.value.b = 0;
+	params[0].u.value.c = 0;
+
+	ret = tee_client_invoke_func(asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	*state = params[0].u.value.a;
+
+exit:
+	asrbcm_optee_close_ta(&asrbcm_tee_ctx);
+	return ret;
+
+}
+
+static int asr_optee_cipher_process(uint32_t cipher_mode, uint32_t op_mode,
+		struct scatterlist *src, struct scatterlist *dst,
+		size_t len, uint32_t key_size, u8 *key,
+		u8 *iv, uint32_t ivsize)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrbcm_tee_context asrbcm_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+    uint32_t srclen = len, dstlen = len, paralen_a = key_size, paralen_b = ivsize;
+    uint8_t *parabuf_a = key, *parabuf_b = iv;
+
+	ret = asrbcm_optee_open_ta(&asrbcm_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cipher_mode;
+	invoke_arg.session  = asrbcm_tee_ctx.session;
+
+	shm = tee_shm_alloc(asrbcm_tee_ctx.tee_ctx, srclen + dstlen + paralen_a + paralen_b, 
+						TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = srclen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[1].u.memref.shm_offs = srclen;
+	params[1].u.memref.size = dstlen;
+	params[1].u.memref.shm = shm;
+
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[2].u.value.a = op_mode;
+
+	ma = tee_shm_get_va(shm, 0);
+	sg_copy_to_buffer(src, sg_nents(src), ma, srclen);
+	memcpy(ma + srclen + dstlen, parabuf_a, paralen_a);
+
+	/* cbc with iv */
+	if (parabuf_b && paralen_b) {
+		memcpy(ma + srclen + dstlen + paralen_a, parabuf_b, paralen_b);
+		params[2].u.value.b = paralen_a;
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen_a + paralen_b;
+		params[3].u.memref.shm = shm;
+		invoke_arg.num_params = 4;
+	} else {
+		/* ecb with non iv */
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen_a;
+		params[3].u.memref.shm = shm;
+		invoke_arg.num_params = 4;
+	}
+
+	ret = tee_client_invoke_func(asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto free_shm;
+	}
+	sg_copy_from_buffer(dst, sg_nents(dst), ma + srclen, dstlen);
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrbcm_optee_close_ta(&asrbcm_tee_ctx);
+	return ret;
+}
+
+static int asr_optee_cipher_hwkey_process(uint32_t cipher_mode, uint32_t op_mode,
+		struct scatterlist *src, struct scatterlist *dst,
+		size_t len, uint32_t key_size,
+		u8 *iv, uint32_t ivsize)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrbcm_tee_context asrbcm_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+    uint32_t srclen = len, dstlen = len, paralen = ivsize;
+    uint8_t *parabuf = iv;
+
+
+	ret = asrbcm_optee_open_ta(&asrbcm_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cipher_mode;
+	invoke_arg.session  = asrbcm_tee_ctx.session;
+
+	shm = tee_shm_alloc(asrbcm_tee_ctx.tee_ctx, srclen + dstlen + paralen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = srclen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[1].u.memref.shm_offs = srclen;
+	params[1].u.memref.size = dstlen;
+	params[1].u.memref.shm = shm;
+
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[2].u.value.a = key_size;
+	params[2].u.value.b = op_mode;
+	params[2].u.value.c = 0;
+
+	ma = tee_shm_get_va(shm, 0);
+	sg_copy_to_buffer(src, sg_nents(src), ma, srclen);
+	if (parabuf && paralen) {
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen;
+		params[3].u.memref.shm = shm;
+		memcpy(ma + srclen + dstlen, parabuf, paralen);
+		invoke_arg.num_params = 4;
+	} else {
+		invoke_arg.num_params = 3;
+	}
+
+	ret = tee_client_invoke_func(asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto free_shm;
+	}
+	sg_copy_from_buffer(dst, sg_nents(dst), ma + srclen, dstlen);
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrbcm_optee_close_ta(&asrbcm_tee_ctx);
+	return ret;
+}
+
+static inline void asr_cipher_set_mode(struct asr_bcm_cipher *dd,
+				      const struct asr_cipher_reqctx *rctx)
+{
+	/* Clear all but persistent flags and set request flags. */
+	dd->flags = (dd->flags & CIPHER_FLAGS_PERSISTENT) | rctx->mode;
+}
+
+static void asr_cipher_set_iv_as_last_ciphertext_block(struct asr_bcm_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
+static int asr_cipher_handle_queue(struct asr_bcm_cipher *dd,
+				  struct crypto_async_request *new_areq)
+{
+	struct crypto_async_request *areq, *backlog;
+	struct asr_cipher_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (new_areq)
+		ret = crypto_enqueue_request(&dd->queue, new_areq);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	areq = crypto_dequeue_request(&dd->queue);
+	if (areq) {
+		dd->flags |= FLAGS_BUSY;
+	}
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!areq)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(areq->tfm);
+
+	dd->areq = areq;
+	dd->ctx = ctx;
+	start_async = (areq != new_areq);
+	dd->is_async = start_async;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static inline int asr_cipher_complete(struct asr_bcm_cipher *dd, int err)
+{
+
+	dd->flags &= ~FLAGS_BUSY;
+
+	asr_cipher_set_iv_as_last_ciphertext_block(dd);
+
+	if (dd->is_async)
+		dd->areq->complete(dd->areq, err);
+
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static int asr_cipher_start(struct asr_bcm_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	u8 *iv;
+	u32 flags, cipher_mode, op_mode, keylen, ivsize;
+	int err;
+
+	asr_cipher_set_mode(dd, rctx);
+
+    flags = dd->flags;
+
+	if ((flags & FLAGS_OPMODE_MASK) == FLAGS_CBC){
+		if (rctx->use_rkek) {
+			cipher_mode = CMD_AES_HWKEY_CBC;
+		} else{
+			cipher_mode = CMD_AES_CBC;
+		}
+		ivsize = crypto_skcipher_ivsize(cipher);
+		iv = req->iv;
+	}
+	else {
+		iv = NULL;
+		ivsize = 0;
+		if (rctx->use_rkek) {
+			cipher_mode = CMD_AES_HWKEY_ECB;
+		} else {
+			cipher_mode = CMD_AES_ECB;
+		}
+	}
+
+	if (flags & FLAGS_ENCRYPT)
+		op_mode = 1;
+	else
+		op_mode = 0;
+
+	keylen = dd->ctx->keylen;
+
+	if (rctx->use_rkek) {
+		err = asr_optee_cipher_hwkey_process(cipher_mode, op_mode, req->src,
+				req->dst, req->cryptlen, keylen, iv, ivsize);
+	} else {
+		err = asr_optee_cipher_process(cipher_mode, op_mode, req->src,
+				req->dst, req->cryptlen, keylen, (u8 *)dd->ctx->key, iv, ivsize);		
+	}
+
+	return asr_cipher_complete(dd, err);
+}
+
+static int asr_cipher(struct skcipher_request *req, unsigned long mode)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_cipher_reqctx *rctx;
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+
+	ctx->block_size = AES_BLOCK_SIZE;
+	ctx->dd = dd;
+
+	rctx = skcipher_request_ctx(req);
+	rctx->mode = mode;
+	rctx->use_rkek = ctx->use_rkek;
+
+	if (!(mode) && (req->src == req->dst)) {
+		unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+		if (req->cryptlen >= ivsize) {
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+		}
+	}
+
+	return asr_cipher_handle_queue(dd, &req->base);
+}
+
+static int asr_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	ctx->use_rkek = false;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_set_hwkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+
+	(void)key; /* ignore the sw key */
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	if (keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_ECB | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_ECB);
+}
+
+static int asr_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_CBC | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_CBC);
+}
+
+static int asr_cipher_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static int asr_cipher_hwkey_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct asr_bcm_cipher *dd = asr_cipher_local;
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static void asr_cipher_exit(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void asr_cipher_queue_task(unsigned long data)
+{
+	struct asr_bcm_cipher *dd = (struct asr_bcm_cipher *)data;
+
+	asr_cipher_handle_queue(dd, NULL);
+}
+
+static struct skcipher_alg cipher_algs[] = {
+	/* AES - ECB, using input key*/
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC, using input key,*/
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+	/* AES - ECB, using hardware key, a.k.a. RKEK */
+	{
+		.base = {
+			.cra_name = "ecb(aes-hwkey)",
+			.cra_driver_name = "asr-ecb-aes-hwkey",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC, using hardware key, a.k.a. RKEK */
+	{
+		.base = {
+			.cra_name = "cbc(aes-hwkey)",
+			.cra_driver_name = "asr-cbc-aes-hwkey",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+};
+
+int asr_bcm_cipher_register(struct asr_bcm_dev *bcm_dd)
+{
+	int i, j, err;
+	struct asr_bcm_cipher *cipher_dd;
+	struct device *dev = bcm_dd->dev;
+	u32 rkek_state;
+
+    cipher_dd = &bcm_dd->asr_cipher;
+	cipher_dd->dev = bcm_dd->dev;
+
+	asr_cipher_local = cipher_dd;
+
+	err = asr_optee_cipher_get_rkek_state(&rkek_state);
+	if (err) {
+		dev_warn(dev, "can't get hwkey(rkek) state\n");
+		cipher_dd->rkek_burned = 0;
+	} else {
+		if (rkek_state)
+			cipher_dd->rkek_burned = 1;
+		else
+			cipher_dd->rkek_burned = 0;
+		switch (rkek_state) {
+		case 2:
+			dev_warn(dev, "hwkey(rkek) burned, SW access not disabled\n");
+			break;
+		case 1:
+			dev_warn(dev, "hwkey(rkek) burned, SW access disabled\n");
+			break;
+		case 0:
+			dev_warn(dev, "hwkey(rkek) not burned\n");
+			break;
+		}
+	}
+
+	spin_lock_init(&cipher_dd->lock);
+	tasklet_init(&cipher_dd->queue_task, asr_cipher_queue_task,
+					(unsigned long)cipher_dd);
+
+	crypto_init_queue(&cipher_dd->queue, ASR_CIPHER_QUEUE_LENGTH);
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++) {
+		err = crypto_register_skcipher(&cipher_algs[i]);
+		if (err){
+			for (j = 0; j < i; j++)
+				crypto_unregister_skcipher(&cipher_algs[j]);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int asr_bcm_cipher_unregister(struct asr_bcm_dev *bcm_dd)
+{
+	int i;
+	struct asr_bcm_cipher *cipher_dd = &bcm_dd->asr_cipher;
+	struct device *dev = bcm_dd->dev;
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++)
+		crypto_unregister_skcipher(&cipher_algs[i]);
+
+	tasklet_kill(&cipher_dd->queue_task);
+
+	devm_kfree(dev, cipher_dd);
+
+	return 0;
+}
+
+MODULE_DESCRIPTION("ASR HWKey CIPHER driver with optee-os.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.h b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.h
new file mode 100644
index 0000000..74b790e
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-cipher-optee.h
@@ -0,0 +1,72 @@
+#ifndef ASR_CIPHER_OPTEE_H
+#define ASR_CIPHER_OPTEE_H
+
+#define ASR_AES_ACCESS_UUID \
+		{ \
+			0xba1b496f, 0xf07d, 0x466e, \
+			{ 0x99, 0x09, 0xeb, 0xe3, 0x55, 0x43, 0xa0, 0x1c } \
+		}
+
+/*
+ * AES ECB encrypt/decrypt data with HWKEY(RKEK)
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          keysize
+ * [in]     pParams[2].value.b          op_mode: 1--encrypt, 0--decrypt
+ */
+#define CMD_AES_HWKEY_ECB   0x1
+
+/*
+ * AES CBC encrypt/decrypt data with HWKEY(RKEK)
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          keysize
+ * [in]     pParams[2].value.b          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[3].memref.buffer    initial vector
+ */
+#define CMD_AES_HWKEY_CBC   0x2
+
+
+/*
+ * Check AES RKEK status 
+ *     0: RKEK(hwkey) is not burned
+ *     1: RKEK(hwkey) is burned and software access is disabled
+ *     2: RKEK(hwkey) is burned but software access is not disabled)
+ *
+ * [out]     pParams[0].value.a          status
+ */
+#define CMD_AES_HWKEY_STATUS	0x3
+
+/*
+ * AES ECB encrypt/decrypt data with input key
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[3].memref.buffer   	input key
+ * [in]     pParams[3].memref.size      keysize
+ */
+#define CMD_AES_ECB   0x4
+
+/*
+ * AES CBC encrypt/decrypt data with input key
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[2].value.b          keysize
+ * [in]     pParams[3].memref.buffer   	input key + initial vector
+ * [in]     pParams[3].memref.size      keysize + ivsize
+ */
+#define CMD_AES_CBC   0x5
+
+
+#include "../bcm/asr-cipher.h"
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.c b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.c
new file mode 100644
index 0000000..fcb9bb7
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.c
@@ -0,0 +1,1119 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <crypto/hmac.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#include "asr-sha-optee.h"
+
+static struct asr_bcm_sha *asr_sha_local = NULL;
+
+static struct teec_uuid pta_sha_uuid = ASR_SHA_ACCESS_UUID;
+
+static int asrbcm_optee_acquire_hash_init(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, u32 alg)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+
+	ret = asrbcm_optee_open_ta(&ctx->asrbcm_tee_ctx, uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrbcm_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[0].u.value.a = alg;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	return ret;
+
+exit:
+	asrbcm_optee_close_ta(&ctx->asrbcm_tee_ctx);
+	return ret;
+}
+
+static int asrbcm_optee_acquire_hash_update(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, \
+									u32 alg, uint8_t *in, u32 inlen)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+	struct tee_shm *shm = NULL;
+	u8 *pbuf = NULL;
+	
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrbcm_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+	shm = tee_shm_alloc(ctx->asrbcm_tee_ctx.tee_ctx, inlen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	pbuf = tee_shm_get_va(shm, 0);
+	memcpy(pbuf, in, inlen);
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = inlen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	tee_shm_free(shm);
+	return ret;
+
+exit:
+	tee_shm_free(shm);
+	asrbcm_optee_close_ta(&ctx->asrbcm_tee_ctx);
+	return ret;
+}
+
+static int asrbcm_optee_acquire_hash_final(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, u32 alg, u8 *out, u8 outlen)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+	struct tee_shm *shm = NULL;
+	u8 *pbuf = NULL;
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrbcm_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+	shm = tee_shm_alloc(ctx->asrbcm_tee_ctx.tee_ctx, outlen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = outlen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrbcm_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+	
+	pbuf = tee_shm_get_va(shm, 0);
+	memcpy(out, pbuf, outlen);
+
+exit:
+    tee_shm_free(shm);
+	asrbcm_optee_close_ta(&ctx->asrbcm_tee_ctx);
+	return ret;
+}
+
+static int asr_sha_handle_queue(struct asr_bcm_sha *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct asr_sha_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req) {
+		return ret;
+	}
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(async_req->tfm);
+
+	dd->req = ahash_request_cast(async_req);
+	start_async = (dd->req != req);
+	dd->is_async = start_async;
+	dd->force_complete = false;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int asr_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	struct asr_bcm_sha *dd = ctx->dd;
+
+	ctx->op = op;
+
+	return asr_sha_handle_queue(dd, req);
+}
+
+static void asr_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	if (!req->result)
+		return;
+
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_MD5:
+		memcpy(req->result, ctx->digest, MD5_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA1:
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA224:
+		memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA256:
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA384:
+		memcpy(req->result, ctx->digest, SHA384_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA512:
+		memcpy(req->result, ctx->digest, SHA512_DIGEST_SIZE);
+		break;
+	default:
+		return;
+	}
+}
+
+static inline int asr_sha_complete(struct asr_bcm_sha *dd, int err)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	dd->flags &= ~(SHA_FLAGS_BUSY);
+	ctx->flags &= ~(SHA_FLAGS_FINAL);
+
+	if ((dd->is_async || dd->force_complete) && req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static size_t asr_sha_append_sg(struct asr_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0) {
+			/*
+			* Check if count <= 0 because the buffer is full or
+			* because the sg length is 0. In the latest case,
+			* check if there is another sg in the list, a 0 length
+			* sg doesn't necessarily mean the end of the sg list.
+			*/
+			if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+				ctx->sg = sg_next(ctx->sg);
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int asr_sha_buff_init(struct asr_bcm_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	ctx->buffer = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+	if (!ctx->buffer) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		return -ENOMEM;
+	}
+
+	ctx->buflen = PAGE_SIZE << get_order(len);
+
+	return 0;
+}
+
+static void asr_sha_buff_cleanup(struct asr_bcm_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	free_pages((unsigned long)ctx->buffer, get_order(len));
+	ctx->buflen = 0;
+}
+
+static int sha_init_req(struct asr_optee_sha_reqctx *optee_ctx)
+{
+	int ret = 0;
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	/* hardware: hash init */
+	ret = asrbcm_optee_acquire_hash_init(optee_ctx, &pta_sha_uuid, \
+								CMD_SHA_INIT, ctx->md.alg);
+	if (ret)
+		return -EINVAL;
+	return 0;
+}
+
+static int sha_update_req(struct asr_optee_sha_reqctx *optee_ctx)
+{
+	int ret = 0;
+	int bufcnt;
+	uint8_t *pdata;
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	uint32_t buflen = ctx->total;
+
+	ret = asr_sha_buff_init(ctx->dd, ctx->total);
+	if (ret)
+		return -ENOMEM;
+	
+	asr_sha_append_sg(ctx);
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	pdata = (uint8_t *)ctx->buffer;
+
+	/* hashware: hash process */
+	ret = asrbcm_optee_acquire_hash_update(optee_ctx, &pta_sha_uuid, \
+			CMD_SHA_UPDATE, ctx->md.alg, pdata, bufcnt);
+	if (ret)
+		ret = -EINVAL;
+
+	asr_sha_buff_cleanup(ctx->dd, buflen);
+	return ret;
+}
+
+static void sha_finish_req(struct asr_optee_sha_reqctx *optee_ctx, int *err)
+{
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(ctx->dd->req);
+    uint8_t *hash = (uint8_t *)ctx->digest;
+	uint32_t outlen = crypto_ahash_digestsize(tfm);
+
+	if (!(*err) && (ctx->flags & SHA_FLAGS_FINAL)) {
+		*err = asrbcm_optee_acquire_hash_final(optee_ctx, &pta_sha_uuid, CMD_SHA_FINAL, \
+											ctx->md.alg, (uint8_t *)hash, outlen);
+		ctx->flags &= (~SHA_FLAGS_FINAL);
+		asr_sha_copy_ready_hash(ctx->dd->req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+}
+
+static void sha_next_req(struct asr_optee_sha_reqctx *optee_ctx, int *err)
+{
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	if (likely(!(*err) && (SHA_FLAGS_FINAL & ctx->flags)))
+		sha_finish_req(optee_ctx, err);
+
+	(void)asr_sha_complete(ctx->dd, *err);
+}
+
+static int asr_sha_done(struct asr_bcm_sha *dd);
+
+static int asr_sha_start(struct asr_bcm_sha *dd)
+{
+	int err = 0;
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	
+	mutex_lock(&dd->queue_lock);
+
+	dd->resume = asr_sha_done;
+
+	if ((ctx->flags & SHA_FLAGS_INIT)) {
+		err = sha_init_req(optee_ctx);
+		ctx->flags &= (~SHA_FLAGS_INIT);
+	}
+
+	if (!err) {
+		if (ctx->op == SHA_OP_UPDATE) {
+			err = sha_update_req(optee_ctx);
+			if (!err && (ctx->flags & SHA_FLAGS_FINUP))
+				/* no final() after finup() */
+				sha_finish_req(optee_ctx, &err);
+		} else if (ctx->op == SHA_OP_FINAL) {
+			sha_finish_req(optee_ctx, &err);
+		}
+	}
+
+	if (unlikely(err != -EINPROGRESS))
+		/* Task will not finish it, so do it here */
+		sha_next_req(optee_ctx, &err);
+
+	mutex_unlock(&dd->queue_lock);
+	return err;
+}
+
+static int asr_sha_cra_init(struct crypto_tfm *tfm)
+{
+    struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct asr_optee_sha_reqctx));
+	ctx->start = asr_sha_start;
+
+	return 0;
+}
+
+static void asr_sha_cra_exit(struct crypto_tfm *tfm)
+{
+    struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+    memset(ctx, 0, sizeof(*ctx));
+}
+
+static inline void asr_sha_get(struct asr_bcm_sha *dd)
+{
+	mutex_lock(&dd->sha_lock);
+}
+
+static inline void asr_sha_put(struct asr_bcm_sha *dd)
+{
+	if(mutex_is_locked(&dd->sha_lock))
+		mutex_unlock(&dd->sha_lock);
+}
+
+static int asr_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	struct asr_bcm_sha *dd = asr_sha_local;
+
+    asr_sha_get(dd);
+
+    ctx->dd = dd;
+	memset(&ctx->md, 0, sizeof(ctx->md));
+    ctx->flags = 0;
+
+	switch (crypto_ahash_digestsize(tfm)) {
+	case MD5_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_MD5;
+		ctx->md.alg = TEE_ALG_MD5;
+		ctx->md.block_size = MD5_HMAC_BLOCK_SIZE;
+		break;
+	case SHA1_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA1;
+		ctx->md.alg = TEE_ALG_SHA1;
+		ctx->md.block_size = SHA1_BLOCK_SIZE;
+		break;
+	case SHA224_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA224;
+		ctx->md.alg = TEE_ALG_SHA224;
+		ctx->md.block_size = SHA224_BLOCK_SIZE;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA256;
+		ctx->md.alg = TEE_ALG_SHA256;
+		ctx->md.block_size = SHA256_BLOCK_SIZE;
+		break;
+	case SHA384_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA384;
+		ctx->md.alg = TEE_ALG_SHA384;
+		ctx->md.block_size = SHA384_BLOCK_SIZE;
+		break;
+	case SHA512_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA512;
+		ctx->md.alg = TEE_ALG_SHA512;
+		ctx->md.block_size = SHA512_BLOCK_SIZE;
+		break;
+	default:
+        asr_sha_put(dd);
+		return -EINVAL;
+	}
+
+	ctx->bufcnt = 0;
+	ctx->flags |= SHA_FLAGS_INIT;
+	
+	asr_sha_put(dd);
+	return 0;
+}
+
+static int asr_sha_update(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	asr_sha_get(ctx->dd);
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	ret = asr_sha_enqueue(req, SHA_OP_UPDATE);
+
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_final(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	asr_sha_get(ctx->dd);
+	ctx->flags |= SHA_FLAGS_FINAL;
+	if (ctx->flags & SHA_FLAGS_ERROR) {
+		asr_sha_put(ctx->dd);
+		return 0; /* uncompleted hash is not needed */
+	}
+	ret = asr_sha_enqueue(req, SHA_OP_FINAL);
+
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_finup(struct ahash_request *req)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = asr_sha_update(req);
+	if (err1 == -EINPROGRESS ||
+		(err1 == -EBUSY && (ahash_request_flags(req) &
+				CRYPTO_TFM_REQ_MAY_BACKLOG))) {
+		asr_sha_put(ctx->dd);
+		return err1;
+	}
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = asr_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int asr_sha_digest(struct ahash_request *req)
+{
+	return asr_sha_init(req) ?: asr_sha_finup(req);
+}
+
+static int asr_sha_export(struct ahash_request *req, void *out)
+{
+	const struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(out, ctx, sizeof(*ctx));
+	return 0;
+}
+
+static int asr_sha_import(struct ahash_request *req, const void *in)
+{
+	struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(ctx, in, sizeof(*ctx));
+	return 0;
+}
+
+static struct ahash_alg sha_algs[] = {
+	/* md5 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= MD5_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "md5",
+				.cra_driver_name	= "asr-md5",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= MD5_HMAC_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha1 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA1_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha1",
+				.cra_driver_name	= "asr-sha1",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA1_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha224 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA224_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha224",
+				.cra_driver_name	= "asr-sha224",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA224_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha256 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA256_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha256",
+				.cra_driver_name	= "asr-sha256",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA256_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha384 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA384_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha384",
+				.cra_driver_name	= "asr-sha384",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA384_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha512 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA512_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha512",
+				.cra_driver_name	= "asr-sha512",
+				.cra_priority		= ASR_SHA_PRIORITY,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA512_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+};
+
+static void asr_sha_queue_task(unsigned long data)
+{
+	struct asr_bcm_sha *dd = (struct asr_bcm_sha *)data;
+
+	asr_sha_handle_queue(dd, NULL);
+}
+
+static int asr_sha_done(struct asr_bcm_sha *dd)
+{
+	int err = 0;
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	sha_finish_req(ctx, 0);
+
+	return err;
+}
+
+static void asr_sha_done_task(unsigned long data)
+{
+	struct asr_bcm_sha *dd = (struct asr_bcm_sha *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
+}
+
+static int hash_handle(int alg, uint8_t *in, uint32_t inlen, uint8_t *out)
+{
+	int ret = 0;
+	uint32_t outlen;
+	struct asr_optee_sha_reqctx ctx;
+
+	switch(alg) {
+	case TEE_ALG_SHA512:
+		outlen = HASH_LEN_SHA512;
+		break;
+	case TEE_ALG_SHA384:
+		outlen = HASH_LEN_SHA384;
+		break;
+	case TEE_ALG_SHA256:
+		outlen = HASH_LEN_SHA256;
+		break;
+	case TEE_ALG_SHA224:
+		outlen = HASH_LEN_SHA224;
+		break;
+	case TEE_ALG_SHA1:
+		outlen = HASH_LEN_SHA1;
+		break;
+	case TEE_ALG_MD5:
+		outlen = HASH_LEN_MD5;
+		break;
+	default:
+		printk("err: not support hash alg\n");
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrbcm_optee_acquire_hash_init(&ctx, &pta_sha_uuid, CMD_SHA_INIT, alg);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx, &pta_sha_uuid, CMD_SHA_UPDATE, alg, in, inlen);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrbcm_optee_acquire_hash_final(&ctx, &pta_sha_uuid, CMD_SHA_FINAL, alg, out, outlen);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+exit:
+	return ret;
+}
+
+static int tee_hwhash_func_verify(void)
+{
+	int ret = 0;
+	unsigned char out_sha256[32] = {0};
+	const struct {
+		const char *msg;
+		uint8_t hash[32];
+	} sha256_tests = {
+		"abc", 
+		{   0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 
+			0xCF, 0xEA, 0x41, 0x41, 0x40, 0xDE, 
+			0x5D, 0xAE, 0x22, 0x23, 0xB0, 0x03, 
+			0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C, 
+			0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 
+			0x15, 0xAD
+		}
+	};
+
+	ret = hash_handle(TEE_ALG_SHA256, (uint8_t *)sha256_tests.msg, strlen(sha256_tests.msg), out_sha256);
+	if (ret) 
+		return ret;
+
+	if (memcmp(out_sha256, sha256_tests.hash, sizeof(out_sha256))) {
+		return -1;
+	}
+
+	return 0;
+}
+
+// #define ASR_BCM_SHA_TEST
+
+#ifdef ASR_BCM_SHA_TEST
+static int bcm_sha_test(void);
+#endif
+
+int asr_bcm_sha_register(struct asr_bcm_dev *bcm_dd)
+{
+	int err, i, j;
+	struct asr_bcm_sha *sha_dd;
+
+	sha_dd = &bcm_dd->asr_sha;
+	sha_dd->dev = bcm_dd->dev;
+
+	asr_sha_local = sha_dd;
+
+	spin_lock_init(&sha_dd->lock);
+	mutex_init(&sha_dd->sha_lock);
+	mutex_init(&sha_dd->queue_lock);
+	tasklet_init(&sha_dd->done_task, asr_sha_done_task,
+					(unsigned long)sha_dd);
+	tasklet_init(&sha_dd->queue_task, asr_sha_queue_task,
+					(unsigned long)sha_dd);
+	crypto_init_queue(&sha_dd->queue, ASR_SHA_QUEUE_LENGTH);
+
+	/* don't register sha if hash verify err in tos */
+	err = tee_hwhash_func_verify();
+	if (err) 
+		return err;
+	
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+#ifdef ASR_BCM_SHA_TEST
+	bcm_sha_test();
+#endif
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_sha_register);
+
+int asr_bcm_sha_unregister(struct asr_bcm_dev *bcm_dd)
+{
+	int i;
+	struct asr_bcm_sha *sha_dd = &bcm_dd->asr_sha;
+
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+
+	tasklet_kill(&sha_dd->queue_task);
+	tasklet_kill(&sha_dd->done_task);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_bcm_sha_unregister);
+
+
+
+#ifdef ASR_BCM_SHA_TEST
+
+static int bcm_sha_test(void)
+{
+	int ret = 0;
+	uint32_t i;
+	
+	const struct {
+		const char *msg;
+		uint8_t hash[20];
+	} sha1_tests[] = {
+		{
+			"abc", 
+			{   0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 
+				0x81, 0x6a, 0xba, 0x3e, 0x25, 0x71, 
+				0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0,
+				0xd8, 0x9d 
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqdsjklfdfjlkdfkfs" \
+			"fkjlfskjdflioherfjjfdjkfdnkfdfdojjodfjdfjflj;sljjlfkkl" \
+			"nfnkgbhhoigfhigfopojpfjojpoffkjlfskjdflioherfjjfdjkfdn" \
+			"kfdfdojjodfjdfjfljnfnkgbhhoigfhigfoponfnkgbhhoigfhigfopojpfjo",
+			{
+				0x93, 0x84, 0x7f, 0x98, 0x22, 0x5e, 
+				0x6d, 0xf2, 0x09, 0x1c, 0xc9, 0xac, 
+				0xbb, 0x5d, 0x00, 0x2d, 0x64, 0x81, 
+				0xe3, 0xcd
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqdsjklfdfjlkdfkfs" \
+			"fkjlfskjdflioherfjjfdjkfdnkfdfdojjodfjdfjflj;sljjlfkkl" \
+			"nfnkgbhhoigfhigfopojpfjojpoffkjlfskjdflioherfjjfdjkfdn" \
+			"kfdfdojjodfjdfjfljnfnkgbhhoigfhigfoponfnkgbhhoigfhigfopojpfjoewiroiowiod",
+			{
+				0x6a, 0x66, 0xc2, 0x87, 0x84, 0x36, 
+				0x14, 0x90, 0x99, 0x03, 0x90, 0xf0, 
+				0xaa, 0x7e, 0xbd, 0xc7, 0xdb, 0x38, 
+				0x54, 0x09
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqds"
+			"jklfdfjlkdfkfsfkjlfskjdflioherfjjfdjkfd"
+			"nkfdfdojjodfjdfjflj;sljjlfkklnfnkgbhhoi"
+			"gfhigfopojpfjojpoffkjlfskjdflioherfjjfd"
+			"jkfdnkfdfdojjodfjdfjfljnfnkgbhhoigfhigf"
+			"oponfnkgbhhoigfhigfopojpfjoewiroiowiods"
+			"djkisijdknknkskdnknflnnesniewinoinknmdn"
+			"kknknsdnjjfsnnkfnkknslnklknfnknkflksnlk"
+			"lskldklklklnmlflmlmlfmlfml",
+			{
+				0xc4, 0x53, 0xca, 0x24, 0xfa, 0xe5,
+				0x39, 0x53, 0x08, 0x8c, 0x57, 0x1a, 
+				0x96, 0xe9, 0x64, 0x7f, 0xd5, 0xf9, 
+				0x13, 0x91
+			}
+		}
+	};
+
+	struct asr_optee_sha_reqctx ctx1;
+	struct asr_optee_sha_reqctx ctx2;
+	struct asr_optee_sha_reqctx ctx3;
+	struct asr_optee_sha_reqctx ctx4;
+	unsigned char out_sha1_1[20] = {0};
+	unsigned char out_sha1_2[20] = {0};
+	unsigned char out_sha1_3[20] = {0};
+	unsigned char out_sha1_4[20] = {0};
+
+	ret = asrbcm_optee_acquire_hash_init(&ctx1, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_init(&ctx2, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx1, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+										(uint8_t *)sha1_tests[0].msg, strlen(sha1_tests[0].msg));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_init(&ctx3, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx2, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+					(uint8_t *)(((uint32_t)sha1_tests[1].msg)+10), strlen(sha1_tests[1].msg) - 10);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_final(&ctx1, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+																out_sha1_1, sizeof(out_sha1_1));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx3, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+															(uint8_t *)sha1_tests[2].msg, 25);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_init(&ctx4, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_final(&ctx2, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+										out_sha1_2, sizeof(out_sha1_2));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx3, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+						(uint8_t *)(((uint32_t)sha1_tests[2].msg)+25), strlen(sha1_tests[2].msg)-25);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_final(&ctx3, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+										out_sha1_3, sizeof(out_sha1_3));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_update(&ctx4, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+										(uint8_t *)sha1_tests[3].msg, 43);
+	if (ret) {
+		return ret;
+	}
+	ret = asrbcm_optee_acquire_hash_update(&ctx4, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1,
+					 (uint8_t *)(((uint32_t)sha1_tests[3].msg)+43), strlen(sha1_tests[3].msg)-43);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrbcm_optee_acquire_hash_final(&ctx4, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+										out_sha1_4, sizeof(out_sha1_4));
+	if (ret) {
+		return ret;
+	}
+
+	if (memcmp(out_sha1_1, sha1_tests[0].hash, sizeof(out_sha1_1))) {
+		printk("sha1 test 0 failed");
+	} else {
+		printk("sha1 test 0 pass");
+	}
+	if (memcmp(out_sha1_2, sha1_tests[1].hash, sizeof(out_sha1_2))) {
+		printk("sha1 test 1 failed");
+	} else {
+		printk("sha1 test 1 pass");
+	}
+	if (memcmp(out_sha1_3, sha1_tests[2].hash, sizeof(out_sha1_3))) {
+		printk("sha1 test 2 failed");
+	} else {
+		printk("sha1 test 2 pass");
+	}
+	if (memcmp(out_sha1_4, sha1_tests[3].hash, sizeof(out_sha1_4))) {
+		printk("sha1 test 3 failed");
+	} else {
+		printk("sha1 test 4 pass");
+	}
+
+	return 0;
+}
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wangyonggan <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR bcm sha driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.h b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.h
new file mode 100644
index 0000000..97e174e
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/bcm_optee/asr-sha-optee.h
@@ -0,0 +1,61 @@
+#ifndef _ASR_BCM_SHA_OPTEE_H_
+#define _ASR_BCM_SHA_OPTEE_H_
+
+#include "asr-bcm-optee.h"
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+
+#define ASR_SHA_ACCESS_UUID 									\
+        { 														\
+            0xc6445f2a, 0x3365, 0x11ef, 						\
+            { 0x9e, 0x32, 0xe7, 0x0c, 0x07, 0x9f, 0x77, 0xec } 	\
+        }														\
+
+#define TEE_ALG_MD5                             0x50000001
+#define TEE_ALG_SHA1                            0x50000002
+#define TEE_ALG_SHA224                          0x50000003
+#define TEE_ALG_SHA256                          0x50000004
+#define TEE_ALG_SHA384                          0x50000005
+#define TEE_ALG_SHA512                          0x50000006
+
+#define HASH_CONTEXT_SIZE (256)
+
+/*
+ * hash init params
+ *
+ * [in]     pParams[0].value.a          hash algorithm type
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ */
+ #define CMD_SHA_INIT         0x1
+
+/*
+ * hash update params
+ * when input addr is share mem, such as params from kernel:
+ * [in]     pParams[0].memref.buffer    input data
+ * [in]     pParams[0].memref.size      length of input data
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ *
+ * when input addr is physical addr, such as params from uboot:
+ * [in]     pParams[0].value.a      input data addr
+ * [in]     pParams[0].value.b      length of input data
+ * [in]     pParams[1].value.a      whether physical addr
+ */
+ #define CMD_SHA_UPDATE       0x2
+
+/*
+ * hash finish params
+ *
+ * [out]    pParams[0].memref.buffer    output hash
+ * [out]    pParams[0].memref.size      length of output hash
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ */
+#define CMD_SHA_FINAL         0x3
+
+struct asr_optee_sha_reqctx {
+    struct asr_sha_reqctx reqctx;
+    struct tee_shm *shm;
+    struct asrbcm_tee_context asrbcm_tee_ctx;
+};
+
+#endif
diff --git a/marvell/linux/drivers/crypto/asr/te200/Makefile b/marvell/linux/drivers/crypto/asr/te200/Makefile
new file mode 100644
index 0000000..444fa01
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ASR_TE200) += asr-te200.o
+obj-$(CONFIG_ASR_TE200_CIPHER) += asr-cipher.o
+obj-$(CONFIG_ASR_TE200_SHA) += asr-sha.o
+obj-$(CONFIG_ASR_TE200_RSA) += asr-aca/
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/Makefile b/marvell/linux/drivers/crypto/asr/te200/asr-aca/Makefile
new file mode 100644
index 0000000..751c359
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/Makefile
@@ -0,0 +1,9 @@
+#
+#  Copyright (c) 2015, ASR micro Limited. All rights reserved.
+#
+obj-y += se_aca.o
+obj-y += se_bn.o
+# rsa depend on te200 sha and geu random
+ifeq ($(CONFIG_ASR_TE200_SHA),y)
+obj-y += se_rsa.o
+endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.c b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.c
new file mode 100644
index 0000000..55ac5d8
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.c
@@ -0,0 +1,3113 @@
+/*
+ * Copyright (c) 2020-2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#include <linux/slab.h>
+#include "se_common.h"
+#include "se_aca_internal.h"
+#include "se_aca.h"
+
+static void op_exec_cmd(uint32_t op_code,
+						int32_t len_type_id,
+						aca_op_t *A,
+						aca_op_t *B,
+						int32_t imme_B,
+						aca_op_t *C,
+						aca_op_t *R);
+
+static void op_submit_cmd(uint32_t op_code,
+						  int32_t len_type_id,
+						  aca_op_t *A,
+						  aca_op_t *B,
+						  int32_t imme_B,
+						  aca_op_t *C,
+						  aca_op_t *R);
+static void op_submit_shift_cmd(uint32_t op_code,
+								int32_t len_type_id,
+								aca_op_t *A,
+								int32_t shift_B,
+								aca_op_t *R);
+static void op_wait_cmd(void);
+
+struct last_op_status {
+	uint32_t intr_status;
+	uint32_t aca_status;
+};
+
+static volatile uint32_t g_sram_allocated_addr             = ACA_SRAM_BASE;
+static volatile int32_t g_aca_gr_allocated_id              = ACA_GR_USED_START;
+static volatile struct last_op_status g_aca_last_op_status = {0};
+
+static inline void reset_last_op_status(void)
+{
+	g_aca_last_op_status.intr_status = 0;
+	g_aca_last_op_status.aca_status  = 0;
+}
+
+#define OP_EXEC_ONE_CMD(_op_a_, _op_b_, _op_c_, _op_r_, _len_type_id_,         \
+						_op_code_)                                             \
+	do {                                                                       \
+		reset_last_op_status();                                                \
+		op_exec_cmd(_op_code_, _len_type_id_, _op_a_, _op_b_, -1, _op_c_,      \
+					_op_r_);                                                   \
+	} while (0)
+
+#define OP_EXEC_ONE_CMD_IMME_B(_op_a_, _imme_b_, _op_c_, _op_r_,               \
+							   _len_type_id_, _op_code_)                       \
+	do {                                                                       \
+		reset_last_op_status();                                                \
+		op_exec_cmd(_op_code_, _len_type_id_, _op_a_, NULL, _imme_b_, _op_c_,  \
+					_op_r_);                                                   \
+	} while (0)
+
+#define OP_EXEC_ONE_CMD_SHIFT(_op_a_, _shift_b_, _op_r_, _len_type_id_,        \
+							  _op_code_)                                       \
+	do {                                                                       \
+		reset_last_op_status();                                                \
+		op_submit_shift_cmd(_op_code_, _len_type_id_, _op_a_, _shift_b_,       \
+							_op_r_);                                           \
+		op_wait_cmd();                                                         \
+	} while (0)
+
+#ifdef ECP_DBG_PRINT_DETAIL_EN
+
+#define OP_ECP_PREPARE(_empty_)                                                \
+	do {                                                                       \
+		reset_last_op_status();                                                \
+	} while (0)
+#define OP_ECP_EXEC(_code_, _len_type_id_, _op_a_, _is_b_imme_, _b_, _op_c_,   \
+					_op_r_)                                                    \
+	do {                                                                       \
+		if (_is_b_imme_) {                                                     \
+			op_exec_cmd(ACA_OP_##_code_, _len_type_id_, _op_a_, NULL,          \
+						(int32_t)(uintptr_t)_b_, _op_c_, _op_r_);              \
+		} else {                                                               \
+			op_exec_cmd(ACA_OP_##_code_, _len_type_id_, _op_a_,                \
+						(aca_op_t *)_b_, -1, _op_c_, _op_r_);                  \
+		}                                                                      \
+	} while (0)
+#define OP_ECP_WAIT(_empty_)                                                   \
+	do {                                                                       \
+	} while (0)
+
+#else /* !ECP_DBG_PRINT_DETAIL_EN */
+
+#define OP_ECP_PREPARE(_empty_)                                                \
+	do {                                                                       \
+		reset_last_op_status();                                                \
+	} while (0)
+#define OP_ECP_EXEC(_code_, _len_type_id_, _op_a_, _is_b_imme_, _b_, _op_c_,   \
+					_op_r_)                                                    \
+	do {                                                                       \
+		if (_is_b_imme_) {                                                     \
+			op_submit_cmd(ACA_OP_##_code_, _len_type_id_, _op_a_, NULL,        \
+						  (int32_t)(uintptr_t)_b_, _op_c_, _op_r_);            \
+		} else {                                                               \
+			op_submit_cmd(ACA_OP_##_code_, _len_type_id_, _op_a_,              \
+						  (aca_op_t *)_b_, -1, _op_c_, _op_r_);                \
+		}                                                                      \
+	} while (0)
+#define OP_ECP_WAIT(_empty_)                                                   \
+	do {                                                                       \
+		op_wait_cmd();                                                         \
+	} while (0)
+
+#endif /* ECP_DBG_PRINT_DETAIL_EN */
+
+int32_t aca_engine_init(void)
+{
+	uint32_t reg = 0;
+
+	/**
+	 * Enable aca_clock, SET FIFO Watermark to 0. We don't use FIFO Wartermark.
+	 */
+	reg = IO_READ32(TOP_CTL_CLOCK_CTL);
+	FIELD_SET(reg, TOP_CTL_CLOCK_CTL, ACA_CLK_EN, 1);
+	IO_WRITE32(TOP_CTL_CLOCK_CTL, reg);
+
+	/**
+	 * Mask all interrupts.
+	 */
+	IO_WRITE32(ACA_ACA_INTR_MSK, 0x7FU);
+
+	return 0;
+}
+
+void aca_engine_exit(void)
+{
+	uint32_t reg = 0;
+
+	/* disable clock */
+	reg = IO_READ32(TOP_CTL_CLOCK_CTL);
+	FIELD_SET(reg, TOP_CTL_CLOCK_CTL, ACA_CLK_EN, 0);
+	IO_WRITE32(TOP_CTL_CLOCK_CTL, reg);
+}
+
+/**
+ * write some data to ACA SRAM
+ */
+static void sram_write_data(uint32_t sram_addr,
+							uint32_t block_num,
+							const uint8_t *data)
+{
+	int32_t i = 0, byte_idx = 0;
+	uint32_t val     = 0;
+	int32_t word_num = ACA_BLOCK_NUM_TO_BITS(block_num) / 32;
+
+	/* When read/write sram, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	/* write sram_waddr with word offset */
+	IO_WRITE32(ACA_SRAM_WADDR, sram_addr >> 2);
+
+	for (i = 0; i < word_num; i++) {
+		byte_idx = (word_num - i) * 4 - 1;
+		val      = ((((uint32_t)data[byte_idx]) << 0) |
+			   (((uint32_t)data[byte_idx - 1]) << 8) |
+			   (((uint32_t)data[byte_idx - 2]) << 16) |
+			   (((uint32_t)data[byte_idx - 3]) << 24));
+		IO_WRITE32(ACA_SRAM_WDATA, val);
+	}
+}
+
+/**
+ * read some data from ACA SRAM
+ */
+static void sram_read_data(uint32_t sram_addr,
+						   uint32_t block_num,
+						   uint8_t *data)
+{
+	int32_t i = 0, byte_idx = 0;
+	uint32_t val     = 0;
+	int32_t word_num = ACA_BLOCK_NUM_TO_BITS(block_num) / 32;
+
+	/* When read/write sram, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	/* write sram_raddr with word offset */
+	IO_WRITE32(ACA_SRAM_RADDR, sram_addr >> 2);
+
+	for (i = 0; i < word_num; i++) {
+		byte_idx           = (word_num - i) * 4 - 1;
+		val                = IO_READ32(ACA_SRAM_RDATA);
+		data[byte_idx]     = (val >> 0) & 0xFF;
+		data[byte_idx - 1] = (val >> 8) & 0xFF;
+		data[byte_idx - 2] = (val >> 16) & 0xFF;
+		data[byte_idx - 3] = (val >> 24) & 0xFF;
+	}
+}
+
+/**
+ * write one word
+ */
+static void sram_write_word(uint32_t sram_addr, uint32_t data)
+{
+	/* When read/write sram, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	/* write sram_waddr with word offset */
+	IO_WRITE32(ACA_SRAM_WADDR, sram_addr >> 2);
+	IO_WRITE32(ACA_SRAM_WDATA, data);
+}
+
+#if 0
+/**
+ * read one word
+ */
+static void sram_read_word(uint32_t sram_addr, uint32_t *data)
+{
+	/* When read/write sram, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	/* write sram_raddr with word offset */
+	IO_WRITE32(ACA_SRAM_RADDR, sram_addr >> 2);
+	*data = IO_READ32(ACA_SRAM_RDATA);
+}
+#endif
+
+/* allocate a sram address */
+static uint32_t sram_alloc_one(uint32_t blk_num)
+{
+	uint32_t addr = g_sram_allocated_addr;
+	g_sram_allocated_addr += ACA_BLOCK_NUM_TO_BYTES(blk_num);
+
+	ASSERT(g_sram_allocated_addr <= ACA_SRAM_BASE + ACA_SRAM_SIZE);
+	return addr;
+}
+
+/* free one address */
+static void sram_free_one(uint32_t blk_num)
+{
+	ASSERT(g_sram_allocated_addr >=
+				ACA_SRAM_BASE + ACA_BLOCK_NUM_TO_BYTES(blk_num));
+	g_sram_allocated_addr -= ACA_BLOCK_NUM_TO_BYTES(blk_num);
+}
+
+/* allocate one GR */
+static int32_t gr_alloc_one(void)
+{
+	int32_t gr = g_aca_gr_allocated_id;
+	g_aca_gr_allocated_id++;
+
+	ASSERT(g_aca_gr_allocated_id <= ACA_GR_NUMBER);
+
+	return gr;
+}
+
+/* free one GR */
+static void gr_free_one(void)
+{
+	ASSERT(g_aca_gr_allocated_id > ACA_GR_USED_START);
+	g_aca_gr_allocated_id--;
+}
+
+/* init one op ctx, excluding T0 T1 */
+static void op_prepare(aca_op_t *op, gr_usage_t usage)
+{
+	uint32_t reg_off  = 0;
+	uint32_t reg_data = 0;
+
+	op->sram_addr = sram_alloc_one(op->blk_num);
+	op->gr_id     = gr_alloc_one();
+
+	/* write sram */
+	if ((usage == GR_USAGE_IN) || (usage == GR_USAGE_N) ||
+		(usage == GR_USAGE_P)) {
+		ASSERT(op->data);
+		sram_write_data(op->sram_addr, op->blk_num, op->data);
+	}
+	/* write sram if have for INOUT */
+	if (usage == GR_USAGE_INOUT) {
+		if (op->data) {
+			sram_write_data(op->sram_addr, op->blk_num, op->data);
+		}
+	}
+
+	/* config GR */
+	reg_off  = ACA_GRX_SRAM_ADDR_OFS + 4 * op->gr_id;
+	reg_data = ((op->sram_addr >> 2) & (0xFFF));
+	IO_WRITE32(reg_off, reg_data);
+
+	/* config special GRs */
+	if ((usage == GR_USAGE_N) || (usage == GR_USAGE_P)) {
+		reg_data = IO_READ32(ACA_N_P_T0_T1_USE_GRID);
+		if (usage == GR_USAGE_N) {
+			reg_data &= (~(0x1FU << 0));
+			reg_data |= ((((uint32_t)(op->gr_id)) & 0x1FU) << 0);
+		} else {
+			reg_data &= (~(0x1FU << 5));
+			reg_data |= ((((uint32_t)(op->gr_id)) & 0x1FU) << 5);
+		}
+		IO_WRITE32(ACA_N_P_T0_T1_USE_GRID, reg_data);
+	}
+}
+
+/* deinit one op ctx, excluding T0 T1 */
+static void op_release(aca_op_t *op)
+{
+	sram_free_one(op->blk_num);
+	gr_free_one();
+	op->sram_addr = ACA_SRAM_ADDR_INVALID;
+	op->gr_id     = ACA_GR_INVALID;
+}
+
+/* init T0 or T1 */
+static void op_prepare_tmp(int32_t t0_t1, uint32_t blk_num)
+{
+	uint32_t reg_off   = 0;
+	uint32_t reg_data  = 0;
+	uint32_t sram_addr = 0;
+	int32_t gr_id      = 0;
+
+	ASSERT((0 == t0_t1) || (1 == t0_t1));
+
+	sram_addr = sram_alloc_one(blk_num);
+	gr_id     = gr_alloc_one();
+
+	/* config GR */
+	reg_off  = ACA_GRX_SRAM_ADDR_OFS + 4 * gr_id;
+	reg_data = ((sram_addr >> 2) & (0xFFF));
+	IO_WRITE32(reg_off, reg_data);
+
+	/* config special GRs */
+	reg_data = IO_READ32(ACA_N_P_T0_T1_USE_GRID);
+	if (0 == t0_t1) {
+		reg_data &= (~(0x1FU << 10));
+		reg_data |= ((((uint32_t)(gr_id)) & 0x1FU) << 10);
+	} else if (1 == t0_t1) {
+		reg_data &= (~(0x1FU << 15));
+		reg_data |= ((((uint32_t)(gr_id)) & 0x1FU) << 15);
+	} else {
+		ASSERT(0);
+	}
+	IO_WRITE32(ACA_N_P_T0_T1_USE_GRID, reg_data);
+}
+
+/* deinit T0 or T1 */
+static void op_release_tmp(int32_t __unused, uint32_t blk_num)
+{
+	sram_free_one(blk_num);
+	gr_free_one();
+}
+
+/**
+ * check that the SRAM and GR resource are in unoccupied state.
+ * Here use ASSERT because occupied resource is unexpected
+ */
+static void op_check_res(void)
+{
+	ASSERT(g_aca_gr_allocated_id == ACA_GR_USED_START);
+	ASSERT(g_sram_allocated_addr == ACA_SRAM_BASE);
+}
+
+/* config length type */
+static void op_cfg_len_type(int32_t len_type_id, int32_t op_bits)
+{
+	ASSERT(op_bits <= ACA_MAX_OP_BITS);
+	IO_WRITE32(ACA_GR_LEN_TYPEX_OFS + 4 * len_type_id, (op_bits & 0x1FFF));
+}
+
+/* submit one command (exclude shift related command) to command queue */
+static void op_submit_cmd(uint32_t op_code,
+						  int32_t len_type_id,
+						  aca_op_t *A,
+						  aca_op_t *B,
+						  int32_t imme_B,
+						  aca_op_t *C,
+						  aca_op_t *R)
+{
+	uint32_t reg = 0;
+	uint32_t cmd = 0;
+
+	/* When submitting command, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	cmd = ((op_code & 0x1F) << 27);
+	cmd |= ((len_type_id & 0x7) << 24);
+
+	ASSERT(A);
+	ASSERT(A->gr_id != ACA_GR_INVALID);
+	cmd |= ((A->gr_id & 0x1F) << 18);
+	if (B) {
+		ASSERT(B->gr_id != ACA_GR_INVALID);
+		cmd |= ((B->gr_id & 0x1F) << 12);
+	} else if (imme_B != -1) {
+		cmd |= (0x1U << 17);
+		cmd |= ((imme_B & 0x1F) << 12);
+	} else {
+		/* This command doesn't need B(for example, MODINV), do nothing */
+	}
+
+	if (R) {
+		ASSERT(R->gr_id != ACA_GR_INVALID);
+		cmd |= ((R->gr_id & 0x1F) << 7);
+	} else {
+		/* not save to R */
+		cmd |= (0x1U << 6);
+	}
+
+	if (C) {
+		ASSERT(C->gr_id != ACA_GR_INVALID);
+		cmd |= ((C->gr_id & 0x1F) << 1);
+	}
+
+	/* Wait until there is free space in command queue. */
+	do {
+		reg = IO_READ32(ACA_ACA_STATUS);
+		if (reg & 0xFU) {
+			break;
+		}
+	} while (1);
+
+	/* Write command to ACA command queue */
+	IO_WRITE32(ACA_ACA_ENTRY, cmd);
+
+	/* Trigger op_run in ACA control incase engine in in IDLE */
+	reg = IO_READ32(ACA_ACA_CTRL);
+	reg |= 1;
+	IO_WRITE32(ACA_ACA_CTRL, reg);
+}
+
+/* submit one shift related command to command queue */
+static void op_submit_shift_cmd(uint32_t op_code,
+								int32_t len_type_id,
+								aca_op_t *A,
+								int32_t shift_B,
+								aca_op_t *R)
+{
+	uint32_t reg = 0;
+	uint32_t cmd = 0;
+
+	/* When submitting command, make sure clock is enabled */
+	ASSERT(IO_READ32(TOP_CTL_CLOCK_CTL) & (1 << 2));
+
+	cmd = ((op_code & 0x1F) << 27);
+	cmd |= ((len_type_id & 0x7) << 24);
+
+	ASSERT(A);
+	ASSERT(A->gr_id != ACA_GR_INVALID);
+	ASSERT(R);
+	ASSERT(R->gr_id != ACA_GR_INVALID);
+
+	cmd |= ((A->gr_id & 0x1F) << 18);
+	cmd |= ((shift_B & 0x3F) << 12);
+	cmd |= ((R->gr_id & 0x1F) << 7);
+
+	/* Wait until there is free space in command queue. */
+	do {
+		reg = IO_READ32(ACA_ACA_STATUS);
+		if (reg & 0xFU) {
+			break;
+		}
+	} while (1);
+
+	/* Write command to ACA command queue */
+	IO_WRITE32(ACA_ACA_ENTRY, cmd);
+
+	/* Trigger op_run in ACA control incase engine in in IDLE */
+	reg = IO_READ32(ACA_ACA_CTRL);
+	reg |= 1;
+	IO_WRITE32(ACA_ACA_CTRL, reg);
+}
+
+/* Wait command finish */
+static void op_wait_cmd(void)
+{
+	uint32_t reg = 0;
+	/* Wait FIFO empty and engine done signal */
+	do {
+		reg = IO_READ32(ACA_ACA_INTR_STAT);
+		if ((reg & (1 << 5)) != 0) {
+			/* intr_opfifo_empty_engine_done status is set */
+			break;
+		}
+	} while (1);
+
+	/* Write clear interrupt status */
+	IO_WRITE32(ACA_ACA_INTR_STAT, reg);
+
+	/**
+	 * Update last op status, use |= so that we are in batch commands.
+	 * intr_status is RW1M, and we should write back to clean the status.
+	 */
+	g_aca_last_op_status.intr_status |= reg;
+	g_aca_last_op_status.aca_status |= IO_READ32(ACA_ACA_STATUS);
+}
+
+static void op_exec_cmd(uint32_t op_code,
+						int32_t len_type_id,
+						aca_op_t *A,
+						aca_op_t *B,
+						int32_t imme_B,
+						aca_op_t *C,
+						aca_op_t *R)
+{
+	op_submit_cmd(op_code, len_type_id, A, B, imme_B, C, R);
+	op_wait_cmd();
+}
+
+static inline uint32_t op_read_last_status(void)
+{
+	return g_aca_last_op_status.aca_status;
+}
+static inline int32_t op_is_last_op_final_aul_carry(void)
+{
+	return ((op_read_last_status() & (0x1U << 9)) ? (1) : (0));
+}
+static inline int32_t op_is_last_op_xor_zero(void)
+{
+	return ((op_read_last_status() & (0x1U << 8)) ? (1) : (0));
+}
+
+static inline uint32_t op_read_last_intr_status(void)
+{
+	return g_aca_last_op_status.intr_status;
+}
+static inline int32_t op_intr_is_mod_n_zero(void)
+{
+	return ((op_read_last_intr_status() & (0x1U << 8)) ? (1) : (0));
+}
+static inline int32_t op_intr_is_red_time_byd63(void)
+{
+	return ((op_read_last_intr_status() & (0x1U << 7)) ? (1) : (0));
+}
+static inline int32_t op_intr_is_mult_red_err(void)
+{
+	return ((op_read_last_intr_status() & (0x1U << 6)) ? (1) : (0));
+}
+static inline int32_t op_intr_is_modinv_zero(void)
+{
+	return ((op_read_last_intr_status() & (0x1U << 4)) ? (1) : (0));
+}
+static inline int32_t op_intr_is_div_zero(void)
+{
+	return ((op_read_last_intr_status() & (0x1U << 3)) ? (1) : (0));
+}
+
+/* The following is the aca_op interfaces. */
+
+/**
+ * Change op's size with new block number, and copy data if have
+ * Note: target_bl_num should > 0
+ */
+static int32_t op_change_size(aca_op_t *op, uint32_t target_blk_num)
+{
+	int32_t ret  = SE_SUCCESS;
+	uint8_t *tmp = NULL;
+
+	ASSERT(target_blk_num > 0);
+
+	if (op->blk_num != target_blk_num) {
+		tmp = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(target_blk_num), GFP_KERNEL);
+		if (NULL == tmp) {
+			printk("Calloc %d failed!\n",
+						 ACA_BLOCK_NUM_TO_BYTES(target_blk_num));
+			ret = SE_ERROR_OOM;
+			goto end;
+		}
+		if (op->data) {
+			/* copy original data only when op->data is not NULL */
+			if (target_blk_num > op->blk_num) {
+				memcpy(
+					tmp + ACA_BLOCK_NUM_TO_BYTES(target_blk_num - op->blk_num),
+					op->data,
+					ACA_BLOCK_NUM_TO_BYTES(op->blk_num));
+			} else {
+				memcpy(tmp,
+							op->data + ACA_BLOCK_NUM_TO_BYTES(op->blk_num -
+															  target_blk_num),
+							ACA_BLOCK_NUM_TO_BYTES(target_blk_num));
+			}
+			kfree(op->data);
+			op->data = NULL;
+		}
+		op->data    = tmp;
+		op->blk_num = target_blk_num;
+	} else {
+		/* do nothing */
+	}
+
+end:
+	return ret;
+}
+
+/* init one aca operation */
+void aca_op_init(aca_op_t *op)
+{
+	ASSERT(op);
+	memset(op, 0, sizeof(aca_op_t));
+	op->sram_addr = ACA_SRAM_ADDR_INVALID;
+	op->gr_id     = ACA_GR_INVALID;
+}
+
+/* free one aca operation */
+void aca_op_free(aca_op_t *op)
+{
+	ASSERT(op);
+	if (op->data) {
+		kfree(op->data);
+	}
+	memset(op, 0, sizeof(aca_op_t));
+	op->sram_addr = ACA_SRAM_ADDR_INVALID;
+	op->gr_id     = ACA_GR_INVALID;
+}
+
+static int32_t _aca_op_init_np(aca_op_t *np)
+{
+	aca_op_init(np);
+	np->blk_num = ACA_NP_BLK_NUM;
+	np->data    = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(np->blk_num), GFP_KERNEL);
+	if (NULL == np->data) {
+		printk("Calloc %d failed!\n",
+					 ACA_BLOCK_NUM_TO_BYTES(np->blk_num));
+		return SE_ERROR_OOM;
+	} else {
+		return SE_SUCCESS;
+	}
+}
+
+/* dump one aca operation's data in hex string format */
+#define __UINT8_GET_LOW(__d__) ((__d__) & (0x0F))
+#define __UINT8_GET_HIGH(__d__) (((__d__) >> 4) & (0x0F))
+#define _INT_TO_CHAR(__d__)                                                    \
+	(((__d__) >= (0x0A)) ? ((__d__) - (0x0A) + 'A') : ((__d__) + '0'))
+static void dbg_dump_data(const char *msg, uint8_t *data, size_t size)
+{
+	uint8_t *str_buf    = NULL;
+	size_t str_buf_size = 0;
+	uint8_t *p          = NULL;
+	size_t i            = 0;
+	size_t start        = 0;
+
+	str_buf_size = 2 * size + 1;
+	str_buf      = kcalloc(1, str_buf_size, GFP_KERNEL);
+	if (NULL == str_buf) {
+		printk("Calloc %d failed!\n", str_buf_size);
+		return;
+	}
+
+	/* data is in big endian */
+	for (i = 0; i < size; i++) {
+		if (data[i] != 0) {
+			start = i;
+			break;
+		}
+	}
+	p = str_buf;
+	if (i != size) {
+		for (i = start; i < size; i++) {
+			*p = _INT_TO_CHAR(__UINT8_GET_HIGH(data[i]));
+			p++;
+			*p = _INT_TO_CHAR(__UINT8_GET_LOW(data[i]));
+			p++;
+		}
+		*p = '\0';
+	} else {
+		*p = '0';
+		p++;
+		*p = '0';
+		p++;
+		*p = '\0';
+	}
+	printk("%s: %s\n", msg, str_buf);
+
+	if (NULL != str_buf) {
+		kfree(str_buf);
+	}
+	return;
+}
+
+void aca_op_dump(const char *name, aca_op_t *op_ctx)
+{
+	uint8_t *tmp_buf = NULL;
+
+	if (!op_ctx) {
+		return;
+	}
+	printk("########## Start Dump %s ##########\n", name);
+
+	if (op_ctx->sram_addr != ACA_SRAM_ADDR_INVALID) {
+		ASSERT(0 != op_ctx->blk_num);
+		tmp_buf = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(op_ctx->blk_num), GFP_KERNEL);
+		if (NULL == tmp_buf) {
+			printk("Calloc %d failed!\n",
+						 ACA_BLOCK_NUM_TO_BYTES(op_ctx->blk_num));
+			goto end;
+		}
+		sram_read_data(op_ctx->sram_addr, op_ctx->blk_num, tmp_buf);
+		printk("CE SRAM Addr: 0x%x, Block num: %d\n", op_ctx->sram_addr,
+					   op_ctx->blk_num);
+		dbg_dump_data("CE SRAM Data (Big Endian)", tmp_buf,
+					  ACA_BLOCK_NUM_TO_BYTES(op_ctx->blk_num));
+	} else if (op_ctx->data) {
+		ASSERT(0 != op_ctx->blk_num);
+		printk(
+			"CE SWAPPED Addr(64 bits): H(32): 0x%x, L(32): 0x%x, Block num: "
+			"%d\n",
+			(uint32_t)((((uint64_t)((uintptr_t)(op_ctx->data))) >> 32) &
+					   0xFFFFFFFFU),
+			(uint32_t)((((uint64_t)((uintptr_t)(op_ctx->data)))) & 0xFFFFFFFFU),
+			op_ctx->blk_num);
+		dbg_dump_data("SWAPPED Data (Big Endian)", op_ctx->data,
+					  ACA_BLOCK_NUM_TO_BYTES(op_ctx->blk_num));
+	} else {
+		printk("No Data!\n");
+	}
+
+	if (op_ctx->gr_id != ACA_GR_INVALID) {
+		printk("CE GR ID: %d\n", op_ctx->gr_id);
+	}
+
+end:
+	if (NULL != tmp_buf) {
+		kfree(tmp_buf);
+	}
+	printk("========== End Dump %s ==========\n", name);
+	return;
+}
+
+/**
+ * copy src to dst, with dst->blk_num == target_blk_num. src must contain data
+ */
+static int32_t aca_op_copy_change_size(aca_op_t *dst,
+									   aca_op_t *src,
+									   uint32_t target_blk_num)
+{
+	int32_t ret      = SE_SUCCESS;
+	uint8_t *tmp_buf = NULL;
+
+	CHECK_OP_CTX(src);
+	ASSERT(dst);
+
+	ASSERT(src->blk_num);
+
+	tmp_buf = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(target_blk_num), GFP_KERNEL);
+	if (NULL == tmp_buf) {
+		printk("Calloc %d failed!\n",
+					 ACA_BLOCK_NUM_TO_BYTES(target_blk_num));
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	/* same logic from op_change_size */
+	if (target_blk_num > src->blk_num) {
+		memcpy(tmp_buf +
+						ACA_BLOCK_NUM_TO_BYTES(target_blk_num - src->blk_num),
+					src->data,
+					ACA_BLOCK_NUM_TO_BYTES(src->blk_num));
+	} else {
+		memcpy(tmp_buf,
+					src->data +
+						ACA_BLOCK_NUM_TO_BYTES(src->blk_num - target_blk_num),
+					ACA_BLOCK_NUM_TO_BYTES(target_blk_num));
+	}
+
+	if (dst->blk_num) {
+		ASSERT(dst->data);
+		kfree(dst->data);
+		dst->data    = NULL;
+		dst->blk_num = 0;
+	} else {
+		ASSERT(NULL == dst->data);
+	}
+	dst->data    = tmp_buf;
+	dst->blk_num = target_blk_num;
+
+end:
+	return ret;
+}
+
+int32_t aca_op_copy(aca_op_t *dst, aca_op_t *src)
+{
+	return aca_op_copy_change_size(dst, src, src->blk_num);
+}
+
+/* this function is also used in ECP point mul in this file */
+/* write u32 */
+int32_t aca_op_import_u32(aca_op_t *op, uint32_t val)
+{
+	int32_t ret  = SE_SUCCESS;
+	uint8_t *tmp = NULL;
+
+	ASSERT(op);
+
+	/* prepare new data first */
+	tmp = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(1), GFP_KERNEL);
+	if (NULL == tmp) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	/* set as big endian */
+	tmp[ACA_BLOCK_NUM_TO_BYTES(1) - 4 + 0] = (val >> 24) & 0xFFU;
+	tmp[ACA_BLOCK_NUM_TO_BYTES(1) - 4 + 1] = (val >> 16) & 0xFFU;
+	tmp[ACA_BLOCK_NUM_TO_BYTES(1) - 4 + 2] = (val >> 8) & 0xFFU;
+	tmp[ACA_BLOCK_NUM_TO_BYTES(1) - 4 + 3] = (val >> 0) & 0xFFU;
+
+	if (op->data) {
+		/* free old data if have */
+		ASSERT(op->blk_num > 0);
+		kfree(op->data);
+		op->data    = NULL;
+		op->blk_num = 0;
+	}
+
+	op->data    = tmp;
+	op->blk_num = 1;
+end:
+	return ret;
+}
+
+/* import bignumber data in big endian */
+int32_t aca_op_import_bin(aca_op_t *op, const uint8_t *data, size_t size)
+{
+	int32_t ret          = SE_SUCCESS;
+	uint32_t req_blk_num = 0;
+	uint8_t *tmp         = NULL;
+
+	ASSERT(op);
+	ASSERT((0 == size) || (data != NULL));
+
+	/* not exceed max OP bits */
+	if (size > (ACA_MAX_OP_BITS >> 3)) {
+		printk("size: %d\n", size);
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	req_blk_num = ACA_BYTES_TO_BLOCK_NUM(size);
+
+	if (0 == req_blk_num) {
+		/**
+		 * if required blk num is 0, change to 1 so that after calling this
+		 * function, the op->data and op->blk_num always meet:
+		 * op->data != NULL and op->blk_num > 0
+		 */
+		req_blk_num = 1;
+	}
+
+	/* prepare new data first */
+	tmp = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(req_blk_num), GFP_KERNEL);
+	if (NULL == tmp) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	/* import with bigendain, copy directly */
+	if (size) {
+		memcpy(tmp + ACA_BLOCK_NUM_TO_BYTES(req_blk_num) - size, data,
+					size);
+	}
+
+	if (op->data) {
+		/* free old data if have */
+		ASSERT(op->blk_num > 0);
+		kfree(op->data);
+		op->data    = NULL;
+		op->blk_num = 0;
+	}
+
+	op->data    = tmp;
+	op->blk_num = req_blk_num;
+end:
+	return ret;
+}
+
+/* export bignumber data to bigendian */
+int32_t aca_op_export_bin(aca_op_t *op, uint8_t *buf, size_t size)
+{
+	int32_t ret           = SE_SUCCESS;
+	int32_t bitlen        = 0;
+	uint32_t req_buf_size = 0;
+
+	CHECK_OP_CTX(op);
+	ASSERT((0 == size) || (buf != NULL));
+
+	bitlen = aca_op_bitlen((aca_op_t *)op);
+
+	req_buf_size = (bitlen + 7) / 8;
+	if (req_buf_size > size) {
+		ret = SE_ERROR_SHORT_BUFFER;
+		goto end;
+	}
+
+	memset(buf, 0, size - req_buf_size);
+	memcpy(buf + size - req_buf_size,
+				op->data + ACA_BLOCK_NUM_TO_BYTES(op->blk_num) - req_buf_size,
+				req_buf_size);
+end:
+	return ret;
+}
+
+/*
+ * Count leading zero bits in a given integer
+ */
+static size_t _sram_clz(const uint32_t x)
+{
+	size_t j;
+	uint32_t mask = (uint32_t)1 << (32 - 1);
+
+	for (j = 0; j < 32; j++) {
+		if (x & mask) {
+			break;
+		}
+		mask >>= 1;
+	}
+
+	return j;
+}
+/**
+ * Get bitlength.
+ *
+ * Note: this function is special, it supports aca_op_t which has not imported
+ * data.
+ * Note: don't add defined(SE_LITE_XXX) to strict this function, because this
+ * function is also used in other aca_op_xxx in this file.
+ */
+uint32_t aca_op_bitlen(aca_op_t *op)
+{
+	int32_t i    = 0;
+	uint32_t val = 0;
+	int32_t size = 0;
+
+	ASSERT(op);
+
+	/* return 0 if this aca_op doesn't have data */
+	if (op->blk_num == 0) {
+		ASSERT(NULL == op->data);
+		return 0;
+	}
+
+	size = ACA_BLOCK_NUM_TO_BYTES(op->blk_num);
+
+	for (i = 0; i < size; i += 4) {
+		/* get one u32 value */
+		val = (((uint32_t)(op->data[i + 0]) << 24) |
+			   ((uint32_t)(op->data[i + 1]) << 16) |
+			   ((uint32_t)(op->data[i + 2]) << 8) |
+			   ((uint32_t)(op->data[i + 3]) << 0));
+		if (val != 0) {
+			break;
+		}
+	}
+	if (i == size) {
+		return 0;
+	} else {
+		return ((size - i) * 8 - _sram_clz(val));
+	}
+}
+
+/**
+ * Get bit value from 0 started position
+ * Note: don't add defined(SE_LITE_XXX) to strict this function, because this
+ * function is also used in other aca_op_xxx in this file.
+ */
+int32_t aca_op_get_bit_value(aca_op_t *op, size_t pos)
+{
+	uint32_t tmp         = 0;
+	uint32_t byte_offset = 0;
+	uint32_t bit_offset  = 0;
+
+	CHECK_OP_CTX(op);
+
+	byte_offset = (uint32_t)(pos) / 8;
+	bit_offset  = (uint32_t)(pos) % 8;
+	if (byte_offset >= ACA_BLOCK_NUM_TO_BYTES(op->blk_num)) {
+		return 0;
+	}
+
+	tmp = op->data[ACA_BLOCK_NUM_TO_BYTES(op->blk_num) - byte_offset - 1];
+	tmp &= (0x1U << bit_offset);
+	return ((tmp == 0) ? (0) : (1));
+}
+
+/**
+ * Calculate np.
+ * The output np block number is always ACA_NP_BLK_NUM(2), and the data buffer
+ * should be prepared by caller.
+ *
+ * Update NP. Algorithm:
+ *
+ * Only support: k ≤ 144, P = floor (2^(k+71)/N);
+ *
+ */
+static int32_t op_cal_np_case1(aca_op_t *np, aca_op_t *N, uint32_t kbits)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t value      = 0;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	/* high = 2^(k + 135) */
+	aca_op_t high = ACA_OP_INIT_DATA;
+	/* tmp to save np */
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t tmp_n = ACA_OP_INIT_DATA;
+
+	/* internal used, skip check ctx */
+	ASSERT(np->blk_num == ACA_NP_BLK_NUM);
+	ASSERT(kbits <= 144);
+
+	op_bit_len = SE_ROUND_UP(kbits + 71 + 1, ACA_BLOCK_BITS);
+	op_blk_num = ACA_BITS_TO_BLOCK_NUM(op_bit_len);
+	/* init high */
+	high.blk_num = op_blk_num;
+	high.data    = NULL;
+
+	/* init tmp */
+	tmp.blk_num = op_blk_num;
+	tmp.data    = NULL;
+
+	ret = aca_op_copy_change_size(&tmp_n, N, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_n, GR_USAGE_IN);
+	op_prepare(&tmp, GR_USAGE_OUT);
+	op_prepare(&high, GR_USAGE_INOUT);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare_tmp(1, op_blk_num);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* zero high */
+	OP_EXEC_ONE_CMD_IMME_B(&high, 0, NULL, &high, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_AND);
+
+	/* set 2^(k + 71) bits to 1 */
+	value = (0x1U << ((kbits + 71) % 32));
+	sram_write_word(high.sram_addr + 4 * ((kbits + 71) / 32), value);
+
+	/* P = 2^(k + 71) / N */
+	OP_EXEC_ONE_CMD(&high, &tmp_n, NULL, &tmp, ACA_LENTYPE_ID_BLK_OP, ACA_OP_DIV);
+	/* Check div by 0 */
+	if (op_intr_is_div_zero()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/* read NP from sram */
+	sram_read_data(tmp.sram_addr, np->blk_num, np->data);
+
+end:
+	op_release(&tmp_n);
+	op_release(&tmp);
+	op_release(&high);
+	op_release_tmp(0, op_blk_num);
+	op_release_tmp(1, op_blk_num);
+	op_check_res();
+	aca_op_free(&tmp_n);
+error:
+	return ret;
+}
+
+/**
+ * Calculate np.
+ * The output np block number is always ACA_NP_BLK_NUM(2), and the data buffer
+ * should be prepared by caller.
+ *
+ * Update NP. Algorithm:
+ *
+ * Only support: k > 144, P = floor(top/bottom);
+ *                        top= 2^215; bottom=ceil(N/(2^(k-144)));
+ *
+ */
+static int32_t op_cal_np_case2(aca_op_t *np, aca_op_t *N, uint32_t kbits)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t value      = 0;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	uint32_t shift_size = 0, cur_shift_size = 0;
+	aca_op_t top    = ACA_OP_INIT_DATA;
+	aca_op_t bottom = ACA_OP_INIT_DATA;
+	aca_op_t tmp    = ACA_OP_INIT_DATA;
+	aca_op_t tmp_n  = ACA_OP_INIT_DATA;
+
+	/* internal used, skip check ctx */
+	ASSERT(np->blk_num == ACA_NP_BLK_NUM);
+	ASSERT(kbits > 144);
+
+	op_blk_num = SE_MAX(ACA_BITS_TO_BLOCK_NUM(215 + 1), N->blk_num);
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_blk_num);
+
+	/* init top, bottom and tmp */
+	top.blk_num    = op_blk_num;
+	top.data       = NULL;
+	bottom.blk_num = op_blk_num;
+	bottom.data    = NULL;
+	tmp.blk_num    = op_blk_num;
+	tmp.data       = NULL;
+
+	ret = aca_op_copy_change_size(&tmp_n, N, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_n, GR_USAGE_IN);
+	op_prepare(&top, GR_USAGE_INOUT);
+	op_prepare(&bottom, GR_USAGE_INOUT);
+	op_prepare(&tmp, GR_USAGE_OUT);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare_tmp(1, op_blk_num);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* Init top = 2^215 */
+	OP_EXEC_ONE_CMD_IMME_B(&top, 0, NULL, &top, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_AND);
+	/* set 2^(215) bits to 1 */
+	value = (0x1U << (215 % 32));
+	sram_write_word(top.sram_addr + 4 * (215 / 32), value);
+
+	/* calculating N/(2^(k-144)) */
+
+	/* set bottom == N - 1, prevent further ceiling increment */
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_n, 1, NULL, &bottom, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_SUB);
+
+	/* start to shift right */
+	shift_size = kbits - 144;
+	while (shift_size) {
+		cur_shift_size = SE_MIN(shift_size, 0x40);
+		/* bottom == bottom >> cur_shift_size */
+		OP_EXEC_ONE_CMD_SHIFT(&bottom, cur_shift_size - 1, &bottom,
+							  ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHR0);
+		shift_size -= cur_shift_size;
+	}
+
+	/* Ceiling by bottom == bottom + 1 */
+	OP_EXEC_ONE_CMD_IMME_B(&bottom, 1, NULL, &bottom, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_ADD);
+
+	/* P = (2^215)/ceil(N/(2^(k-144))) */
+	OP_EXEC_ONE_CMD(&top, &bottom, NULL, &tmp, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_DIV);
+	/* Check div by 0 */
+	if (op_intr_is_div_zero()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/* read NP from sram */
+	sram_read_data(tmp.sram_addr, np->blk_num, np->data);
+
+end:
+	op_release(&tmp_n);
+	op_release(&tmp);
+	op_release(&top);
+	op_release(&bottom);
+	op_release_tmp(0, op_blk_num);
+	op_release_tmp(1, op_blk_num);
+	op_check_res();
+	aca_op_free(&tmp_n);
+error:
+	return ret;
+}
+
+/**
+ * The function uses physical data pointers to calculate and output
+ * the Barrett tag Np.
+ *
+ *  For N bitsize > 2*A+2*X it uses truncated sizes:
+ *      Np = truncated(2^(3*A+3*X-1) / ceiling(n/(2^(N-2*A-2*X)));
+ *  For  N bitsize <= 2*A+2*X:
+ *      Np = truncated(2^(N+A+X-1) / n);
+ *  Here A means ACA word size in bits(64), X means ACA extra bits(8)
+ *  N means N bit size(kbits)
+ */
+static int32_t op_cal_np(aca_op_t *np, aca_op_t *N, uint32_t kbits)
+{
+	if (kbits > 144) {
+		return op_cal_np_case2(np, N, kbits);
+	} else {
+		return op_cal_np_case1(np, N, kbits);
+	}
+}
+
+int32_t aca_op_cmp_bn(aca_op_t *op_a, aca_op_t *op_b, int32_t *result)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t *tmp_ptr1 = op_a;
+	aca_op_t *tmp_ptr2 = op_b;
+
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+	ASSERT(result);
+
+	/* init tmp */
+	aca_op_init(&tmp);
+
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(SE_MAX(op_a->blk_num, op_b->blk_num));
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	if (op_a->blk_num < op_b->blk_num) {
+		/* copy op_a to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_a, op_b->blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr1 = ( aca_op_t *)&tmp;
+	} else if (op_a->blk_num > op_b->blk_num) {
+		/* copy op_b to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_b, op_a->blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr2 = ( aca_op_t *)&tmp;
+	}
+
+	op_prepare(tmp_ptr1, GR_USAGE_IN);
+	op_prepare(tmp_ptr2, GR_USAGE_IN);
+
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, NULL, ACA_LENTYPE_ID_BLK_OP, ACA_OP_XOR);
+	if (op_is_last_op_xor_zero()) {
+		/* XOR result is 0, a == b */
+		*result = 0;
+		ret     = SE_SUCCESS;
+		goto end;
+	}
+
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, NULL, ACA_LENTYPE_ID_BLK_OP, ACA_OP_SUB);
+	if (op_is_last_op_final_aul_carry()) {
+		/* ALU carry, a < b */
+		*result = -1;
+	} else {
+		*result = 1;
+	}
+
+end:
+	op_release(tmp_ptr1);
+	op_release(tmp_ptr2);
+	op_check_res();
+error:
+	aca_op_free(&tmp);
+	return ret;
+}
+
+int32_t aca_op_cmp_u32(aca_op_t *op_a, uint32_t b, int32_t *result)
+{
+	int32_t ret          = SE_SUCCESS;
+	uint8_t tmp_a_buf[4] = {0};
+	uint32_t tmp_a       = 0;
+
+	ASSERT(result);
+
+	if (op_a->blk_num != 0) {
+		ASSERT(NULL != op_a->data);
+
+		ret = aca_op_export_bin(op_a, tmp_a_buf, sizeof(tmp_a_buf));
+		if ((int32_t)SE_ERROR_SHORT_BUFFER == ret) {
+			/* op_a can't fill into 32bits buffer, a > b */
+			*result = 1;
+			ret     = SE_SUCCESS;
+			goto end;
+		}
+		ASSERT(SE_SUCCESS == ret);
+
+		tmp_a = ((tmp_a_buf[3]) | (tmp_a_buf[2] << 8) | (tmp_a_buf[1] << 16) |
+				 (tmp_a_buf[0] << 24));
+	} else {
+		tmp_a = 0;
+	}
+
+	if (tmp_a == b) {
+		*result = 0;
+	} else if (tmp_a > b) {
+		*result = 1;
+	} else {
+		*result = -1;
+	}
+
+end:
+	return ret;
+}
+
+int32_t aca_op_cmp_bn_equal(aca_op_t *op_a, aca_op_t *op_b, int32_t *result)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t *tmp_ptr1 = op_a;
+	aca_op_t *tmp_ptr2 = op_b;
+
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+	ASSERT(result);
+
+	/* init tmp */
+	aca_op_init(&tmp);
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(SE_MAX(op_a->blk_num, op_b->blk_num));
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	if (op_a->blk_num < op_b->blk_num) {
+		/* copy op_a to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_a, op_b->blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr1 = ( aca_op_t *)&tmp;
+	} else if (op_a->blk_num > op_b->blk_num) {
+		/* copy op_b to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_b, op_a->blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr2 = ( aca_op_t *)&tmp;
+	}
+
+	op_prepare(tmp_ptr1, GR_USAGE_IN);
+	op_prepare(tmp_ptr2, GR_USAGE_IN);
+
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, NULL, ACA_LENTYPE_ID_BLK_OP, ACA_OP_XOR);
+	if (op_is_last_op_xor_zero()) {
+		/* XOR result is 0, a == b */
+		*result = 0;
+	} else {
+		*result = 1;
+	}
+
+	op_release(tmp_ptr1);
+	op_release(tmp_ptr2);
+	op_check_res();
+error:
+	aca_op_free(&tmp);
+	return ret;
+}
+
+
+/**
+ * This function covers add_mod and sub_mod, because they are both ALU OPs.
+ * Requirement: op_a and op_b bit length < op_n's bit length. Because there is
+ * MOD_RED to reduction op_a/op_b, and if op_a/op_b bit length > op_n's bit
+ * length, the MOD_RED may take very long time.
+ */
+static int32_t _aca_op_add_sub_mod(aca_op_t *op_r,
+								   aca_op_t *op_a,
+								   aca_op_t *op_b,
+								   aca_op_t *op_n,
+								   int32_t is_mod_add)
+{
+	int32_t ret    = SE_SUCCESS;
+	aca_op_t tmp_a = ACA_OP_INIT_DATA;
+	aca_op_t tmp_b = ACA_OP_INIT_DATA;
+	/* use tmp_r to support IO same */
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+	CHECK_OP_CTX(op_n);
+
+	op_bit_len = aca_op_bitlen(op_n);
+	/**
+	 * checks that op_a and op_b bit length <= op_n bitlen.
+	 */
+	ASSERT(aca_op_bitlen(op_a) <= op_bit_len);
+	ASSERT(aca_op_bitlen(op_b) <= op_bit_len);
+
+	op_blk_num = ACA_BITS_TO_BLOCK_NUM(op_bit_len);
+
+	/* init tmp_a and tmp_b */
+	aca_op_init(&tmp_a);
+	aca_op_init(&tmp_b);
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+
+	/* reset op_r size if necessary */
+	ret = op_change_size(op_r, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_a, op_a, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_b, op_b, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(op_n, GR_USAGE_N);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare(&tmp_a, GR_USAGE_IN);
+	op_prepare(&tmp_b, GR_USAGE_IN);
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BIT_OP, op_bit_len);
+
+	/* Call MODRED first */
+	OP_EXEC_ONE_CMD(&tmp_a, op_n, NULL, &tmp_a, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODRED);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	OP_EXEC_ONE_CMD(&tmp_b, op_n, NULL, &tmp_b, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODRED);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/* call mod_add or mod_sub */
+	OP_EXEC_ONE_CMD(&tmp_a, &tmp_b, NULL, &tmp_r, ACA_LENTYPE_ID_BIT_OP,
+					(is_mod_add ? ACA_OP_MODADD : ACA_OP_MODSUB));
+	if (op_intr_is_mod_n_zero() || op_intr_is_red_time_byd63()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/* read result */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num, op_r->data);
+
+end:
+	op_release(op_n);
+	op_release_tmp(0, op_blk_num);
+	op_release(&tmp_a);
+	op_release(&tmp_b);
+	op_release(&tmp_r);
+	op_check_res();
+error:
+	aca_op_free(&tmp_a);
+	aca_op_free(&tmp_b);
+	aca_op_free(&tmp_r);
+	return ret;
+}
+
+int32_t
+aca_op_add_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n)
+{
+	return _aca_op_add_sub_mod(op_r, op_a, op_b, op_n, 1);
+}
+
+int32_t
+aca_op_sub_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n)
+{
+	return _aca_op_add_sub_mod(op_r, op_a, op_b, op_n, 0);
+}
+
+/**
+ * MOD_INV is valid only:
+ * 1. N is odd
+ * 2. GCD(A, N) == 1
+ * Although HW doesn't require A <= N, for easy use here we requires that A
+ * bitlength <= N block bits.
+ * Besides, only support N is odd.
+ * Requirement:
+ * 1. A bitlen <= N block bits
+ * 2. N is odd.
+ */
+int32_t aca_op_inv_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_n)
+{
+	int32_t ret         = SE_SUCCESS;
+	aca_op_t tmp_a      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_n      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_n);
+
+	ASSERT(aca_op_bitlen(op_a) <= ACA_BLOCK_NUM_TO_BITS(op_n->blk_num));
+
+	if (aca_op_get_bit_value(op_n, 0) != 1) {
+		/* return invalid mode if N is not odd. */
+		ret = SE_ERROR_INVAL_MOD;
+		goto error;
+	}
+
+	/* for modular N of modINV, use op_n's block bits as op bits */
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_n->blk_num);
+
+	/* use op_n's ceil_128(N_bit_len + 16) as op block number. */
+	op_blk_num = ACA_BITS_TO_BLOCK_NUM(op_bit_len + 16);
+
+	/* init tmp_a, tmp_n, tmp_r */
+	aca_op_init(&tmp_a);
+	aca_op_init(&tmp_n);
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+
+	/* change op_r size to op_n's block size */
+	ret = op_change_size(op_r, op_n->blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	/* copy op_a to tmp_a, op_n to tmp_n */
+	ret = aca_op_copy_change_size(&tmp_a, op_a, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_n, op_n, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+	op_prepare(&tmp_a, GR_USAGE_IN);
+	op_prepare(&tmp_n, GR_USAGE_N);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare_tmp(1, op_blk_num);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* call mod_inv */
+	OP_EXEC_ONE_CMD(&tmp_a, NULL, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_MODINV);
+	if (op_intr_is_modinv_zero()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/**
+	 * mod_inv is valid only if GCD(A, N) == 1, check tmp_a == 1. use 1 ^ 1 = 0
+	 */
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_a, 1, NULL, NULL, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_XOR);
+	if (!op_is_last_op_xor_zero()) {
+		/* XOR result != 0, tmp_a != 1 */
+		ret = SE_ERROR_NOT_ACCEPTABLE;
+		goto end;
+	}
+
+	/**
+	 * Every thing is OK, read result.
+	 * Here we read only tmp_r.blk_num data, skip the high 0 data.
+	 */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num, op_r->data);
+
+end:
+	op_release(&tmp_r);
+	op_release(&tmp_a);
+	op_release(&tmp_n);
+	op_release_tmp(0, op_blk_num);
+	op_release_tmp(1, op_blk_num);
+	op_check_res();
+error:
+	aca_op_free(&tmp_a);
+	aca_op_free(&tmp_n);
+	aca_op_free(&tmp_r);
+	return ret;
+}
+
+/**
+ * This function covers mul_mod and exp_mod, because they are both MULT OPs.
+ * Requirement: both op_a bit length and op_b bit length <= op_n bit length
+ */
+static int32_t _aca_op_mul_exp_mod(aca_op_t *op_r,
+								   aca_op_t *op_a,
+								   aca_op_t *op_b,
+								   aca_op_t *op_n,
+								   int32_t is_exp_mod)
+{
+	int32_t ret         = SE_SUCCESS;
+	aca_op_t tmp_a      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_b      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_n      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_np     = ACA_OP_INIT_DATA;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+
+#define np (&(tmp_np))
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+	CHECK_OP_CTX(op_n);
+
+	op_bit_len = aca_op_bitlen(op_n);
+
+	/* Check op_a and op_b's bit length <= op_n's bit length */
+	ASSERT(aca_op_bitlen(op_a) <= op_bit_len);
+	ASSERT(aca_op_bitlen(op_b) <= op_bit_len);
+
+	/* use op_n's ceil_128(N_bit_len + 2) as op block number. This apply to
+	 * exp_mod and mul_mod */
+	op_blk_num = ACA_BITS_TO_BLOCK_NUM(op_bit_len + 2);
+
+	/* init tmp_a, tmp_n, tmp_r */
+	aca_op_init(&tmp_a);
+	aca_op_init(&tmp_b);
+	aca_op_init(&tmp_n);
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+
+	/* init np */
+	ret = _aca_op_init_np(np);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* change op_r size to op_n's block size */
+	ret = op_change_size(op_r, op_n->blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* copy op_a to tmp_a, op_b to tmp_b, op_n to tmp_n */
+	ret = aca_op_copy_change_size(&tmp_a, op_a, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_b, op_b, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_n, op_n, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* calculate np */
+	ret = op_cal_np(np, op_n, op_bit_len);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+	op_prepare(&tmp_a, GR_USAGE_IN);
+	op_prepare(&tmp_b, GR_USAGE_IN);
+	op_prepare(&tmp_n, GR_USAGE_N);
+	op_prepare(np, GR_USAGE_P);
+	op_prepare_tmp(0, op_blk_num);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BIT_OP, op_bit_len);
+
+	OP_EXEC_ONE_CMD(&tmp_a, &tmp_b, NULL, &tmp_r, ACA_LENTYPE_ID_BIT_OP,
+					is_exp_mod ? ACA_OP_MODEXP : ACA_OP_MODMUL);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+	/* read result */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num, op_r->data);
+
+end:
+	op_release(&tmp_r);
+	op_release(&tmp_a);
+	op_release(&tmp_b);
+	op_release(&tmp_n);
+	op_release(np);
+	op_release_tmp(0, op_blk_num);
+	op_check_res();
+error:
+	aca_op_free(&tmp_a);
+	aca_op_free(&tmp_b);
+	aca_op_free(&tmp_n);
+	aca_op_free(&tmp_r);
+	aca_op_free(np);
+#undef np
+	return ret;
+}
+
+int32_t
+aca_op_exp_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_e, aca_op_t *op_n)
+{
+	return _aca_op_mul_exp_mod(op_r, op_a, op_e, op_n, 1);
+}
+
+int32_t
+aca_op_mul_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n)
+{
+	return _aca_op_mul_exp_mod(op_r, op_a, op_b, op_n, 0);
+}
+
+/**
+ * Call SHIFT_D directly.
+ * Requirements: shift_num <= 64
+ */
+int32_t aca_op_shift_r(aca_op_t *op_a, int32_t shift_num)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+
+	CHECK_OP_CTX(op_a);
+	/* supports max of 64bit shift */
+	ASSERT(shift_num <= 64);
+
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_a->blk_num);
+
+	/* prepare ops */
+	op_prepare(op_a, GR_USAGE_INOUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	OP_EXEC_ONE_CMD_SHIFT(op_a, shift_num - 1, op_a, ACA_LENTYPE_ID_BLK_OP,
+						  ACA_OP_SHR0);
+	/* read result */
+	sram_read_data(op_a->sram_addr, op_a->blk_num, op_a->data);
+
+	op_release(op_a);
+	op_check_res();
+	return ret;
+}
+
+/**
+ * Call MOD_RED directly.
+ *
+ * Requirements: op_a bit length < op_n's bit length.
+ */
+int32_t aca_op_mod_red(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_n)
+{
+	int32_t ret         = SE_SUCCESS;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_a      = ACA_OP_INIT_DATA;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_n);
+
+	/**
+	 * checks op_a bit length <= op_n blk bits.
+	 */
+	ASSERT(aca_op_bitlen(op_a) <= aca_op_bitlen(op_n));
+
+	/* use op_n's block bits as op bits */
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_n->blk_num);
+	op_blk_num = op_n->blk_num;
+
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+	aca_op_init(&tmp_a);
+
+	/* reset op_r size if necessary */
+	ret = op_change_size(op_r, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	ret = aca_op_copy_change_size(&tmp_a, op_a, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_a, GR_USAGE_IN);
+	op_prepare(op_n, GR_USAGE_IN);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* Call MODRED */
+	OP_EXEC_ONE_CMD(&tmp_a, op_n, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_MODRED);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* read result */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num, op_r->data);
+
+end:
+	op_release(&tmp_r);
+	op_release(&tmp_a);
+	op_release(op_n);
+	op_release_tmp(0, op_blk_num);
+	op_check_res();
+error:
+	aca_op_free(&tmp_r);
+	aca_op_free(&tmp_a);
+	return ret;
+}
+
+/************************* Used by RSA signing blinding ***********************/
+int32_t aca_op_mul(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t *tmp_ptr1 = op_a;
+	aca_op_t *tmp_ptr2 = op_b;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+
+	/* calculate op block number */
+	op_blk_num = SE_MAX(op_a->blk_num, op_b->blk_num);
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_blk_num);
+
+	/* init tmp_r and tmp */
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+	aca_op_init(&tmp);
+
+	if (op_a->blk_num < op_b->blk_num) {
+		/* copy op_a to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_a, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, end);
+		tmp_ptr1 = ( aca_op_t *)&tmp;
+	} else if (op_a->blk_num > op_b->blk_num) {
+		/* copy op_b to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_b, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, end);
+		tmp_ptr2 = ( aca_op_t *)&tmp;
+	}
+
+	/* reset op_r size */
+	ret = op_change_size(op_r, op_a->blk_num + op_b->blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ASSERT((op_r->blk_num > op_blk_num) &&
+				(op_r->blk_num <= 2 * op_blk_num));
+
+	/* prepare ops */
+	op_prepare(tmp_ptr1, GR_USAGE_IN);
+	op_prepare(tmp_ptr2, GR_USAGE_IN);
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* MUL_LOW */
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_MUL_LOW);
+
+	/* read low */
+	sram_read_data(tmp_r.sram_addr, op_blk_num,
+				   op_r->data +
+					   ACA_BLOCK_NUM_TO_BYTES(op_r->blk_num - op_blk_num));
+
+	/* MUL_HIGH */
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_MUL_HIGH);
+	/* read high */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num - op_blk_num, op_r->data);
+
+	op_release(tmp_ptr1);
+	op_release(tmp_ptr2);
+	op_release(&tmp_r);
+	op_check_res();
+end:
+	aca_op_free(&tmp_r);
+	aca_op_free(&tmp);
+	return ret;
+}
+
+int32_t aca_op_add(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	int32_t is_carry    = 0;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t *tmp_ptr1 = op_a;
+	aca_op_t *tmp_ptr2 = op_b;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+
+	/* calculate op block number */
+	op_blk_num = SE_MAX(op_a->blk_num, op_b->blk_num);
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_blk_num);
+
+	/* init tmp_r and tmp */
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+	aca_op_init(&tmp);
+
+	/* reset op_r size to op_blk_num. handle carryout latter */
+	ret = op_change_size(op_r, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	if (op_a->blk_num < op_b->blk_num) {
+		/* copy op_a to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_a, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, end);
+		tmp_ptr1 = ( aca_op_t *)&tmp;
+	}
+
+	if (op_a->blk_num > op_b->blk_num) {
+		/* copy op_b to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_b, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, end);
+		tmp_ptr2 = ( aca_op_t *)&tmp;
+	}
+
+	/* prepare ops */
+	op_prepare(tmp_ptr1, GR_USAGE_IN);
+	op_prepare(tmp_ptr2, GR_USAGE_IN);
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* ADD */
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_ADD);
+	is_carry = op_is_last_op_final_aul_carry();
+
+	sram_read_data(tmp_r.sram_addr, op_blk_num, op_r->data);
+
+	op_release(tmp_ptr1);
+	op_release(tmp_ptr2);
+	op_release(&tmp_r);
+	op_check_res();
+
+	/**
+	 * handle carry bit here, don't change size before op_release, because
+	 * op_a/op_b may equal to op_r
+	 */
+	if ((SE_SUCCESS == ret) && (is_carry)) {
+		ret = op_change_size(op_r, op_r->blk_num + 1);
+		CHECK_SUCCESS_GOTO(ret, end);
+		/* set 1 << op_bit_len to 1 */
+		op_r->data[ACA_BLOCK_NUM_TO_BYTES(1) - 1] = 1;
+	}
+end:
+	aca_op_free(&tmp_r);
+	aca_op_free(&tmp);
+	return ret;
+}
+
+/* return SE_ERROR_NEGATIVE_VALUE if a < b */
+int32_t aca_op_sub(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b)
+{
+	int32_t ret         = SE_SUCCESS;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	aca_op_t tmp_r      = ACA_OP_INIT_DATA;
+	aca_op_t tmp = ACA_OP_INIT_DATA;
+	aca_op_t *tmp_ptr1 = op_a;
+	aca_op_t *tmp_ptr2 = op_b;
+
+	ASSERT(op_r);
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_b);
+
+	/* calculate op block number */
+	op_blk_num = SE_MAX(op_a->blk_num, op_b->blk_num);
+	op_bit_len = ACA_BLOCK_NUM_TO_BITS(op_blk_num);
+
+	/* init tmp_r and tmp */
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num = op_blk_num;
+	tmp_r.data    = NULL;
+	aca_op_init(&tmp);
+
+	/* reset op_r size to op_blk_num. Note the overflow is handled latter */
+	ret = op_change_size(op_r, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	if (op_a->blk_num < op_b->blk_num) {
+		/* copy op_a to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_a, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr1 = ( aca_op_t *)&tmp;
+	}
+
+	if (op_a->blk_num > op_b->blk_num) {
+		/* copy op_b to tmp */
+		ret = aca_op_copy_change_size(&tmp, op_b, op_blk_num);
+		CHECK_SUCCESS_GOTO(ret, error);
+		tmp_ptr2 = ( aca_op_t *)&tmp;
+	}
+
+	/* prepare ops */
+	op_prepare(tmp_ptr1, GR_USAGE_IN);
+	op_prepare(tmp_ptr2, GR_USAGE_IN);
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, op_bit_len);
+
+	/* SUB */
+	OP_EXEC_ONE_CMD(tmp_ptr1, tmp_ptr2, NULL, &tmp_r, ACA_LENTYPE_ID_BLK_OP,
+					ACA_OP_SUB);
+	if (op_is_last_op_final_aul_carry()) {
+		ret = SE_ERROR_NEGATIVE_VALUE;
+		goto end;
+	}
+
+	sram_read_data(tmp_r.sram_addr, op_blk_num, op_r->data);
+end:
+	op_release(tmp_ptr1);
+	op_release(tmp_ptr2);
+	op_release(&tmp_r);
+	op_check_res();
+error:
+	aca_op_free(&tmp_r);
+	aca_op_free(&tmp);
+	return ret;
+}
+
+/**
+ * Calculate MODEXP when E > N.
+ * Algorithm:
+ * R = A^E mod N
+ *     A ^ E = A ^ (EM * EQ + ER) = A^(EM * EQ) * A^ER = (A^EM)^EQ * A^ER
+ *
+ * E = EM * EQ + ER, where:
+ * EM is fixed to: 1 << (n_bit_len - 1). That is: 2^(n_bit_len - 1). This will
+ * easier the E / EM by shift right.
+ * By mathematical analyse, we got:
+ * EQ <= (2^n_bit_len) - 1
+ * ER <= (2^n_bit_len) - 1
+ * so E <= 2^(n_bit_len - 1) * ((2^n_bit_len) - 1) + (2^n_bit_len) - 1
+ *      <= 2^(2*n_bit_len - 1) + 2^(n_bit_len - 1) - 1
+ * But actually in our code, note that when calculating EQ and ER, we use
+ * sram_write_data then shift, and when write to SRAM, we assume the maximum
+ * written block number < eq.block_number, and from the condition:
+ *
+ *     tmp_blk_num = op_e->blk_num - ACA_BITS_TO_BLOCK_NUM(SE_ROUND_DOWN(
+ *                          (op_n_bit_len - 1), ACA_BLOCK_BITS));
+ *     ASSERT(tmp_blk_num <= tmp_eq.blk_num);
+ * We got, the maximum E bit length is:
+ *
+ *          2 * ROUND_DOWN((N_bit_len - 1), 128) + 128
+ * Note: also require that E bit length > N bit length
+ **/
+
+int32_t aca_op_exp_mod_large_e(aca_op_t *op_r,
+							   aca_op_t *op_a,
+							   aca_op_t *op_e,
+							   aca_op_t *op_n)
+{
+	int32_t ret           = SE_SUCCESS;
+	aca_op_t tmp_a        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_er       = ACA_OP_INIT_DATA;
+	aca_op_t tmp_eq       = ACA_OP_INIT_DATA;
+	aca_op_t tmp_em       = ACA_OP_INIT_DATA;
+	aca_op_t tmp_n        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_r        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_np       = ACA_OP_INIT_DATA;
+	uint32_t op_n_bit_len = 0;
+	uint32_t op_blk_num   = 0;
+	uint32_t tmp_blk_num  = 0;
+	uint32_t value        = 0;
+
+#define np (&(tmp_np))
+
+	CHECK_OP_CTX(op_a);
+	CHECK_OP_CTX(op_e);
+	CHECK_OP_CTX(op_n);
+
+	op_n_bit_len = aca_op_bitlen(op_n);
+
+	/* Check op_a's bit length <= op_n's bit length */
+	ASSERT(aca_op_bitlen(op_a) <= op_n_bit_len);
+	ASSERT(aca_op_bitlen(op_e) > op_n_bit_len);
+	ASSERT(aca_op_bitlen(op_e) <=
+				(2 * SE_ROUND_DOWN((op_n_bit_len - 1), ACA_BLOCK_BITS) +
+				 ACA_BLOCK_BITS));
+
+	/* use op_n's ceil_128(N_bit_len + 2) as op block number. This apply to
+	 * exp_mod and mul_mod */
+	op_blk_num = ACA_BITS_TO_BLOCK_NUM(op_n_bit_len + 2);
+
+	/* init tmp_a, tmp_n, tmp_r */
+	aca_op_init(&tmp_a);
+	aca_op_init(&tmp_er);
+	aca_op_init(&tmp_em);
+	aca_op_init(&tmp_eq);
+	aca_op_init(&tmp_n);
+	aca_op_init(&tmp_r);
+	tmp_r.blk_num  = op_blk_num;
+	tmp_r.data     = NULL;
+	tmp_er.blk_num = op_blk_num;
+	tmp_er.data    = NULL;
+	tmp_em.blk_num = op_blk_num;
+	tmp_em.data    = NULL;
+	tmp_eq.blk_num = op_blk_num;
+	tmp_eq.data    = NULL;
+
+	/* init np */
+	ret = _aca_op_init_np(np);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* copy op_a to tmp_a, op_e to tmp_e, op_n to tmp_n */
+	ret = aca_op_copy_change_size(&tmp_a, op_a, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	ret = aca_op_copy_change_size(&tmp_n, op_n, op_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* change op_r size to op_n's block size */
+	ret = op_change_size(op_r, op_n->blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* calculate np */
+	ret = op_cal_np(np, op_n, op_n_bit_len);
+	CHECK_SUCCESS_GOTO(ret, error);
+
+	/* prepare ops */
+	op_prepare(&tmp_r, GR_USAGE_OUT);
+	op_prepare(&tmp_a, GR_USAGE_IN);
+	op_prepare(&tmp_n, GR_USAGE_N);
+	op_prepare(&tmp_er, GR_USAGE_INOUT);
+	op_prepare(&tmp_em, GR_USAGE_INOUT);
+	op_prepare(&tmp_eq, GR_USAGE_INOUT);
+	op_prepare(np, GR_USAGE_P);
+	op_prepare_tmp(0, op_blk_num);
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BIT_OP, op_n_bit_len);
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, ACA_BLOCK_NUM_TO_BITS(op_blk_num));
+
+	/* Zero EM, ER, EQ */
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_em, 0, NULL, &tmp_em, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_AND);
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_er, 0, NULL, &tmp_er, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_AND);
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_eq, 0, NULL, &tmp_eq, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_AND);
+
+	/* Init EM = 1 << (n_bit_len - 1), that is Set bit: n_bit_len - 1 to 1 */
+	value = (0x1U << ((op_n_bit_len - 1) % 32));
+	sram_write_word(tmp_em.sram_addr + 4 * ((op_n_bit_len - 1) / 32), value);
+
+	/* EQ = E >> (n_bit_len - 1) */
+	ASSERT(op_e->blk_num > ACA_BITS_TO_BLOCK_NUM(SE_ROUND_DOWN(
+									(op_n_bit_len - 1), ACA_BLOCK_BITS)));
+	tmp_blk_num = op_e->blk_num - ACA_BITS_TO_BLOCK_NUM(SE_ROUND_DOWN(
+									  (op_n_bit_len - 1), ACA_BLOCK_BITS));
+	ASSERT(tmp_blk_num <= tmp_eq.blk_num);
+	/* write high blocks */
+	sram_write_data(tmp_eq.sram_addr, tmp_blk_num,
+					(const uint8_t *)(op_e->data));
+	/* shift right if necessary */
+	if (((op_n_bit_len - 1) & (ACA_BLOCK_BITS - 1)) != 0) {
+		OP_EXEC_ONE_CMD_SHIFT(&tmp_eq,
+							  ((op_n_bit_len - 1) & (ACA_BLOCK_BITS - 1)) - 1,
+							  &tmp_eq, ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHR0);
+	}
+
+	/* ER = E & ((1 << (n_bit_len - 1)) - 1), that is read low n_bit_len - 1
+	 * data from E
+	 */
+	tmp_blk_num =
+		ACA_BITS_TO_BLOCK_NUM(SE_ROUND_UP(op_n_bit_len - 1, ACA_BLOCK_BITS));
+	ASSERT(tmp_blk_num <= tmp_er.blk_num);
+	/* write low blocks */
+	sram_write_data(
+		tmp_er.sram_addr, tmp_blk_num,
+		(const uint8_t *)(op_e->data +
+						  ACA_BLOCK_NUM_TO_BYTES(op_e->blk_num - tmp_blk_num)));
+	/* adjust if necessary */
+	if (((op_n_bit_len - 1) & (ACA_BLOCK_BITS - 1)) != 0) {
+		value = op_blk_num - tmp_blk_num;
+		while (value) {
+			OP_EXEC_ONE_CMD_SHIFT(&tmp_er, ACA_BLOCK_BITS - 1, &tmp_er,
+								  ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHL0);
+			value--;
+		}
+		OP_EXEC_ONE_CMD_SHIFT(
+			&tmp_er,
+			ACA_BLOCK_BITS - ((op_n_bit_len - 1) & (ACA_BLOCK_BITS - 1)) - 1,
+			&tmp_er, ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHL0);
+		OP_EXEC_ONE_CMD_SHIFT(
+			&tmp_er,
+			ACA_BLOCK_BITS - ((op_n_bit_len - 1) & (ACA_BLOCK_BITS - 1)) - 1,
+			&tmp_er, ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHR0);
+		value = op_blk_num - tmp_blk_num;
+		while (value) {
+			OP_EXEC_ONE_CMD_SHIFT(&tmp_er, ACA_BLOCK_BITS - 1, &tmp_er,
+								  ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHR0);
+			value--;
+		}
+	}
+
+	/* tmp_r = A^em mod N */
+	OP_EXEC_ONE_CMD(&tmp_a, &tmp_em, NULL, &tmp_r, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODEXP);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* em is free, em = (A^em)^eq mod N */
+	OP_EXEC_ONE_CMD(&tmp_r, &tmp_eq, NULL, &tmp_em, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODEXP);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* eq is free, eq = A^er mod N */
+	OP_EXEC_ONE_CMD(&tmp_a, &tmp_er, NULL, &tmp_eq, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODEXP);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* tmp_r = eq * em mod N */
+	OP_EXEC_ONE_CMD(&tmp_eq, &tmp_em, NULL, &tmp_r, ACA_LENTYPE_ID_BIT_OP,
+					ACA_OP_MODMUL);
+	if (op_intr_is_mod_n_zero() || op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* read result */
+	sram_read_data(tmp_r.sram_addr, op_r->blk_num, op_r->data);
+
+end:
+	op_release(&tmp_r);
+	op_release(&tmp_a);
+	op_release(&tmp_n);
+	op_release(&tmp_em);
+	op_release(&tmp_er);
+	op_release(&tmp_eq);
+	op_release(np);
+	op_release_tmp(0, op_blk_num);
+	op_check_res();
+error:
+	aca_op_free(&tmp_r);
+	aca_op_free(&tmp_a);
+	aca_op_free(&tmp_n);
+	aca_op_free(&tmp_em);
+	aca_op_free(&tmp_er);
+	aca_op_free(&tmp_eq);
+	aca_op_free(np);
+#undef np
+	return ret;
+}
+/******************************************************************************/
+
+
+static int32_t cal_ecp_op_blk_num(int32_t op_bit_len)
+{
+	return ACA_BITS_TO_BLOCK_NUM(SE_ROUND_UP(op_bit_len + 2, ACA_BLOCK_BITS));
+}
+
+/**
+ * Convert jacobian jx, jy, jz to affine X, Y
+ * Note: jz is modified after calling this function.
+ */
+static int32_t jaco_to_affi(aca_op_t *P,
+							aca_op_t *np,
+							aca_op_t *jx,
+							aca_op_t *jy,
+							aca_op_t *jz,
+							aca_op_t *X,
+							aca_op_t *Y)
+{
+	int32_t ret         = SE_SUCCESS;
+	int32_t i           = 0;
+	uint32_t op_bit_len = 0;
+	uint32_t op_blk_num = 0;
+	aca_op_t tmp[5]     = {0};
+
+	CHECK_OP_CTX(P);
+	CHECK_OP_CTX(np);
+	CHECK_OP_CTX(jx);
+	CHECK_OP_CTX(jy);
+	CHECK_OP_CTX(jz);
+	CHECK_OP_CTX(X);
+	CHECK_OP_CTX(Y);
+	ASSERT(np->blk_num == ACA_NP_BLK_NUM);
+
+	op_bit_len = aca_op_bitlen(P);
+	op_blk_num = cal_ecp_op_blk_num(op_bit_len);
+
+	/* Check block number */
+	ASSERT(P->blk_num == op_blk_num);
+	ASSERT(jx->blk_num == op_blk_num);
+	ASSERT(jy->blk_num == op_blk_num);
+	ASSERT(jz->blk_num == op_blk_num);
+
+	op_prepare(P, GR_USAGE_N);
+	op_prepare(np, GR_USAGE_P);
+	op_prepare(jx, GR_USAGE_IN);
+	op_prepare(jy, GR_USAGE_IN);
+	op_prepare(jz, GR_USAGE_INOUT);
+	op_prepare_tmp(0, op_blk_num);
+	op_prepare_tmp(1, op_blk_num);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_init(&(tmp[i]));
+		tmp[i].blk_num = op_blk_num;
+		tmp[i].data    = NULL;
+		op_prepare(&tmp[i], GR_USAGE_INOUT);
+	}
+
+	/* config length type */
+	op_cfg_len_type(ACA_LENTYPE_ID_BIT_OP, op_bit_len);
+
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODINV, ACA_LENTYPE_ID_BIT_OP, jz, 0, 0, 0, &tmp[0]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &tmp[0], 0, &tmp[0], 0, &tmp[1]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &tmp[0], 0, &tmp[1], 0, &tmp[2]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, jx, 0, &tmp[1], 0, &tmp[3]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, jy, 0, &tmp[2], 0, &tmp[4]);
+	OP_ECP_WAIT();
+
+	if (op_intr_is_mod_n_zero() || op_intr_is_modinv_zero() ||
+		op_intr_is_mult_red_err()) {
+		ret = SE_ERROR_INVAL_MOD;
+		goto end;
+	}
+
+	/* copy to X, Y */
+	sram_read_data(tmp[3].sram_addr, X->blk_num, X->data);
+	sram_read_data(tmp[4].sram_addr, Y->blk_num, Y->data);
+
+end:
+	op_release(P);
+	op_release(np);
+	op_release(jx);
+	op_release(jy);
+	op_release(jz);
+	op_release_tmp(0, op_blk_num);
+	op_release_tmp(1, op_blk_num);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		op_release(&tmp[i]);
+	}
+	op_check_res();
+
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_free(&(tmp[i]));
+	}
+	return ret;
+}
+
+static void _double_point_jj(aca_op_t *A,
+							 aca_op_t *Tx,
+							 aca_op_t *Ty,
+							 aca_op_t *Tz,
+							 aca_op_t *W,
+							 bool i_mj_en,
+							 bool o_mj_en,
+							 aca_op_t u[])
+{
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Tx, 0, Tx, 0, &u[0]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, &u[0], 0, &u[1]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Ty, 0, Ty, 0, &u[2]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Tx, 0, &u[2], 0, &u[3]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[3], 0, &u[3], 0, &u[4]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[2], 0, &u[2], 0, &u[5]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[5], 0, &u[5], 0, &u[6]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[6], 0, &u[6], 0, &u[7]);
+
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, &u[1], 0, &u[8]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[4], 0, &u[9]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[7], 0, &u[7], 0, &u[10]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Ty, 0, Tz, 0, &u[11]);
+	OP_ECP_WAIT();
+	OP_ECP_PREPARE();
+	if (!i_mj_en) {
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Tz, 0, Tz, 0, W);
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, W, 0, W, 0, &u[12]);
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, A, 0, &u[12], 0, W);
+	}
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[8], 0, W, 0, &u[0]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, &u[0], 0, &u[1]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[9], 0, &u[9], 0, &u[2]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[1], 0, &u[2], 0, Tx);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[9], 0, Tx, 0, &u[3]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, &u[3], 0, &u[4]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[10], 0, Ty);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[11], 0, &u[11], 0, Tz);
+	if (o_mj_en) {
+		OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[10], 0, &u[10], 0, &u[5]);
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, W, 0, &u[5], 0, &u[6]);
+		OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[6], 1, 0, 0, W);
+	}
+	OP_ECP_WAIT();
+
+	return;
+}
+
+static void _add_point_ajj(aca_op_t *A,
+						   aca_op_t *X,
+						   aca_op_t *Y,
+						   aca_op_t *Z,
+						   aca_op_t *X1,
+						   aca_op_t *Y1,
+						   aca_op_t *W,
+						   bool o_mj_en,
+						   aca_op_t u[])
+{
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Z, 0, Z, 0, &u[0]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, Z, 0, &u[1]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, X1, 0, &u[2]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[1], 0, Y1, 0, &u[3]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[2], 0, X, 0, &u[4]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[4], 0, &u[5]);
+
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, Z, 0, &u[7]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[3], 0, Y, 0, &u[8]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[8], 0, &u[8], 0, &u[9]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[5], 0, &u[10]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[5], 0, X, 0, &u[11]);
+	OP_ECP_WAIT();
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[11], 0, &u[11], 0, &u[0]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[10], 0, &u[0], 0, &u[1]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[9], 0, &u[1], 0, X);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[11], 0, X, 0, &u[2]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[8], 0, &u[2], 0, &u[3]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[10], 0, Y, 0, &u[4]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[3], 0, &u[4], 0, Y);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[7], 1, 0, 0, Z);
+
+	if (o_mj_en) {
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Z, 0, Z, 0, &u[5]);
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[5], 0, &u[5], 0, &u[6]);
+		OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[6], 0, A, 0, W);
+	}
+	OP_ECP_WAIT();
+
+	return;
+}
+
+static void _add_point_jjj(aca_op_t *P,
+						   aca_op_t *X,
+						   aca_op_t *Y,
+						   aca_op_t *Z,
+						   aca_op_t *X1,
+						   aca_op_t *Y1,
+						   aca_op_t *Z1,
+						   aca_op_t *X2,
+						   aca_op_t *Y2,
+						   aca_op_t *Z2,
+						   aca_op_t u[])
+{
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Z2, 0, Z2, 0, &u[0]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Z1, 0, Z1, 0, &u[1]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, Z2, 0, &u[2]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[1], 0, Z1, 0, &u[3]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, X1, 0, &u[4]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[1], 0, X2, 0, &u[5]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[2], 0, Y1, 0, &u[6]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[3], 0, Y2, 0, &u[7]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, Z1, 0, Z2, 0, &u[8]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[5], 0, &u[9]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[6], 0, &u[7], 0, &u[10]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[5], 0, &u[11]);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, &u[6], 0, &u[7], 0, &u[12]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[8], 0, &u[9], 0, Z);
+	OP_ECP_WAIT();
+	OP_ECP_PREPARE();
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[10], 0, &u[10], 0, &u[0]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[9], 0, &u[9], 0, &u[1]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[11], 0, &u[1], 0, &u[2]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[0], 0, &u[2], 0, X);
+	OP_ECP_EXEC(MODADD, ACA_LENTYPE_ID_BIT_OP, X, 0, X, 0, &u[3]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[2], 0, &u[3], 0, &u[4]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[4], 0, &u[10], 0, &u[5]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[1], 0, &u[9], 0, &u[6]);
+	OP_ECP_EXEC(MODMUL, ACA_LENTYPE_ID_BIT_OP, &u[12], 0, &u[6], 0, &u[7]);
+	OP_ECP_EXEC(MODSUB, ACA_LENTYPE_ID_BIT_OP, &u[5], 0, &u[7], 0, &u[8]);
+	OP_EXEC_ONE_CMD_SHIFT(P, 0,  &u[9], ACA_LENTYPE_ID_BLK_OP, ACA_OP_SHR0);
+	OP_ECP_EXEC(MODMULACC, ACA_LENTYPE_ID_BIT_OP, &u[8], 0, &u[9], &u[8], Y);
+
+	OP_ECP_WAIT();
+
+	return;
+}
+
+/**
+ * calculate ecp point mul: R = k * G
+ */
+static int32_t ecp_mul(aca_op_t *P,
+					   aca_op_t *np,
+					   aca_op_t *A,
+					   aca_op_t *G_X,
+					   aca_op_t *G_Y,
+					   aca_op_t *G_Z,
+					   aca_op_t *k,
+					   aca_op_t *R_X,
+					   aca_op_t *R_Y,
+					   aca_op_t *R_Z)
+{
+	int32_t ret           = SE_SUCCESS;
+	aca_op_t neg_Y        = ACA_OP_INIT_DATA;
+	aca_op_t *used_y      = NULL;
+	aca_op_t tmp_k        = ACA_OP_INIT_DATA;
+	aca_op_t k3           = ACA_OP_INIT_DATA;
+	aca_op_t Tx           = ACA_OP_INIT_DATA;
+	aca_op_t Ty           = ACA_OP_INIT_DATA;
+	aca_op_t Tz           = ACA_OP_INIT_DATA;
+	aca_op_t W            = ACA_OP_INIT_DATA;
+	aca_op_t u[13]         = {0};
+	uint32_t mul_bit_len  = 0;
+	uint32_t full_bit_len = 0;
+	uint32_t full_blk_num = 0;
+	uint32_t k_bit_len = 0, k3_bit_len = 0;
+	uint32_t k_bit_val = 0, k3_bit_val = 0;
+	int32_t i = 0;
+
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	PAL_LOG_INFO(
+		"ECP MUL START ++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+	aca_op_dump("P", P);
+	aca_op_dump("np", np);
+	aca_op_dump("A", A);
+	aca_op_dump("G_X", G_X);
+	aca_op_dump("G_Y", G_Y);
+	aca_op_dump("G_Z", G_Z);
+	aca_op_dump("k", k);
+#endif
+
+	CHECK_OP_CTX(P);
+	CHECK_OP_CTX(np);
+	CHECK_OP_CTX(A);
+	CHECK_OP_CTX(G_X);
+	CHECK_OP_CTX(G_Y);
+	CHECK_OP_CTX(G_Z);
+	CHECK_OP_CTX(k);
+	CHECK_OP_CTX(R_X);
+	CHECK_OP_CTX(R_Y);
+	CHECK_OP_CTX(R_Z);
+
+	mul_bit_len = aca_op_bitlen(P);
+	k_bit_len   = aca_op_bitlen(k);
+	ASSERT(k_bit_len <= mul_bit_len);
+	full_blk_num = cal_ecp_op_blk_num(mul_bit_len);
+	full_bit_len = ACA_BLOCK_NUM_TO_BITS(full_blk_num);
+	/* Check that all input meet required size */
+	ASSERT(np->blk_num == ACA_NP_BLK_NUM);
+	ASSERT(P->blk_num == full_blk_num);
+	ASSERT(A->blk_num == full_blk_num);
+	ASSERT(G_X->blk_num == full_blk_num);
+	ASSERT(G_Y->blk_num == full_blk_num);
+	ASSERT(G_Z->blk_num == full_blk_num);
+
+	/* init tmp_op_ctx u */
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(u); i++) {
+		aca_op_init(&(u[i]));
+	}
+
+	/* init k3 */
+	k3.blk_num = full_blk_num;
+	k3.data    = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(k3.blk_num), GFP_KERNEL);
+	if (!k3.data) {
+		printk("Calloc %d failed!\n", ACA_BLOCK_NUM_TO_BYTES(k3.blk_num));
+		ret = SE_ERROR_OOM;
+		goto error;
+	}
+
+	ret = aca_op_copy_change_size(&tmp_k, k, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, error);
+	/* calculate k3 */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, full_bit_len);
+	op_prepare(&tmp_k, GR_USAGE_IN);
+	op_prepare(&k3, GR_USAGE_OUT);
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+		aca_op_dump("tmp_k:", &tmp_k);
+#endif
+
+	OP_EXEC_ONE_CMD_IMME_B(&tmp_k, 3, NULL, &k3, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_MUL_LOW);
+
+	sram_read_data(k3.sram_addr, k3.blk_num, k3.data);
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	aca_op_dump("k3:", &k3);
+#endif
+
+	/* here we can safely release k and k3, because they have been swapped */
+	op_release(&tmp_k);
+	op_release(&k3);
+	op_check_res();
+	/* calc k3 bitlen */
+	k3_bit_len = aca_op_bitlen(&k3);
+
+	/* check k3 bitlen vs k bitlen */
+	ASSERT((k3_bit_len >= k_bit_len + 1) && (k3_bit_len <= k_bit_len + 2));
+	/* ECP MUL(m=1,G) makes k3_bit_len=2 */
+	ASSERT(k3_bit_len >= 2);
+
+	/* init internal used GRs' block number */
+	neg_Y.blk_num = full_blk_num;
+	Tx.blk_num    = full_blk_num;
+	Ty.blk_num    = full_blk_num;
+	Tz.blk_num    = full_blk_num;
+	W.blk_num     = full_blk_num;
+
+	/* init u block number */
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(u); i++) {
+		u[i].blk_num = full_blk_num;
+	}
+
+	/* prepare two length types */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, full_bit_len);
+	op_cfg_len_type(ACA_LENTYPE_ID_BIT_OP, mul_bit_len);
+
+	/**
+	 * Init Tx, Ty, Tz, W, neg_Y. Set P usage to N,
+	 * because calculating W may use MODMUL.
+	 *
+	 * Note: Here we don't allocate cached buffer
+	 * (swapped memory) for saving the internal used GRs (such as neg_Y),
+	 * so we MUST NOT release these resources. For easy using, here we
+	 * make all common used GRs as INOUT.
+	 **/
+	op_prepare(G_X, GR_USAGE_INOUT);
+	op_prepare(&Tx, GR_USAGE_INOUT);
+	op_prepare(G_Y, GR_USAGE_INOUT);
+	op_prepare(&Ty, GR_USAGE_INOUT);
+	op_prepare(G_Z, GR_USAGE_INOUT);
+	op_prepare(&Tz, GR_USAGE_INOUT);
+	op_prepare(A, GR_USAGE_INOUT);
+	op_prepare(&W, GR_USAGE_INOUT);
+	op_prepare(P, GR_USAGE_N);
+	op_prepare(&neg_Y, GR_USAGE_INOUT);
+	op_prepare(np, GR_USAGE_P);
+	op_prepare_tmp(0, full_blk_num);
+	op_prepare_tmp(1, full_blk_num);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(u); i++) {
+		op_prepare(&u[i], GR_USAGE_INOUT);
+	}
+
+	/* copy G_X-->Tx, G_Y-->Ty, G_Z-->Tz */
+	OP_EXEC_ONE_CMD_IMME_B(G_X, 0, NULL, &Tx, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_ADD);
+	OP_EXEC_ONE_CMD_IMME_B(G_Y, 0, NULL, &Ty, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_ADD);
+	OP_EXEC_ONE_CMD_IMME_B(G_Z, 0, NULL, &Tz, ACA_LENTYPE_ID_BLK_OP,
+						   ACA_OP_ADD);
+	/* W = A * Z^4. Here the caller makes sure Z ==1, so W = A */
+	OP_EXEC_ONE_CMD_IMME_B(A, 0, NULL, &W, ACA_LENTYPE_ID_BLK_OP, ACA_OP_ADD);
+
+	/* neg_Y = P - G_Y */
+	OP_EXEC_ONE_CMD(P, G_Y, NULL, &neg_Y, ACA_LENTYPE_ID_BLK_OP, ACA_OP_SUB);
+
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	aca_op_dump("neg_Y:", &neg_Y);
+#endif
+	/* parse binary NAF */
+	for (i = k3_bit_len - 2; i >= 1; i--) {
+		k3_bit_val = aca_op_get_bit_value(&k3, i);
+		if (i >= (int32_t)k_bit_len) {
+			k_bit_val = 0;
+		} else {
+			k_bit_val = aca_op_get_bit_value(k, i);
+		}
+
+		if (k3_bit_val == k_bit_val) {
+			/* 0 */
+			_double_point_jj(A, &Tx, &Ty, &Tz, &W, true, true, u);
+		} else {
+			/* +- */
+			_double_point_jj(A, &Tx, &Ty, &Tz, &W, true, false, u);
+		}
+
+		if ((k3_bit_val == 1) && (k_bit_val == 0)) {
+			/* + */
+			used_y = G_Y;
+		} else if ((k3_bit_val == 0) && (k_bit_val == 1)) {
+			/* - */
+			used_y = &neg_Y;
+		} else {
+			used_y = NULL;
+		}
+		if (used_y) {
+			_add_point_ajj(A, &Tx, &Ty, &Tz, G_X, used_y, &W, true, u);
+		} else {
+			/*do not support blinding here, no do nothing*/
+		}
+	}
+
+	/* Copy to R_X, R_Y, R_Z */
+	sram_read_data(Tx.sram_addr, R_X->blk_num, R_X->data);
+	sram_read_data(Ty.sram_addr, R_Y->blk_num, R_Y->data);
+	sram_read_data(Tz.sram_addr, R_Z->blk_num, R_Z->data);
+
+	op_release(G_X);
+	op_release(&Tx);
+	op_release(G_Y);
+	op_release(&Ty);
+	op_release(G_Z);
+	op_release(&Tz);
+	op_release(A);
+	op_release(&W);
+	op_release(P);
+	op_release(&neg_Y);
+	op_release(np);
+	op_release_tmp(0, full_blk_num);
+	op_release_tmp(1, full_blk_num);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(u); i++) {
+		op_release(&u[i]);
+	}
+	op_check_res();
+error:
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(u); i++) {
+		aca_op_free(&(u[i]));
+	}
+	aca_op_free(&tmp_k);
+	aca_op_free(&k3);
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	aca_op_dump("R_X", R_X);
+	aca_op_dump("R_Y", R_Y);
+	aca_op_dump("R_Z", R_Z);
+	PAL_LOG_INFO(
+		"ECP MUL END ----------------------------------------------------\n");
+#endif
+	return ret;
+}
+
+/**
+ * Calculate ecp point add: R = G1 + G2
+ */
+static int32_t ecp_add(aca_op_t *P,
+					   aca_op_t *np,
+					   aca_op_t *G1_X,
+					   aca_op_t *G1_Y,
+					   aca_op_t *G1_Z,
+					   aca_op_t *G2_X,
+					   aca_op_t *G2_Y,
+					   aca_op_t *G2_Z,
+					   aca_op_t *R_X,
+					   aca_op_t *R_Y,
+					   aca_op_t *R_Z)
+{
+	int32_t ret           = SE_SUCCESS;
+	aca_op_t Tx           = ACA_OP_INIT_DATA;
+	aca_op_t Ty           = ACA_OP_INIT_DATA;
+	aca_op_t Tz           = ACA_OP_INIT_DATA;
+	aca_op_t u[13]         = {0};
+	uint32_t mul_bit_len  = 0;
+	uint32_t full_bit_len = 0;
+	uint32_t full_blk_num = 0;
+	size_t i              = 0;
+
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	PAL_LOG_INFO(
+		"ECP ADD START ++++++++++++++++++++++++++++++++++++++++++++++++++\n");
+	aca_op_dump("P", P);
+	aca_op_dump("np", np);
+	aca_op_dump("G1_X", G1_X);
+	aca_op_dump("G1_Y", G1_Y);
+	aca_op_dump("G1_Z", G1_Z);
+	aca_op_dump("G2_X", G2_X);
+	aca_op_dump("G2_Y", G2_Y);
+	aca_op_dump("G2_Z", G2_Z);
+#endif
+
+	CHECK_OP_CTX(P);
+	CHECK_OP_CTX(np);
+	CHECK_OP_CTX(G1_X);
+	CHECK_OP_CTX(G1_Y);
+	CHECK_OP_CTX(G1_Z);
+	CHECK_OP_CTX(G2_X);
+	CHECK_OP_CTX(G2_Y);
+	CHECK_OP_CTX(G2_Z);
+	CHECK_OP_CTX(R_X);
+	CHECK_OP_CTX(R_Y);
+	CHECK_OP_CTX(R_Z);
+
+	mul_bit_len = aca_op_bitlen(P);
+
+	full_blk_num = cal_ecp_op_blk_num(mul_bit_len);
+	full_bit_len = ACA_BLOCK_NUM_TO_BITS(full_blk_num);
+
+	/* Check that all input meet required size */
+	ASSERT(np->blk_num == ACA_NP_BLK_NUM);
+	ASSERT(P->blk_num == full_blk_num);
+	ASSERT(G1_X->blk_num == full_blk_num);
+	ASSERT(G1_Y->blk_num == full_blk_num);
+	ASSERT(G1_Z->blk_num == full_blk_num);
+	ASSERT(G2_X->blk_num == full_blk_num);
+	ASSERT(G2_Y->blk_num == full_blk_num);
+	ASSERT(G2_Z->blk_num == full_blk_num);
+
+	/* init tmp_op_ctx u */
+	for (i = 0; i < SE_ARRAY_SIZE(u); i++) {
+		aca_op_init(&(u[i]));
+	}
+
+	/* init internal used GRs' block number */
+	Tx.blk_num  = full_blk_num;
+	Ty.blk_num  = full_blk_num;
+	Tz.blk_num  = full_blk_num;
+
+	/* init u block number */
+	for (i = 0; i < SE_ARRAY_SIZE(u); i++) {
+		u[i].blk_num = full_blk_num;
+	}
+
+	/* prepare two length types */
+	op_cfg_len_type(ACA_LENTYPE_ID_BLK_OP, full_bit_len);
+
+	/* Same as ECP MUL, prepare all used GRs */
+	op_prepare(G1_X, GR_USAGE_INOUT);
+	op_prepare(&Tx, GR_USAGE_INOUT);
+	op_prepare(G1_Y, GR_USAGE_INOUT);
+	op_prepare(&Ty, GR_USAGE_INOUT);
+	op_prepare(G1_Z, GR_USAGE_INOUT);
+	op_prepare(&Tz, GR_USAGE_INOUT);
+	op_prepare(P, GR_USAGE_N);
+	op_prepare(G2_X, GR_USAGE_INOUT);
+	op_prepare(G2_Y, GR_USAGE_INOUT);
+	op_prepare(G2_Z, GR_USAGE_INOUT);
+	op_prepare(np, GR_USAGE_P);
+	op_prepare_tmp(0, full_blk_num);
+	op_prepare_tmp(1, full_blk_num);
+	for (i = 0; i < SE_ARRAY_SIZE(u); i++) {
+		op_prepare(&u[i], GR_USAGE_INOUT);
+	}
+
+	/* call jacobin point addition */
+	_add_point_jjj(P, &Tx, &Ty, &Tz, G1_X, G1_Y, G1_Z, G2_X, G2_Y, G2_Z, u);
+
+	/* Copy to R_X, R_Y, R_Z */
+	sram_read_data(Tx.sram_addr, R_X->blk_num, R_X->data);
+	sram_read_data(Ty.sram_addr, R_Y->blk_num, R_Y->data);
+	sram_read_data(Tz.sram_addr, R_Z->blk_num, R_Z->data);
+
+	op_release(G1_X);
+	op_release(&Tx);
+	op_release(G1_Y);
+	op_release(&Ty);
+	op_release(G1_Z);
+	op_release(&Tz);
+	op_release(P);
+	op_release(G2_X);
+	op_release(G2_Y);
+	op_release(G2_Z);
+	op_release(np);
+	op_release_tmp(0, full_blk_num);
+	op_release_tmp(1, full_blk_num);
+	for (i = 0; i < SE_ARRAY_SIZE(u); i++) {
+		op_release(&u[i]);
+	}
+	op_check_res();
+
+	for (i = 0; i < SE_ARRAY_SIZE(u); i++) {
+		aca_op_free(&(u[i]));
+	}
+#ifdef ECP_DBG_PRINT_SIMPLE_EN
+	aca_op_dump("R_X", R_X);
+	aca_op_dump("R_Y", R_Y);
+	aca_op_dump("R_Z", R_Z);
+	PAL_LOG_INFO(
+		"ECP ADD END ----------------------------------------------------\n");
+#endif
+	return ret;
+}
+
+/* Calculate ecp point mul: R = G * k*/
+int32_t aca_op_ecp_mul(aca_op_t *P,
+					   aca_op_t *A,
+					   aca_op_t *G_X,
+					   aca_op_t *G_Y,
+					   aca_op_t *G_Z,
+					   aca_op_t *k,
+					   aca_op_t *R_X,
+					   aca_op_t *R_Y,
+					   aca_op_t *R_Z)
+{
+	int32_t ret           = SE_SUCCESS;
+	aca_op_t tmp_P        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_A        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G_X      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G_Y      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G_Z      = ACA_OP_INIT_DATA;
+	aca_op_t tmp_np       = ACA_OP_INIT_DATA;
+	aca_op_t tmp[3]       = {0};
+	uint32_t p_bit_len    = 0;
+	uint32_t full_blk_num = 0;
+	int32_t i             = 0;
+
+#define np (&(tmp_np))
+#define RR_X (&(tmp[0]))
+#define RR_Y (&(tmp[1]))
+#define RR_Z (&(tmp[2]))
+
+	CHECK_OP_CTX(P);
+	CHECK_OP_CTX(A);
+	CHECK_OP_CTX(G_X);
+	CHECK_OP_CTX(G_Y);
+	CHECK_OP_CTX(G_Z);
+	CHECK_OP_CTX(k);
+
+	/* get p length */
+	p_bit_len    = aca_op_bitlen(P);
+	full_blk_num = cal_ecp_op_blk_num(p_bit_len);
+	/* check G_Z equals to 1 */
+	ret = aca_op_bitlen(G_Z);
+	ASSERT(ret == 1);
+	/* init tmp_P, tmp_A, tmp_G_X, tmp_G_Y, tmp_G_Z */
+	aca_op_init(&tmp_P);
+	aca_op_init(&tmp_A);
+	aca_op_init(&tmp_G_X);
+	aca_op_init(&tmp_G_Y);
+	aca_op_init(&tmp_G_Z);
+
+	/* init np */
+	ret = _aca_op_init_np(np);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* init tmp ops */
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_init(&(tmp[i]));
+		tmp[i].blk_num = full_blk_num;
+		tmp[i].data    = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(full_blk_num), GFP_KERNEL);
+		if (NULL == tmp[i].data) {
+			printk("Calloc %d failed!\n",
+						 ACA_BLOCK_NUM_TO_BYTES(full_blk_num));
+			ret = SE_ERROR_OOM;
+			goto end;
+		}
+	}
+	/* copy P, A, G_X, G_Y, G_Z. */
+	ret = aca_op_copy_change_size(&tmp_P, P, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_A, A, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G_X, G_X, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G_Y, G_Y, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G_Z, G_Z, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* adjust R_X, R_Y to P block_size, not include R_Z */
+	ret = op_change_size(R_X, P->blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = op_change_size(R_Y, P->blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* calculate np */
+	ret = op_cal_np(np, &tmp_P, p_bit_len);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* calculate RR = k * G */
+	ret = ecp_mul(&tmp_P, np, &tmp_A, &tmp_G_X, &tmp_G_Y, &tmp_G_Z, k, RR_X,
+				  RR_Y, RR_Z);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* convert jacobian to affine */
+	ret = jaco_to_affi(&tmp_P, np, RR_X, RR_Y, RR_Z, R_X, R_Y);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* set R_Z to 1 */
+	ret = aca_op_import_u32(R_Z, 1);
+	CHECK_SUCCESS_GOTO(ret, end);
+end:
+	aca_op_free(np);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_free(&(tmp[i]));
+	}
+	aca_op_free(&tmp_P);
+	aca_op_free(&tmp_A);
+	aca_op_free(&tmp_G_X);
+	aca_op_free(&tmp_G_Y);
+	aca_op_free(&tmp_G_Z);
+	return ret;
+#undef np
+#undef RR_X
+#undef RR_Y
+#undef RR_Z
+}
+
+/* Calculate ecp point mul add: R = G1 * k1 + G2 * k2 */
+int32_t aca_op_ecp_muladd(aca_op_t *P,
+						  aca_op_t *A,
+						  aca_op_t *G1_X,
+						  aca_op_t *G1_Y,
+						  aca_op_t *G1_Z,
+						  aca_op_t *k1,
+						  aca_op_t *G2_X,
+						  aca_op_t *G2_Y,
+						  aca_op_t *G2_Z,
+						  aca_op_t *k2,
+						  aca_op_t *R_X,
+						  aca_op_t *R_Y,
+						  aca_op_t *R_Z)
+{
+	int32_t ret           = SE_SUCCESS;
+	aca_op_t tmp_P        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_A        = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G1_X     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G1_Y     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G1_Z     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G2_X     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G2_Y     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_G2_Z     = ACA_OP_INIT_DATA;
+	aca_op_t tmp_np       = ACA_OP_INIT_DATA;
+	aca_op_t tmp[9]       = {0};
+	uint32_t p_bit_len    = 0;
+	uint32_t full_blk_num = 0;
+	int32_t i             = 0;
+
+#define np (&(tmp_np))
+#define R1_X (&(tmp[0]))
+#define R1_Y (&(tmp[1]))
+#define R1_Z (&(tmp[2]))
+#define R2_X (&(tmp[3]))
+#define R2_Y (&(tmp[4]))
+#define R2_Z (&(tmp[5]))
+#define RR_X (&(tmp[6]))
+#define RR_Y (&(tmp[7]))
+#define RR_Z (&(tmp[8]))
+
+	CHECK_OP_CTX(P);
+	CHECK_OP_CTX(A);
+	CHECK_OP_CTX(G1_X);
+	CHECK_OP_CTX(G1_Y);
+	CHECK_OP_CTX(G1_Z);
+	CHECK_OP_CTX(k1);
+	CHECK_OP_CTX(G2_X);
+	CHECK_OP_CTX(G2_Y);
+	CHECK_OP_CTX(G2_Z);
+	CHECK_OP_CTX(k2);
+
+	/* get p length */
+	p_bit_len    = aca_op_bitlen(P);
+	full_blk_num = cal_ecp_op_blk_num(p_bit_len);
+
+	/* Check G1_Z and G2_Z, must be 1 */
+	ret = aca_op_bitlen(G1_Z);
+	ASSERT(ret == 1);
+	ret = aca_op_bitlen(G2_Z);
+	ASSERT(ret == 1);
+
+	/* init tmp_P, tmp_A, tmp_G_X, tmp_G_Y, tmp_G_Z */
+	aca_op_init(&tmp_P);
+	aca_op_init(&tmp_A);
+	aca_op_init(&tmp_G1_X);
+	aca_op_init(&tmp_G1_Y);
+	aca_op_init(&tmp_G1_Z);
+	aca_op_init(&tmp_G2_X);
+	aca_op_init(&tmp_G2_Y);
+	aca_op_init(&tmp_G2_Z);
+
+	/* init np */
+	ret = _aca_op_init_np(np);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* init tmp ops */
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_init(&(tmp[i]));
+		tmp[i].blk_num = full_blk_num;
+		tmp[i].data    = kcalloc(1, ACA_BLOCK_NUM_TO_BYTES(full_blk_num), GFP_KERNEL);
+		if (NULL == tmp[i].data) {
+			printk("Calloc %d failed!\n",
+						 ACA_BLOCK_NUM_TO_BYTES(full_blk_num));
+			ret = SE_ERROR_OOM;
+			goto end;
+		}
+	}
+
+	/* copy P, A, G1_X, G1_Y, G1_Z, G2_X, G2_Y, G2_Z. */
+	ret = aca_op_copy_change_size(&tmp_P, P, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_A, A, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G1_X, G1_X, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G1_Y, G1_Y, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G1_Z, G1_Z, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G2_X, G2_X, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G2_Y, G2_Y, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = aca_op_copy_change_size(&tmp_G2_Z, G2_Z, full_blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* adjust R_X, R_Y to P block_size, not include R_Z */
+	ret = op_change_size(R_X, P->blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = op_change_size(R_Y, P->blk_num);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* calculate np */
+	ret = op_cal_np(np, &tmp_P, p_bit_len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* calculate R1 = k1 * G1 */
+	ret = ecp_mul(&tmp_P, np, &tmp_A, &tmp_G1_X, &tmp_G1_Y, &tmp_G1_Z, k1, R1_X,
+				  R1_Y, R1_Z);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* calculate R2 = k2 * G2 */
+	ret = ecp_mul(&tmp_P, np, &tmp_A, &tmp_G2_X, &tmp_G2_Y, &tmp_G2_Z, k2, R2_X,
+				  R2_Y, R2_Z);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* calculate RR = R1 + R2 */
+	ret = ecp_add(&tmp_P, np, R1_X, R1_Y, R1_Z, R2_X, R2_Y, R2_Z, RR_X, RR_Y,
+				  RR_Z);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* convert jacobian to affine */
+	ret = jaco_to_affi(&tmp_P, np, RR_X, RR_Y, RR_Z, R_X, R_Y);
+	CHECK_SUCCESS_GOTO(ret, end);
+	/* set R_Z to 1 */
+	ret = aca_op_import_u32(R_Z, 1);
+	CHECK_SUCCESS_GOTO(ret, end);
+end:
+	aca_op_free(np);
+	for (i = 0; i < (int32_t)SE_ARRAY_SIZE(tmp); i++) {
+		aca_op_free(&(tmp[i]));
+	}
+	aca_op_free(&tmp_P);
+	aca_op_free(&tmp_A);
+	aca_op_free(&tmp_G1_X);
+	aca_op_free(&tmp_G1_Y);
+	aca_op_free(&tmp_G1_Z);
+	aca_op_free(&tmp_G2_X);
+	aca_op_free(&tmp_G2_Y);
+	aca_op_free(&tmp_G2_Z);
+	return ret;
+#undef np
+#undef R1_X
+#undef R1_Y
+#undef R1_Z
+#undef R2_X
+#undef R2_Y
+#undef R2_Z
+#undef RR_X
+#undef RR_Y
+#undef RR_Z
+}
+
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.h b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.h
new file mode 100644
index 0000000..07d78d0
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2020-2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#ifndef __SE_ACA_H__
+#define __SE_ACA_H__
+
+#include <linux/types.h>
+#include "se_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The base block size in bit of ACA engine. */
+#define ACA_BLOCK_BITS (64)
+
+/* The maximum operation size in bit of ACA engine. */
+#define ACA_MAX_OP_BITS (8000)
+
+/* The start address of ACA SRAM */
+#define ACA_SRAM_BASE (0x00000000)
+
+/* The size of total ACA SRAM */
+#define ACA_SRAM_SIZE (8192)
+/* The total GR number */
+#define ACA_GR_NUMBER (32)
+
+/* The total length type number */
+#define ACA_LEN_TYPE_NUMBER (8)
+
+/*
+ * ACA registers
+ */
+ 
+/* The offset of grx_sram_addr(x is 0 - 31) of target host */
+#define ACA_GRX_SRAM_ADDR_OFS (ACA_OFS + 0x00)
+
+/* The offset of gr_len_typex(x is 0 - 7) of target host */
+#define ACA_GR_LEN_TYPEX_OFS (ACA_OFS + 0x100)
+
+/* The offset of n_p_t0_t1_use_grid of target host */
+#define ACA_N_P_T0_T1_USE_GRID (ACA_OFS + 0x120)
+
+/* The offset of aca_ctrl of target host */
+#define ACA_ACA_CTRL (ACA_OFS + 0x124)
+
+/* The offset of aca_entry of target host */
+#define ACA_ACA_ENTRY (ACA_OFS + 0x128)
+
+/* The offset of aca_status of target host */
+#define ACA_ACA_STATUS (ACA_OFS + 0x12C)
+
+/* The offset of sram_waddr of target host */
+#define ACA_SRAM_WADDR (ACA_OFS + 0x130)
+
+/* The offset of sram_wdata of target host */
+#define ACA_SRAM_WDATA (ACA_OFS + 0x134)
+
+/* The offset of sram_raddr of target host */
+#define ACA_SRAM_RADDR (ACA_OFS + 0x138)
+
+/* The offset of sram_rdata of target host */
+#define ACA_SRAM_RDATA (ACA_OFS + 0x13C)
+
+/* The offset of aca_intr_stat of target host */
+#define ACA_ACA_INTR_STAT (ACA_OFS + 0x140)
+
+/* The offset of aca_intr_msk of target host */
+#define ACA_ACA_INTR_MSK (ACA_OFS + 0x144)
+
+/* Start of used GR ID. always reserve GRID 0 */
+#define ACA_GR_USED_START (1)
+
+/* Invalid GR ID value */
+#define ACA_GR_INVALID (-1)
+
+/* Invalid SRAM address value */
+#define ACA_SRAM_ADDR_INVALID (0xFFFFFFFFU)
+
+/* Convert bit length to block number */
+#define ACA_BITS_TO_BLOCK_NUM(bits)                                            \
+	(((bits) + (ACA_BLOCK_BITS - 1)) / ACA_BLOCK_BITS)
+/* Convert byte length to block number */
+#define ACA_BYTES_TO_BLOCK_NUM(bytes)                                          \
+	((((bytes)*8) + (ACA_BLOCK_BITS - 1)) / ACA_BLOCK_BITS)
+
+/* Convert block number to bit length */
+#define ACA_BLOCK_NUM_TO_BITS(blk_nm) ((blk_nm)*ACA_BLOCK_BITS)
+
+/* Convert block number to byte length */
+#define ACA_BLOCK_NUM_TO_BYTES(blk_nm) (((blk_nm)*ACA_BLOCK_BITS) / 8)
+
+/**
+ * The usage of lengthtype ID is fixed to two type:
+ *  1. Block based OP bits. In this usage, the OP bits is multiple of block
+ *     bits. Such as ADD, SUB, MUL.
+ *  2. Modulus N based OP bits. In this usage, the OP bits is the real bit
+ *     length of modulus N. Such as modMUL, modEXP, modMUL7NR.
+ */
+/* The lengthtype ID for block based OP bits. */
+#define ACA_LENTYPE_ID_BLK_OP (1)
+
+/* The lengthtype ID for modulus N based OP bits */
+#define ACA_LENTYPE_ID_BIT_OP (2)
+
+/* The const block number for NP, always 2*/
+#define ACA_NP_BLK_NUM (2)
+
+/* The ACA OP command enumeration */
+enum {
+	ACA_OP_ADD          = 0x01,
+	ACA_OP_SUB          = 0x02,
+	ACA_OP_MUL_LOW      = 0x03,
+	ACA_OP_DIV          = 0x04,
+	ACA_OP_AND          = 0x05,
+	ACA_OP_OR           = 0x06,
+	ACA_OP_XOR          = 0x07,
+	ACA_OP_SHR0         = 0x08,
+	ACA_OP_SHL0         = 0x0A,
+	ACA_OP_SHL1         = 0x0B,
+	ACA_OP_MUL_HIGH     = 0x0C,
+	ACA_OP_MODRED       = 0x10,
+	ACA_OP_MODADD       = 0x11,
+	ACA_OP_MODSUB       = 0x12,
+	ACA_OP_MODMUL       = 0x13,
+	ACA_OP_MODINV       = 0x14,
+	ACA_OP_MODEXP       = 0x15,
+	ACA_OP_MODMULNR     = 0x16,
+	ACA_OP_MODMULACC    = 0x17,
+	ACA_OP_MODMULACCNR  = 0x18,
+};
+
+/* The GR usage */
+typedef enum gr_usage {
+	GR_USAGE_NULL  = 0,
+	GR_USAGE_IN    = 1,
+	GR_USAGE_OUT   = 2,
+	GR_USAGE_INOUT = 3,
+	GR_USAGE_N     = 4,
+	GR_USAGE_P     = 5,
+	GR_USAGE_T0    = 6,
+	GR_USAGE_T1    = 7,
+} gr_usage_t;
+
+/* The OP context structure. */
+typedef struct se_aca_bn_t {
+	uint32_t blk_num;
+	/* data in bigendian */
+	uint8_t *data;
+	uint32_t sram_addr;
+	int32_t gr_id;
+} se_aca_bn_t;
+
+typedef struct se_aca_bn_t aca_op_t;
+
+/* The init value for one OP context */
+#define ACA_OP_INIT_DATA                                                       \
+	{                                                                          \
+		.blk_num = 0, .data = NULL, .sram_addr = ACA_SRAM_ADDR_INVALID,        \
+		.gr_id = ACA_GR_INVALID,                                               \
+	}
+
+/* ASSERT if op context is invalid. */
+#define CHECK_OP_CTX(op_ctx)                                                   \
+	do {                                                                       \
+		ASSERT((op_ctx != NULL) && (op_ctx->data != NULL) &&              \
+					(op_ctx->blk_num != 0));                                   \
+	} while (0)
+
+int32_t aca_engine_init(void);
+void aca_engine_exit(void);
+void aca_op_init(aca_op_t *op);
+void aca_op_free(aca_op_t *op);
+int32_t aca_op_copy(aca_op_t *dst, aca_op_t *src);
+int32_t aca_op_import_u32(aca_op_t *op, uint32_t val);
+int32_t aca_op_import_bin(aca_op_t *op, const uint8_t *data, size_t size);
+int32_t aca_op_export_bin(aca_op_t *op, uint8_t *buf, size_t size);
+uint32_t aca_op_bitlen(aca_op_t *op);
+int32_t aca_op_get_bit_value(aca_op_t *op, size_t bit_num);
+int32_t aca_op_set_bit_value(aca_op_t *op, size_t bit_num, int32_t value);
+int32_t aca_op_cmp_bn(aca_op_t *op_a, aca_op_t *op_b, int32_t *result);
+int32_t aca_op_cmp_u32(aca_op_t *op_a, uint32_t b, int32_t *result);
+int32_t aca_op_cmp_bn_equal(aca_op_t *op_a, aca_op_t *op_b, int32_t *result);
+int32_t
+aca_op_add_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n);
+int32_t
+aca_op_sub_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n);
+int32_t aca_op_inv_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_n);
+int32_t
+aca_op_exp_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_e, aca_op_t *op_n);
+int32_t
+aca_op_mul_mod(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b, aca_op_t *op_n);
+int32_t aca_op_shift_r(aca_op_t *op_a, int32_t shift_num);
+int32_t aca_op_mod_red(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_n);
+int32_t aca_op_mul(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b);
+int32_t aca_op_add(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b);
+int32_t aca_op_sub(aca_op_t *op_r, aca_op_t *op_a, aca_op_t *op_b);
+int32_t aca_op_exp_mod_large_e(aca_op_t *op_r,
+							   aca_op_t *op_a,
+							   aca_op_t *op_e,
+							   aca_op_t *op_n);
+int32_t aca_op_ecp_mul(aca_op_t *P,
+					   aca_op_t *A,
+					   aca_op_t *G_X,
+					   aca_op_t *G_Y,
+					   aca_op_t *G_Z,
+					   aca_op_t *k,
+					   aca_op_t *R_X,
+					   aca_op_t *R_Y,
+					   aca_op_t *R_Z);
+int32_t aca_op_ecp_muladd(aca_op_t *P,
+						  aca_op_t *A,
+						  aca_op_t *G1_X,
+						  aca_op_t *G1_Y,
+						  aca_op_t *G1_Z,
+						  aca_op_t *k1,
+						  aca_op_t *G2_X,
+						  aca_op_t *G2_Y,
+						  aca_op_t *G2_Z,
+						  aca_op_t *k2,
+						  aca_op_t *R_X,
+						  aca_op_t *R_Y,
+						  aca_op_t *R_Z);
+void aca_op_dump(const char *name, aca_op_t *op_ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__SE_ACA_H__*/
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca_internal.h b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca_internal.h
new file mode 100644
index 0000000..12bcb85
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_aca_internal.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2020-2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#ifndef __SE_ACA_INTERNAL_H__
+#define __SE_ACA_INTERNAL_H__
+
+#include <linux/types.h>
+#include "se_common.h"
+#include "se_bn.h"
+#include "se_aca.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The following are internal functions which are used by ecp/rsa/sm2, but are
+ * not exported as CE Lite APIs.
+ */
+int32_t se_bn_import_u32(se_bn_t *bn, uint32_t val);
+int32_t se_bn_cmp_bn(const se_bn_t *bna, const se_bn_t *bnb, int32_t *result);
+int32_t se_bn_cmp_u32(const se_bn_t *bna, uint32_t b, int32_t *result);
+int32_t se_bn_cmp_bn_equal(const se_bn_t *bna,
+						   const se_bn_t *bnb,
+						   int32_t *result);
+
+/**
+ * Requirement:
+ * A/B bit length <= N bit length
+ */
+int32_t se_bn_add_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn);
+/**
+ * Requirement:
+ * A/B bit length <= N bit length
+ */
+int32_t se_bn_sub_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn);
+/**
+ * Requirement:
+ * 1. A bit length <= N block bits
+ * 2. N is odd.
+ */
+int32_t se_bn_inv_mod(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnn);
+int32_t se_bn_bitlen(const se_bn_t *bn);
+int32_t se_bn_get_bit(const se_bn_t *bn, int32_t pos);
+
+/**
+ * Requirement:
+ * A/B bit length <= N bit length
+ */
+int32_t se_bn_mul_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn);
+/**
+ * Requirement:
+ * A/B bit length <= N bit length
+ */
+int32_t se_bn_exp_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bne,
+					  const se_bn_t *bnn);
+
+/**
+ * Requirement:
+ * 0 < shift_num <= 64
+ */
+int32_t se_bn_shift_r(se_bn_t *bna, int32_t count);
+/**
+ * Requirement:
+ * A bit length <= N bit length
+ */
+int32_t se_bn_mod_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnn);
+int32_t se_bn_mul_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb);
+int32_t se_bn_add_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb);
+
+/**
+ * Requirement:
+ * A >= B
+ */
+int32_t se_bn_sub_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb);
+
+/**
+ * Requirement:
+ * 1. E bit length > N bit length
+ * 2. E bit length <= 2 * ROUND_DOWN((N_bit_len - 1), 128) + 128
+ * 3. A bit length < N bit length
+ */
+int32_t se_bn_exp_mod_large_e(se_bn_t *bnr,
+							  const se_bn_t *bna,
+							  const se_bn_t *bne,
+							  const se_bn_t *bnn);
+int32_t se_bn_import_random(se_bn_t *bn,
+							size_t size,
+							int32_t (*f_rng)(void *, uint8_t *, size_t),
+							void *p_rng);
+int32_t se_bn_import_random_bits(se_bn_t *bn,
+								 size_t bit_len,
+								 int32_t (*f_rng)(void *, uint8_t *, size_t),
+								 void *p_rng);
+int32_t
+se_bn_import_random_max_bit_len(se_bn_t *bn,
+								size_t max_bit_len,
+								int32_t (*f_rng)(void *, uint8_t *, size_t),
+								void *p_rng);
+int32_t se_bn_copy(se_bn_t *bn_dst, const se_bn_t *bn_src);
+void se_bn_dump(const char *name, const se_bn_t *bn);
+
+#if 0
+int32_t se_bn_ecp_mul(const se_bn_t *P,
+					  const se_bn_t *A,
+					  const se_bn_t *G_X,
+					  const se_bn_t *G_Y,
+					  const se_bn_t *G_Z,
+					  const se_bn_t *k,
+					  se_bn_t *R_X,
+					  se_bn_t *R_Y,
+					  se_bn_t *R_Z);
+/**
+ * Requirement:
+ * G1_Z and G2_Z must be 1
+ */
+int32_t se_bn_ecp_muladd(const se_bn_t *P,
+						 const se_bn_t *A,
+						 const se_bn_t *G1_X,
+						 const se_bn_t *G1_Y,
+						 const se_bn_t *G1_Z,
+						 const se_bn_t *k1,
+						 const se_bn_t *G2_X,
+						 const se_bn_t *G2_Y,
+						 const se_bn_t *G2_Z,
+						 const se_bn_t *k2,
+						 se_bn_t *R_X,
+						 se_bn_t *R_Y,
+						 se_bn_t *R_Z);
+
+int32_t se_ecp_gen_privkey(const se_ecp_group_t *grp,
+						   se_bn_t *d,
+						   int32_t (*f_rng)(void *, uint8_t *, size_t),
+						   void *p_rng);
+int32_t se_ecp_copy(se_ecp_point_t *P, const se_ecp_point_t *Q);
+int32_t se_ecp_group_copy(se_ecp_group_t *dst, const se_ecp_group_t *src);
+int32_t se_ecp_is_zero(se_ecp_point_t *pt);
+#endif
+
+int32_t se_asn1_get_len(uint8_t **p, const uint8_t *limit, size_t *len);
+int32_t
+se_asn1_get_tag(uint8_t **p, const uint8_t *limit, size_t *len, int32_t tag);
+int32_t se_asn1_get_mpi(uint8_t **p, const uint8_t *limit, se_bn_t *X);
+int32_t se_asn1_write_len(uint8_t **p, uint8_t *start, size_t len);
+int32_t se_asn1_write_tag(uint8_t **p, uint8_t *start, uint8_t tag);
+int32_t se_asn1_write_mpi(uint8_t **p, uint8_t *start, const se_bn_t *X);
+
+static inline size_t se_md_size(se_algo_t md_algo)
+{
+	switch (md_algo) {
+
+	case SE_ALG_SHA256:
+		return SE_SHA256_HASH_SIZE;
+
+	case SE_ALG_SHA224:
+		return SE_SHA224_HASH_SIZE;
+
+	case SE_ALG_SHA1:
+		return SE_SHA1_HASH_SIZE;
+
+	case SE_ALG_SM3:
+		return SE_SM3_HASH_SIZE;
+
+	default:
+		return 0;
+	}
+}
+
+/* constant-time buffer comparison */
+static inline int32_t se_safer_memcmp(const uint8_t *a,
+									  const uint8_t *b,
+									  size_t n)
+{
+	size_t i;
+	const uint8_t *A = (const uint8_t *)a;
+	const uint8_t *B = (const uint8_t *)b;
+	uint8_t diff     = 0;
+
+	for (i = 0; i < n; i++) {
+		diff |= A[i] ^ B[i];
+	}
+
+	return (int32_t)(diff);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*__SE_ACA_INTERNAL_H__*/
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.c b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.c
new file mode 100644
index 0000000..37516e1
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2020-2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#include <linux/slab.h>
+#include "se_common.h"
+#include "se_bn.h"
+#include "se_aca_internal.h"
+#include "se_aca.h"
+
+int32_t se_bn_init(se_bn_t *bn)
+{
+	int32_t ret = SE_SUCCESS;
+
+	if (NULL == bn) {
+		ret = SE_ERROR_BAD_PARAMS;
+		goto end;
+	}
+
+	bn->ctx = kcalloc(1, sizeof(aca_op_t), GFP_KERNEL);
+	if (NULL == bn->ctx) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	aca_op_init(bn->ctx);
+
+end:
+	return ret;
+}
+
+void se_bn_free(se_bn_t *bn)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		/* do nothing */
+		return;
+	}
+
+	aca_op_free(bn->ctx);
+	kfree(bn->ctx);
+	bn->ctx = NULL;
+}
+
+int32_t se_bn_import(se_bn_t *bn, const uint8_t *buf, size_t size)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	if ((0 != size) && (NULL == buf)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_import_bin(bn->ctx, buf, size);
+}
+
+int32_t se_bn_export(const se_bn_t *bn, uint8_t *buf, size_t size)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	if ((0 != size) && (NULL == buf)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_export_bin(bn->ctx, buf, size);
+}
+
+/**
+ * The following are internal functions which are used by ecp/rsa/sm2, but are
+ * not exported as CE Lite APIs.
+ *
+ * The coressponding header file is se_aca_bn_internal.h
+ */
+
+/**
+ * Used in:
+ * 1. RSA SIGN && BLINDING
+ * 2. ECP
+ * 3. SM2DSA SIGN (Skip)
+ */
+int32_t se_bn_import_u32(se_bn_t *bn, uint32_t val)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_import_u32(bn->ctx, val);
+}
+
+/**
+ * Used in:
+ * 1. RSA SIGN && VERIFY
+ * 2. ECP
+ */
+int32_t se_bn_cmp_bn(const se_bn_t *bna, const se_bn_t *bnb, int32_t *result)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) || (NULL == result)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_cmp_bn(bna->ctx, bnb->ctx, result);
+}
+
+
+/**
+ * Used in:
+ * 1. ECP
+ * 2. ECDSA SIGN (Skip)
+ * 3. SM2DSA (Skip)
+ */
+int32_t se_bn_cmp_u32(const se_bn_t *bna, uint32_t b, int32_t *result)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) || (NULL == result)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_cmp_u32(bna->ctx, b, result);
+}
+
+/**
+ * Used in:
+ * 1. ECP
+ * 2. ECDSA Verify (Skip)
+ * 3. SM2DSA Verify (Skip)
+ * 4. RSA Sign
+ */
+int32_t se_bn_cmp_bn_equal(const se_bn_t *bna,
+						   const se_bn_t *bnb,
+						   int32_t *result)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) || (NULL == result)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_cmp_bn_equal(bna->ctx, bnb->ctx, result);
+}
+
+/**
+ * Used in:
+ * 1. ECP
+ * 2. ECDSA Sign (Skip)
+ * 3. SM2DSA Sign && Verify (Skip)
+ */
+int32_t se_bn_add_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_add_mod(bnr->ctx, bna->ctx, bnb->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. SM2DSA Sign
+ */
+int32_t se_bn_sub_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_sub_mod(bnr->ctx, bna->ctx, bnb->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. SM2DSA Sign
+ * 2. ECDSA Sign && Verify
+ * 3. RSA Sign && Blinding
+ */
+int32_t se_bn_inv_mod(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_inv_mod(bnr->ctx, bna->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. ECP
+ * 2. RSA Sign && Verify
+ */
+int32_t se_bn_bitlen(const se_bn_t *bn)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return (int32_t)aca_op_bitlen(bn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. RSA Sign && Verify
+ */
+int32_t se_bn_get_bit(const se_bn_t *bn, int32_t pos)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_get_bit_value(bn->ctx, (size_t)pos);
+}
+
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ * 2. ECP
+ * 3. ECDSA Sign && Verify (Skip)
+ * 4. SM2DSA Sign (Skip)
+ */
+int32_t se_bn_mul_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bnb,
+					  const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_mul_mod(bnr->ctx, bna->ctx, bnb->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. RSA Sign && Verify
+ */
+int32_t se_bn_exp_mod(se_bn_t *bnr,
+					  const se_bn_t *bna,
+					  const se_bn_t *bne,
+					  const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bne) || (NULL == bne->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_exp_mod(bnr->ctx, bna->ctx, bne->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. ECDSA Sign && Verify
+ */
+int32_t se_bn_shift_r(se_bn_t *bna, int32_t count)
+{
+	if ((NULL == bna) || (NULL == bna->ctx)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_shift_r(bna->ctx, count);
+}
+
+/**
+ * Used in:
+ * 1. ECDSA Sign && Verify
+ */
+int32_t se_bn_mod_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_mod_red(bnr->ctx, bna->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ */
+int32_t se_bn_mul_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_mul(bnr->ctx, bna->ctx, bnb->ctx);
+}
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ */
+int32_t se_bn_add_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_add(bnr->ctx, bna->ctx, bnb->ctx);
+}
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ */
+int32_t se_bn_sub_bn(se_bn_t *bnr, const se_bn_t *bna, const se_bn_t *bnb)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bnb) || (NULL == bnb->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_sub(bnr->ctx, bna->ctx, bnb->ctx);
+}
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ */
+int32_t se_bn_exp_mod_large_e(se_bn_t *bnr,
+							  const se_bn_t *bna,
+							  const se_bn_t *bne,
+							  const se_bn_t *bnn)
+{
+	if (((NULL == bna) || (NULL == bna->ctx)) ||
+		((NULL == bne) || (NULL == bne->ctx)) ||
+		((NULL == bnn) || (NULL == bnn->ctx)) ||
+		((NULL == bnr) || (NULL == bnr->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_exp_mod_large_e(bnr->ctx, bna->ctx, bne->ctx, bnn->ctx);
+}
+
+/**
+ * Used in:
+ * 1. RSA Sign && Blinding
+ */
+int32_t se_bn_import_random(se_bn_t *bn,
+							size_t size,
+							int32_t (*f_rng)(void *, uint8_t *, size_t),
+							void *p_rng)
+{
+	int32_t ret  = SE_SUCCESS;
+	uint8_t *tmp = NULL;
+
+	if ((NULL == bn) || (NULL == bn->ctx) || (NULL == f_rng)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	tmp = kcalloc(1, size, GFP_KERNEL);
+	if (NULL == tmp) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	ret = f_rng(p_rng, tmp, size);
+	if (SE_SUCCESS != ret) {
+		ret = SE_ERROR_GEN_RANDOM;
+		goto end;
+	}
+
+	ret = aca_op_import_bin(bn->ctx, tmp, size);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = SE_SUCCESS;
+end:
+	if (NULL != tmp) {
+		kfree(tmp);
+	}
+	return ret;
+}
+
+/**
+ * Used in:
+ * 1. ECP
+ */
+static int32_t
+bn_import_random_bits(se_bn_t *bn,
+					  size_t bit_len,
+					  int32_t (*f_rng)(void *, uint8_t *, size_t),
+					  void *p_rng,
+					  int32_t fix_msb)
+{
+	int32_t ret  = SE_SUCCESS;
+	uint8_t *tmp = NULL;
+	size_t size  = 0;
+	size_t i     = 0;
+
+	if ((NULL == bn) || (NULL == bn->ctx) || (NULL == f_rng)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	size = (bit_len + 7) / 8;
+	tmp  = kcalloc(1, size, GFP_KERNEL);
+	if (NULL == tmp) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	ret = f_rng(p_rng, tmp, size);
+	if (SE_SUCCESS != ret) {
+		ret = SE_ERROR_GEN_RANDOM;
+		goto end;
+	}
+
+	/* Adjust byte[0] to fit bit length */
+	for (i = 0; i < (size * 8) - bit_len; i++) {
+		tmp[0] &= (~(((uint8_t)1) << (7 - i)));
+	}
+	if (fix_msb) {
+		/* always set MSB to 1 if fix_msb */
+		if ((bit_len & 7) == 0) {
+			tmp[0] |= (((uint8_t)1) << 7);
+		} else {
+			tmp[0] |= (((uint8_t)1) << ((bit_len & 7) - 1));
+		}
+	}
+
+	ret = aca_op_import_bin(bn->ctx, tmp, size);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* Double check bitlength */
+	if (fix_msb) {
+		ASSERT(aca_op_bitlen(bn->ctx) == bit_len);
+	} else {
+		ASSERT(aca_op_bitlen(bn->ctx) <= bit_len);
+	}
+
+	ret = SE_SUCCESS;
+end:
+	if (NULL != tmp) {
+		kfree(tmp);
+	}
+	return ret;
+}
+
+/**
+ * Used in:
+ * 1. ECP && Blinding
+ */
+int32_t se_bn_import_random_bits(se_bn_t *bn,
+								 size_t bit_len,
+								 int32_t (*f_rng)(void *, uint8_t *, size_t),
+								 void *p_rng)
+{
+	return bn_import_random_bits(bn, bit_len, f_rng, p_rng, 1);
+}
+
+/**
+ * Used in:
+ * 1. ECP Gen Private
+ */
+int32_t
+se_bn_import_random_max_bit_len(se_bn_t *bn,
+								size_t max_bit_len,
+								int32_t (*f_rng)(void *, uint8_t *, size_t),
+								void *p_rng)
+{
+	return bn_import_random_bits(bn, max_bit_len, f_rng, p_rng, 0);
+}
+
+/**
+ * Used in:
+ * 1. ECP
+ * 2. RSA
+ * 3. ECDSA
+ * 4. SM2DSA
+ */
+int32_t se_bn_copy(se_bn_t *bn_dst, const se_bn_t *bn_src)
+{
+	if (((NULL == bn_dst) || (NULL == bn_dst->ctx)) ||
+		((NULL == bn_src) || (NULL == bn_src->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_copy(bn_dst->ctx, bn_src->ctx);
+}
+
+void se_bn_dump(const char *name, const se_bn_t *bn)
+{
+	if ((NULL == bn) || (NULL == bn->ctx)) {
+		return;
+	}
+	aca_op_dump(name, bn->ctx);
+}
+
+#if 0
+int32_t se_bn_ecp_mul(const se_bn_t *P,
+					  const se_bn_t *A,
+					  const se_bn_t *G_X,
+					  const se_bn_t *G_Y,
+					  const se_bn_t *G_Z,
+					  const se_bn_t *k,
+					  se_bn_t *R_X,
+					  se_bn_t *R_Y,
+					  se_bn_t *R_Z)
+{
+	if (((NULL == P) || (NULL == P->ctx)) ||
+		((NULL == A) || (NULL == A->ctx)) ||
+		((NULL == G_X) || (NULL == G_X->ctx)) ||
+		((NULL == G_Y) || (NULL == G_Y->ctx)) ||
+		((NULL == G_Z) || (NULL == G_Z->ctx)) ||
+		((NULL == k) || (NULL == k->ctx)) ||
+		((NULL == R_X) || (NULL == R_X->ctx)) ||
+		((NULL == R_Y) || (NULL == R_Y->ctx)) ||
+		((NULL == R_Z) || (NULL == R_Z->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_ecp_mul(P->ctx,
+						  A->ctx,
+						  G_X->ctx,
+						  G_Y->ctx,
+						  G_Z->ctx,
+						  k->ctx,
+						  R_X->ctx,
+						  R_Y->ctx,
+						  R_Z->ctx);
+}
+
+int32_t se_bn_ecp_muladd(const se_bn_t *P,
+						 const se_bn_t *A,
+						 const se_bn_t *G1_X,
+						 const se_bn_t *G1_Y,
+						 const se_bn_t *G1_Z,
+						 const se_bn_t *k1,
+						 const se_bn_t *G2_X,
+						 const se_bn_t *G2_Y,
+						 const se_bn_t *G2_Z,
+						 const se_bn_t *k2,
+						 se_bn_t *R_X,
+						 se_bn_t *R_Y,
+						 se_bn_t *R_Z)
+{
+	if (((NULL == P) || (NULL == P->ctx)) ||
+		((NULL == A) || (NULL == A->ctx)) ||
+		((NULL == G1_X) || (NULL == G1_X->ctx)) ||
+		((NULL == G1_Y) || (NULL == G1_Y->ctx)) ||
+		((NULL == G1_Z) || (NULL == G1_Z->ctx)) ||
+		((NULL == k1) || (NULL == k1->ctx)) ||
+		((NULL == G2_X) || (NULL == G2_X->ctx)) ||
+		((NULL == G2_Y) || (NULL == G2_Y->ctx)) ||
+		((NULL == G2_Z) || (NULL == G2_Z->ctx)) ||
+		((NULL == k2) || (NULL == k2->ctx)) ||
+		((NULL == R_X) || (NULL == R_X->ctx)) ||
+		((NULL == R_Y) || (NULL == R_Y->ctx)) ||
+		((NULL == R_Z) || (NULL == R_Z->ctx))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+	return aca_op_ecp_muladd(P->ctx,
+							 A->ctx,
+							 G1_X->ctx,
+							 G1_Y->ctx,
+							 G1_Z->ctx,
+							 k1->ctx,
+							 G2_X->ctx,
+							 G2_Y->ctx,
+							 G2_Z->ctx,
+							 k2->ctx,
+							 R_X->ctx,
+							 R_Y->ctx,
+							 R_Z->ctx);
+}
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.h b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.h
new file mode 100644
index 0000000..928fb6b
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_bn.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#ifndef __SE_LITE_BN_H__
+#define __SE_LITE_BN_H__
+
+#include "se_aca.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+/*                            Bignumebr APIs                                  */
+/******************************************************************************/
+struct se_aca_bn_t;
+/**
+ * The bignumber context.
+ */
+typedef struct se_bn_t {
+	struct se_aca_bn_t *ctx;
+} se_bn_t;
+
+/**
+ * \brief This function initializes one bignumber.
+ *
+ * This makes the bignumber ready to be set or reed, but NOT defines a
+ * value for the bignumber.
+ *
+ * \param[in] bn    The bignumber context.
+ *                  This must not be \c NULL.
+ * \return          \c SE_SUCCESS on success.
+ * \return          \c SE_ERROR_BAD_PARAMS on detecting bad parameters.
+ * \return          \c SE_ERROR_OOM on memory allocation failed.
+ */
+int32_t se_bn_init(se_bn_t *bn);
+
+/**
+ * \brief This function frees the components of an bignumber and sets the \c ctx
+ *        in \c se_bn_t to NULL.
+ *
+ * \param[in] bn    The bignumber to be cleared. The \p bn or the \c ctx in
+ *                  bn->ctx may be \c NULL, in which case this function is
+ *                  a no-op.
+ */
+void se_bn_free(se_bn_t *bn);
+
+/**
+ * \brief Import an BN from unsigned big endian binary data.
+ *
+ * \param[in] bn    The destination BN. This must point to an initialized BN.
+ * \param[in] data  The input buffer. This must be a readable buffer of length
+ *                  \p size Bytes.
+ * \param[in] size  The length of the input buffer \p data in Bytes.
+ * \return          \c SE_SUCCESS on success.
+ * \return          \c SE_ERROR_BAD_PARAMS on detecting bad parameters.
+ * \return          \c SE_ERROR_OOM on memory allocation failed.
+ */
+int32_t se_bn_import(se_bn_t *bn, const uint8_t *buf, size_t size);
+
+/**
+ * \brief Export an BN into unsigned big endian binary data of fixed size.
+ *
+ * \param[in] bn    The source BN. This must point to an initialized BN.
+ * \param[out] buf  The output buffer. This must be a writable buffer of
+ *                  length \p size Bytes.
+ * \param[in] size  The output buffer size pointer,
+ *                  updated to the BN byte length.
+ * \return          \c SE_SUCCESS on success.
+ * \return          \c SE_ERROR_BAD_PARAMS on detecting bad parameters.
+ * \return          \c SE_ERROR_SHORT_BUFFER on buffer size too short to save
+ *                  BN data.
+ */
+int32_t se_bn_export(const se_bn_t *bn, uint8_t *buf, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SE_LITE_BN_H__ */
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_common.h b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_common.h
new file mode 100644
index 0000000..bed12d2
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_common.h
@@ -0,0 +1,528 @@
+/*
+ * Copyright (c) 2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#ifndef __SE_LITE_COMMON_H__
+#define __SE_LITE_COMMON_H__
+
+#include <linux/types.h>
+#include <linux/bug.h>
+#include "se_rsa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * CE base address
+ */
+#ifndef SE_CFG_BASE_ADDR
+#define SE_CFG_BASE_ADDR    0xD4220000UL
+#endif
+
+/**
+ * CE host number
+ */
+#ifndef SE_CFG_HOST_ID
+#define SE_CFG_HOST_ID      0
+#endif
+
+/*
+ * Top memory map
+ */
+#define TOP_CTL_OFS (0x0000U)
+#define TOP_STAT_OFS (0x0100U)
+#define TRNG_CTL_OFS (0x0200U)
+#define DBG_CTL_OFS (0x0300U)
+#define OTP_CTL_OFS (0x0400U)
+#define OTP_SPASE_OFS (0x1000U)
+
+
+#define HOST_IO_SIZE (0x400U)
+#define ACA_OFS (0x3000U + HOST_IO_SIZE * SE_CFG_HOST_ID)
+#define SCA_OFS (0x3200U + HOST_IO_SIZE * SE_CFG_HOST_ID)
+#define HASH_OFS (0x3280U + HOST_IO_SIZE * SE_CFG_HOST_ID)
+#if SE_CFG_HOST_ID == 0
+#define RNP_OFS (0x0280U)
+#elif SE_CFG_HOST_ID == 1
+#define RNP_OFS (0x3800U)
+#else
+#error "SHANHAI Dubhe only support host 0 and host 1"
+#endif
+
+/*
+ * Top control registers
+ */
+#define TOP_CTL_CLOCK_CTL (TOP_CTL_OFS + 0x00)
+#define TOP_CTL_CLOCK_CTL_HASH_CLK_EN_SHIFT    (0x00U)
+#define TOP_CTL_CLOCK_CTL_HASH_CLK_EN_WIDTH    (0x01U)
+#define TOP_CTL_CLOCK_CTL_SCA_CLK_EN_SHIFT     (0x01U)
+#define TOP_CTL_CLOCK_CTL_SCA_CLK_EN_WIDTH     (0x01U)
+#define TOP_CTL_CLOCK_CTL_ACA_CLK_EN_SHIFT     (0x02U)
+#define TOP_CTL_CLOCK_CTL_ACA_CLK_EN_WIDTH     (0x01U)
+#define TOP_CTL_CLOCK_CTL_OTP_CLK_EN_SHIFT     (0x03U)
+#define TOP_CTL_CLOCK_CTL_OTP_CLK_EN_WIDTH     (0x01U)
+#define TOP_CTL_CLOCK_CTL_TRNG_CLK_EN_SHIFT    (0x04U)
+#define TOP_CTL_CLOCK_CTL_TRNG_CLK_EN_WIDTH    (0x01U)
+#define TOP_CTL_CLOCK_CTL_DMA_CLK_EN_SHIFT     (0x05U)
+#define TOP_CTL_CLOCK_CTL_DMA_CLK_EN_WIDTH     (0x01U)
+
+
+#define TOP_CTL_RESET_CTL (TOP_CTL_OFS + 0x04)
+#define TOP_CTL_RESET_CTL_HASH_SRESET_SHIFT    (0x00U)
+#define TOP_CTL_RESET_CTL_HASH_SRESET_WIDTH    (0x01U)
+#define TOP_CTL_RESET_CTL_SCA_SRESET_SHIFT     (0x01U)
+#define TOP_CTL_RESET_CTL_SCA_SRESET_WIDTH     (0x01U)
+#define TOP_CTL_RESET_CTL_ACA_SRESET_SHIFT     (0x02U)
+#define TOP_CTL_RESET_CTL_ACA_SRESET_WIDTH     (0x01U)
+#define TOP_CTL_RESET_CTL_OTP_SRESET_SHIFT     (0x03U)
+#define TOP_CTL_RESET_CTL_OTP_SRESET_WIDTH     (0x01U)
+#define TOP_CTL_RESET_CTL_TRNG_SRESET_SHIFT    (0x04U)
+#define TOP_CTL_RESET_CTL_TRNG_SRESET_WIDTH    (0x01U)
+
+#define TOP_CTL_TOP_CFG (TOP_CTL_OFS + 0x08)
+
+/*
+ * Top status registers
+ */
+#define TOP_STAT_VER (TOP_STAT_OFS + 0x00)
+
+#define TOP_STAT_CFG1 (TOP_STAT_OFS + 0x04)
+#define TOP_STAT_CFG1_OTP_TST_WORD_SIZE_SHIFT    (0x00U)
+#define TOP_STAT_CFG1_OTP_TST_WORD_SIZE_WIDTH    (0x0BU)
+#define TOP_STAT_CFG1_RN_POOL_EXT_HOST_SEC_SHIFT (0x0FU)
+#define TOP_STAT_CFG1_RN_POOL_EXT_HOST_SEC_WIDTH (0x01U)
+#define TOP_STAT_CFG1_RN_POOL_HOST_NUM_SHIFT     (0x10U)
+#define TOP_STAT_CFG1_RN_POOL_HOST_NUM_WIDTH     (0x02U)
+#define TOP_STAT_CFG1_TRNG_INTER_SRC_EXIST_SHIFT (0x12U)
+#define TOP_STAT_CFG1_TRNG_INTER_SRC_EXIST_WIDTH (0x01U)
+#define TOP_STAT_CFG1_OTP_INIT_VALUE_SHIFT       (0x13U)
+#define TOP_STAT_CFG1_OTP_INIT_VALUE_WIDTH       (0x01U)
+#define TOP_STAT_CFG1_OTP_EXIST_SHIFT            (0x14U)
+#define TOP_STAT_CFG1_OTP_EXIST_WIDTH            (0x01U)
+#define TOP_STAT_CFG1_ACA_SRAM_SIZE_SHIFT        (0x15U)
+#define TOP_STAT_CFG1_ACA_SRAM_SIZE_WIDTH        (0x02U)
+#define TOP_STAT_CFG1_ACA_EXT_HOST_SEC_SHIFT     (0x17U)
+#define TOP_STAT_CFG1_ACA_EXT_HOST_SEC_WIDTH     (0x01U)
+#define TOP_STAT_CFG1_ACA_HOST_NUM_SHIFT         (0x18U)
+#define TOP_STAT_CFG1_ACA_HOST_NUM_WIDTH         (0x02U)
+#define TOP_STAT_CFG1_HASH_EXT_HOST_SEC_SHIFT    (0x1AU)
+#define TOP_STAT_CFG1_HASH_EXT_HOST_SEC_WIDTH    (0x01U)
+#define TOP_STAT_CFG1_HASH_HOST_NUM_SHIFT        (0x1BU)
+#define TOP_STAT_CFG1_HASH_HOST_NUM_WIDTH        (0x02U)
+#define TOP_STAT_CFG1_SCA_EXT_HOST_SEC_SHIFT     (0x1DU)
+#define TOP_STAT_CFG1_SCA_EXT_HOST_SEC_WIDTH     (0x01U)
+#define TOP_STAT_CFG1_SCA_HOST_NUM_SHIFT         (0x1EU)
+#define TOP_STAT_CFG1_SCA_HOST_NUM_WIDTH         (0x02U)
+
+#define TOP_STAT_CFG2 (TOP_STAT_OFS + 0x08)
+#define TOP_STAT_CFG2_OTP_SEC_WORD_SIZE_SHIFT    (0x00U)
+#define TOP_STAT_CFG2_OTP_SEC_WORD_SIZE_WIDTH    (0x0BU)
+#define TOP_STAT_CFG2_OTP_NSEC_WORD_SIZE_SHIFT   (0x10U)
+#define TOP_STAT_CFG2_OTP_NSEC_WORD_SIZE_WIDTH   (0x0BU)
+#define TOP_STAT_INTR_HOST0 (TOP_STAT_OFS + 0x10)
+#define TOP_STAT_INTR_HOST1 (TOP_STAT_OFS + 0x14)
+
+
+/*
+ * Top Deubug Control registers
+ */
+#define TOP_DBG_CTL (DBG_CTL_OFS + 0x00U)
+#define TOP_DBG_CTRL_VALUE_SHIFT   (0x00U)
+#define TOP_DBG_CTRL_VALUE_WIDTH    (0x20U)
+
+#define TOP_DBG_CTL_LOCK (DBG_CTL_OFS + 0x04U)
+#define TOP_DBG_CTL_LOCK_VALUE_SHIFT   (0x00U)
+#define TOP_DBG_CTL_LOCK_VALUE_WIDTH    (0x20U)
+
+/*
+ * Make the CE IO address based on the specified offset
+ */
+#define SE_ADDR(ofs) ((uintptr_t)SE_CFG_BASE_ADDR + (ofs))
+
+#define ASSERT(cnd)	BUG_ON(!(cnd))
+
+#ifdef IO_LOG_ENABLE
+
+#define IO_LOG(...)                                                            \
+	do {                                                                       \
+		printk(__VA_ARGS__);                                           \
+	} while (0)
+
+#else /* IO_LOG_ENABLE */
+
+#define IO_LOG(...)                                                            \
+	do {                                                                       \
+	} while (0)
+
+#endif /* !IO_LOG_ENABLE */
+
+/*
+ * Read register
+ */
+#define IO_READ32(ofs)                                                         \
+	__extension__({                                                            \
+		uint32_t _v = asr_rsa_read(ofs);                                     \
+		IO_LOG(                                                                \
+			"READ_REG_WORD (0x%08x)                              ;0x%08x\n",   \
+			SE_ADDR(ofs), _v);                                                 \
+		_v;                                                                    \
+	})
+
+/*
+ * Write register
+ */
+#define IO_WRITE32(ofs, val)                                                   \
+	do {                                                                       \
+		IO_LOG("WRITE_REG_WORD(0x%08x, 0x%08x)\n", SE_ADDR(ofs), (val));       \
+		asr_rsa_write(ofs, val);                                               \
+	} while (0)
+
+/*
+ * Check register
+ */
+#define IO_CHECK32(ofs, msk, want)                                             \
+	do {                                                                       \
+		uint32_t _t = 0;                                                       \
+		IO_LOG("CHECK_REG_WORD(0x%08x, 0x%08x, 0x%08x)", SE_ADDR(ofs), (msk),  \
+			   (want));                                                        \
+		do {                                                                   \
+			_t = LE32TOH(*(volatile uint32_t *)SE_ADDR(ofs));                  \
+		} while ((_t & (msk)) != ((want) & (msk)));                            \
+		IO_LOG("      ;0x%08x\n", _t);                                         \
+	} while (0)
+
+/*
+ * Endianess
+ */
+#define ___se_swap32(x)                                                        \
+	((uint32_t)((((uint32_t)(x) & (uint32_t)0x000000ffU) << 24) |             \
+				(((uint32_t)(x) & (uint32_t)0x0000ff00U) << 8) |              \
+				(((uint32_t)(x) & (uint32_t)0x00ff0000U) >> 8) |              \
+				(((uint32_t)(x) & (uint32_t)0xff000000U) >> 24)))
+
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define HTOLE32(x) ((uint32_t)(x))
+#define LE32TOH(x) ((uint32_t)(x))
+
+#define HTOBE32(x) (___se_swap32(x))
+#define BE32TOH(x) (___se_swap32(x))
+
+#else /* __ORDER_LITTLE_ENDIAN__ */
+
+#define HTOLE32(x) (___se_swap32(x))
+#define LE32TOH(x) (___se_swap32(x))
+
+#define HTOBE32(x) ((uint32_t)(x))
+#define BE32TOH(x) ((uint32_t)(x))
+
+#endif /* !__ORDER_LITTLE_ENDIAN__ */
+
+/**
+ * Other helper macros
+ */
+#ifndef SE_ARRAY_SIZE
+#define SE_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+#endif
+
+#define SE_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define SE_MAX(a, b) ((a) >= (b) ? (a) : (b))
+
+#define SE_CONTAINER_OF(ptr, TYPE, FIELD)                                      \
+	((TYPE *)(((char *)(ptr)) - offsetof(TYPE, FIELD)))
+
+/* round "x" up/down to next multiple of "align" (which must be a power of 2) */
+#define SE_ROUND_UP(x, a) (((x) + ((a)-1)) & ~((a)-1))
+#define SE_ROUND_DOWN(x, a) ((x) & ~((a)-1))
+/* check if "x" is align with "align"(must be a power of 2) */
+#define SE_IS_ALIGNED(x, a) (!(((x)) & ((a)-1)))
+
+#ifndef __se_dma_aligned
+/**
+ * __se_dma_aligned - Mark variable keep aligned.
+ */
+#define SE_DMA_ALIGNED OSAL_CACHE_LINE_SIZE
+#define __se_dma_aligned __attribute__((aligned(SE_DMA_ALIGNED)))
+#endif
+
+/* The maximum retry count when calling random fuction */
+#define SE_RANDOM_RETRY_CNT (30)
+
+#define CHECK_SUCCESS_GOTO(_ret_, _label_)                                     \
+	do {                                                                       \
+		if (SE_SUCCESS != _ret_) {                                             \
+			goto _label_;                                                      \
+		}                                                                      \
+	} while (0)
+
+#ifndef SET_BITS
+/**
+ * SET_BITS - set bit[\p s, \p s + \p w] to \p nv for value \p v.
+ * @v:          output value.
+ * @s:          the field shift value.
+ * @w:          the field width.
+ * @nv:         new value.
+ */
+#define SET_BITS(v, s, w, nv) do {                                              \
+	(v) &= ~(((0x1 << (w)) - 1) << (s));                                        \
+	(v) |= ((nv) & ((0x1 << (w)) - 1)) << (s);                                  \
+} while(0)
+#endif
+
+#ifndef GET_BITS
+/**
+ * GET_BITS - get bit[\p s,\p s + \p w] from value \p v.
+ * @v:          input value.
+ * @s:          the field shift value.
+ * @w:          the field width.
+ */
+#define GET_BITS(v, s, w) (((v) >> (s)) & ((1 << (w)) - 1))
+#endif
+
+#ifndef FIELD_GET
+/**
+ * Get the field \p fn value from the specified register \p rn
+ * value \p val.
+ *
+ * \val             Register value, 32-bit.
+ * \rn              Register name.
+ * \fn              Register field name.
+ */
+#define FIELD_GET(val, rn, fn) __extension__({         \
+	 GET_BITS((val),rn ##_## fn ##_SHIFT,                  \
+			  rn ##_## fn ##_WIDTH);                       \
+})
+#endif
+
+#ifndef FIELD_SET
+/**
+ * Set the field \p fn value \p fv in the specified register \p rn
+ * value \p val.
+ *
+ * \val             Register value, 32-bit.
+ * \rn              Register name.
+ * \fn              Register field name.
+ * \fv              Register field value.
+ */
+#define FIELD_SET(val, rn, fn, fv) __extension__({    \
+	 SET_BITS((val),rn ##_## fn ##_SHIFT,                 \
+			  rn ##_## fn ##_WIDTH,(fv));                 \
+})
+#endif
+
+
+/**
+ * Functions to parse main algorithm and chain mode
+ */
+#define SE_ALG_GET_MAIN_ALG(algo) ((algo) & 0xFF)
+#define SE_ALG_GET_CHAIN_MODE(algo) (((algo) >> 8) & 0xF)
+
+
+
+/******************************************************************************/
+/*                              Error Code                                    */
+/******************************************************************************/
+/* API Error Codes (GP TEE compliant) */
+#define SE_SUCCESS                       0x00000000
+#define SE_ERROR_GENERIC                 0xFFFF0000
+#define SE_ERROR_ACCESS_DENIED           0xFFFF0001
+#define SE_ERROR_CANCEL                  0xFFFF0002
+#define SE_ERROR_ACCESS_CONFLICT         0xFFFF0003
+#define SE_ERROR_EXCESS_DATA             0xFFFF0004
+#define SE_ERROR_BAD_FORMAT              0xFFFF0005
+#define SE_ERROR_BAD_PARAMS              0xFFFF0006
+#define SE_ERROR_BAD_STATE               0xFFFF0007
+#define SE_ERROR_ITEM_NOT_FOUND          0xFFFF0008
+#define SE_ERROR_NOT_IMPLEMENTED         0xFFFF0009
+#define SE_ERROR_NOT_SUPPORTED           0xFFFF000A
+#define SE_ERROR_NO_DATA                 0xFFFF000B
+#define SE_ERROR_OOM                     0xFFFF000C
+#define SE_ERROR_BUSY                    0xFFFF000D
+#define SE_ERROR_COMMUNICATION           0xFFFF000E
+#define SE_ERROR_SECURITY                0xFFFF000F
+#define SE_ERROR_SHORT_BUFFER            0xFFFF0010
+#define SE_ERROR_EXTERNAL_CANCEL         0xFFFF0011
+#define SE_ERROR_TIMEOUT                 0xFFFF3001
+#define SE_ERROR_OVERFLOW                0xFFFF300F
+
+/**
+ * Extended Error Codes
+ *
+ * +------+----------+
+ * |Type  |Range     |
+ * +------+----------+
+ * |Common|0x800000xx|
+ * +------+----------+
+ * |Cipher|0x800001xx|
+ * +------+----------+
+ * |MPI   |0x800002xx|
+ * +------+----------+
+ * |DHM   |0x800003xx|
+ * +------+----------+
+ * |PK    |0x800004xx|
+ * +------+----------+
+ */
+#define SE_ERROR_AGAIN                   0x80000000
+#define SE_ERROR_FEATURE_UNAVAIL         0x80000001
+#define SE_ERROR_BAD_KEY_LENGTH          0x80000002
+#define SE_ERROR_INVAL_KEY               0x80000003
+#define SE_ERROR_BAD_INPUT_LENGTH        0x80000004
+#define SE_ERROR_BAD_INPUT_DATA          0x80000005
+#define SE_ERROR_AUTH_FAILED             0x80000006
+#define SE_ERROR_INVAL_CTX               0x80000007
+#define SE_ERROR_UNKNOWN_ALG             0x80000008
+#define SE_ERROR_INVAL_ALG               0x80000009
+
+#define SE_ERROR_INVAL_PADDING           0x80000100
+#define SE_ERROR_INCOMPLETE_BLOCK        0x80000101
+
+#define SE_ERROR_INVAL_CHAR              0x80000200
+#define SE_ERROR_NEGATIVE_VALUE          0x80000201
+#define SE_ERROR_DIV_BY_ZERO             0x80000202
+#define SE_ERROR_NOT_ACCEPTABLE          0x80000203
+#define SE_ERROR_NO_SRAM_SPACE           0x80000204
+#define SE_ERROR_NO_AVAIL_GR             0x80000205
+#define SE_ERROR_NO_AVAIL_LEN_TYPE       0x80000206
+#define SE_ERROR_INVAL_MOD               0x80000207
+#define SE_ERROR_NOT_PRIME               0x80000208
+#define SE_ERROR_OP_TOO_LONG             0x80000209
+
+#define SE_ERROR_READ_PARAMS             0x80000300
+#define SE_ERROR_MAKE_PARAMS             0x80000301
+#define SE_ERROR_READ_PUBLIC             0x80000302
+#define SE_ERROR_MAKE_PUBLIC             0x80000303
+#define SE_ERROR_CALC_SECRET             0x80000304
+#define SE_ERROR_SET_GROUP               0x80000305
+
+#define SE_ERROR_GEN_RANDOM              0x80000400
+#define SE_ERROR_TYPE_MISMATCH           0x80000401
+#define SE_ERROR_KEY_VERSION             0x80000402
+#define SE_ERROR_KEY_FORMAT              0x80000403
+#define SE_ERROR_INVAL_PUBKEY            0x80000404
+#define SE_ERROR_UNKNOWN_CURVE           0x80000405
+#define SE_ERROR_SIG_LENGTH              0x80000406
+#define SE_ERROR_GEN_KEY                 0x80000407
+#define SE_ERROR_CHECK_KEY               0x80000408
+#define SE_ERROR_PUBLIC_OP               0x80000409
+#define SE_ERROR_PRIVATE_OP              0x8000040A
+#define SE_ERROR_VERIFY_SIG              0x8000040B
+#define SE_ERROR_OUT_TOO_LARGE           0x8000040C
+#define SE_ERROR_BAD_PADDING             0x8000040D
+
+/******************************************************************************/
+/*                           Algorithem identifier                            */
+/******************************************************************************/
+/**
+ * Algorithem identifier type
+ */
+typedef uint32_t se_algo_t;
+
+/* Main Algorithm */
+#define SE_MAIN_ALGO_SHA1                 0x02
+#define SE_MAIN_ALGO_SHA224               0x03
+#define SE_MAIN_ALGO_SHA256               0x04
+#define SE_MAIN_ALGO_SM3                  0x07
+#define SE_MAIN_ALGO_AES                  0x10
+#define SE_MAIN_ALGO_SM4                  0x14
+#define SE_MAIN_ALGO_RSA                  0x30
+#define SE_MAIN_ALGO_SM2                  0x45
+
+/*
+ * Algorithm Identifiers (compliant with GP Core API v1.2.1)
+ * Bitwise value with assignments
+ *    [31:28]    class (operation)
+ *    [23:20]    internal hash
+ *    [15:12]    digest hash
+ *    [11:8]     chain mode
+ *    [7:0]      main algo
+ */
+#define SE_ALG_AES_ECB_NOPAD                   0x10000010
+#define SE_ALG_AES_CBC_NOPAD                   0x10000110
+#define SE_ALG_AES_CTR                         0x10000210
+#define SE_ALG_AES_ECB_PKCS7                   0x10000B10
+#define SE_ALG_AES_CBC_PKCS7                   0x10000C10
+#define SE_ALG_SM4_ECB_NOPAD                   0x10000014
+#define SE_ALG_SM4_CBC_NOPAD                   0x10000114
+#define SE_ALG_SM4_CTR                         0x10000214
+#define SE_ALG_SM4_ECB_PKCS7                   0x10000B14
+#define SE_ALG_SM4_CBC_PKCS7                   0x10000C14
+#define SE_ALG_RSASSA_PKCS1_V1_5_SHA1          0x70002830
+#define SE_ALG_RSASSA_PKCS1_V1_5_SHA224        0x70003830
+#define SE_ALG_RSASSA_PKCS1_V1_5_SHA256        0x70004830
+#define SE_ALG_RSASSA_PKCS1_PSS_MGF1_SHA1      0x70212930
+#define SE_ALG_RSASSA_PKCS1_PSS_MGF1_SHA224    0x70313930
+#define SE_ALG_RSASSA_PKCS1_PSS_MGF1_SHA256    0x70414930
+#define SE_ALG_RSA_NOPAD                       0x60000030
+#define SE_ALG_SHA1                            0x50000002
+#define SE_ALG_SHA224                          0x50000003
+#define SE_ALG_SHA256                          0x50000004
+#define SE_ALG_SM3                             0x50000007
+#define SE_ALG_ECDSA_P256                      0x70003041
+#define SE_ALG_ECDSA_P521                      0x70005041
+#define SE_ALG_SM2_DSA_SM3                     0x70007045
+
+/******************************************************************************/
+/*                                 UTIL Size                                  */
+/******************************************************************************/
+/**
+ * SCA block size
+ */
+#define SE_AES_BLOCK_SIZE      16UL
+#define SE_SM4_BLOCK_SIZE      16UL
+#define SE_MAX_SCA_BLOCK       16UL
+
+/**
+ * SCA key size
+ */
+#define SE_SM4_KEY_SIZE        16UL
+#define SE_MAX_AES_KEY         32UL
+#define SE_MAX_SCA_KEY         32UL
+
+/**
+ * HASH block size
+ */
+#define SE_SHA1_BLK_SIZE       64UL
+#define SE_SHA224_BLK_SIZE     64UL
+#define SE_SHA256_BLK_SIZE     64UL
+#define SE_SM3_BLK_SIZE        64UL
+#define SE_MAX_HASH_BLOCK      SE_SHA256_BLK_SIZE
+
+/**
+ * HASH digest size
+ */
+#define SE_SHA1_HASH_SIZE      20UL
+#define SE_SHA224_HASH_SIZE    28UL
+#define SE_SHA256_HASH_SIZE    32UL
+#define SE_SM3_HASH_SIZE       32UL
+#define SE_MAX_HASH_SIZE       SE_SHA256_HASH_SIZE
+
+/******************************************************************************/
+/*                                   APIs                                     */
+/******************************************************************************/
+
+/**
+ * \brief           This function initializes the CE driver.
+ * \return          \c TE_SUCCESS on success.
+ * \return          \c SE_ERROR_GENERIC if there is hardware error.
+ */
+int32_t se_drv_init(void);
+
+/**
+ * \brief           This function deinitializes the CE driver.
+ */
+void se_drv_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SE_LITE_COMMON_H__ */
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.c b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.c
new file mode 100644
index 0000000..c6e924a
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.c
@@ -0,0 +1,1973 @@
+/*
+ * Copyright (c) 2020-2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * Lisense Agreement between Lisensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notise.
+ */
+
+#include <linux/module.h>
+#include <linux/mpi.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/algapi.h>
+#include <crypto/rng.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/random.h>
+#include <uapi/linux/hwrsa_ioctl.h>
+#include "se_common.h"
+#include "se_bn.h"
+#include "se_rsa.h"
+#include "se_aca.h"
+#include "se_aca_internal.h"
+#include "../asr-te200.h"
+
+static int32_t se_rsa_init(se_rsa_context_t *ctx)
+{
+	int32_t ret = SE_SUCCESS;
+
+	if (NULL == ctx) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	memset(ctx, 0, sizeof(se_rsa_context_t));
+
+	ret = se_bn_init(&(ctx->N));
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&(ctx->E));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_init(&(ctx->D));
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&(ctx->P));
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&(ctx->Q));
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&(ctx->Vi));
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&(ctx->Vf));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+end:
+	if (ret != SE_SUCCESS) {
+		/* se_bn_free is safe free even bn is not initialized */
+		se_bn_free(&(ctx->N));
+		se_bn_free(&(ctx->E));
+		se_bn_free(&(ctx->D));
+		se_bn_free(&(ctx->P));
+		se_bn_free(&(ctx->Q));
+		se_bn_free(&(ctx->Vi));
+		se_bn_free(&(ctx->Vf));
+	}
+	return ret;
+}
+
+static void se_rsa_free(se_rsa_context_t *ctx)
+{
+	if (NULL == ctx) {
+		return;
+	}
+	se_bn_free(&(ctx->N));
+	se_bn_free(&(ctx->E));
+	se_bn_free(&(ctx->D));
+	se_bn_free(&(ctx->P));
+	se_bn_free(&(ctx->Q));
+	se_bn_free(&(ctx->Vi));
+	se_bn_free(&(ctx->Vf));
+
+	memset(ctx, 0, sizeof(se_rsa_context_t));
+	return;
+}
+
+static int32_t se_rsa_import_raw(se_rsa_context_t *ctx,
+						  const uint8_t *N,
+						  size_t N_len,
+						  const uint8_t *P,
+						  size_t P_len,
+						  const uint8_t *Q,
+						  size_t Q_len,
+						  const uint8_t *D,
+						  size_t D_len,
+						  const uint8_t *E,
+						  size_t E_len)
+{
+	int32_t ret = SE_SUCCESS;
+
+	if (NULL == ctx) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	if (N != NULL) {
+		ret = se_bn_import(&(ctx->N), N, N_len);
+		CHECK_SUCCESS_GOTO(ret, end);
+		ret = se_bn_bitlen((const se_bn_t *)(&(ctx->N)));
+		ASSERT(ret > 0);
+		ctx->len = ((size_t)ret + 7) / 8;
+	}
+
+	if (E != NULL) {
+		ret = se_bn_import(&(ctx->E), E, E_len);
+		CHECK_SUCCESS_GOTO(ret, end);
+	}
+
+	if (D != NULL) {
+		ret = se_bn_import(&(ctx->D), D, D_len);
+		CHECK_SUCCESS_GOTO(ret, end);
+	}
+
+	if (P != NULL) {
+		ret = se_bn_import(&(ctx->P), P, P_len);
+		CHECK_SUCCESS_GOTO(ret, end);
+	}
+
+	if (Q != NULL) {
+		ret = se_bn_import(&(ctx->Q), Q, Q_len);
+		CHECK_SUCCESS_GOTO(ret, end);
+	}
+
+	ret = SE_SUCCESS;
+end:
+	return ret;
+}
+
+/**
+ * Check the RSA context. This function checks both private and public cases.
+ */
+static int32_t rsa_check_context(const se_rsa_context_t *ctx, int32_t is_priv)
+{
+	int32_t ret    = SE_SUCCESS;
+	int32_t result = 0;
+
+	ret = se_bn_bitlen((const se_bn_t *)(&ctx->N));
+	ASSERT(ret >= 0);
+	if (0 == ret) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	if ((8 * (ctx->len) != (size_t)(ret)) ||
+		(8 * (ctx->len) > SE_RSA_MAX_KEY_BITS) ||
+		(8 * (ctx->len) < SE_RSA_MIN_KEY_BITS)) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	/*
+	 * 1. Modular exponentiation needs positive, odd moduli.
+	 */
+	/* Modular exponentiation wrt. N is always used for
+	 * RSA public key operations. */
+	ret = se_bn_get_bit((const se_bn_t *)(&ctx->N), 0);
+	ASSERT(ret >= 0);
+	if (ret == 0) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	/**
+	 * Check private key. Here also assume that D bits MUST > 2
+	 */
+	if (is_priv) {
+		ret = se_bn_bitlen((const se_bn_t *)(&ctx->D));
+		ASSERT(ret >= 0);
+		if ((0 == ret) || (ret < 2) || (ret > (int32_t)(8 * (ctx->len)))) {
+			ret = SE_ERROR_BAD_INPUT_DATA;
+			goto end;
+		}
+
+		ret = se_bn_cmp_bn((const se_bn_t *)(&ctx->D),
+						   (const se_bn_t *)(&ctx->N), &result);
+		CHECK_SUCCESS_GOTO(ret, end);
+		if (result >= 0) {
+			ret = SE_ERROR_BAD_INPUT_DATA;
+			goto end;
+		}
+	}
+
+	/* Check public E. Minimal E is 3, so E bit length > 2 */
+	ret = se_bn_bitlen((const se_bn_t *)(&ctx->E));
+	ASSERT(ret >= 0);
+
+	if ((0 == ret) || (ret < 2) || (ret > (int32_t)(8 * (ctx->len)))) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	ret = se_bn_cmp_bn((const se_bn_t *)(&ctx->E), (const se_bn_t *)(&ctx->N),
+					   &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (result >= 0) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	/* E must be odd */
+	ret = se_bn_get_bit((const se_bn_t *)(&ctx->E), 0);
+	ASSERT(ret >= 0);
+	if (0 == ret) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	ret = SE_SUCCESS;
+end:
+	return ret;
+}
+
+/**
+ * Check the RSA context. This function checks both private and public cases.
+ */
+static int32_t rsa_check_blinding_context(const se_rsa_context_t *ctx)
+{
+	int32_t ret = SE_SUCCESS;
+
+	/* blinding requires P and Q != NULL, and P/Q bitlength < N bitlength / 2 */
+
+	if (ctx->P.ctx->data && ctx->P.ctx->blk_num) {
+		ret = se_bn_bitlen((const se_bn_t *)(&ctx->P));
+		ASSERT(ret >= 0);
+		if ((0 == ret) || (ret > (int32_t)(4 * (ctx->len)))) {
+			ret = SE_ERROR_BAD_INPUT_DATA;
+			goto end;
+		}
+	}
+
+	if (ctx->Q.ctx->data && ctx->Q.ctx->blk_num) {
+		ret = se_bn_bitlen((const se_bn_t *)(&ctx->Q));
+		ASSERT(ret >= 0);
+		if ((0 == ret) || (ret > (int32_t)(4 * (ctx->len)))) {
+			ret = SE_ERROR_BAD_INPUT_DATA;
+			goto end;
+		}
+	}
+
+	ret = SE_SUCCESS;
+end:
+	return ret;
+}
+
+static int32_t se_rsa_check_pubkey(const se_rsa_context_t *ctx)
+{
+	int32_t ret = SE_SUCCESS;
+
+	if (NULL == ctx) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	ret = rsa_check_context(ctx, 0);
+	if ((int32_t)SE_ERROR_BAD_INPUT_DATA == ret) {
+		/* convert bad input data to invalid key */
+		ret = SE_ERROR_INVAL_KEY;
+	}
+	return ret;
+}
+
+static int32_t se_rsa_check_privkey(const se_rsa_context_t *ctx)
+{
+	int32_t ret = SE_SUCCESS;
+
+	if (NULL == ctx) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	ret = rsa_check_context(ctx, 1);
+	if ((int32_t)SE_ERROR_BAD_INPUT_DATA == ret) {
+		/* convert bad input data to invalid key */
+		ret = SE_ERROR_INVAL_KEY;
+	}
+	return ret;
+}
+
+static int32_t se_rsa_check_pub_priv(const se_rsa_context_t *pub,
+							  const se_rsa_context_t *prv)
+{
+	int32_t ret    = SE_SUCCESS;
+	int32_t result = 0;
+
+	if ((NULL == pub) || (NULL == prv)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	ret = rsa_check_context(pub, 0);
+	if ((int32_t)SE_ERROR_BAD_INPUT_DATA == ret) {
+		/* convert bad input data to invalid key */
+		ret = SE_ERROR_INVAL_KEY;
+	}
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = rsa_check_context(prv, 1);
+	if ((int32_t)SE_ERROR_BAD_INPUT_DATA == ret) {
+		/* convert bad input data to invalid key */
+		ret = SE_ERROR_INVAL_KEY;
+	}
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_cmp_bn_equal((const se_bn_t *)(&(pub->N)),
+							 (const se_bn_t *)(&(prv->N)), &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (0 != result) {
+		ret = SE_ERROR_INVAL_KEY;
+		goto end;
+	}
+
+	ret = se_bn_cmp_bn_equal((const se_bn_t *)(&(pub->E)),
+							 (const se_bn_t *)(&(prv->E)), &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (0 != result) {
+		ret = SE_ERROR_INVAL_KEY;
+		goto end;
+	}
+
+	ret = SE_SUCCESS;
+end:
+	return ret;
+}
+
+static int32_t se_rsa_public(se_rsa_context_t *ctx,
+					  const uint8_t *input,
+					  uint8_t *output)
+{
+	int32_t ret     = SE_SUCCESS;
+	se_bn_t tmp_in  = {NULL};
+	se_bn_t tmp_out = {NULL};
+	int32_t result  = 0;
+
+	if ((NULL == ctx) || (NULL == input) || (NULL == output)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	ret = se_bn_init(&tmp_in);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&tmp_out);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_import(&tmp_in, input, ctx->len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_cmp_bn(&tmp_in, (const se_bn_t *)(&(ctx->N)), &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (result >= 0) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	ret = se_bn_exp_mod(&tmp_out, (const se_bn_t *)(&tmp_in),
+						(const se_bn_t *)(&(ctx->E)),
+						(const se_bn_t *)(&(ctx->N)));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_export((const se_bn_t *)(&tmp_out), output, ctx->len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+end:
+	se_bn_free(&tmp_in);
+	se_bn_free(&tmp_out);
+	return ret;
+}
+
+/*
+ * Generate or update blinding values, see section 10 of:
+ *  KOCHER, Paul C. Timing attacks on implementations of Diffie-Hellman, RSA,
+ *  DSS, and other systems. In : Advanses in Cryptology-CRYPTO'96. Springer
+ *  Berlin Heidelberg, 1996. p. 104-113.
+ */
+static int32_t rsa_prepare_blinding(se_rsa_context_t *ctx,
+									int32_t (*f_rng)(void *, uint8_t *, size_t),
+									void *p_rng)
+{
+	int32_t ret   = SE_SUCCESS;
+	int32_t count = 0;
+
+	ret = se_bn_bitlen((const se_bn_t *)(&ctx->Vf));
+	ASSERT(ret >= 0);
+
+	if (0 != ret) {
+		/* Vf/Vi already contains blinding data, check */
+		ASSERT(se_bn_bitlen((const se_bn_t *)(&ctx->Vi)) > 0);
+
+		/* We already have blinding values, just update them by squaring */
+		ret = se_bn_mul_mod(&(ctx->Vi), (const se_bn_t *)(&(ctx->Vi)),
+							(const se_bn_t *)(&(ctx->Vi)),
+							(const se_bn_t *)(&(ctx->N)));
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		ret = se_bn_mul_mod(&(ctx->Vf), (const se_bn_t *)(&(ctx->Vf)),
+							(const se_bn_t *)(&(ctx->Vf)),
+							(const se_bn_t *)(&(ctx->N)));
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		ret = SE_SUCCESS;
+		goto end;
+	} else {
+		/* do nothing */
+	}
+
+	do {
+		if (count++ > SE_RANDOM_RETRY_CNT) {
+			ret = SE_ERROR_GEN_RANDOM;
+			goto end;
+		}
+
+		ret = se_bn_import_random_bits(&(ctx->Vf), (8 * ctx->len) - 1, f_rng,
+									   p_rng);
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		ret = se_bn_inv_mod(&(ctx->Vi), (const se_bn_t *)(&(ctx->Vf)),
+							(const se_bn_t *)(&(ctx->N)));
+		if ((SE_SUCCESS != ret) && ((int32_t)SE_ERROR_NOT_ACCEPTABLE != ret)) {
+			goto end;
+		}
+	} while ((int32_t)SE_ERROR_NOT_ACCEPTABLE == ret);
+
+	/**
+	 * Blinding value: Vi =  Vf^(-e) mod N
+	 * (Vi already contains Vf^-1 at this point)
+	 */
+	ret = se_bn_exp_mod(&(ctx->Vi), (const se_bn_t *)(&(ctx->Vi)),
+						(const se_bn_t *)(&(ctx->E)),
+						(const se_bn_t *)(&(ctx->N)));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = SE_SUCCESS;
+end:
+	return ret;
+}
+
+/*
+ * Exponent blinding supposed to prevent side-channel attacks using multiple
+ * trases of measurements to recover the RSA key. The more collisions are there,
+ * the more bits of the key can be recovered. See [3].
+ *
+ * Collecting n collisions with m bit long blinding value requires 2^(m-m/n)
+ * observations on avarage.
+ *
+ * For example with 28 byte blinding to achieve 2 collisions the adversary has
+ * to make 2^112 observations on avarage.
+ *
+ * (With the currently (as of 2017 April) known best algorithms breaking 2048
+ * bit RSA requires approximately as much time as trying out 2^112 random keys.
+ * Thus in this sense with 28 byte blinding the security is not redused by
+ * side-channel attacks like the one in [3])
+ *
+ * This countermeasure does not help if the key recovery is possible with a
+ * single trase.
+ */
+#define RSA_EXPONENT_BLINDING 28
+
+static int32_t rsa_private_core(se_rsa_context_t *ctx,
+								se_bn_t *in,
+								se_bn_t *out,
+								int32_t (*f_rng)(void *, uint8_t *, size_t),
+								void *p_rng)
+{
+	int32_t ret       = 0;
+	se_bn_t const1    = {NULL};
+	se_bn_t tmp1      = {NULL};
+	se_bn_t tmp2      = {NULL};
+	se_bn_t tmp3      = {NULL};
+	int32_t reset_tmp = 0;
+
+	if (NULL == f_rng || (NULL == ctx->P.ctx->data) || (0 == ctx->P.ctx->blk_num) ||
+		(NULL == ctx->Q.ctx->data) || (0 == ctx->Q.ctx->blk_num)) {
+		/* f_rng is NULL, no blinding */
+		ret = se_bn_exp_mod(out, (const se_bn_t *)(in),
+							(const se_bn_t *)(&(ctx->D)),
+							(const se_bn_t *)(&(ctx->N)));
+		CHECK_SUCCESS_GOTO(ret, end);
+		ret = SE_SUCCESS;
+		goto end;
+	}
+
+	/* Check blinding context here */
+	ret = rsa_check_blinding_context((const se_rsa_context_t *)ctx);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* prepare blinding context */
+	ret = rsa_prepare_blinding(ctx, f_rng, p_rng);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_init(&const1);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&tmp1);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&tmp2);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&tmp3);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	reset_tmp = 1;
+
+	/* tmp3 = in * Vi */
+	ret =
+		se_bn_mul_mod(&tmp3, (const se_bn_t *)in, (const se_bn_t *)(&(ctx->Vi)),
+					  (const se_bn_t *)(&(ctx->N)));
+
+	ret = se_bn_import_u32(&const1, 1);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* import rnd with random */
+	ret = se_bn_import_random(&tmp1, RSA_EXPONENT_BLINDING, f_rng, p_rng);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* tmp2 = P - 1 */
+	ret = se_bn_sub_bn(&tmp2, (const se_bn_t *)(&(ctx->P)),
+					   (const se_bn_t *)(&const1));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* tmp1 = tmp1 * tmp2 */
+	ret =
+		se_bn_mul_bn(&tmp1, (const se_bn_t *)(&tmp1), (const se_bn_t *)(&tmp2));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* tmp2 = Q - 1 */
+	ret = se_bn_sub_bn(&tmp2, (const se_bn_t *)(&(ctx->Q)),
+					   (const se_bn_t *)(&const1));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* tmp1 = tmp1 * tmp2 */
+	ret =
+		se_bn_mul_bn(&tmp1, (const se_bn_t *)(&tmp1), (const se_bn_t *)(&tmp2));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* tmp1 = D + tmp1 */
+	ret = se_bn_add_bn(&tmp1, (const se_bn_t *)(&(ctx->D)),
+					   (const se_bn_t *)(&tmp1));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* set tmp2 to 0 before exp mode*/
+	ret = se_bn_import_u32(&tmp2, 0);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* exp mode with large e(tmp1) */
+	ret = se_bn_exp_mod_large_e(&tmp2, (const se_bn_t *)(&tmp3),
+								(const se_bn_t *)(&tmp1),
+								(const se_bn_t *)(&(ctx->N)));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* Unblinding */
+	/* out = tmp2 * Vf mod N */
+	ret = se_bn_mul_mod(out, (const se_bn_t *)(&tmp2),
+						(const se_bn_t *)(&(ctx->Vf)),
+						(const se_bn_t *)(&(ctx->N)));
+	CHECK_SUCCESS_GOTO(ret, end);
+end:
+	/* set tmp1 and tmp2 to 0 before exit */
+	if (reset_tmp) {
+		ret = se_bn_import_u32(&tmp1, 0);
+		CHECK_SUCCESS_GOTO(ret, end);
+		ret = se_bn_import_u32(&tmp2, 0);
+		CHECK_SUCCESS_GOTO(ret, end);
+	}
+
+	se_bn_free(&const1);
+	se_bn_free(&tmp1);
+	se_bn_free(&tmp2);
+	se_bn_free(&tmp3);
+	return ret;
+}
+
+/*
+ * Do an RSA private key operation
+ */
+static int32_t se_rsa_private(se_rsa_context_t *ctx,
+					   int32_t (*f_rng)(void *, uint8_t *, size_t),
+					   void *p_rng,
+					   uint8_t *input,
+					   uint8_t *output)
+{
+	int32_t ret     = SE_SUCCESS;
+	int32_t result  = 0;
+	se_bn_t tmp_in  = {NULL};
+	se_bn_t tmp_out = {NULL};
+	se_bn_t C       = {NULL};
+
+	if ((NULL == ctx) || (NULL == input) || (NULL == output)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	ret = se_bn_init(&tmp_in);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&tmp_out);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_init(&C);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_import(&tmp_in, input, ctx->len);
+	CHECK_SUCCESS_GOTO(ret, end);
+	ret = se_bn_cmp_bn(&tmp_in, (const se_bn_t *)(&(ctx->N)), &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (result >= 0) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	ret = rsa_private_core(ctx, &tmp_in, &tmp_out, f_rng, p_rng);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* Verify the result to prevent glitching attacks. */
+	ret = se_bn_exp_mod(&C, (const se_bn_t *)(&tmp_out),
+						(const se_bn_t *)(&(ctx->E)),
+						(const se_bn_t *)(&(ctx->N)));
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_bn_cmp_bn_equal((const se_bn_t *)(&C), (const se_bn_t *)(&tmp_in),
+							 &result);
+	CHECK_SUCCESS_GOTO(ret, end);
+	if (result != 0) {
+		ret = SE_ERROR_VERIFY_SIG;
+		goto end;
+	}
+
+	ret = se_bn_export((const se_bn_t *)(&tmp_out), output, ctx->len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = SE_SUCCESS;
+end:
+	se_bn_free(&tmp_in);
+	se_bn_free(&tmp_out);
+	se_bn_free(&C);
+	return ret;
+}
+
+/**
+ * Generate and apply the MGF1 operation (from PKCS#1 v2.1) to a buffer.
+ *
+ * \param dst       buffer to mask
+ * \param dlen      length of destination buffer
+ * \param src       sourse of the mask generation
+ * \param slen      length of the sourse buffer
+ * \param md        message digest type to use
+ */
+static int32_t
+mgf_mask(uint8_t *dst, size_t dlen, uint8_t *src, size_t slen, se_algo_t md)
+{
+	int32_t ret                    = SE_SUCCESS;
+	uint8_t mask[SE_MAX_HASH_SIZE] = {0};
+	uint8_t counter[4]             = {0};
+	uint8_t *p                     = NULL;
+	uint32_t hlen                  = 0;
+	size_t i                       = 0;
+	size_t use_len                 = 0;
+	uint32_t out_size;
+	int alg;
+	struct asr_sha_reqctx hash_ctx;
+
+	memset(mask, 0, sizeof(mask));
+	memset(counter, 0, sizeof(counter));
+
+	hlen = se_md_size(md);
+	ASSERT(hlen != 0);
+
+	switch (md) {
+	case SE_ALG_SHA256:
+		out_size = 32;
+		alg = HASH_SHA256;
+		break;
+	case SE_ALG_SHA224:
+		out_size = 28;
+		alg = HASH_SHA224;
+		break;
+	case SE_ALG_SHA1:
+		out_size = 20;
+		alg = HASH_SHA1;
+		break;
+	default:
+		ret = SE_ERROR_BAD_PARAMS;
+		goto end;
+	}
+
+	/* Generate and apply dbMask */
+	p = dst;
+	while (dlen > 0) {
+		use_len = hlen;
+		if (dlen < hlen) {
+			use_len = dlen;
+		}
+
+		ret = asr_te200_hash_init(&hash_ctx, alg);
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		ret = asr_te200_hash_proc(&hash_ctx, src, slen);
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		ret = asr_te200_hash_proc(&hash_ctx, counter, 4);
+		CHECK_SUCCESS_GOTO(ret, end);
+		
+		ret = asr_te200_hash_finish(&hash_ctx, mask, out_size);
+		CHECK_SUCCESS_GOTO(ret, end);
+
+		for (i = 0; i < use_len; ++i) {
+			*p++ ^= mask[i];
+		}
+
+		counter[3]++;
+
+		dlen -= use_len;
+	}
+
+end:
+	memset(mask, 0, sizeof(mask));
+	return ret;
+}
+
+/* Do pss encoding as from PKCS#1 v2.1 9.1.1 */
+static int32_t rsa_pss_encode(uint8_t *output,
+							  const uint8_t *input,
+							  size_t input_len,
+							  size_t rsa_key_bits,
+							  int32_t target_salt_len,
+							  se_algo_t hash_id,
+							  int32_t (*f_rng)(void *, uint8_t *, size_t),
+							  void *p_rng)
+{
+	int32_t ret     = SE_SUCCESS;
+	size_t embits   = 0;
+	size_t emlen    = 0;
+	size_t sign_len = 0;
+	size_t slen     = 0;
+	size_t slen_max = 0;
+	size_t hlen     = 0;
+	uint8_t *h      = NULL;
+	uint8_t *db     = NULL;
+	uint8_t *salt   = NULL;
+	uint8_t *m_     = NULL;
+	size_t tmp_len  = 0;
+	uint32_t out_size;
+	int alg;
+	struct asr_sha_reqctx hash_ctx;
+
+	embits   = rsa_key_bits - 1;
+	sign_len = (rsa_key_bits + 7) / 8;
+
+	/* calc emlen = seil(embits/8) */
+	emlen = SE_ROUND_UP(embits, 8) / 8;
+
+	/* double check emlen vs sign_len */
+	if ((embits & 0x7) == 0) {
+		ASSERT(emlen == sign_len - 1);
+	} else {
+		ASSERT(emlen == sign_len);
+	}
+
+	hlen = se_md_size(hash_id);
+	ASSERT(hlen > 0);
+
+	/* Check hlen, emLen < hLen + sLen + 2, the minimal sLen is hashlen - 2 */
+	if (emlen < hlen + hlen - 2 + 2) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+	if (emlen >= hlen + hlen + 2) {
+		slen_max = hlen;
+	} else {
+		slen_max = emlen - hlen - 2;
+	}
+
+	ASSERT(slen_max >= hlen - 2);
+
+	/* target_salt_len only support max and positive length */
+	if (target_salt_len == SE_RSA_SALT_LEN_MAX) {
+		slen = slen_max;
+	} else if (target_salt_len < 0) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	} else {
+		/* check target_salt_len >= hlen - 2 && target_salt_len <= slen_max */
+		if ((target_salt_len < (int32_t)(hlen - 2)) || (target_salt_len > (int32_t)slen_max)) {
+			ret = SE_ERROR_BAD_INPUT_DATA;
+			goto end;
+		}
+		slen = target_salt_len;
+	}
+
+	/* double check salt length. This accord with Step3 */
+	ASSERT(emlen >= hlen + slen + 2);
+
+	/* 4. Generate salt */
+	if (slen) {
+		salt = kcalloc(1, slen, GFP_KERNEL);
+		if (NULL == salt) {
+			ret = SE_ERROR_OOM;
+			goto end;
+		}
+
+		ret = f_rng(p_rng, salt, slen);
+		if (SE_SUCCESS != ret) {
+			ret = SE_ERROR_GEN_RANDOM;
+			goto end;
+		}
+	} else {
+		salt = NULL;
+	}
+
+	/* 5. Format M' */
+	tmp_len = 8 + input_len + slen;
+	m_      = kcalloc(1, tmp_len, GFP_KERNEL);
+	if (NULL == m_) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	memset(m_, 0, 8);
+	memcpy(m_ + 8, input, input_len);
+	if (slen) {
+		memcpy(m_ + 8 + input_len, salt, slen);
+	} else {
+		/* do nothing */
+	}
+	/* 6. Calculate H = Hash(M') */
+	h = kcalloc(1, SE_MAX_HASH_SIZE, GFP_KERNEL);
+	if (NULL == h) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	switch (hash_id) {
+	case SE_ALG_SHA256:
+		out_size = 32;
+		alg = HASH_SHA256;
+		break;
+	case SE_ALG_SHA224:
+		out_size = 28;
+		alg = HASH_SHA224;
+		break;
+	case SE_ALG_SHA1:
+		out_size = 20;
+		alg = HASH_SHA1;
+		break;
+
+	default:
+		ret = SE_ERROR_BAD_PARAMS;
+		goto end;
+	}
+
+	ret = asr_te200_hash_init(&hash_ctx, alg);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = asr_te200_hash_proc(&hash_ctx, m_, tmp_len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = asr_te200_hash_finish(&hash_ctx, h, out_size);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* 7 && 8. Format DB = PS || 0x01 || salt */
+	db = kcalloc(1, emlen - hlen - 1, GFP_KERNEL);
+	if (NULL == db) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	/* set ps. tmp_len = ps length */
+	tmp_len = emlen - slen - hlen - 2;
+	if (tmp_len) {
+		memset(db, 0, tmp_len);
+	}
+	db[tmp_len] = 0x01;
+	if (slen) {
+		memcpy(db + tmp_len + 1, salt, slen);
+	}
+
+	/* 9 && 10: maskedDB = DB xor dbMask */
+	ret = mgf_mask(db, emlen - hlen - 1, h, hlen, hash_id);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* 11. Set the leftmost 8emLen - emBits bits of the leftmost octet in
+	 * maskedDB to zero. */
+	db[0] &= (0xFF >> (8 * emlen - embits));
+
+	/* 12 && 13: output final signature */
+	/* tmp_len is db length */
+	tmp_len = emlen - hlen - 1;
+	memset(output, 0, sign_len);
+	memcpy(output + (sign_len - emlen), db, tmp_len);
+	memcpy(output + (sign_len - emlen + tmp_len), h, se_md_size(hash_id));
+	output[sign_len - 1] = 0xbc;
+
+end:
+	if (salt) {
+		kfree(salt);
+	}
+	if (m_) {
+		kfree(m_);
+	}
+	if (h) {
+		kfree(h);
+	}
+	if (db) {
+		kfree(db);
+	}
+	return ret;
+}
+
+static int32_t rsa_pss_decode(const uint8_t *sign,
+							  const uint8_t *input,
+							  size_t input_len,
+							  size_t rsa_key_bits,
+							  int32_t target_salt_len,
+							  se_algo_t hash_id)
+{
+	int32_t ret        = SE_SUCCESS;
+	size_t embits      = 0;
+	size_t emlen       = 0;
+	size_t sign_len    = 0;
+	size_t slen        = 0;
+	size_t expect_slen = 0;
+	const uint8_t *em  = NULL;
+	const uint8_t *h   = NULL;
+	uint8_t *db        = NULL;
+	size_t db_len      = 0;
+	uint8_t *salt      = NULL;
+	uint8_t *h_        = NULL;
+	size_t i           = 0;
+	uint8_t *m_        = NULL;
+	size_t tmp_len     = 0;
+	size_t hlen        = 0;
+	uint32_t out_size;
+	int alg;
+	struct asr_sha_reqctx hash_ctx;
+
+	embits   = rsa_key_bits - 1;
+	sign_len = (rsa_key_bits + 7) / 8;
+
+	/* calc emlen = seil(embits/8) */
+	emlen = SE_ROUND_UP(embits, 8) / 8;
+
+	/* double check emlen vs sign_len */
+	if ((embits & 0x7) == 0) {
+		ASSERT(emlen == sign_len - 1);
+		em = sign + 1;
+	} else {
+		ASSERT(emlen == sign_len);
+		em = sign;
+	}
+
+	hlen = se_md_size(hash_id);
+	ASSERT(hlen > 0);
+
+	/* 3. Check emLen < hLen + sLen + 2. we don't have sLen currently */
+	if (emlen < hlen + 2) {
+		ret = SE_ERROR_BAD_INPUT_DATA;
+		goto end;
+	}
+
+	if (target_salt_len == SE_RSA_SALT_LEN_MAX) {
+		if (emlen >= hlen + hlen + 2) {
+			expect_slen = hlen;
+		} else {
+			expect_slen = emlen - hlen - 2;
+		}
+	} else if (target_salt_len == SE_RSA_SALT_LEN_ANY) {
+		expect_slen = SE_RSA_SALT_LEN_ANY;
+	} else {
+		expect_slen = target_salt_len;
+	}
+
+	/* 4. Check 0xbc */
+	if (sign[sign_len - 1] != 0xbc) {
+		ret = SE_ERROR_INVAL_PADDING;
+		goto end;
+	}
+
+	/* 5. get maskedDB(equals to em) and h */
+	db_len = emlen - hlen - 1;
+	db     = kcalloc(1, db_len, GFP_KERNEL);
+	if (NULL == db) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	h = em + db_len;
+	ASSERT(h + hlen + 1 == sign + sign_len);
+
+	/* 6. Check left most data */
+	if ((em != sign) && (sign[0] != 0x00)) {
+		ret = SE_ERROR_INVAL_PADDING;
+		goto end;
+	}
+
+	if ((em[0] >> (8 - (8 * emlen - embits))) != 0x00) {
+		ret = SE_ERROR_INVAL_PADDING;
+		goto end;
+	} else {
+		/* do nothing */
+	}
+
+	/* 7 && 8: calc DB */
+	memcpy(db, em, db_len);
+	ret = mgf_mask(db, db_len, (uint8_t *)h, hlen, hash_id);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* 9. set leftmost 8*emlen - embits to 0 */
+	db[0] &= (0xFF >> (emlen * 8 - embits));
+
+	/* 10. find salt start */
+	i = 0;
+	while (1) {
+		if (i >= db_len) {
+			salt = NULL;
+			/* doesn't found 0x01 */
+			break;
+		}
+		if (db[i] == 0) {
+			i++;
+			continue;
+		} else if (db[i] == 0x01) {
+			salt = db + i + 1;
+			break;
+		} else {
+			salt = NULL;
+			/* find invalid padding */
+			break;
+		}
+	}
+	if (NULL == salt) {
+		ret = SE_ERROR_INVAL_PADDING;
+		goto end;
+	}
+	slen = db + db_len - salt;
+
+	/* Check salt length */
+	if ((expect_slen != (size_t)SE_RSA_SALT_LEN_ANY) && (slen != expect_slen)) {
+		ret = SE_ERROR_INVAL_PADDING;
+		goto end;
+	}
+
+	/* 12. Format m_ */
+	tmp_len = 8 + input_len + slen;
+	m_      = kcalloc(1, tmp_len, GFP_KERNEL);
+	if (NULL == m_) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	memset(m_, 0, 8);
+	memcpy(m_ + 8, input, input_len);
+	if (slen) {
+		memcpy(m_ + 8 + input_len, salt, slen);
+	} else {
+		/* do nothing */
+	}
+	/* 13. Calculate H' = Hash(M') */
+	h_ = kcalloc(1, SE_MAX_HASH_SIZE, GFP_KERNEL);
+	if (NULL == h_) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+
+	switch (hash_id) {
+	case SE_ALG_SHA256:
+		out_size = 32;
+		alg = HASH_SHA256;
+		break;
+	case SE_ALG_SHA224:
+		out_size = 28;
+		alg = HASH_SHA224;
+		break;
+	case SE_ALG_SHA1:
+		out_size = 20;
+		alg = HASH_SHA1;
+		break;
+
+	default:
+		ret = SE_ERROR_BAD_PARAMS;
+		goto end;
+	}
+
+	ret = asr_te200_hash_init(&hash_ctx, alg);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = asr_te200_hash_proc(&hash_ctx, m_, tmp_len);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = asr_te200_hash_finish(&hash_ctx, h_, out_size);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/* 14. Compare h with h_ */
+	if (se_safer_memcmp(h, h_, hlen) != 0) {
+		ret = SE_ERROR_VERIFY_SIG;
+	} else {
+		ret = SE_SUCCESS;
+	}
+end:
+	if (db) {
+		kfree(db);
+	}
+	if (m_) {
+		kfree(m_);
+	}
+	if (h_) {
+		kfree(h_);
+	}
+	return ret;
+}
+
+static int32_t se_rsa_rsassa_pss_verify(se_rsa_context_t *ctx,
+								 const uint8_t *hash,
+								 size_t hashlen,
+								 se_algo_t mgf1_hash_id,
+								 int32_t expected_salt_len,
+								 const uint8_t *sig)
+{
+	int32_t ret     = 0;
+	uint8_t *buf    = NULL;
+	size_t key_bits = 0;
+	size_t siglen   = 0;
+
+	if ((NULL == ctx) || (NULL == hash) || (0 == hashlen) || (NULL == sig)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	if (!((mgf1_hash_id == SE_ALG_SHA256) || (mgf1_hash_id == SE_ALG_SHA1) ||
+		(mgf1_hash_id == SE_ALG_SHA224))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	siglen = ctx->len;
+
+	key_bits = se_bn_bitlen((const se_bn_t *)(&(ctx->N)));
+	ASSERT(siglen == (key_bits + 7) / 8);
+
+	buf = kcalloc(1, siglen, GFP_KERNEL);
+	if (NULL == buf) {
+		return SE_ERROR_OOM;
+	}
+
+	ret = se_rsa_public(ctx, sig, buf);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = rsa_pss_decode(buf, hash, hashlen, key_bits, expected_salt_len,
+						 mgf1_hash_id);
+end:
+	memset(buf, 0, siglen);
+	kfree(buf);
+
+	return ret;
+}
+
+static int32_t se_rsa_rsassa_pss_sign(se_rsa_context_t *ctx,
+							   int32_t (*f_rng)(void *, uint8_t *, size_t),
+							   void *p_rng,
+							   const uint8_t *hash,
+							   size_t hashlen,
+							   se_algo_t mgf1_hash_id,
+							   int32_t salt_len,
+							   uint8_t *sig)
+{
+	int32_t ret     = 0;
+	uint8_t *buf    = NULL;
+	size_t key_bits = 0;
+	size_t siglen   = 0;
+
+	if ((NULL == ctx) || (NULL == hash) || (0 == hashlen) || (NULL == sig) ||
+		(NULL == f_rng)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	if (!((mgf1_hash_id == SE_ALG_SHA256) || (mgf1_hash_id == SE_ALG_SHA1) ||
+		(mgf1_hash_id == SE_ALG_SHA224))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	siglen = ctx->len;
+
+	key_bits = se_bn_bitlen((const se_bn_t *)(&(ctx->N)));
+	ASSERT(siglen == (key_bits + 7) / 8);
+
+	buf = kcalloc(1, siglen, GFP_KERNEL);
+	if (NULL == buf) {
+		return SE_ERROR_OOM;
+	}
+	ret = rsa_pss_encode(buf, hash, hashlen, key_bits, salt_len, mgf1_hash_id,
+						 f_rng, p_rng);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	ret = se_rsa_private(ctx, f_rng, p_rng, buf, sig);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+end:
+	memset(buf, 0, siglen);
+	kfree(buf);
+	return ret;
+}
+
+static const uint8_t sha256_encoding_t_data[] = {
+	0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01,
+	0x65, 0x03, 0x04, 0x02, 0x01, 0x05, 0x00, 0x04, 0x20};
+
+static const uint8_t sha224_encoding_t_data[] = {
+	0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48, 0x01,
+	0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04, 0x1c};
+
+static const uint8_t sha1_encoding_t_data[] = {
+	0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03, 0x02,
+	0x1a, 0x05, 0x00, 0x04, 0x14};
+
+static int32_t se_md_get_encoding_t_info(se_algo_t md_alg,
+										 const uint8_t **encoding_t_data,
+										 size_t *encoding_t_size)
+{
+	switch (md_alg) {
+	case SE_ALG_SHA256:
+		*encoding_t_data = sha256_encoding_t_data;
+		*encoding_t_size = sizeof(sha256_encoding_t_data);
+		return SE_SUCCESS;
+
+	case SE_ALG_SHA224:
+		*encoding_t_data = sha224_encoding_t_data;
+		*encoding_t_size = sizeof(sha224_encoding_t_data);
+		return SE_SUCCESS;
+
+	case SE_ALG_SHA1:
+		*encoding_t_data = sha1_encoding_t_data;
+		*encoding_t_size = sizeof(sha1_encoding_t_data);
+		return SE_SUCCESS;
+
+	default:
+		return SE_ERROR_NOT_SUPPORTED;
+	}
+}
+
+/*
+ * Implementation of the PKCS#1 v2.1 RSASSA-PKCS1-V1_5-SIGN function
+ */
+
+/* Construct a PKCS v1.5 encoding of a hashed message
+ *
+ * This is used both for signature generation and verification.
+ *
+ * Parameters:
+ * - md_alg:  Identifies the hash algorithm used to generate the given hash;
+ *            0 if raw data is signed.
+ * - hashlen: Length of hash in case hashlen is 0.
+ * - hash:    Buffer containing the hashed message or the raw data.
+ * - dst_len: Length of the encoded message.
+ * - dst:     Buffer to hold the encoded message.
+ *
+ * Assumptions:
+ * - hash has size hashlen if md_alg == 0.
+ * - hash has size corresponding to md_alg if md_alg != 0.
+ * - dst points to a buffer of size at least dst_len.
+ *
+ */
+static int32_t rsa_rsassa_pkcs1_v15_encode(se_algo_t md_alg,
+										   uint32_t hashlen,
+										   const uint8_t *hash,
+										   size_t dst_len,
+										   uint8_t *dst)
+{
+	int32_t ret                    = SE_SUCCESS;
+	size_t ps_len                  = 0;
+	uint8_t *p                     = NULL;
+	const uint8_t *encoding_t_data = NULL;
+	size_t encoding_t_size         = 0;
+
+	/* Are we signing hashed or raw data? */
+	if (md_alg != 0) {
+		ret = se_md_get_encoding_t_info(md_alg, &encoding_t_data,
+										&encoding_t_size);
+		if (SE_SUCCESS != ret) {
+			return ret;
+		}
+		hashlen = se_md_size(md_alg);
+		ASSERT(hashlen > 0);
+
+		/* Check emLen >= tLen + 11 */
+		if (dst_len < encoding_t_size + hashlen + 11) {
+			return SE_ERROR_BAD_INPUT_DATA;
+		}
+
+		/* Calculate ps_length */
+		ps_len = dst_len - encoding_t_size - hashlen - 3;
+		ASSERT(ps_len >= 8);
+	} else {
+		if (dst_len < hashlen + 11) {
+			return SE_ERROR_BAD_INPUT_DATA;
+		}
+
+		ps_len = dst_len - hashlen - 3;
+	}
+
+	p = dst;
+
+	/* Write PS */
+	*p++ = 0x00;
+	*p++ = 0x01;
+	memset(p, 0xFF, ps_len);
+	p += ps_len;
+	*p++ = 0x00;
+
+	/* Are we signing raw data? */
+	if (md_alg == 0) {
+		memcpy(p, hash, hashlen);
+		return SE_SUCCESS;
+	}
+
+	/* copy encoding T */
+	memcpy(p, encoding_t_data, encoding_t_size);
+	p += encoding_t_size;
+
+	memcpy(p, hash, hashlen);
+	p += hashlen;
+
+	/* Just a sanity-check. */
+	ASSERT(p == dst + dst_len);
+
+	return SE_SUCCESS;
+}
+
+/*
+ * Implementation of the PKCS#1 v2.1 RSASSA-PKCS1-v1_5-VERIFY function
+ */
+static int32_t se_rsa_rsassa_pkcs1_v15_verify(se_rsa_context_t *ctx,
+									   se_algo_t md_alg,
+									   size_t hashlen,
+									   const uint8_t *hash,
+									   const uint8_t *sig)
+{
+	int32_t ret      = 0;
+	size_t sig_len   = 0;
+	uint8_t *encoded = NULL, *encoded_expected = NULL;
+
+	if ((NULL == ctx) || (NULL == hash) || (0 == hashlen) || (NULL == sig)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	if (!((md_alg == SE_ALG_SHA256) || (md_alg == SE_ALG_SHA1) ||
+		  (md_alg == 0) || (md_alg == SE_ALG_SHA224))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	sig_len = ctx->len;
+
+	/*
+	 * Prepare expected PKCS1 v1.5 encoding of hash.
+	 */
+	encoded = kcalloc(1, sig_len, GFP_KERNEL);
+	if (NULL == encoded) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	encoded_expected = kcalloc(1, sig_len, GFP_KERNEL);
+	if (NULL == encoded_expected) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	ret = rsa_rsassa_pkcs1_v15_encode(md_alg, hashlen, hash, sig_len,
+									  encoded_expected);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/*
+	 * Apply RSA primitive to get what should be PKCS1 encoded hash.
+	 */
+	ret = se_rsa_public(ctx, sig, encoded);
+	if (ret != 0) {
+		goto end;
+	}
+
+	/*
+	 * Compare
+	 */
+	if (se_safer_memcmp(encoded, encoded_expected, sig_len) != 0) {
+		ret = SE_ERROR_VERIFY_SIG;
+	} else {
+		ret = SE_SUCCESS;
+	}
+
+end:
+	if (encoded != NULL) {
+		memset(encoded, 0, sig_len);
+		kfree(encoded);
+	}
+
+	if (encoded_expected != NULL) {
+		memset(encoded_expected, 0, sig_len);
+		kfree(encoded_expected);
+	}
+
+	return ret;
+}
+
+static int32_t
+se_rsa_rsassa_pkcs1_v15_sign(se_rsa_context_t *ctx,
+							 int32_t (*f_rng)(void *, uint8_t *, size_t),
+							 void *p_rng,
+							 se_algo_t md_alg,
+							 size_t hashlen,
+							 const uint8_t *hash,
+							 uint8_t *sig)
+{
+	int32_t ret      = 0;
+	size_t sig_len   = 0;
+	uint8_t *encoded = NULL;
+
+	if ((NULL == ctx) || (NULL == hash) || (0 == hashlen) || (NULL == sig)) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	if (!((md_alg == SE_ALG_SHA256) || (md_alg == SE_ALG_SHA1) ||
+		  (md_alg == 0) || (md_alg == SE_ALG_SHA224))) {
+		return SE_ERROR_BAD_PARAMS;
+	}
+
+	sig_len = ctx->len;
+	/*
+	 * Prepare expected PKCS1 v1.5 encoding of hash.
+	 */
+	encoded = kcalloc(1, sig_len, GFP_KERNEL);
+	if (NULL == encoded) {
+		ret = SE_ERROR_OOM;
+		goto end;
+	}
+	ret = rsa_rsassa_pkcs1_v15_encode(md_alg, hashlen, hash, sig_len, encoded);
+	CHECK_SUCCESS_GOTO(ret, end);
+
+	/*
+	 * Apply RSA primitive to get what should be PKCS1 encoded hash.
+	 */
+	ret = se_rsa_private(ctx, f_rng, p_rng, encoded, sig);
+	if (ret != 0) {
+		goto end;
+	}
+
+end:
+	if (encoded != NULL) {
+		memset(encoded, 0, sig_len);
+		kfree(encoded);
+	}
+
+	return ret;
+}
+
+static int rsa_rng(void *ctx __maybe_unused, uint8_t *buf, size_t size)
+{
+	get_random_bytes(buf, size);
+	return 0;
+}
+
+struct asr_te200_rsa *asr_rsa_local = NULL;
+static struct mutex rsa_lock = __MUTEX_INITIALIZER(rsa_lock);
+
+static int32_t _rsa_sign(const uint8_t *mod,
+						size_t mod_size,
+						const uint8_t *prv_p,
+						size_t prv_p_size,
+						const uint8_t *prv_q,
+						size_t prv_q_size,
+						const uint8_t *prv_e,
+						size_t prv_e_size,
+						const uint8_t *pub_e,
+						size_t pub_e_size,
+						int salt_len,
+						const uint8_t *hash,
+						size_t hash_size,
+						uint8_t *sign,
+						int hash_id,
+						int use_pkcs1_v21)
+{
+	int32_t ret = 0;
+	size_t key_size = 0;
+	se_rsa_context_t rsa_ctx = {0};
+
+	mutex_lock(&rsa_lock);
+
+	aca_engine_exit();
+	aca_engine_init();
+
+	/* Init local variables */
+	se_rsa_init(&rsa_ctx);
+
+	/* import RSA key */
+	ret = se_rsa_import_raw(&rsa_ctx,
+							mod,
+							mod_size,
+							prv_p,
+							prv_p_size,
+							prv_q,
+							prv_q_size,
+							prv_e,
+							prv_e_size,
+							pub_e,
+							pub_e_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	/* get key_size */
+	key_size = rsa_ctx.len;
+	if (key_size != mod_size) {
+		ret = -EINVAL;
+		goto end;        
+	}
+
+	/* check rsa key */
+	ret = se_rsa_check_privkey(&rsa_ctx);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = se_rsa_check_pub_priv(&rsa_ctx, &rsa_ctx);
+	if (ret != 0) {
+		goto end;
+	}
+
+	/* Sign */
+	if (!use_pkcs1_v21) {
+		ret = se_rsa_rsassa_pkcs1_v15_sign(&rsa_ctx,
+											rsa_rng,
+											NULL,
+											hash_id,
+											hash_size,
+											hash,
+											sign);
+	} else {
+		ret = se_rsa_rsassa_pss_sign(&rsa_ctx,
+									rsa_rng,
+									NULL,
+									hash,
+									hash_size,
+									hash_id,
+									salt_len,
+									sign);
+	}
+
+end:
+	se_rsa_free(&rsa_ctx);
+	aca_engine_exit();
+	mutex_unlock(&rsa_lock);
+	return ret;
+}
+
+static int32_t rsa_sign(const uint8_t *mod,
+						size_t mod_size,
+						const uint8_t *prv_p,
+						size_t prv_p_size,
+						const uint8_t *prv_q,
+						size_t prv_q_size,
+						const uint8_t *prv_e,
+						size_t prv_e_size,
+						const uint8_t *pub_e,
+						size_t pub_e_size,
+						const uint8_t *msg,
+						size_t msg_size,
+						uint8_t *sign,
+						int hash_id,
+						int use_pkcs1_v21)
+{
+	int32_t ret = 0;
+	uint8_t hash[32] = {0};
+	uint32_t hash_size = 0;
+	int alg = 0;
+	struct asr_sha_reqctx hash_ctx;
+
+	/* Calculate message's hash */
+	if (hash_id == SE_ALG_SHA256) {
+		hash_size = 32;
+		alg = HASH_SHA256;
+	} else if (hash_id == SE_ALG_SHA224) {
+		hash_size = 28;
+		alg = HASH_SHA224;
+	} else if (hash_id == SE_ALG_SHA1){
+		hash_size = 20;
+		alg = HASH_SHA1;
+	} else {
+		goto end;
+	}
+
+	/* Calculate message's hash */
+	ret = asr_te200_hash_init(&hash_ctx, alg);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = asr_te200_hash_proc(&hash_ctx, msg, msg_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = asr_te200_hash_finish(&hash_ctx, hash, hash_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = _rsa_sign(mod, mod_size, 
+					prv_p, prv_p_size,
+					prv_q, prv_q_size, 
+					prv_e, prv_e_size,
+					pub_e, pub_e_size,
+					SE_RSA_SALT_LEN_MAX,
+					hash, hash_size,
+					sign, hash_id, use_pkcs1_v21);
+
+end: 
+	return ret;
+}
+
+static int _rsa_verify(const uint8_t *pub_mod,
+						  size_t pub_mod_size,
+						  const uint8_t *pub_exp,
+						  size_t pub_exp_size,
+						  const uint8_t *hash,
+						  size_t hash_size,
+						  const uint8_t *sign,
+						  size_t sign_size,
+						  int hash_id,
+						  int use_pkcs1_v21)
+{
+	int ret = 0;
+	size_t key_size = 0;
+	se_rsa_context_t rsa_ctx = {0};
+
+	mutex_lock(&rsa_lock);
+
+	aca_engine_exit();
+	aca_engine_init();
+
+	/* Init local variables */
+	se_rsa_init(&rsa_ctx);
+
+	/* import RSA key */
+	ret = se_rsa_import_raw(&rsa_ctx,
+								pub_mod,
+								pub_mod_size,
+								NULL,
+								0,
+								NULL,
+								0,
+								NULL,
+								0,
+								pub_exp,
+								pub_exp_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	/* check rsa pub key */
+	ret = se_rsa_check_pubkey(&rsa_ctx);
+	if (ret != 0) {
+		goto end;
+	}
+
+	/* before verify signature, check signature size */
+	key_size = rsa_ctx.len;
+	if (sign_size != key_size) {
+		printk("Bad signature size: 0x%x, should be: 0x%x\r\n",
+					(uint32_t)(sign_size),
+					(uint32_t)key_size);
+		ret = -1;
+		goto end;
+	}
+
+	/* Verify */
+	if (!use_pkcs1_v21) {
+		ret = se_rsa_rsassa_pkcs1_v15_verify(&rsa_ctx,
+											hash_id,
+											hash_size,
+											hash,
+											sign);
+	} else {
+		ret = se_rsa_rsassa_pss_verify(&rsa_ctx,
+										hash,
+										hash_size,
+										hash_id,
+										SE_RSA_SALT_LEN_ANY,
+										sign);
+	}
+
+end:
+	se_rsa_free(&rsa_ctx);
+	aca_engine_exit();
+	mutex_unlock(&rsa_lock);
+	return ret;
+}
+
+static int rsa_verify(const uint8_t *pub_mod,
+						  size_t pub_mod_size,
+						  const uint8_t *pub_exp,
+						  size_t pub_exp_size,
+						  const uint8_t *msg,
+						  size_t msg_size,
+						  const uint8_t *sign,
+						  size_t sign_size,
+						  int hash_id,
+						  int use_pkcs1_v21)
+{
+	int ret = 0;
+	uint8_t hash[32];
+	uint32_t hash_size = 0;
+	int alg = 0;
+	struct asr_sha_reqctx hash_ctx;
+
+	/* Calculate message's hash */
+	if (hash_id == SE_ALG_SHA256) {
+		hash_size = 32;
+		alg = HASH_SHA256;
+	} else if (hash_id == SE_ALG_SHA224) {
+		hash_size = 28;
+		alg = HASH_SHA224;
+	} else if (hash_id == SE_ALG_SHA1){
+		hash_size = 20;
+		alg = HASH_SHA1;
+	} else {
+		goto end;
+	}
+
+	/* Calculate message's hash */
+	ret = asr_te200_hash_init(&hash_ctx, alg);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = asr_te200_hash_proc(&hash_ctx, msg, msg_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = asr_te200_hash_finish(&hash_ctx, hash, hash_size);
+	if (ret != 0) {
+		goto end;
+	}
+
+	ret = _rsa_verify(pub_mod, pub_mod_size, pub_exp, 
+					  pub_exp_size, hash, hash_size, 
+					  sign, sign_size, hash_id, use_pkcs1_v21);
+
+end: 
+	return ret;
+}
+
+inline u32 asr_rsa_read(u32 offset)
+{
+	u32 value = readl_relaxed(asr_rsa_local->io_base + offset);
+
+	return value;
+}
+
+inline void asr_rsa_write(u32 offset, u32 value)
+{
+	writel_relaxed(value, asr_rsa_local->io_base + offset);
+}
+
+static int asr_rsa_open(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static int asr_rsa_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static long asr_rsa_ioctl(struct file *file, u_int cmd, u_long arg)
+{
+	int ret = 0;
+	struct miscdevice *miscdev;
+	struct asr_te200_dev *te200_dd;
+	struct asr_te200_rsa *rsa;
+	struct asr_te200_ops *te200_ops;
+	struct hwrsa_arg rsa_arg;
+	struct rsa_ioctl_key *key;
+	struct hwrsa_arg *u_arg = (void __user *)arg;
+	int result;
+	
+	miscdev = file->private_data;
+	rsa = container_of(miscdev, struct asr_te200_rsa, rsa_misc);
+	te200_dd = dev_get_drvdata(rsa->dev);
+	te200_ops = te200_dd->te200_ops;
+
+	if (copy_from_user(&rsa_arg, (void __user *)arg, sizeof(rsa_arg))) {
+		return -EFAULT;
+	}
+
+	if (!rsa_arg.rsa_key) {
+		return -EFAULT;
+	}
+
+	if (!rsa_arg.msg || !rsa_arg.msg_size) {
+		return -EFAULT;
+	}
+
+	te200_ops->dev_get(te200_dd);
+	key = rsa_arg.rsa_key;
+
+	switch (cmd) {
+	case HWRSA_SIGN_PKCS_V15_SHA1:
+		if (!rsa_arg.sign || !key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_sign(key->n, key->n_size,
+					   key->p, key->p_size,
+					   key->q, key->q_size,  
+					   key->d, key->d_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign,
+					   SE_ALG_SHA1, 0);
+		break;
+	case HWRSA_SIGN_PKCS_V15_SHA256:
+		if (!rsa_arg.sign || !key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_sign(key->n, key->n_size,
+					   key->p, key->p_size,
+					   key->q, key->q_size,  
+					   key->d, key->d_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign,
+					   SE_ALG_SHA256, 0);
+		break;
+	case HWRSA_SIGN_PKCS_V21_SHA1:
+		if (!rsa_arg.sign || !key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_sign(key->n, key->n_size,
+					   key->p, key->p_size,
+					   key->q, key->q_size,  
+					   key->d, key->d_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign,
+					   SE_ALG_SHA1, 1);
+		break;
+	case HWRSA_SIGN_PKCS_V21_SHA256:
+		if (!rsa_arg.sign || !key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_sign(key->n, key->n_size,
+					   key->p, key->p_size,
+					   key->q, key->q_size,  
+					   key->d, key->d_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign,
+					   SE_ALG_SHA256, 1);
+		break;
+	case HWRSA_VERIFY_PKCS_V15_SHA1:
+		if (!rsa_arg.sign || !rsa_arg.sign_size || key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_verify(key->n, key->n_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign, rsa_arg.sign_size,
+					   SE_ALG_SHA1, 0);
+		break;
+	case HWRSA_VERIFY_PKCS_V15_SHA256:
+		if (!rsa_arg.sign || !rsa_arg.sign_size || key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_verify(key->n, key->n_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign, rsa_arg.sign_size,
+					   SE_ALG_SHA256, 0);
+		break;
+	case HWRSA_VERIFY_PKCS_V21_SHA1:
+		if (!rsa_arg.sign || !rsa_arg.sign_size || key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_verify(key->n, key->n_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign, rsa_arg.sign_size,
+					   SE_ALG_SHA1, 1);
+		break;
+	case HWRSA_VERIFY_PKCS_V21_SHA256:
+		if (!rsa_arg.sign || !rsa_arg.sign_size || key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		ret = rsa_verify(key->n, key->n_size,
+					   key->e, key->e_size,
+					   rsa_arg.msg, rsa_arg.msg_size,
+					   rsa_arg.sign, rsa_arg.sign_size,
+					   SE_ALG_SHA256, 1);
+		break;
+	default:
+		dev_err(rsa->dev, "asr te200: rsa iotcl invald command %x\n", cmd);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (ret) {
+		result = 0;
+	} else {
+		result = 1;
+	}
+	put_user(result, &u_arg->result);
+exit:
+	te200_ops->dev_put(te200_dd);
+	return ret;
+}
+
+static const struct file_operations asr_rsa_fops = {
+	.owner = THIS_MODULE,
+	.open = asr_rsa_open,
+	.release = asr_rsa_close,
+	.unlocked_ioctl = asr_rsa_ioctl,
+};
+
+int asr_te200_rsa_register(struct asr_te200_dev *te200_dd)
+{
+	int ret = 0;
+	struct asr_te200_rsa *prsa;
+	struct miscdevice *misc;
+	struct device *dev = te200_dd->dev;
+
+	prsa = &te200_dd->asr_rsa;
+	misc = &prsa->rsa_misc;
+
+	misc->name = "hwrsa";
+	misc->minor = MISC_DYNAMIC_MINOR;
+	misc->fops = &asr_rsa_fops;
+	misc->this_device = NULL;
+	prsa->io_base = te200_dd->io_base;
+	prsa->dev = te200_dd->dev;
+
+	asr_rsa_local = prsa;
+
+	/* register the device */
+	ret = misc_register(misc);
+	if (ret < 0) {
+		dev_err(dev,
+			"asr rsa: unable to register device node /dev/hwrsa\n");
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_rsa_register);
+
+int asr_te200_rsa_unregister(struct asr_te200_dev *te200_dd)
+{
+	struct miscdevice *miscdev;
+
+	miscdev = &te200_dd->asr_rsa.rsa_misc;
+
+	misc_deregister(miscdev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_rsa_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yonggan Wang <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR hwrsa driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.h b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.h
new file mode 100644
index 0000000..6218b0a
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-aca/se_rsa.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2021, Arm Technology (China) Co., Ltd.
+ * All rights reserved.
+ *
+ * The content of this file or document is CONFIDENTIAL and PROPRIETARY
+ * to Arm Technology (China) Co., Ltd. It is subject to the terms of a
+ * License Agreement between Licensee and Arm Technology (China) Co., Ltd
+ * restricting among other things, the use, reproduction, distribution
+ * and transfer.  Each of the embodiments, including this information and,,
+ * any derivative work shall retain this copyright notice.
+ */
+
+#ifndef __SE_RSA_H__
+#define __SE_RSA_H__
+
+#include <linux/miscdevice.h>
+#include <crypto/internal/rsa.h>
+#include "se_aca.h"
+#include "se_bn.h"
+#include "se_common.h"
+
+/******************************************************************************/
+/*                               RSA APIs                                     */
+/******************************************************************************/
+#define SE_RSA_SALT_LEN_ANY -1
+#define SE_RSA_SALT_LEN_MAX -2
+
+#define SE_RSA_MIN_KEY_BITS 1024
+#define SE_RSA_MAX_KEY_BITS 4096
+
+/**
+ * \brief   The RSA context structure.
+ *
+ * \note    Direct manipulation of the members of this structure
+ *          is deprecated. All manipulation should instead be done through
+ *          the public interface functions.
+ */
+typedef struct se_rsa_context {
+	size_t len; /*!<  The size of \p N in Bytes. */
+
+	se_bn_t N; /*!<  The public modulus. */
+	se_bn_t E; /*!<  The public exponent. */
+
+	se_bn_t D; /*!<  The private exponent. */
+	se_bn_t P; /*!<  The first prime factor. */
+	se_bn_t Q; /*!<  The second prime factor. */
+
+	se_bn_t Vi; /*!<  The cached blinding value. */
+	se_bn_t Vf; /*!<  The cached un-blinding value. */
+} se_rsa_context_t;
+
+struct asr_te200_rsa { 
+	struct device		*dev;
+	void __iomem		*io_base;
+	struct miscdevice rsa_misc;
+};
+
+inline u32 asr_rsa_read(u32 offset);
+inline void asr_rsa_write(u32 offset, u32 value);
+#endif /* __SE_RSA_H__ */
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-cipher.c b/marvell/linux/drivers/crypto/asr/te200/asr-cipher.c
new file mode 100644
index 0000000..582aae7
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-cipher.c
@@ -0,0 +1,1078 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <linux/jiffies.h>
+#include <crypto/aes.h>
+#include <crypto/sm4.h>
+#include <crypto/internal/skcipher.h>
+#include "asr-te200.h"
+#include "asr-cipher.h"
+
+#define CIPHER_BLOCK_SIZE AES_BLOCK_SIZE
+#define CIPHER_MIN_KEY_SIZE AES_MIN_KEY_SIZE
+#define CIPHER_MAX_KEY_SIZE AES_MAX_KEY_SIZE
+
+static struct asr_te200_cipher *asr_cipher_local = NULL;
+
+static inline u32 asr_cipher_read(struct asr_te200_cipher *dd, u32 offset)
+{
+	u32 value = readl_relaxed(dd->io_base + offset);
+
+	return value;
+}
+
+static inline void asr_cipher_write(struct asr_te200_cipher *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static int sca_clock_switch(struct asr_te200_cipher *dd, int enable)
+{
+	uint32_t value;
+	value = asr_cipher_read(dd, TE200_CLOCK_CTRL);
+	if (enable) {
+		value |= SCA_CLK_EN;
+	} else {
+		value &= ~SCA_CLK_EN;
+	}
+	asr_cipher_write(dd, TE200_CLOCK_CTRL, value);
+	return 0;
+}
+
+static int sca_start_run(struct asr_te200_cipher *dd)
+{
+	uint32_t value;
+	value = asr_cipher_read(dd, TE200_SSCA_CTRL);
+	value |= SCA_RUN;
+	asr_cipher_write(dd, TE200_SSCA_CTRL, value);
+	return 0; 
+}
+
+static int sca_set_alg(int alg_type, uint32_t *value)
+{
+	switch (alg_type) {
+	case NORMAL_AES:
+		*value &= SCA_NORMAL_AES;
+		break;
+	case SM4:
+		*value |= SCA_SM4;
+		break;
+	default:
+		return -1;
+	}
+	return 0; 
+}
+
+static int sca_set_cipher_mode(int mode, uint32_t *value)
+{
+	switch (mode) {
+	case ECB:
+		*value &= SCA_MODE_ECB;
+		break;
+	case CTR:
+		*value |= SCA_MODE_CTR;
+		break;
+	case CBC:
+		*value |= SCA_MODE_CBC;
+		break;
+	default:
+		return -1;
+	}
+	return 0; 
+}
+
+static int sca_set_iv(const uint8_t *iv, uint32_t *value)
+{
+	if (iv) {
+		*value |= SCA_SET_IV | SCA_SET_IV_ADDR;
+	} else {
+		*value &= (~(SCA_SET_IV | SCA_SET_IV_ADDR));
+	}
+	return 0;
+}
+
+static int sca_set_key(const uint8_t *key, uint32_t key_len, uint32_t *value)
+{
+	switch (key_len) {
+	case 16:
+		*value &= SCA_KEY_128_BITS;
+		break;
+	case 24:
+		*value |= SCA_KEY_192_BITS;
+		break;
+	case 32:
+		*value |= SCA_KEY_256_BITS;
+		break;  
+	default:
+		return -1;
+	}
+
+	if (key) {
+		*value |= SCA_EXTERNAL_KEY | SCA_KEY_IS_ADDR;
+	} else {
+		*value |= SCA_DEVICE_ROOT_KEY | SCA_KEY_IS_ADDR;
+	}
+
+	return 0;
+}
+
+static int sca_wait_intr(struct asr_te200_cipher *dd)
+{
+	int ret = 0;
+	uint32_t value;
+	uint32_t time_start;
+	time_start = jiffies;
+	value = asr_cipher_read(dd, TE200_SSCA_INTR_STAT);
+
+	while (1) {
+		value = asr_cipher_read(dd, TE200_SSCA_INTR_STAT);
+
+		if (value & SCA_INVALID_CMD) {
+			dev_err(dd->dev, "invallid cmd\n");
+			ret = -1;
+			break;
+		}
+
+		if (value & SCA_INVALID_KEY) {
+			dev_err(dd->dev, "invallid key\n");
+			ret = -1;
+			break;
+		}
+
+		if (value & SCA_BUS_ERROR) {
+			dev_err(dd->dev, "bus err\n");
+			ret = -1;
+			break;
+		}
+
+		if ((jiffies - time_start) > 500) {
+			dev_err(dd->dev, "wait intr timeout !\n");
+			ret = -1;
+			break;
+		}
+
+		if (value & SCA_CMD_INTR) {
+			break;
+		}
+	}
+
+	value = asr_cipher_read(dd, TE200_SSCA_INTR_STAT);
+	value |= SCA_CMD_INTR;
+	asr_cipher_write(dd, TE200_SSCA_INTR_STAT, value);
+	return ret;
+}
+
+static inline void cipher_cache_operation(void *addr, int size)
+{
+	__cpuc_flush_dcache_area(addr, size);
+}
+
+/* sync the same key ladder in /tos/uboot/kernel te200 driver */
+static const struct {
+	__attribute__ ((aligned (16))) uint8_t ek3[16];
+	__attribute__ ((aligned (16))) uint8_t ek2[16];
+	__attribute__ ((aligned (16))) uint8_t ek1[16];
+} key_ladder = {
+	{ 0x50,0xCF,0x0F,0x29,0xD1,0xCF,0x32,0x41,0xC5,0x64,0xAC,0xDB,0xDD,0x9A,0xFC,0xF4 },
+	{ 0x9C,0xAB,0x04,0x57,0xB7,0x17,0xD9,0x4A,0x34,0x74,0x28,0x30,0x34,0x16,0x3B,0x52 },
+	{ 0xF5,0xA0,0x33,0x7B,0x4B,0xE8,0x18,0x84,0x51,0x4E,0x38,0x86,0x6D,0x08,0xBB,0x6E },
+};
+
+static int rkek_cfg_init(struct asr_te200_cipher *dd)
+{
+#define SYS_SEC_CTRL0           (0x0C)
+
+    uint32_t value;
+	struct device_node *np;
+	struct resource res;
+	void __iomem *io_base;
+
+    value = asr_cipher_read(dd, TE200_CLOCK_CTRL);
+    value &= ~OTP_CLK_EN;
+    asr_cipher_write(dd, TE200_CLOCK_CTRL, value);
+
+    value = asr_cipher_read(dd, TE200_CLOCK_CTRL);
+    value |= OTP_CLK_EN;
+    asr_cipher_read(dd, TE200_CLOCK_CTRL);
+
+	/* set opt key sel */
+	np = of_find_compatible_node(NULL, NULL, "marvell,mmp-ciu");
+	if (!np) {
+		dev_err(dd->dev, "can't find ciu node for set opt key sel");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		return -1;
+	}
+
+	io_base = ioremap(res.start, res.end - res.start);
+	if (!io_base) {
+		dev_err(dd->dev, "geu regs can't remap");
+		return 0;
+	}
+
+	value = readl_relaxed(io_base + SYS_SEC_CTRL0);
+	value |= (1 << 24);
+	writel_relaxed(value, io_base + SYS_SEC_CTRL0);
+
+	/* enable lock */
+	value = asr_cipher_read(dd, TE200_OTP_DUMMY_CFG);
+	value |= 0x10;
+    asr_cipher_write(dd, TE200_OTP_DUMMY_CFG, value);
+
+	iounmap(io_base);
+	return 0;
+}
+
+static int sca_cipher_init(struct asr_te200_cipher *dd, int alg_type, int mode, 
+						const uint8_t *iv, const uint8_t *key, uint32_t key_len)
+{
+	int ret;
+	uint32_t cmd = 0;
+	uint32_t param;
+	uint32_t key_phys;
+	uint32_t iv_phys;
+
+	sca_start_run(dd);
+
+	ret = sca_set_alg(alg_type, &cmd);
+	if (ret) {
+		return -1;
+	}
+	ret = sca_set_cipher_mode(mode, &cmd);
+	if (ret) {
+		return -1;
+	}
+
+	ret = sca_set_key(key, key_len, &cmd);
+	if (ret) {
+		return -1;
+	}
+
+	if (iv && ((mode == CBC) || (mode == CTR))) {
+		ret = sca_set_iv(iv, &cmd);
+		if (ret) {
+			return -1;
+		}
+	}
+
+	cmd |= SCA_INTER_TRIGGERD;
+	cmd |= SCA_INIT_CMD;
+
+	asr_cipher_write(dd, TE200_SSCA_QUEUE, cmd);
+
+	/* set key params */
+	if (key) {
+		key_phys = virt_to_phys((void *)key);
+		cipher_cache_operation((void*)key, key_len);
+		param = (uint32_t)key_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+	} else {		/* use rkek */
+		key_phys = virt_to_phys((void *)key_ladder.ek3);
+		cipher_cache_operation((void*)key_ladder.ek3, key_len);
+		param = (uint32_t)key_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+
+		key_phys = virt_to_phys((void *)key_ladder.ek2);
+		cipher_cache_operation((void*)key_ladder.ek2, key_len);
+		param = (uint32_t)key_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+
+		key_phys = virt_to_phys((void *)key_ladder.ek1);
+		cipher_cache_operation((void*)key_ladder.ek1, key_len);
+		param = (uint32_t)key_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+	}
+
+	/* set iv params */
+	if (iv && ((mode == CBC) || (mode == CTR))) {
+		/* set iv addr */
+		iv_phys = virt_to_phys((void *)iv);
+		cipher_cache_operation((void*)iv, 16);
+		param = (uint32_t)iv_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+	}
+
+	return sca_wait_intr(dd);
+}
+
+static int sca_cipher_process(struct asr_te200_cipher *dd, int encrypt, 
+							int last_one, const void *in, uint32_t size, void *out)
+{
+	uint32_t cmd = 0;
+	uint32_t param;
+	uint8_t *psrc = (uint8_t *)in;
+	uint8_t *pdst = (uint8_t *)out;
+	uint32_t in_phys, out_phys;
+	uint32_t len;
+
+	len = (size + 0xf) & (~0xf);
+
+	/* set big endain */
+	if (encrypt) {
+		cmd |= SCA_ENCRYPTION;
+	} else {
+		cmd &= (~SCA_ENCRYPTION);
+	}
+
+	cmd |= SCA_INTER_TRIGGERD;
+	cmd |= SCA_PROCESS_CMD;
+	if (last_one) {
+		cmd |= SCA_LAST_ONE_SESSION;
+	} else {
+		cmd &= ~SCA_LAST_ONE_SESSION;
+	}
+	asr_cipher_write(dd, TE200_SSCA_QUEUE, cmd);
+
+	in_phys = (uint32_t)virt_to_phys((void *)psrc);
+	out_phys = (uint32_t)virt_to_phys((void *)pdst);
+	cipher_cache_operation((void*)psrc, len);
+	cipher_cache_operation((void*)pdst, len);
+
+	/* set src addr */
+	param = (uint32_t)in_phys;
+	asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+
+	/* set data length */
+	param = (uint32_t)size;
+	asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+
+	/* set dst addr */
+	if (out_phys) {
+		param = (uint32_t)out_phys;
+		asr_cipher_write(dd, TE200_SSCA_QUEUE, param);
+	}
+
+	sca_start_run(dd);
+	return sca_wait_intr(dd);
+}
+
+static int sca_cipher_finish(struct asr_te200_cipher *dd)
+{
+	uint32_t cmd = 0;
+	/* set cmd*/
+	cmd |= SCA_INTER_TRIGGERD;
+	cmd |= SCA_FINISH_CMD;
+	asr_cipher_write(dd, TE200_SSCA_QUEUE, cmd);
+
+	sca_start_run(dd);
+	return sca_wait_intr(dd);
+}
+
+static int asr_cipher_hw_init(struct asr_te200_cipher *dd)
+{
+	asr_cipher_write(dd, TE200_SSCA_INTR_MSK, 0x1f);
+	return 0;
+}
+
+static int sca_cipher_handle(struct asr_te200_cipher *dd, struct sca_data *psca_data, const uint8_t *iv, 
+			const uint8_t *key, uint32_t key_len, const void *in, uint32_t size, void *out)
+{
+	int ret = 0;
+
+	sca_clock_switch(dd, 0);
+	sca_clock_switch(dd, 1);
+
+	ret = asr_cipher_hw_init(dd);
+	if (ret) {
+		goto exit;
+	}
+
+	if (psca_data->use_rkek) {
+		ret = rkek_cfg_init(dd);
+		if (ret) {
+			goto exit;
+		}
+		ret = sca_cipher_init(dd, psca_data->alg_type, psca_data->mode, iv, NULL, key_len);
+	} else {
+		ret = sca_cipher_init(dd, psca_data->alg_type, psca_data->mode, iv, key, key_len);
+	}
+	if (ret) {
+		goto exit;
+	}
+
+	ret = sca_cipher_process(dd, psca_data->encrypt, 1, in, size, out);
+	if (ret) {
+		goto exit;
+	}        
+
+	ret = sca_cipher_finish(dd);
+	if (ret) {
+		goto exit;
+	}
+
+exit:
+	sca_clock_switch(dd, 0);
+	return ret;
+}
+
+static void asr_cipher_set_iv_as_last_ciphertext_block(struct asr_te200_cipher*dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
+static int asr_cipher_complete(struct asr_te200_cipher *dd, int err)
+{
+	struct asr_te200_dev *te200_dd = dev_get_drvdata(dd->dev);
+	struct asr_te200_ops *te200_ops = te200_dd->te200_ops;
+
+	dd->flags &= ~FLAGS_BUSY;
+
+	asr_cipher_set_iv_as_last_ciphertext_block(dd);
+
+	if (dd->is_async)
+		dd->areq->complete(dd->areq, err);
+
+	te200_ops->dev_put(te200_dd);
+
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static int asr_complete(struct asr_te200_cipher *dd)
+{
+	return asr_cipher_complete(dd, 0);
+}
+
+static inline size_t asr_cipher_padlen(size_t len, size_t block_size)
+{
+	len &= block_size - 1;
+	return len ? block_size - len : 0;
+}
+
+static void get_sca_data_info(struct sca_data *psca_data, struct asr_te200_cipher *dd)
+{
+
+	psca_data->alg_type = (dd->flags & FLAGS_SM4) ? SM4 : NORMAL_AES;
+	psca_data->encrypt = (dd->flags & FLAGS_ENCRYPT) ? 1 : 0;
+
+	if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_ECB)
+		psca_data->mode = ECB;
+	else if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_CBC)
+		psca_data->mode = CBC;
+	else if ((dd->flags & FLAGS_OPMODE_MASK) == FLAGS_CTR)
+		psca_data->mode = CTR;
+}
+
+static int asr_cipher_buff_init(struct asr_te200_cipher *dd, uint32_t len)
+{
+	dd->buf = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+
+	if (!dd->buf) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		return -ENOMEM;
+	}
+
+	dd->buflen = PAGE_SIZE << get_order(len);
+
+	return 0;
+}
+
+static void asr_cipher_buff_cleanup(struct asr_te200_cipher *dd, uint32_t len)
+{
+	free_pages((unsigned long)dd->buf, get_order(len));
+	dd->buflen = 0;
+}
+
+static inline void asr_cipher_get(struct asr_te200_cipher *dd)
+{
+	mutex_lock(&dd->cipher_lock);
+}
+
+static inline void asr_cipher_put(struct asr_te200_cipher *dd)
+{
+	if(mutex_is_locked(&dd->cipher_lock))
+		mutex_unlock(&dd->cipher_lock);
+}
+
+static int asr_sca_cipher_process(struct asr_te200_cipher *dd, 
+					struct skcipher_request *req, asr_cipher_fn_t resume)
+{
+	int ret;
+	struct sca_data sca_data = {0};
+	size_t padlen = asr_cipher_padlen(req->cryptlen, CIPHER_BLOCK_SIZE);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+
+	asr_cipher_get(dd);
+
+	if (unlikely(req->cryptlen == 0)) {
+		asr_cipher_put(dd);
+		return -EINVAL;
+	}
+
+	dd->datalen = req->cryptlen + padlen;
+	ret = asr_cipher_buff_init(dd, dd->datalen);
+	if (ret) {
+		asr_cipher_put(dd);
+		return ret;
+	}
+
+	sg_copy_to_buffer(req->src, sg_nents(req->src), dd->buf, req->cryptlen);
+
+	dd->total = req->cryptlen;
+	dd->real_dst = req->dst;
+	dd->resume = resume;
+	dd->data = (u32 *)dd->buf;
+
+	get_sca_data_info(&sca_data, dd);
+	sca_data.use_rkek = rctx->use_rkek;
+	ret = sca_cipher_handle(dd, &sca_data, req->iv, (uint8_t *)dd->ctx->key, 
+						dd->ctx->keylen, dd->data, dd->datalen, dd->data);
+	if (!sg_copy_from_buffer(dd->real_dst, sg_nents(dd->real_dst),
+				 dd->buf, dd->total)) 
+		ret = -EINVAL;
+
+	asr_cipher_buff_cleanup(dd, dd->datalen);
+	asr_cipher_put(dd);
+
+	return asr_cipher_complete(dd, ret);
+}
+
+static inline void asr_cipher_set_mode(struct asr_te200_cipher *dd,
+					  const struct asr_cipher_reqctx *rctx)
+{
+	/* Clear all but persistent flags and set request flags. */
+	dd->flags = (dd->flags & CIPHER_FLAGS_PERSISTENT) | rctx->mode;
+}
+
+static int asr_cipher_start(struct asr_te200_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct asr_te200_dev *te200_dd = dev_get_drvdata(dd->dev);
+	struct asr_te200_ops *te200_ops = te200_dd->te200_ops;
+
+	te200_ops->dev_get(te200_dd);
+
+	asr_cipher_set_mode(dd, rctx);
+	return asr_sca_cipher_process(dd, req, asr_complete);
+}
+
+static int asr_cipher_handle_queue(struct asr_te200_cipher *dd,
+				  struct crypto_async_request *new_areq)
+{
+	struct crypto_async_request *areq, *backlog;
+	struct asr_cipher_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (new_areq)
+		ret = crypto_enqueue_request(&dd->queue, new_areq);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	areq = crypto_dequeue_request(&dd->queue);
+	if (areq) {
+		dd->flags |= FLAGS_BUSY;
+	}
+	spin_unlock_irqrestore(&dd->lock, flags);
+	if (!areq)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(areq->tfm);
+	dd->areq = areq;
+	dd->ctx = ctx;
+	start_async = (areq != new_areq);
+	dd->is_async = start_async;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int asr_cipher(struct skcipher_request *req, unsigned long mode)
+{
+	int ret;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_cipher_reqctx *rctx;
+
+	ctx->block_size = CIPHER_BLOCK_SIZE;
+	rctx = skcipher_request_ctx(req);
+	rctx->mode = mode;
+	rctx->use_rkek = ctx->use_rkek;
+
+	if (!(mode & FLAGS_ENCRYPT) && (req->src == req->dst)) {
+		unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+		if (req->cryptlen >= ivsize) {
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+		}
+	}
+
+	ret = asr_cipher_handle_queue(ctx->dd, &req->base);
+
+	asr_cipher_put(ctx->dd);
+	return ret;
+}
+
+static int asr_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	ctx->use_rkek = false;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_set_hwkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	ctx->use_rkek = true;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_rkek_fused(struct asr_te200_cipher *dd)
+{
+#define GEU_FUSE_VAL_APCFG2					(0x0408)
+#define GEU_SECURE_KEY_ACCESS_DISABLED		(1 << 29)
+
+    uint32_t value;
+	struct device_node *np;
+	struct resource res;
+	void __iomem *io_base;
+
+	/* get geu node */
+	np = of_find_compatible_node(NULL, NULL, "asr,asr-geu");
+	if (!np) {
+		dev_err(dd->dev, "can't find geu node to check rkek burned");
+		return 0;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		dev_err(dd->dev, "can't find geu address");
+		return 0;
+	}
+
+	io_base = ioremap(res.start, res.end - res.start);
+	if (!io_base) {
+		dev_err(dd->dev, "geu regs can't remap");
+		return 0;
+	}
+
+	value = readl_relaxed(io_base + GEU_FUSE_VAL_APCFG2);
+	if (value & GEU_SECURE_KEY_ACCESS_DISABLED) {
+		iounmap(io_base);
+		return 1;
+	}
+
+	iounmap(io_base);
+	return 0;
+}
+
+static int asr_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_ECB | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_ECB);
+}
+
+static int asr_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CBC | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CBC);
+}
+
+static int asr_aes_ctr_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CTR | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ctr_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_AES | FLAGS_CTR);
+}
+
+static int asr_sm4_ecb_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_ECB | FLAGS_ENCRYPT);
+}
+
+static int asr_sm4_ecb_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_ECB);
+}
+
+static int asr_sm4_cbc_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_CBC | FLAGS_ENCRYPT);
+}
+
+static int asr_sm4_cbc_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_CBC);
+}
+
+static int asr_sm4_ctr_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_CTR | FLAGS_ENCRYPT);
+}
+
+static int asr_sm4_ctr_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_SM4 | FLAGS_CTR);
+}
+
+static int asr_cipher_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static int asr_cipher_hwkey_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static void asr_cipher_exit(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static struct skcipher_alg cipher_algs[] = {
+	/* AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+	/* AES - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "asr-ctr-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ctr_encrypt,
+		.decrypt = asr_aes_ctr_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+
+	/* SM4 - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(sm4)",
+			.cra_driver_name = "asr-ecb-sm4",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_sm4_ecb_encrypt,
+		.decrypt = asr_sm4_ecb_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+	},
+	/* SM4 - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(sm4)",
+			.cra_driver_name = "asr-cbc-sm4",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_sm4_cbc_encrypt,
+		.decrypt = asr_sm4_cbc_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = SM4_BLOCK_SIZE,
+	},
+	/* SM4 - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(sm4)",
+			.cra_driver_name = "asr-ctr-sm4",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SM4_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_sm4_ctr_encrypt,
+		.decrypt = asr_sm4_ctr_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = SM4_BLOCK_SIZE,
+	},
+
+	/* hardware key AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes-hwkey)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes-hwkey)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = CIPHER_MIN_KEY_SIZE,
+		.max_keysize = CIPHER_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+};
+
+static void asr_cipher_queue_task(unsigned long data)
+{
+	struct asr_te200_cipher *dd = (struct asr_te200_cipher *)data;
+
+	asr_cipher_handle_queue(dd, NULL);
+}
+
+static void asr_cipher_done_task(unsigned long data)
+{
+	struct asr_te200_cipher *dd = (struct asr_te200_cipher *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
+}
+
+int asr_te200_cipher_register(struct asr_te200_dev *te200_dd)
+{
+	int err, i, j;
+	struct device_node *np = NULL;
+	struct asr_te200_cipher *cipher_dd;
+
+	cipher_dd = &te200_dd->asr_cipher;
+	cipher_dd->dev = te200_dd->dev;
+	cipher_dd->io_base = te200_dd->io_base;
+	cipher_dd->phys_base = te200_dd->phys_base;
+
+	np = cipher_dd->dev->of_node;
+
+
+	cipher_dd->rkek_burned = asr_cipher_rkek_fused(cipher_dd);
+
+	asr_cipher_local = cipher_dd;
+
+	spin_lock_init(&cipher_dd->lock);
+	mutex_init(&cipher_dd->cipher_lock);
+	tasklet_init(&cipher_dd->done_task, asr_cipher_done_task,
+					(unsigned long)cipher_dd);
+	tasklet_init(&cipher_dd->queue_task, asr_cipher_queue_task,
+					(unsigned long)cipher_dd);
+	crypto_init_queue(&cipher_dd->queue, ASR_CIPHER_QUEUE_LENGTH);
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++) {
+		err = crypto_register_skcipher(&cipher_algs[i]);
+		if (err){
+			for (j = 0; j < i; j++)
+				crypto_unregister_skcipher(&cipher_algs[j]);
+			return err;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_cipher_register);
+
+int asr_te200_cipher_unregister(struct asr_te200_dev *te200_dd)
+{
+	int i;
+	struct asr_te200_cipher *cipher_dd = &te200_dd->asr_cipher;
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++)
+		crypto_unregister_skcipher(&cipher_algs[i]);
+
+	tasklet_kill(&cipher_dd->done_task);
+	tasklet_kill(&cipher_dd->queue_task);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_cipher_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wangyonggan <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR te200 cipher driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-cipher.h b/marvell/linux/drivers/crypto/asr/te200/asr-cipher.h
new file mode 100644
index 0000000..86747a9
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-cipher.h
@@ -0,0 +1,105 @@
+#ifndef ASR_CIPHER_H
+#define ASR_CIPHER_H
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/crypto.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+
+/* CIHER flags */
+#define FLAGS_ENCRYPT		(1 << 0)
+#define FLAGS_ECB			(0 << 1)
+#define FLAGS_CBC			(1 << 1)
+#define FLAGS_CTR			(2 << 1)
+#define FLAGS_OPMODE_MASK 	(3 << 1)
+#define FLAGS_BUSY			(1 << 3)
+#define FLAGS_AES			(0 << 4)
+#define FLAGS_SM4			(1 << 4)
+
+#define CIPHER_FLAGS_PERSISTENT	FLAGS_BUSY
+
+#define ASR_CIPHER_QUEUE_LENGTH	50
+#define ASR_CIPHER_PRIORITY		300
+
+#define ASR_CIPHER_BUFFER_ORDER	2
+#define ASR_CIPHER_BUFFER_SIZE	(PAGE_SIZE << ASR_CIPHER_BUFFER_ORDER)
+
+enum SCA_MODE {
+	NORMAL_AES = 0,
+	SM4
+};
+
+enum SCA_ALG_STANDARD {
+	ECB = 0,
+	CTR,
+	CBC,
+	CBC_MAC,
+	CMAC,
+	GHASH,
+};
+
+struct sca_data {
+	int encrypt;
+	int alg_type;
+	int mode;
+	int is_last_blk;
+	bool use_rkek;
+};
+
+struct asr_te200_cipher;
+
+typedef int (*asr_cipher_fn_t)(struct asr_te200_cipher *dd);
+typedef irqreturn_t (*asr_cipher_irq_t)(void *);
+
+
+struct asr_cipher_ctx {
+	struct asr_te200_cipher	*dd;
+	asr_cipher_fn_t		    start;
+	int			            keylen;
+	u32			            key[AES_KEYSIZE_256 / sizeof(u32)];
+	u16			            block_size;
+	bool		            use_rkek;
+};
+
+struct asr_cipher_reqctx {
+	unsigned long		mode;
+	bool		use_rkek;
+	u32			lastc[AES_BLOCK_SIZE / sizeof(u32)];
+};
+
+struct asr_te200_cipher {
+	struct device		*dev;
+	struct crypto_async_request	*areq;
+
+	void __iomem		*io_base;
+	unsigned long		phys_base;
+
+	struct asr_cipher_ctx	*ctx;
+
+	bool			is_async;
+	bool			rkek_burned;
+	unsigned long		flags;
+
+	spinlock_t		lock;
+	struct mutex cipher_lock;
+	struct crypto_queue	queue;
+	struct tasklet_struct	queue_task;
+
+	asr_cipher_fn_t		resume;
+	struct tasklet_struct	done_task;
+
+	size_t			total;
+	size_t			datalen;
+	u32			*data;
+
+	size_t			buflen;
+	void			*buf;
+
+	struct scatterlist	aligned_sg;
+	struct scatterlist	*real_dst;
+};
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-sha.c b/marvell/linux/drivers/crypto/asr/te200/asr-sha.c
new file mode 100644
index 0000000..a17382a
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-sha.c
@@ -0,0 +1,1222 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <crypto/hmac.h>
+#include <crypto/sha.h>
+#include "asr-te200.h"
+#include "asr-sha.h"
+
+// #define ASR_TE200_SHA_TEST
+
+static struct asr_te200_sha *asr_sha_local = NULL;
+static struct mutex hash_lock = __MUTEX_INITIALIZER(hash_lock);
+
+static inline u32 asr_sha_read(struct asr_te200_sha *dd, u32 offset)
+{
+	u32 value = readl_relaxed(dd->io_base + offset);
+
+	return value;
+}
+
+static inline void asr_sha_write(struct asr_te200_sha *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+/* ------- te200 sha hardware operation -------- */
+static int hash_clock_switch(struct asr_te200_sha *dd, int enable)
+{
+	uint32_t value;
+
+	value = asr_sha_read(dd, TE200_CLOCK_CTRL);
+	if (enable) {
+		value |= HASH_CLK_EN;
+	} else {
+		value &= ~HASH_CLK_EN;
+	}
+
+	asr_sha_write(dd, TE200_CLOCK_CTRL, value);
+
+	return 0;
+}
+
+static int hash_start_run(struct asr_te200_sha *dd)
+{
+	uint32_t value;
+	value = asr_sha_read(dd, TE200_SHASH_CTRL);
+	value |= HASH_RUN;
+	asr_sha_write(dd, TE200_SHASH_CTRL, value);
+	return 0; 
+}
+
+static int hash_wait_intr(struct asr_te200_sha *dd)
+{
+	int ret = 0;
+	uint32_t value;
+	uint32_t time_start;
+	uint32_t clk_val;
+	clk_val = asr_sha_read(dd, TE200_CLOCK_CTRL);
+
+	time_start = jiffies;
+	value = asr_sha_read(dd, TE200_SHASH_INTR_STAT);
+
+	while (1) {
+		value = asr_sha_read(dd, TE200_SHASH_INTR_STAT);
+
+		if (value & HASH_INVALID_CMD) {
+			dev_err(dd->dev, "invallid cmd\n");
+			ret = -1;
+			break;
+		}
+
+		if (value & HASH_BUS_ERROR) {
+			dev_err(dd->dev, "bus err\n");
+			ret = -1;
+			break;
+		}
+
+		if ((jiffies - time_start) > 500) {
+			dev_err(dd->dev, "wait intr timeout !\n");
+			ret = -1;
+			break;
+		}
+
+		if (value & HASH_CMD_INTR) {
+			break;
+		}
+	}
+
+	value = asr_sha_read(dd, TE200_SHASH_INTR_STAT);
+	value |= HASH_CMD_INTR;
+	asr_sha_write(dd, TE200_SHASH_INTR_STAT, value);
+	return ret;
+}
+
+static inline void sha_cache_operation(void *addr, int size)
+{
+	__cpuc_flush_dcache_area(addr, size);
+}
+
+static int _hash_op_init(struct asr_sha_reqctx *reqctx, int alg, uint8_t *ext_iv)
+{
+	int ret;
+	uint32_t cmd = 0;
+	uint32_t ext_iv_phys;
+	struct asr_te200_sha *dd = reqctx->dd;
+	te200_hash_context_t *ctx = &reqctx->hash_ctx;
+
+	hash_clock_switch(dd, 1);
+
+	if (ext_iv) {
+		cmd |= HASH_INIT_CMD | HASH_SET_EXT_IV | HASH_PARAM_IS_ADDR | HASH_INTER_TRIGGERD;
+		/* Set initial length */
+		if (ctx->total_bits_num != 0)
+			cmd |= 0x4;
+	} else {
+		cmd |= HASH_INIT_CMD | HASH_PARAM_IS_ADDR | HASH_INTER_TRIGGERD;
+	}
+
+	switch (alg) {
+	case HASH_SHA1:
+		cmd &= HASH_MODE_SHA1;
+		break;
+	case HASH_SHA224:
+		cmd |= HASH_MODE_SHA224;
+		break;
+	case HASH_SHA256:
+		cmd |= HASH_MODE_SHA256;
+		break;
+	default:
+		hash_clock_switch(dd, 0);
+		return -EINVAL;
+	}
+
+	asr_sha_write(dd, TE200_SHASH_QUEUE, cmd);
+	if (ext_iv) {
+		ext_iv_phys = (uint32_t)virt_to_phys((void *)ext_iv);
+		sha_cache_operation((void *)ext_iv, 32);
+		asr_sha_write(dd, TE200_SHASH_QUEUE, ext_iv_phys);
+		/* Set HASH total bits length, split 64 bits into two parts, 32 bits for
+			* each */
+		if (ctx->total_bits_num != 0) {
+			asr_sha_write(dd, TE200_SHASH_QUEUE, (ctx->total_bits_num & 0xFFFFFFFF));
+			asr_sha_write(dd, TE200_SHASH_QUEUE, (ctx->total_bits_num >> 0x20));
+		}
+	}
+
+	hash_start_run(dd);
+	ret = hash_wait_intr(dd);
+	reqctx->hash_ctx.finish_flag = 1;
+
+	hash_clock_switch(dd, 0);
+	return ret;
+}
+
+static int _hash_op_proc(struct asr_sha_reqctx *reqctx, const uint8_t *src, size_t size)
+{
+	int ret = 0;
+	uint32_t cmd = 0;
+	uint32_t src_phys;
+	struct asr_te200_sha *dd = reqctx->dd;
+	te200_hash_context_t *ctx = &reqctx->hash_ctx;
+	size_t input_data_len = 0;
+	uint32_t old_extra_len = ctx->count;
+
+	hash_clock_switch(dd, 1);
+
+	/* Extra data bytes number */
+	ctx->count = (size + old_extra_len) % HASH_BUF_LEN;
+	if (size + old_extra_len >= HASH_BUF_LEN) {
+		/* First handle old extra data, then the new input data */
+		if (old_extra_len != 0) {
+			src_phys = (uint32_t)virt_to_phys((void *)ctx->extra_data);
+			sha_cache_operation((void *)ctx->extra_data, old_extra_len);
+
+			cmd = HASH_PROCESS_CMD | HASH_INTER_TRIGGERD;
+			asr_sha_write(dd, TE200_SHASH_QUEUE, cmd);
+
+			asr_sha_write(dd, TE200_SHASH_QUEUE, src_phys);
+			asr_sha_write(dd, TE200_SHASH_QUEUE, old_extra_len);
+
+			hash_start_run(dd);
+			ret = hash_wait_intr(dd);
+			if (ret)
+				goto err;
+			ctx->total_bits_num += old_extra_len * 8;
+		}
+
+		cmd = HASH_PROCESS_CMD | HASH_INTER_TRIGGERD;
+		input_data_len = size - ctx->count;
+
+		src_phys = virt_to_phys((void *)src);
+		sha_cache_operation((void *)src, input_data_len);
+		asr_sha_write(dd, TE200_SHASH_QUEUE, cmd);
+		asr_sha_write(dd, TE200_SHASH_QUEUE, (uint32_t)src_phys);
+		asr_sha_write(dd, TE200_SHASH_QUEUE, input_data_len);
+
+		hash_start_run(dd);
+		ret = hash_wait_intr(dd);
+		if (ret)
+			goto err;
+
+		/* Total data bits number */
+		ctx->total_bits_num += input_data_len * 8;
+		/* Save new extra data */
+		memset(ctx->extra_data, 0, sizeof( ctx->extra_data ));
+		memcpy(ctx->extra_data, (src + size - ctx->count), ctx->count);
+	} else {
+		/* If ilen + old_extra_len < HASH_BUF_LEN */
+		/* Save input data and return. */
+		memcpy(ctx->extra_data + old_extra_len, src, size);		
+	}
+	ret = 0;
+
+err:
+	hash_clock_switch(dd, 0);
+	return ret;
+}
+
+static int _hash_op_finish(struct asr_sha_reqctx *reqctx,
+						  uint8_t *out, uint32_t out_size, int padding)
+{
+	int ret = 0;
+	uint32_t cmd = 0;
+	uint32_t out_phys;
+	struct asr_te200_sha *dd = reqctx->dd;
+	te200_hash_context_t *ctx = &reqctx->hash_ctx;
+	uint32_t extra_data_phys;
+
+	/* filter uninitialized finish request */
+	if ( !reqctx->hash_ctx.finish_flag ) {
+		return ret;
+	}
+
+	hash_clock_switch(dd, 1);
+
+	if (padding == 0) {
+		cmd = HASH_FINISH_CMD | HASH_INTER_TRIGGERD;
+		ctx->hash_temp_valid = 1;
+		ctx->finish_flag     = 0;
+	} else {
+		/* If extra data count is not zero, execute HASH process command first */
+		if (ctx->count != 0) {
+			cmd = HASH_PROCESS_CMD | HASH_INTER_TRIGGERD;
+			asr_sha_write(dd, TE200_SHASH_QUEUE, cmd);
+
+			extra_data_phys = (uint32_t)virt_to_phys((void *)ctx->extra_data);
+			sha_cache_operation((void *)ctx->extra_data, ctx->count);
+			asr_sha_write(dd, TE200_SHASH_QUEUE, extra_data_phys);
+			asr_sha_write(dd, TE200_SHASH_QUEUE, ctx->count);
+			hash_start_run(dd);
+			ret = hash_wait_intr(dd);
+			if (ret)
+				goto err;
+		}
+		cmd = HASH_FINISH_CMD | HASH_PADDING | HASH_INTER_TRIGGERD;
+	}
+
+	out_phys = virt_to_phys((void *)out);
+	sha_cache_operation((void *)out, out_size);
+
+	asr_sha_write(dd, TE200_SHASH_QUEUE, cmd);
+
+	asr_sha_write(dd, TE200_SHASH_QUEUE, (uint32_t)out_phys);
+
+	hash_start_run(dd);
+	ret = hash_wait_intr(dd);
+	if (ret)
+		goto err;
+
+	ret = 0;
+err:
+	hash_clock_switch(dd, 0);
+	return ret;
+}
+
+static struct asr_sha_reqctx *_g_sha_ctx = NULL;
+#define GET_HASH_LEN( reqctx )                         \
+	( ( reqctx->hash_ctx.alg == HASH_SHA1 )                  \
+		  ? 20                                 		\
+		  : ( reqctx->hash_ctx.alg == HASH_SHA224 )          \
+				? 28                         		\
+				: ( reqctx->hash_ctx.alg == HASH_SHA256 )    \
+					  ? 32 : 0)
+
+static int hash_op_init(struct asr_sha_reqctx *reqctx, int alg)
+{
+	int ret = 0;
+	unsigned char garbage[64] = {0};
+	uint32_t hash_temp_len;
+
+	mutex_lock(&hash_lock);
+
+	if (_g_sha_ctx != reqctx) {
+		/* First finish old session (_g_sha_ctx), then load new session(ctx) */
+		if (_g_sha_ctx != NULL) {
+			hash_temp_len = GET_HASH_LEN(_g_sha_ctx);
+			if (hash_temp_len == 0) {
+				ret = -1;
+				goto exit;
+			}
+			ret = _hash_op_finish(_g_sha_ctx, _g_sha_ctx->hash_ctx.hash_temp, hash_temp_len, 0 );
+			_g_sha_ctx = NULL;
+			if (ret) {
+				printk("swap out previously context failed");
+				goto exit;
+			}
+		}
+	} else {
+		/*
+		 * This session re-start, flush garbage data. before execute
+		 * finish command must check if it's finish flag is set,
+		 * if not no need to excecute finish command
+		 */
+		if ( _g_sha_ctx != NULL ) {
+			hash_temp_len = GET_HASH_LEN(_g_sha_ctx);
+			if (hash_temp_len == 0) {
+				ret = -1;
+				goto exit;
+			}
+			ret = _hash_op_finish( _g_sha_ctx, garbage, hash_temp_len, 1 );
+			_g_sha_ctx = NULL;
+			if (ret) {
+				printk("hash finish error during switching context!");
+				goto exit;
+			}
+		}		
+	}
+
+	memset(&reqctx->hash_ctx, 0, sizeof(reqctx->hash_ctx));
+	reqctx->hash_ctx.alg = alg;
+	ret = _hash_op_init(reqctx, alg, NULL);
+	if (ret) {
+		printk( " execute hash init failed when te200 hash init" );
+		goto exit;
+	}
+
+	_g_sha_ctx = reqctx;
+	ret = 0;
+
+exit:
+	mutex_unlock(&hash_lock);
+	return ret;
+}
+
+static int hash_op_proc(struct asr_sha_reqctx *reqctx, const uint8_t *src, size_t size)
+{
+	int ret = 0;
+	uint32_t hash_temp_len;
+
+	mutex_lock(&hash_lock);
+
+	if (reqctx == NULL) {
+		ret = -1;
+		goto exit;
+	}
+
+	/* Multi-session */
+	if ( _g_sha_ctx != reqctx ) {
+		/* First finish old session (_g_sha_ctx), then load new session(ctx) */
+		if (_g_sha_ctx != NULL) {
+			hash_temp_len = GET_HASH_LEN(_g_sha_ctx);
+			if (hash_temp_len == 0) {
+				ret = -1;
+				goto exit;
+			}
+			ret = _hash_op_finish( _g_sha_ctx, _g_sha_ctx->hash_ctx.hash_temp, hash_temp_len, 0 );
+			_g_sha_ctx = NULL;
+			if (ret) {
+				printk("hash finish error during switching context!");
+				goto exit;
+			}
+		}
+
+		/* Re-initialize */
+		/* Execute te200 HASH_init command, load hash intermediate data */
+		hash_temp_len = GET_HASH_LEN( reqctx );
+		if ( reqctx->hash_ctx.hash_temp_valid == 1 ) {
+			ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, reqctx->hash_ctx.hash_temp);
+		} else {
+			ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, NULL);
+		}
+		if ( ret != 0 ) {
+			printk("execute hash init failed when update, reason: %x", ret);
+			goto exit;
+		}
+		_g_sha_ctx = reqctx;
+	}
+
+	/* Execute te200 HASH_process command */
+	ret = _hash_op_proc(reqctx, src, size);
+	if ( ret != 0 ) {
+		printk("execute hash process failed when update, reason: %x", ret);
+		goto exit;
+	}
+
+	ret = 0;
+
+exit:
+	mutex_unlock(&hash_lock);
+	return ret;
+}
+
+static int hash_op_finish(struct asr_sha_reqctx *reqctx, uint8_t *out, uint32_t out_size)
+{
+	int ret = 0;
+	uint32_t hash_temp_len;
+	
+	mutex_lock(&hash_lock);
+
+	if ((reqctx == NULL) || (NULL == out)) {
+		printk( "context might probably not initialised!!" );
+		ret = -1;
+		goto exit;
+	}
+
+	if ( _g_sha_ctx == reqctx ) {
+		/* even though invoke hash_finish_req right after _hash_op_init
+		   should get a default hash ouput*/
+
+		if ( !reqctx->hash_ctx.finish_flag ) {
+			if ( reqctx->hash_ctx.hash_temp_valid == 1 ) {
+				ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, reqctx->hash_ctx.hash_temp);
+			} else {
+				ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, NULL);
+			}
+			if ( ret != 0 ) {
+				printk("execute hash init failed when finish, reason: %x", ret);
+				goto exit;
+			}
+		}
+
+		ret = _hash_op_finish( reqctx, out, out_size, 1 );
+	} else {
+		/*  when finished the session must check it's finish flag first, if not
+		* set don't need to finish it  */
+		if ( _g_sha_ctx != NULL ) {
+			/* Save current session, then load new session */
+			hash_temp_len = GET_HASH_LEN(_g_sha_ctx);
+			if (hash_temp_len == 0) {
+				ret = -1;
+				goto exit;
+			}
+			ret = _hash_op_finish( _g_sha_ctx, _g_sha_ctx->hash_ctx.hash_temp, hash_temp_len, 0 );
+			_g_sha_ctx = NULL;
+			if ( ret != 0 ) {
+				printk("hash finish error during switching context!");
+				goto exit;
+			}
+		}
+
+		if ( reqctx->hash_ctx.hash_temp_valid == 1 ) {
+			ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, reqctx->hash_ctx.hash_temp);
+		} else {
+			ret = _hash_op_init(reqctx, reqctx->hash_ctx.alg, NULL);
+		}
+		if ( ret != 0 ) {
+			printk("execute hash init failed when finish, reason: %x", ret);
+			goto exit;
+		}
+
+		_g_sha_ctx = reqctx;
+		ret = _hash_op_finish( reqctx, out, out_size, 1 );
+	}
+
+	_g_sha_ctx = NULL;
+
+	ret = 0;
+
+exit:
+	mutex_unlock(&hash_lock);
+	return ret;	
+}
+
+int asr_te200_hash_init(struct asr_sha_reqctx *reqctx, int alg)
+{
+	reqctx->dd = asr_sha_local;
+
+	if (!reqctx->dd) {
+		return -1;
+	}
+	return hash_op_init(reqctx, alg);
+}
+
+int asr_te200_hash_proc(struct asr_sha_reqctx *reqctx, const uint8_t *src, size_t size)
+{
+	int ret;
+	uint8_t *psrc;
+	reqctx->dd = asr_sha_local;
+	
+	if (!reqctx->dd) {
+		return -1;
+	}
+
+	psrc = kmalloc(size, GFP_KERNEL);
+	if (!psrc) {
+		return -1;
+	}
+	memcpy(psrc, (void *)src, size);
+
+	ret = hash_op_proc(reqctx, psrc, size);
+	kfree(psrc);
+
+	return ret;
+}
+
+int asr_te200_hash_finish(struct asr_sha_reqctx *reqctx, uint8_t *out, uint32_t out_size)
+{
+	int ret;
+	/* Avoid cache caherence problems caused by out variables being optimized */
+	uint8_t hash[64] __aligned(64) = {0};
+	reqctx->dd = asr_sha_local;
+
+	if (!reqctx->dd) {
+		return -1;
+	}
+	ret = hash_op_finish(reqctx, hash, out_size);
+	memcpy(out, hash, out_size);
+	
+	return ret;
+
+}
+/* ------- end -------- */
+
+static size_t asr_sha_append_sg(struct asr_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0) {
+			/*
+			* Check if count <= 0 because the buffer is full or
+			* because the sg length is 0. In the latest case,
+			* check if there is another sg in the list, a 0 length
+			* sg doesn't necessarily mean the end of the sg list.
+			*/
+			if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+				ctx->sg = sg_next(ctx->sg);
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int asr_sha_done(struct asr_te200_sha *dd);
+
+static int asr_sha_handle_queue(struct asr_te200_sha *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct asr_sha_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req) {
+		return ret;
+	}
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(async_req->tfm);
+
+	dd->req = ahash_request_cast(async_req);
+	start_async = (dd->req != req);
+	dd->is_async = start_async;
+	dd->force_complete = false;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int asr_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct asr_te200_sha *dd = ctx->dd;
+
+	ctx->op = op;
+
+	return asr_sha_handle_queue(dd, req);
+}
+
+static void asr_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return;
+
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_SHA1:
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA224:
+		memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA256:
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+		break;
+	default:
+		return;
+	}
+}
+
+static inline int asr_sha_complete(struct asr_te200_sha *dd, int err)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	dd->flags &= ~(SHA_FLAGS_BUSY);
+	ctx->flags &= ~(SHA_FLAGS_FINAL);
+
+	if ((dd->is_async || dd->force_complete) && req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static int asr_sha_buff_init(struct asr_te200_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	ctx->buffer = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+	if (!ctx->buffer) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		return -ENOMEM;
+	}
+
+	ctx->buflen = PAGE_SIZE << get_order(len);
+
+	return 0;
+}
+
+static void asr_sha_buff_cleanup(struct asr_te200_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	free_pages((unsigned long)ctx->buffer, get_order(len));
+	ctx->buflen = 0;
+}
+
+static int sha_init_req(struct asr_sha_reqctx *ctx)
+{
+	int ret = 0;
+
+	/* hardware: hash init */
+	ret = hash_op_init(ctx, ctx->alg);
+	if (ret)
+		return -EINVAL;
+	return 0;
+}
+
+static int sha_update_req(struct asr_sha_reqctx *ctx)
+{
+	int ret = 0;
+	int bufcnt;
+	uint32_t buflen = ctx->total;
+
+	ret = asr_sha_buff_init(ctx->dd, ctx->total);
+	if (ret)
+		return -ENOMEM;
+	
+	asr_sha_append_sg(ctx);
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	/* hashware: hash process */
+	ret = hash_op_proc(ctx, ctx->buffer, bufcnt);
+	if (ret)
+		ret = -EINVAL;
+
+	asr_sha_buff_cleanup(ctx->dd, buflen);
+	return ret;
+}
+
+static void sha_finish_req(struct asr_sha_reqctx *ctx, int *err)
+{
+	uint8_t *hash = (uint8_t *)ctx->digest;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(ctx->dd->req);
+	uint32_t hash_size = crypto_ahash_digestsize(tfm);
+	
+	if (!(*err) && (ctx->flags & SHA_FLAGS_FINAL)) {
+		*err = hash_op_finish(ctx, (uint8_t *)hash, hash_size);
+		asr_sha_copy_ready_hash(ctx->dd->req);
+		ctx->flags &= (~SHA_FLAGS_FINAL);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+}
+
+static void sha_next_req(struct asr_sha_reqctx *ctx, int *err)
+{
+	if (likely(!(*err) && (SHA_FLAGS_FINAL & ctx->flags)))
+		sha_finish_req(ctx, err);
+
+	(void)asr_sha_complete(ctx->dd, *err);
+}
+
+static int asr_sha_start(struct asr_te200_sha *dd)
+{
+	int err = 0;
+	struct ahash_request *req = dd->req;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct asr_te200_dev *te200_dd = dev_get_drvdata(dd->dev);
+	struct asr_te200_ops *te200_ops = te200_dd->te200_ops;
+
+	te200_ops->dev_get(te200_dd);
+
+	dd->resume = asr_sha_done; 
+
+	if ((ctx->flags & SHA_FLAGS_INIT)) {
+		err = sha_init_req(ctx);
+		ctx->flags &= (~SHA_FLAGS_INIT);
+		if (err) {
+			te200_ops->dev_put(te200_dd);
+			return err;
+		}
+	}
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = sha_update_req(ctx);
+		if (!err && (ctx->flags & SHA_FLAGS_FINUP))
+			/* no final() after finup() */
+			sha_finish_req(ctx, &err);
+	} else if (ctx->op == SHA_OP_FINAL) {
+		sha_finish_req(ctx, &err);
+	}
+
+	if (unlikely(err != -EINPROGRESS)) {
+		/* Task will not finish it, so do it here */
+		sha_next_req(ctx, &err);
+	}
+
+	te200_ops->dev_put(te200_dd);
+	return err;
+}
+
+static int asr_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct asr_sha_reqctx));
+	ctx->start = asr_sha_start;
+
+	return 0;
+}
+
+static void asr_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static inline void asr_sha_get(struct asr_te200_sha *dd)
+{
+	mutex_lock(&dd->sha_lock);
+}
+
+static inline void asr_sha_put(struct asr_te200_sha *dd)
+{
+	if(mutex_is_locked(&dd->sha_lock))
+		mutex_unlock(&dd->sha_lock);
+}
+
+static int asr_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct asr_te200_sha *dd = asr_sha_local;
+
+	asr_sha_get(dd);
+
+	ctx->dd = dd;
+	ctx->flags = 0;
+	ctx->alg = 0;
+
+	switch (crypto_ahash_digestsize(tfm)) {
+	case SHA1_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA1;
+		ctx->alg = HASH_SHA1;
+		break;
+	case SHA224_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA224;
+		ctx->alg = HASH_SHA224;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA256;
+		ctx->alg = HASH_SHA256;
+		break;
+	default:
+		asr_sha_put(dd);
+		return -EINVAL;
+	}
+
+	ctx->bufcnt = 0;
+
+	ctx->flags |= SHA_FLAGS_INIT;
+	
+	asr_sha_put(dd);
+	return 0;
+}
+
+static int asr_sha_update(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	asr_sha_get(ctx->dd);
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	ret = asr_sha_enqueue(req, SHA_OP_UPDATE);
+
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_final(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	asr_sha_get(ctx->dd);
+
+	ctx->flags |= SHA_FLAGS_FINAL;
+	if (ctx->flags & SHA_FLAGS_ERROR) {
+		asr_sha_put(ctx->dd);
+		return 0; /* uncompleted hash is not needed */
+	}
+	ret = asr_sha_enqueue(req, SHA_OP_FINAL);
+
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_finup(struct ahash_request *req)
+{
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = asr_sha_update(req);
+	if (err1 == -EINPROGRESS ||
+		(err1 == -EBUSY && (ahash_request_flags(req) &
+				CRYPTO_TFM_REQ_MAY_BACKLOG))) {
+		asr_sha_put(ctx->dd);
+		return err1;
+	}
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = asr_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int asr_sha_digest(struct ahash_request *req)
+{
+	return asr_sha_init(req) ?: asr_sha_finup(req);
+}
+
+static int asr_sha_export(struct ahash_request *req, void *out)
+{
+	const struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(out, ctx, sizeof(*ctx));
+	return 0;
+}
+
+static int asr_sha_import(struct ahash_request *req, const void *in)
+{
+	struct asr_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(ctx, in, sizeof(*ctx));
+	return 0;
+}
+
+static struct ahash_alg sha_algs[] = {
+	/* sha1 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA1_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha1",
+				.cra_driver_name	= "asr-sha1",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA1_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+	/* sha224 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA224_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha224",
+				.cra_driver_name	= "asr-sha224",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA224_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+	/* sha256 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA256_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha256",
+				.cra_driver_name	= "asr-sha256",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA256_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+};
+
+static void asr_sha_queue_task(unsigned long data)
+{
+	struct asr_te200_sha *dd = (struct asr_te200_sha *)data;
+
+	asr_sha_handle_queue(dd, NULL);
+}
+
+static int asr_sha_done(struct asr_te200_sha *dd)
+{
+	int err = 0;
+
+	if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+		dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
+	}
+	
+	return err;
+}
+
+static void asr_sha_done_task(unsigned long data)
+{
+	struct asr_te200_sha *dd = (struct asr_te200_sha *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
+}
+
+#ifdef ASR_TE200_SHA_TEST
+	static int te200_sha_test(struct asr_te200_sha *dd);
+#endif
+
+int asr_te200_sha_register(struct asr_te200_dev *te200_dd)
+{
+	int err, i, j;
+	struct device_node *np = NULL;
+	struct asr_te200_sha *sha_dd;
+
+	sha_dd = &te200_dd->asr_sha;
+
+	sha_dd->dev = te200_dd->dev;
+	sha_dd->io_base = te200_dd->io_base;
+	sha_dd->phys_base = te200_dd->phys_base;
+
+	np = sha_dd->dev->of_node;
+
+	asr_sha_local = sha_dd;
+
+	spin_lock_init(&sha_dd->lock);
+	mutex_init(&sha_dd->sha_lock);
+	tasklet_init(&sha_dd->done_task, asr_sha_done_task,
+					(unsigned long)sha_dd);
+	tasklet_init(&sha_dd->queue_task, asr_sha_queue_task,
+					(unsigned long)sha_dd);
+	crypto_init_queue(&sha_dd->queue, ASR_SHA_QUEUE_LENGTH);
+	
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+#ifdef ASR_TE200_SHA_TEST
+	te200_sha_test(sha_dd);
+#endif
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(asr_te200_sha_register);
+
+int asr_te200_sha_unregister(struct asr_te200_dev *te200_dd)
+{
+	int i;
+	struct asr_te200_sha *sha_dd = &te200_dd->asr_sha;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+
+	tasklet_kill(&sha_dd->queue_task);
+	tasklet_kill(&sha_dd->done_task);
+
+	return 0;
+}
+
+#ifdef ASR_TE200_SHA_TEST
+static int te200_sha_test(struct asr_te200_sha *dd)
+{
+	int ret = 0;
+	
+	const struct {
+		const char *msg;
+		uint8_t hash[20];
+	} sha1_tests[] = {
+		{
+			"abc", 
+			{   0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 
+				0x81, 0x6a, 0xba, 0x3e, 0x25, 0x71, 
+				0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0,
+				0xd8, 0x9d 
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqds"
+			"jklfdfjlkdfkfsfkjlfskjdflioherfjjfdjkfd"
+			"nkfdfdojjodfjdfjflj;sljjlfkklnfnkgbhhoi"
+			"gfhigfopojpfjojpoffkjlfskjdflioherfjjfd"
+			"jkfdnkfdfdojjodfjdfjfljnfnkgbhhoigfhigf"
+			"oponfnkgbhhoigfhigfopojpfjoewiroiowiods"
+			"djkisijdknknkskdnknflnnesniewinoinknmdn"
+			"kknknsdnjjfsnnkfnkknslnklknfnknkflksnlk"
+			"lskldklklklnmlflmlmlfmlfml",
+			{
+				0xc4, 0x53, 0xca, 0x24, 0xfa, 0xe5,
+				0x39, 0x53, 0x08, 0x8c, 0x57, 0x1a, 
+				0x96, 0xe9, 0x64, 0x7f, 0xd5, 0xf9, 
+				0x13, 0x91
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqdsjklfdfjlkdfkfs"
+			"fkjlfskjdflioherfjjfdjkfdnkfdfdojjodfjdfjflj;sljjlfkkl"
+			"nfnkgbhhoigfhigfopojpfjojpoffkjlfskjdflioherfjjfdjkfdn"
+			"kfdfdojjodfjdfjfljnfnkgbhhoigfhigfoponfnkgbhhoigfhigfo"
+			"pojpfjoewiroiowiodsdjkisijdknknkskdnknflnnesniewinoinkn"
+			"mdnkknknsdnjjfsnnkfnkknslnklknfnknkflksnlklskldklklklnm"
+			"lflmlmlfmlfml",
+			{
+				0xc4, 0x53, 0xca, 0x24, 0xfa, 0xe5,
+				0x39, 0x53, 0x08, 0x8c, 0x57, 0x1a, 
+				0x96, 0xe9, 0x64, 0x7f, 0xd5, 0xf9, 
+				0x13, 0x91
+			}
+		}
+	};
+
+	struct asr_sha_reqctx ctx1;
+	struct asr_sha_reqctx ctx2;
+	struct asr_sha_reqctx ctx3;
+
+	unsigned char out_sha1_1[20] = {0};
+	unsigned char out_sha1_2[20] = {0};
+	unsigned char out_sha1_3[20] = {0};
+
+	ctx1.dd = dd;
+	ctx2.dd = dd;
+	ctx3.dd = dd;
+
+	ret = hash_op_init(&ctx1, HASH_SHA1);
+	if (ret)
+		return ret;
+	ret = hash_op_proc(&ctx1, (uint8_t *)sha1_tests[0].msg, strlen(sha1_tests[0].msg));
+	if (ret)
+		return ret;
+	ret = hash_op_init(&ctx2, HASH_SHA1);
+	if (ret)
+		return ret;
+	ret = hash_op_proc(&ctx2, (uint8_t *)sha1_tests[1].msg, 10);
+	if (ret)
+		return ret;
+	ret = hash_op_finish(&ctx1, out_sha1_1, sizeof(out_sha1_1));
+	if (ret)
+		return ret;
+	ret = hash_op_init(&ctx3, HASH_SHA1);
+	if (ret)
+		return ret;
+	ret = hash_op_proc(&ctx2, (uint8_t *)sha1_tests[1].msg+10, strlen(sha1_tests[1].msg)-10);
+	if (ret)
+		return ret;
+	ret = hash_op_proc(&ctx3, (uint8_t *)sha1_tests[2].msg, 23);
+	if (ret)
+		return ret;
+	ret = hash_op_finish(&ctx2, out_sha1_2, sizeof(out_sha1_2));
+	if (ret)
+		return ret;
+	ret = hash_op_proc(&ctx3, (uint8_t *)sha1_tests[2].msg+23, strlen(sha1_tests[2].msg)-23);
+	if (ret)
+		return ret;
+	ret = hash_op_finish(&ctx3, out_sha1_3, sizeof(out_sha1_3));
+	if (ret)
+		return ret;
+
+	if (memcmp(out_sha1_1, sha1_tests[0].hash, sizeof(out_sha1_1))) {
+		printk("sha1 test 0 failed");
+	} else {
+		printk("sha1 test 0 pass");
+	}
+	if (memcmp(out_sha1_2, sha1_tests[1].hash, sizeof(out_sha1_2))) {
+		printk("sha1 test 1 failed");
+	} else {
+		printk("sha1 test 1 pass");
+	}
+	if (memcmp(out_sha1_3, sha1_tests[2].hash, sizeof(out_sha1_3))) {
+		printk("sha1 test 2 failed");
+	} else {
+		printk("sha1 test 2 pass");
+	}
+
+	return 0;
+}
+#endif
+
+EXPORT_SYMBOL_GPL(asr_te200_sha_unregister);
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-sha.h b/marvell/linux/drivers/crypto/asr/te200/asr-sha.h
new file mode 100644
index 0000000..42dea66
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-sha.h
@@ -0,0 +1,118 @@
+#ifndef ASR_SHA_H
+#define ASR_SHA_H
+
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#define ASR_SHA_BUFFER_ORDER	2
+#define ASR_SHA_BUFFER_SIZE	(PAGE_SIZE << ASR_SHA_BUFFER_ORDER)
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY				BIT(0)
+#define	SHA_FLAGS_INIT				BIT(1)
+#define	SHA_FLAGS_FINAL				BIT(2)
+#define SHA_FLAGS_FINUP				BIT(3)
+#define SHA_FLAGS_OUTPUT_READY		BIT(4)
+#define SHA_FLAGS_ALGO_MASK			GENMASK(10, 5)
+#define SHA_FLAGS_SHA1				BIT(5)
+#define SHA_FLAGS_SHA224			BIT(6)
+#define SHA_FLAGS_SHA256			BIT(7)
+#define SHA_FLAGS_HMAC				BIT(8)
+#define SHA_FLAGS_PAD				BIT(9)
+#define SHA_FLAGS_ERROR				BIT(10)
+
+#define SHA_OP_INIT	    1
+#define SHA_OP_UPDATE	2
+#define SHA_OP_FINAL	3
+
+#define HASH_BUF_LEN 64
+
+typedef enum {
+	HASH_SHA1 = 0x0,
+	HASH_SHA224 = 0X2,
+	HASH_SHA256 = 0x3,
+} HASH_ALGO_T;
+
+struct asr_te200_sha;
+
+typedef int (*asr_sha_fn_t)(struct asr_te200_sha *);
+typedef irqreturn_t (*asr_sha_irq_t)(void *);
+
+struct asr_te200_sha {
+	unsigned long		phys_base;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+	void __iomem		*io_base;
+
+	spinlock_t		lock;
+	struct mutex sha_lock;
+	
+	int			err;
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	queue_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+	bool			is_async;
+	bool			force_complete;
+	asr_sha_fn_t		resume;
+
+	int                 alg;
+};
+
+typedef struct te200_hash_context {
+    volatile uint32_t count;   /* Used to store the extra data count */
+    unsigned char extra_data[HASH_BUF_LEN]; /* Buffer to store the extra data*/
+    volatile uint64_t total_bits_num; /* Total process data bits number */
+    unsigned char
+        hash_temp[64 / 2]; /* Buffer to store hash temp value */
+    int alg;
+    uint32_t hash_temp_valid;
+    uint32_t finish_flag; /* This flag indicates if the context need to finish.
+                           * 0 not, 1 need to do. */
+} te200_hash_context_t;
+
+/*
+ * .statesize = sizeof(struct asr_sha_reqctx) must be <= PAGE_SIZE / 8 as
+ * tested by the ahash_prepare_alg() function.
+ */
+
+struct asr_sha_reqctx {
+	struct asr_te200_sha	*dd;
+	unsigned long	op;
+
+	u8	digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32));
+	u64	digcnt[2];
+	void *buffer;
+	size_t	bufcnt;
+	size_t	buflen;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int	offset;	/* offset in current sg */
+	unsigned int	total;	/* total request */
+
+	int                 alg;
+	unsigned long		flags;
+
+	te200_hash_context_t hash_ctx;
+};
+
+struct asr_sha_ctx {
+	struct asr_te200_sha	*dd;
+	asr_sha_fn_t		start;
+
+	unsigned long		flags;
+};
+
+#define ASR_SHA_QUEUE_LENGTH	50
+
+int asr_te200_hash_init(struct asr_sha_reqctx *reqctx, int alg);
+int asr_te200_hash_proc(struct asr_sha_reqctx *reqctx, const uint8_t *src, size_t size);
+int asr_te200_hash_finish(struct asr_sha_reqctx *reqctx, uint8_t *out, uint32_t out_size);
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-te200.c b/marvell/linux/drivers/crypto/asr/te200/asr-te200.c
new file mode 100644
index 0000000..00ae816
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-te200.c
@@ -0,0 +1,299 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+
+#include "asr-te200.h"
+#include "../asr_aes_clk.h"
+
+static inline u32 asr_te200_read(struct asr_te200_dev *dd, u32 offset)
+{
+	u32 value = readl_relaxed(dd->io_base + offset);
+	return value;
+}
+
+static inline void asr_te200_write(struct asr_te200_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static int asr_te200_clk_sync(struct asr_te200_dev *dd)
+{
+	struct clk *te200_clk;
+
+	if (dd->clk_synced)
+		return 0;
+
+	te200_clk = dd->te200_clk;
+	/* TE200 clk will be disable by CP core, but the enable count is still 1.
+	* Need to sync the clk enable state here and re-enable the clk.
+	*/
+	if (__clk_is_enabled(te200_clk) == false &&
+		__clk_get_enable_count(te200_clk))
+	{
+		asr_aes_clk_put(te200_clk);
+		asr_aes_clk_get(te200_clk);
+		dd->clk_synced = 1;
+		dev_dbg(dd->dev, "sync te200 clk done\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int asr_te200_dev_get(struct asr_te200_dev *dd)
+{
+	mutex_lock(&dd->te200_lock);
+
+	asr_te200_clk_sync(dd);
+	asr_aes_clk_get(dd->te200_clk);
+
+	return 0;
+}
+
+static int asr_te200_dev_put(struct asr_te200_dev *dd)
+{
+	asr_aes_clk_put(dd->te200_clk);
+
+	mutex_unlock(&dd->te200_lock);
+	return 0;
+}
+
+static void asr_te200_hw_init(struct asr_te200_dev *dd)
+{
+	asr_te200_write(dd, TE200_CLOCK_CTRL, 0);
+	asr_te200_write(dd, TE200_RESET_CTRL, 0);
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_te200_dt_ids[] = {
+	{ .compatible = "asr,asr-te200" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_te200_dt_ids);
+#endif
+
+static struct asr_te200_ops te200_ops = {
+	.dev_get = asr_te200_dev_get,
+	.dev_put = asr_te200_dev_put,
+};
+
+static int asr_te200_probe(struct platform_device *pdev)
+{
+	struct asr_te200_dev *te200_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *te200_res;
+	struct device_node *np = NULL;
+	int err = 0, devnum = 0;
+
+	te200_dd = devm_kzalloc(&pdev->dev, sizeof(*te200_dd), GFP_KERNEL);
+	if (te200_dd == NULL) {
+		err = -ENOMEM;
+		goto res_err;
+	}
+
+	np = dev->of_node;
+	te200_dd->dev = dev;
+	te200_dd->te200_ops = &te200_ops;
+
+	platform_set_drvdata(pdev, te200_dd);
+
+	mutex_init(&te200_dd->te200_lock);
+
+	/* Get the base address */
+	te200_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!te200_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	te200_dd->phys_base = te200_res->start;
+
+	/* Initializing the clock */
+	te200_dd->te200_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(te200_dd->te200_clk)) {
+		dev_err(dev, "clock initialization failed.\n");
+		err = PTR_ERR(te200_dd->te200_clk);
+		goto res_err;
+	}
+	te200_dd->clk_synced = 0;
+
+	te200_dd->io_base = devm_ioremap_resource(&pdev->dev, te200_res);
+	if (IS_ERR(te200_dd->io_base)) {
+		dev_err(dev, "can't ioremap\n");
+		err = PTR_ERR(te200_dd->io_base);
+		goto res_err;
+	}
+
+	err = clk_prepare(te200_dd->te200_clk);
+	if (err)
+		goto res_err;
+
+	err = asr_aes_clk_get(te200_dd->te200_clk);
+	if (err)
+		goto te200_clk_unprepare;
+
+	asr_te200_hw_init(te200_dd);
+
+#ifdef CONFIG_ASR_TE200_CIPHER
+	if (of_get_property(np, "asr,asr-cipher", NULL)) {
+		err = asr_te200_cipher_register(te200_dd);
+		if (err)
+			goto te200_asr_aes_clk_put;
+		dev_info(dev, "CIPHER engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+#ifdef CONFIG_ASR_TE200_SHA
+	if (of_get_property(np, "asr,asr-sha", NULL)) {
+		err = asr_te200_sha_register(te200_dd);
+		if (err)
+			goto sha_err;
+		dev_info(dev, "SHA engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+#ifdef CONFIG_ASR_TE200_RSA
+	if (of_get_property(np, "asr,asr-rsa", NULL)) {
+		err = asr_te200_rsa_register(te200_dd);
+		if (err)
+			goto rsa_err;
+		dev_info(dev, "RSA engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+	if (!devnum) {
+		dev_err(dev, "No TE200 device enabled\n");
+		err = -ENODEV;
+		goto te200_asr_aes_clk_put;
+	}
+
+	return 0;
+
+#ifdef CONFIG_ASR_TE200_RSA
+rsa_err:
+#ifdef CONFIG_ASR_TE200_SHA
+	asr_te200_sha_unregister(te200_dd);
+#endif
+#endif
+#ifdef CONFIG_ASR_TE200_SHA
+sha_err:
+#ifdef CONFIG_ASR_TE200_CIPHER
+	asr_te200_cipher_unregister(te200_dd);
+#endif
+#endif
+te200_asr_aes_clk_put:
+	asr_aes_clk_put(te200_dd->te200_clk);
+te200_clk_unprepare:
+	clk_unprepare(te200_dd->te200_clk);
+res_err:
+	devm_kfree(dev, te200_dd);
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int asr_te200_remove(struct platform_device *pdev)
+{
+	struct asr_te200_dev *te200_dd;
+
+	te200_dd = platform_get_drvdata(pdev);
+	if (!te200_dd)
+		return -ENODEV;
+
+	clk_unprepare(te200_dd->te200_clk);
+	asr_aes_clk_put(te200_dd->te200_clk);
+
+#ifdef CONFIG_ASR_TE200_CIPHER
+	asr_te200_cipher_unregister(te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_SHA
+	asr_te200_sha_unregister(te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_RSA
+	asr_te200_rsa_unregister(te200_dd);
+#endif
+
+	devm_kfree(te200_dd->dev, te200_dd);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int asr_te200_suspend(struct device *dev)
+{
+	struct asr_te200_dev *te200_dd = dev_get_drvdata(dev);
+
+	asr_aes_clk_put(te200_dd->te200_clk);
+
+	return 0;
+}
+
+static int asr_te200_resume(struct device *dev)
+{
+	struct asr_te200_dev *te200_dd = dev_get_drvdata(dev);
+
+	return asr_aes_clk_get(te200_dd->te200_clk);
+}
+
+static const struct dev_pm_ops asr_te200_pm_ops = {
+	.suspend	= asr_te200_suspend,
+	.resume		= asr_te200_resume,
+};
+#endif /* CONFIG_PM */
+
+static struct platform_driver asr_te200_driver = {
+	.probe		= asr_te200_probe,
+	.remove		= asr_te200_remove,
+	.driver		= {
+		.name	= "asr_te200",
+#ifdef CONFIG_PM
+		.pm	= &asr_te200_pm_ops,
+#endif
+		.of_match_table = of_match_ptr(asr_te200_dt_ids),
+	},
+};
+
+static int __init asr_te200_init(void)
+{
+	int ret;
+
+	if (!cpu_is_asr1903_b0()) {
+		return 0;
+	}
+
+	ret = platform_driver_register(&asr_te200_driver);
+
+	return ret;
+}
+
+device_initcall_sync(asr_te200_init);
+
+MODULE_DESCRIPTION("ASR Trust Engine support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200/asr-te200.h b/marvell/linux/drivers/crypto/asr/te200/asr-te200.h
new file mode 100644
index 0000000..c43685f
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200/asr-te200.h
@@ -0,0 +1,159 @@
+#ifndef _TE200_H_
+#define _TE200_H_
+
+#include "asr-cipher.h"
+#include "asr-sha.h"
+#include "./asr-aca/se_rsa.h"
+
+#define TE200_CTRL                 0x0000
+#define TE200_CLOCK_CTRL           (TE200_CTRL+0x00)
+#define TE200_RESET_CTRL           (TE200_CTRL+0x04)
+
+#define TE200_STATUS               0x0100
+#define TE200_INIT_STATUS_HOST0    (TE200_STATUS+0x10)
+
+#define DMA_AHB_CLK_EN      	   (1 << 5)
+#define TMG_CLK_EN          	   (1 << 4)
+#define OTP_CLK_EN          	   (1 << 3)
+#define ACA_CLK_EN          	   (1 << 2)
+#define SCA_CLK_EN          	   (1 << 1)
+#define HASH_CLK_EN         	   (1 << 0)
+
+/* OTP registers */
+#define TE200_OTP_MANAGER          0x0400
+#define TE200_OTP_SPACE            0x1000
+#define TE200_OTP_DUMMY_CFG		   (TE200_OTP_MANAGER+0x28)
+
+/* SECURE SCA registers */
+#define TE200_SSCA_QUEUE           0x3200
+#define TE200_SSCA_CTRL	           0x3204
+#define TE200_SSCA_STAT            0x3208
+#define TE200_SSCA_INTR_STAT       0x320C
+#define TE200_SSCA_INTR_MSK        0x3210
+#define TE200_SSCA_SUSP_MSK        0x3214
+
+/* sca queue register bits */
+#define SCA_INIT_CMD       (0x80 << 24)
+#define SCA_PROCESS_CMD    (0x40 << 24)
+#define SCA_FINISH_CMD     (0x20 << 24)
+
+/* sca intr msk register */
+#define WM_INTR_MSK         (1 << 4)
+#define BUS_RROR_MSK        (1 << 3)
+#define INVALID_KEY_MSK     (1 << 2)
+#define INVALID_CMD_MSK     (1 << 1)
+#define CMD_INIR_MSK        (1 << 0)
+
+/* sca queue registers bits */
+#define SCA_INTER_TRIGGERD     (1 << 0)
+
+/* sca ctrl registers bits */
+#define SCA_RUN             (1 << 0)
+
+/* sca intr stat registers bits */
+#define SCA_WM_INTR         (1 << 4)
+#define SCA_BUS_ERROR       (1 << 3)
+#define SCA_INVALID_KEY     (1 << 2)
+#define SCA_INVALID_CMD     (1 << 1)
+#define SCA_CMD_INTR        (1 << 0)
+
+/* sca queue: sca init */
+#define SCA_MODEL_KEY           (~(1 << 22))
+#define SCA_DEVICE_ROOT_KEY     (1 << 22)
+#define SCA_EXTERNAL_KEY        (2 << 22)
+
+#define SCA_KEY_128_BITS        (~(1 << 20))
+#define SCA_KEY_192_BITS        (1 << 20)
+#define SCA_KEY_256_BITS        (2 << 20)
+
+#define SCA_NORMAL_AES          (~(1 << 19))
+#define SCA_SM4                 (1 << 19)
+#define SCA_KEY_IS_ADDR         (1 << 18)
+#define SCA_SET_IV              (1 << 15)
+#define SCA_SET_IV_ADDR         (1 << 14)
+
+#define SCA_MODE_ECB            (~(1 << 4))
+#define SCA_MODE_CTR            (1 << 4)
+#define SCA_MODE_CBC            (2 << 4)
+#define SCA_MODE_CBC_MAC        (3 << 4)
+#define SCA_MODE_CMAC           (4 << 4)
+#define SCA_MODE_GHASH          (5 << 4)
+
+/* sca queue: sca process */
+#define SCA_LAST_ONE_SESSION    (1 << 6)
+#define SCA_ENCRYPTION          (1 << 5)
+
+/* SECURE HASH registers  */
+#define TE200_SHASH_QUEUE  		 	0x3280
+#define TE200_SHASH_CTRL			0x3284
+#define TE200_SHASH_STAT			0x3288
+#define TE200_SHASH_INTR_STAT		0x328C
+#define TE200_SHASH_INTR_MSK		0x3290
+#define TE200_SHASH_SUSP_MSK		0x3294
+
+/* hash queue register bits */
+#define HASH_INIT_CMD       (0x80 << 24)
+#define HASH_PROCESS_CMD    (0x40 << 24)
+#define HASH_FINISH_CMD     (0x20 << 24)
+
+/* hash queue registers bits */
+#define HASH_INTER_TRIGGERD     (1 << 0)
+
+/* scahash ctrl registers bits */
+#define HASH_RUN             	(1 << 0)
+
+/* hash queue: hash init */
+#define HASH_MODE_SHA1			(~(1 << 5))
+#define HASH_MODE_SHA224		(1 << 5)
+#define HASH_MODE_SHA256		(2 << 5)
+#define HASH_MODE_SM3			(3 << 5)
+
+#define HASH_SET_EXT_IV	        (1 << 4)
+#define HASH_PARAM_IS_ADDR      (1 << 3)
+
+/* hash queue: hash process */
+#define HASH_LITTLE_ENDIAN		(1 << 1)
+
+/* hash queue: hash finish */
+#define HASH_PADDING			(1 << 7)
+
+/* hash intr stat registers bits */
+#define HASH_WM_INTR         (1 << 4)
+#define HASH_BUS_ERROR       (1 << 3)
+#define HASH_PADDING_ERROR   (1 << 2)
+#define HASH_INVALID_CMD     (1 << 1)
+#define HASH_CMD_INTR        (1 << 0)
+
+
+
+struct asr_te200_dev {
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+	struct mutex		te200_lock;
+	struct device		*dev;
+
+	struct clk		*te200_clk;
+	int			clk_synced;
+	refcount_t	refcount;
+
+	struct asr_te200_cipher asr_cipher;
+	struct asr_te200_sha asr_sha;
+	struct asr_te200_rsa asr_rsa;
+
+	struct asr_te200_ops	*te200_ops;
+};
+
+struct asr_te200_ops {
+	int (*dev_get)(struct asr_te200_dev *);
+	int (*dev_put)(struct asr_te200_dev *);
+};
+
+int asr_te200_cipher_register(struct asr_te200_dev *te200_dd);
+int asr_te200_cipher_unregister(struct asr_te200_dev *te200_dd);
+
+int asr_te200_sha_register(struct asr_te200_dev *te200_dd);
+int asr_te200_sha_unregister(struct asr_te200_dev *te200_dd);
+
+int asr_te200_rsa_register(struct asr_te200_dev *te200_dd);
+int asr_te200_rsa_unregister(struct asr_te200_dev *te200_dd);
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/Makefile b/marvell/linux/drivers/crypto/asr/te200_optee/Makefile
new file mode 100644
index 0000000..f7f28f2
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ASR_TE200) += asr-te200-optee.o
+obj-$(CONFIG_ASR_TE200_SHA) += asr-sha-optee.o
+obj-$(CONFIG_ASR_TE200_CIPHER) += asr-cipher-optee.o
+obj-$(CONFIG_ASR_TE200_RSA) += asr-rsa-optee.o
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.c b/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.c
new file mode 100644
index 0000000..805a48d
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.c
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ASR Micro Limited
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+#include "asr-te200-optee.h"
+#include "asr-cipher-optee.h"
+
+struct asr_te200_cipher *asr_cipher_local;
+
+static struct teec_uuid pta_cipher_uuid = ASR_AES_ACCESS_UUID;
+
+static int asr_optee_cipher_get_rkek_state(u32 *state)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[1];
+	struct asrte200_tee_context asrte200_tee_ctx;
+	int ret = 0;
+
+	ret = asrte200_optee_open_ta(&asrte200_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = CMD_AES_HWKEY_STATUS;
+	invoke_arg.session  = asrte200_tee_ctx.session;
+	invoke_arg.num_params = 1;
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_OUTPUT;
+	params[0].u.value.a = 0;
+	params[0].u.value.b = 0;
+	params[0].u.value.c = 0;
+
+	ret = tee_client_invoke_func(asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	*state = params[0].u.value.a;
+
+exit:
+	asrte200_optee_close_ta(&asrte200_tee_ctx);
+	return ret;
+
+}
+
+static int asr_optee_cipher_process(uint32_t cipher_mode, uint32_t op_mode,
+		struct scatterlist *src, struct scatterlist *dst,
+		size_t len, uint32_t key_size, u8 *key,
+		u8 *iv, uint32_t ivsize)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrte200_tee_context asrte200_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+    uint32_t srclen = len, dstlen = len, paralen_a = key_size, paralen_b = ivsize;
+    uint8_t *parabuf_a = key, *parabuf_b = iv;
+
+	ret = asrte200_optee_open_ta(&asrte200_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cipher_mode;
+	invoke_arg.session  = asrte200_tee_ctx.session;
+
+	shm = tee_shm_alloc(asrte200_tee_ctx.tee_ctx, srclen + dstlen + paralen_a + paralen_b, 
+						TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = srclen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[1].u.memref.shm_offs = srclen;
+	params[1].u.memref.size = dstlen;
+	params[1].u.memref.shm = shm;
+
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[2].u.value.a = op_mode;
+
+	ma = tee_shm_get_va(shm, 0);
+	sg_copy_to_buffer(src, sg_nents(src), ma, srclen);
+	memcpy(ma + srclen + dstlen, parabuf_a, paralen_a);
+
+	/* cbc with iv */
+	if (parabuf_b && paralen_b) {
+		memcpy(ma + srclen + dstlen + paralen_a, parabuf_b, paralen_b);
+		params[2].u.value.b = paralen_a;
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen_a + paralen_b;
+		params[3].u.memref.shm = shm;
+		invoke_arg.num_params = 4;
+	} else {
+		/* ecb with non iv */
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen_a;
+		params[3].u.memref.shm = shm;
+		invoke_arg.num_params = 4;
+	}
+
+	ret = tee_client_invoke_func(asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto free_shm;
+	}
+	sg_copy_from_buffer(dst, sg_nents(dst), ma + srclen, dstlen);
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrte200_optee_close_ta(&asrte200_tee_ctx);
+	return ret;
+}
+
+static int asr_optee_cipher_hwkey_process(uint32_t cipher_mode, uint32_t op_mode,
+		struct scatterlist *src, struct scatterlist *dst,
+		size_t len, uint32_t key_size,
+		u8 *iv, uint32_t ivsize)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrte200_tee_context asrte200_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+    uint32_t srclen = len, dstlen = len, paralen = ivsize;
+    uint8_t *parabuf = iv;
+
+
+	ret = asrte200_optee_open_ta(&asrte200_tee_ctx, &pta_cipher_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cipher_mode;
+	invoke_arg.session  = asrte200_tee_ctx.session;
+
+	shm = tee_shm_alloc(asrte200_tee_ctx.tee_ctx, srclen + dstlen + paralen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = srclen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[1].u.memref.shm_offs = srclen;
+	params[1].u.memref.size = dstlen;
+	params[1].u.memref.shm = shm;
+
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[2].u.value.a = key_size;
+	params[2].u.value.b = op_mode;
+	params[2].u.value.c = 0;
+
+	ma = tee_shm_get_va(shm, 0);
+	sg_copy_to_buffer(src, sg_nents(src), ma, srclen);
+	if (parabuf && paralen) {
+		params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+		params[3].u.memref.shm_offs = srclen + dstlen;
+		params[3].u.memref.size = paralen;
+		params[3].u.memref.shm = shm;
+		memcpy(ma + srclen + dstlen, parabuf, paralen);
+		invoke_arg.num_params = 4;
+	} else {
+		invoke_arg.num_params = 3;
+	}
+
+	ret = tee_client_invoke_func(asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto free_shm;
+	}
+	sg_copy_from_buffer(dst, sg_nents(dst), ma + srclen, dstlen);
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrte200_optee_close_ta(&asrte200_tee_ctx);
+	return ret;
+}
+
+static inline void asr_cipher_set_mode(struct asr_te200_cipher *dd,
+				      const struct asr_cipher_reqctx *rctx)
+{
+	/* Clear all but persistent flags and set request flags. */
+	dd->flags = (dd->flags & CIPHER_FLAGS_PERSISTENT) | rctx->mode;
+}
+
+static void asr_cipher_set_iv_as_last_ciphertext_block(struct asr_te200_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+
+	if (req->cryptlen < ivsize)
+		return;
+
+	if (rctx->mode & FLAGS_ENCRYPT) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->cryptlen - ivsize, ivsize, 0);
+	} else {
+		if (req->src == req->dst)
+			memcpy(req->iv, rctx->lastc, ivsize);
+		else
+			scatterwalk_map_and_copy(req->iv, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+	}
+}
+
+static int asr_cipher_handle_queue(struct asr_te200_cipher *dd,
+				  struct crypto_async_request *new_areq)
+{
+	struct crypto_async_request *areq, *backlog;
+	struct asr_cipher_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (new_areq)
+		ret = crypto_enqueue_request(&dd->queue, new_areq);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	areq = crypto_dequeue_request(&dd->queue);
+	if (areq) {
+		dd->flags |= FLAGS_BUSY;
+	}
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!areq)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(areq->tfm);
+
+	dd->areq = areq;
+	dd->ctx = ctx;
+	start_async = (areq != new_areq);
+	dd->is_async = start_async;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static inline int asr_cipher_complete(struct asr_te200_cipher *dd, int err)
+{
+
+	dd->flags &= ~FLAGS_BUSY;
+
+	asr_cipher_set_iv_as_last_ciphertext_block(dd);
+
+	if (dd->is_async)
+		dd->areq->complete(dd->areq, err);
+
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static int asr_cipher_start(struct asr_te200_cipher *dd)
+{
+	struct skcipher_request *req = skcipher_request_cast(dd->areq);
+	struct asr_cipher_reqctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	u8 *iv;
+	u32 flags, cipher_mode, op_mode, keylen, ivsize;
+	int err;
+
+	asr_cipher_set_mode(dd, rctx);
+
+    flags = dd->flags;
+
+	if ((flags & FLAGS_OPMODE_MASK) == FLAGS_CBC){
+		if (rctx->use_rkek) {
+			cipher_mode = CMD_AES_HWKEY_CBC;
+		} else{
+			cipher_mode = CMD_AES_CBC;
+		}
+		ivsize = crypto_skcipher_ivsize(cipher);
+		iv = req->iv;
+	}
+	else {
+		iv = NULL;
+		ivsize = 0;
+		if (rctx->use_rkek) {
+			cipher_mode = CMD_AES_HWKEY_ECB;
+		} else {
+			cipher_mode = CMD_AES_ECB;
+		}
+	}
+
+	if (flags & FLAGS_ENCRYPT)
+		op_mode = 1;
+	else
+		op_mode = 0;
+
+	keylen = dd->ctx->keylen;
+
+	if (rctx->use_rkek) {
+		err = asr_optee_cipher_hwkey_process(cipher_mode, op_mode, req->src,
+				req->dst, req->cryptlen, keylen, iv, ivsize);
+	} else {
+		err = asr_optee_cipher_process(cipher_mode, op_mode, req->src,
+				req->dst, req->cryptlen, keylen, (u8 *)dd->ctx->key, iv, ivsize);		
+	}
+
+	return asr_cipher_complete(dd, err);
+}
+
+static int asr_cipher(struct skcipher_request *req, unsigned long mode)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_cipher_reqctx *rctx;
+	struct asr_te200_cipher *dd = asr_cipher_local;
+
+	ctx->block_size = AES_BLOCK_SIZE;
+	ctx->dd = dd;
+
+	rctx = skcipher_request_ctx(req);
+	rctx->mode = mode;
+	rctx->use_rkek = ctx->use_rkek;
+
+	if (!(mode) && (req->src == req->dst)) {
+		unsigned int ivsize = crypto_skcipher_ivsize(cipher);
+		if (req->cryptlen >= ivsize) {
+			scatterwalk_map_and_copy(rctx->lastc, req->src,
+						 req->cryptlen - ivsize,
+						 ivsize, 0);
+		}
+	}
+
+	return asr_cipher_handle_queue(dd, &req->base);
+}
+
+static int asr_cipher_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+	
+	ctx->dd = dd;
+	ctx->use_rkek = false;
+
+	if (keylen != AES_KEYSIZE_128 &&
+		keylen != AES_KEYSIZE_192 &&
+		keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_cipher_set_hwkey(struct crypto_skcipher *cipher, const u8 *key,
+			   unsigned int keylen)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+
+	(void)key; /* ignore the sw key */
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	if (keylen != AES_KEYSIZE_128 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int asr_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_ECB | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_ECB);
+}
+
+static int asr_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_CBC | FLAGS_ENCRYPT);
+}
+
+static int asr_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return asr_cipher(req, FLAGS_CBC);
+}
+
+static int asr_cipher_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static int asr_cipher_hwkey_init(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct asr_te200_cipher *dd = asr_cipher_local;
+
+	if (!dd->rkek_burned)
+		return -EPERM;
+
+	tfm->reqsize = sizeof(struct asr_cipher_reqctx);
+	ctx->start = asr_cipher_start;
+
+	return 0;
+}
+
+static void asr_cipher_exit(struct crypto_skcipher *tfm)
+{
+	struct asr_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void asr_cipher_queue_task(unsigned long data)
+{
+	struct asr_te200_cipher *dd = (struct asr_te200_cipher *)data;
+
+	asr_cipher_handle_queue(dd, NULL);
+}
+
+static struct skcipher_alg cipher_algs[] = {
+	/* AES - ECB, using input key*/
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "asr-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC, using input key,*/
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "asr-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_setkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+	/* AES - ECB, using hardware key, a.k.a. RKEK */
+	{
+		.base = {
+			.cra_name = "ecb(aes-hwkey)",
+			.cra_driver_name = "asr-ecb-aes-hwkey",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_ecb_encrypt,
+		.decrypt = asr_aes_ecb_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+	},
+	/* AES - CBC, using hardware key, a.k.a. RKEK */
+	{
+		.base = {
+			.cra_name = "cbc(aes-hwkey)",
+			.cra_driver_name = "asr-cbc-aes-hwkey",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct asr_cipher_ctx),
+			.cra_alignmask = 0xf,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = asr_cipher_set_hwkey,
+		.encrypt = asr_aes_cbc_encrypt,
+		.decrypt = asr_aes_cbc_decrypt,
+		.init = asr_cipher_hwkey_init,
+		.exit = asr_cipher_exit,
+		.ivsize = AES_BLOCK_SIZE,
+	},
+};
+
+int asr_te200_cipher_register(struct asr_te200_dev *te200_dd)
+{
+	int i, j, err;
+	struct asr_te200_cipher *cipher_dd;
+	struct device *dev = te200_dd->dev;
+	u32 rkek_state;
+
+    cipher_dd = &te200_dd->asr_cipher;
+	cipher_dd->dev = te200_dd->dev;
+
+	asr_cipher_local = cipher_dd;
+
+	err = asr_optee_cipher_get_rkek_state(&rkek_state);
+	if (err) {
+		dev_warn(dev, "can't get hwkey(rkek) state\n");
+		cipher_dd->rkek_burned = 0;
+	} else {
+		if (rkek_state)
+			cipher_dd->rkek_burned = 1;
+		else
+			cipher_dd->rkek_burned = 0;
+		switch (rkek_state) {
+		case 2:
+			dev_warn(dev, "hwkey(rkek) burned, SW access not disabled\n");
+			break;
+		case 1:
+			dev_warn(dev, "hwkey(rkek) burned, SW access disabled\n");
+			break;
+		case 0:
+			dev_warn(dev, "hwkey(rkek) not burned\n");
+			break;
+		}
+	}
+
+	spin_lock_init(&cipher_dd->lock);
+	tasklet_init(&cipher_dd->queue_task, asr_cipher_queue_task,
+					(unsigned long)cipher_dd);
+
+	crypto_init_queue(&cipher_dd->queue, ASR_CIPHER_QUEUE_LENGTH);
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++) {
+		err = crypto_register_skcipher(&cipher_algs[i]);
+		if (err){
+			for (j = 0; j < i; j++)
+				crypto_unregister_skcipher(&cipher_algs[j]);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int asr_te200_cipher_unregister(struct asr_te200_dev *te200_dd)
+{
+	int i;
+	struct asr_te200_cipher *cipher_dd = &te200_dd->asr_cipher;
+	struct device *dev = te200_dd->dev;
+
+	for (i = 0; i < ARRAY_SIZE(cipher_algs); i++)
+		crypto_unregister_skcipher(&cipher_algs[i]);
+
+	tasklet_kill(&cipher_dd->queue_task);
+
+	devm_kfree(dev, cipher_dd);
+
+	return 0;
+}
+
+MODULE_DESCRIPTION("ASR HWKey CIPHER driver with optee-os.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.h b/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.h
new file mode 100644
index 0000000..a5ac0eb
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-cipher-optee.h
@@ -0,0 +1,72 @@
+#ifndef ASR_CIPHER_OPTEE_H
+#define ASR_CIPHER_OPTEE_H
+
+#define ASR_AES_ACCESS_UUID \
+		{ \
+			0xba1b496f, 0xf07d, 0x466e, \
+			{ 0x99, 0x09, 0xeb, 0xe3, 0x55, 0x43, 0xa0, 0x1c } \
+		}
+
+/*
+ * AES ECB encrypt/decrypt data with HWKEY(RKEK)
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          keysize
+ * [in]     pParams[2].value.b          op_mode: 1--encrypt, 0--decrypt
+ */
+#define CMD_AES_HWKEY_ECB   0x1
+
+/*
+ * AES CBC encrypt/decrypt data with HWKEY(RKEK)
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          keysize
+ * [in]     pParams[2].value.b          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[3].memref.buffer    initial vector
+ */
+#define CMD_AES_HWKEY_CBC   0x2
+
+
+/*
+ * Check AES RKEK status 
+ *     0: RKEK(hwkey) is not burned
+ *     1: RKEK(hwkey) is burned and software access is disabled
+ *     2: RKEK(hwkey) is burned but software access is not disabled)
+ *
+ * [out]     pParams[0].value.a          status
+ */
+#define CMD_AES_HWKEY_STATUS	0x3
+
+/*
+ * AES ECB encrypt/decrypt data with input key
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[3].memref.buffer   	input key
+ * [in]     pParams[3].memref.size      keysize
+ */
+#define CMD_AES_ECB   0x4
+
+/*
+ * AES CBC encrypt/decrypt data with input key
+ *
+ * [in]     params[0].memref.buffer     plain/cipher text to encrypt/decrypt
+ * [in]     params[0].memref.size       length of plain/cipher text
+ * [out]    pParams[1].memref.buffer    cipher/plain text after encrypt/decrypt
+ * [in]     pParams[2].value.a          op_mode: 1--encrypt, 0--decrypt
+ * [in]     pParams[2].value.b          keysize
+ * [in]     pParams[3].memref.buffer   	input key + initial vector
+ * [in]     pParams[3].memref.size      keysize + ivsize
+ */
+#define CMD_AES_CBC   0x5
+
+
+#include "../te200/asr-cipher.h"
+
+#endif
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.c b/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.c
new file mode 100644
index 0000000..d16aea0
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.c
@@ -0,0 +1,383 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <uapi/linux/hwrsa_ioctl.h>
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+
+#include "asr-te200-optee.h"
+#include "asr-rsa-optee.h"
+
+static struct teec_uuid pta_rsa_uuid = ASR_RSA_ACCESS_UUID;
+
+static int asr_optee_rsa_sign(struct hwrsa_arg *rsa_arg, u_int cmd)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrte200_tee_context asrte200_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+	struct rsa_ioctl_key *key = rsa_arg->rsa_key;
+	uint8_t *n = key->n, *e = key->e, *d = key->d, *p = key->p, *q= key->q;
+	size_t n_size = key->n_size, e_size = key->e_size, d_size = key->d_size;
+	size_t p_size = key->p_size, q_size = key->q_size;
+	uint8_t *msg = rsa_arg->msg, *sign = rsa_arg->sign;
+	size_t msg_size = rsa_arg->msg_size, sign_size = key->n_size;
+	int is_blinding;
+	u_int optee_cmd;
+
+	switch (cmd) {
+	case HWRSA_SIGN_PKCS_V15_SHA1:
+		optee_cmd = CMD_RSA_SIGN_PKCS_V15_SHA1;
+		break;
+	case HWRSA_SIGN_PKCS_V15_SHA256:
+		optee_cmd = CMD_RSA_SIGN_PKCS_V15_SHA256;
+		break;
+	case HWRSA_SIGN_PKCS_V21_SHA1:
+		optee_cmd = CMD_RSA_SIGN_PKCS_V21_SHA1;
+		break;
+	case HWRSA_SIGN_PKCS_V21_SHA256:
+		optee_cmd = CMD_RSA_SIGN_PKCS_V21_SHA256;
+		break;
+	default:
+		ret = -EINVAL;
+		goto exit;		
+	}
+
+	if (!p || !q || !p_size || !q_size) {
+		is_blinding = 0;
+	} else {
+		is_blinding = 1;
+	}
+
+	ret = asrte200_optee_open_ta(&asrte200_tee_ctx, &pta_rsa_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = optee_cmd;
+	invoke_arg.session  = asrte200_tee_ctx.session;
+
+	if (is_blinding) {
+		shm = tee_shm_alloc(asrte200_tee_ctx.tee_ctx, 
+							n_size + e_size + d_size + p_size + q_size + msg_size + sign_size, 
+							TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	} else {
+		shm = tee_shm_alloc(asrte200_tee_ctx.tee_ctx, 
+							n_size + e_size + d_size + msg_size + sign_size, 
+							TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	}
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ma = tee_shm_get_va(shm, 0);
+	memcpy(ma, n, n_size);
+	memcpy(ma + n_size, e, e_size);
+	memcpy(ma + n_size + e_size, d, d_size);
+	if (is_blinding) {
+		memcpy(ma + n_size + e_size + d_size, p, p_size);
+		memcpy(ma + n_size + e_size + d_size + q_size, q, q_size);
+	}
+
+	/* import rsa key */
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	if (is_blinding) {
+		params[0].u.memref.size = n_size + e_size + d_size + p_size + q_size;
+	} else {
+		params[0].u.memref.size = n_size + e_size + d_size;
+	}
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = 0;
+	params[1].u.value.a |= n_size;
+	params[1].u.value.a |= e_size << 10;
+	params[1].u.value.a |= d_size << 20;
+	params[1].u.value.a |= is_blinding << 30;
+	if (is_blinding) {
+		params[1].u.value.b = 0;
+		params[1].u.value.b |= p_size;
+		params[1].u.value.b |= q_size << 10;
+	}
+
+	/* import message */
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	if (is_blinding) {
+		memcpy(ma + n_size + e_size + d_size + p_size + q_size, msg, msg_size);
+		params[2].u.memref.shm_offs = n_size + e_size + d_size + p_size + q_size;
+	} else {
+		memcpy(ma + n_size + e_size + d_size, msg, msg_size);
+		params[2].u.memref.shm_offs = n_size + e_size + d_size;
+	}
+	params[2].u.memref.size = msg_size;
+	params[2].u.memref.shm = shm;
+
+	/* import signature */
+	params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	if (is_blinding) {
+		params[3].u.memref.shm_offs = n_size + e_size + d_size + p_size + q_size + msg_size;
+	} else {
+		params[3].u.memref.shm_offs = n_size + e_size + d_size + msg_size;
+	}
+	params[3].u.memref.size = sign_size;
+	params[3].u.memref.shm = shm;
+	
+	invoke_arg.num_params = 4;
+
+	ret = tee_client_invoke_func(asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		rsa_arg->result = 0;
+	} else if (invoke_arg.ret != 0) {
+		rsa_arg->result = 0;
+		goto free_shm;
+	}
+
+	rsa_arg->result = 1;
+
+	if (is_blinding) {
+		memcpy(sign, ma + n_size + e_size + d_size + p_size + q_size + msg_size, sign_size);
+	} else {
+		memcpy(sign, ma + n_size + e_size + d_size + msg_size, sign_size);
+	}
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrte200_optee_close_ta(&asrte200_tee_ctx);
+	return ret;
+}
+
+static int asr_optee_rsa_verify(struct hwrsa_arg *rsa_arg, u_int cmd)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[4];
+	struct asrte200_tee_context asrte200_tee_ctx;
+	struct tee_shm *shm;
+	int ret = 0;
+	char *ma = NULL;
+	struct rsa_ioctl_key *key = rsa_arg->rsa_key;
+	uint8_t *n = key->n, *e = key->e;
+	size_t n_size = key->n_size, e_size = key->e_size;
+	uint8_t *msg = rsa_arg->msg, *sign = rsa_arg->sign;
+	size_t msg_size = rsa_arg->msg_size, sign_size = rsa_arg->sign_size;
+	u_int optee_cmd;
+
+	switch (cmd) {
+	case HWRSA_VERIFY_PKCS_V15_SHA1:
+		optee_cmd = CMD_RSA_VERIFY_PKCS_V15_SHA1;
+		break;
+	case HWRSA_VERIFY_PKCS_V15_SHA256:
+		optee_cmd = CMD_RSA_VERIFY_PKCS_V15_SHA256;
+		break;
+	case HWRSA_VERIFY_PKCS_V21_SHA1:
+		optee_cmd = CMD_RSA_VERIFY_PKCS_V21_SHA1;
+		break;
+	case HWRSA_VERIFY_PKCS_V21_SHA256:
+		optee_cmd = CMD_RSA_VERIFY_PKCS_V21_SHA256;
+		break;
+	default:
+		ret = -EINVAL;
+		goto exit;		
+	}
+
+	ret = asrte200_optee_open_ta(&asrte200_tee_ctx, &pta_rsa_uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = optee_cmd;
+	invoke_arg.session  = asrte200_tee_ctx.session;
+
+	shm = tee_shm_alloc(asrte200_tee_ctx.tee_ctx, 
+						n_size + e_size + msg_size + sign_size, 
+						TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ma = tee_shm_get_va(shm, 0);
+	memcpy(ma, n, n_size);
+	memcpy(ma + n_size, e, e_size);
+
+	/* import rsa key */
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = n_size + e_size;
+	params[0].u.memref.shm = shm;
+
+	/* import msg */
+	memcpy(ma + n_size + e_size, msg, msg_size);
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[1].u.memref.shm_offs = n_size + e_size;
+	params[1].u.memref.size = msg_size;
+	params[1].u.memref.shm = shm;
+
+	/* import sign */
+	memcpy(ma + n_size + e_size + msg_size, sign, sign_size);
+	params[2].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[2].u.memref.shm_offs = n_size + e_size + msg_size;
+	params[2].u.memref.size = sign_size;
+	params[2].u.memref.shm = shm;
+
+	params[3].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[3].u.value.a = n_size;
+
+	invoke_arg.num_params = 4;
+
+	ret = tee_client_invoke_func(asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		rsa_arg->result = 0;
+		goto free_shm;
+	} else if (invoke_arg.ret != 0) {
+		rsa_arg->result = 0;
+		goto free_shm;
+	}
+
+	rsa_arg->result = 1;
+
+free_shm:
+	tee_shm_free(shm);
+exit:
+	asrte200_optee_close_ta(&asrte200_tee_ctx);
+	return ret;
+}
+
+static int asr_rsa_open(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static int asr_rsa_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+static long asr_rsa_ioctl(struct file *file, u_int cmd, u_long arg)
+{
+	int ret = 0;
+	struct miscdevice *miscdev;
+	struct asr_te200_rsa *rsa;
+	struct hwrsa_arg rsa_arg;
+	struct rsa_ioctl_key *key;
+	struct hwrsa_arg *u_arg = (void __user *)arg;
+	
+	miscdev = file->private_data;
+	rsa = container_of(miscdev, struct asr_te200_rsa, rsa_misc);
+
+	if (copy_from_user(&rsa_arg, (void __user *)arg, sizeof(rsa_arg))) {
+		return -EFAULT;
+	}
+
+	key = rsa_arg.rsa_key;
+
+	if (!rsa_arg.rsa_key) {
+		return -EFAULT;
+	}
+
+	if (!rsa_arg.msg || !rsa_arg.msg_size) {
+		return -EFAULT;
+	}
+
+	switch (cmd) {
+	case HWRSA_SIGN_PKCS_V15_SHA1:
+	case HWRSA_SIGN_PKCS_V15_SHA256:
+	case HWRSA_SIGN_PKCS_V21_SHA1:
+	case HWRSA_SIGN_PKCS_V21_SHA256:
+		if (!rsa_arg.sign || !key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+		ret = asr_optee_rsa_sign(&rsa_arg, cmd);
+		put_user(rsa_arg.result, &u_arg->result);
+		break;
+	case HWRSA_VERIFY_PKCS_V15_SHA1:
+	case HWRSA_VERIFY_PKCS_V15_SHA256:
+	case HWRSA_VERIFY_PKCS_V21_SHA1:
+	case HWRSA_VERIFY_PKCS_V21_SHA256:
+		if (!rsa_arg.sign || !rsa_arg.sign_size || key->is_private) {
+			ret = -EINVAL;
+			goto exit;
+		}
+		ret = asr_optee_rsa_verify(&rsa_arg, cmd);
+		put_user(rsa_arg.result, &u_arg->result);
+		break;
+	default:
+		dev_err(rsa->dev, "asr te200: rsa iotcl invald command %x\n", cmd);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	return ret;
+}
+
+static const struct file_operations asr_rsa_fops = {
+	.owner = THIS_MODULE,
+	.open = asr_rsa_open,
+	.release = asr_rsa_close,
+	.unlocked_ioctl = asr_rsa_ioctl,
+};
+
+int asr_te200_rsa_register(struct asr_te200_dev *te200_dd)
+{
+	int ret = 0;
+	struct asr_te200_rsa *prsa;
+	struct miscdevice *misc;
+	struct device *dev = te200_dd->dev;
+
+	prsa = &te200_dd->asr_rsa;
+	misc = &prsa->rsa_misc;
+
+	misc->name = "hwrsa";
+	misc->minor = MISC_DYNAMIC_MINOR;
+	misc->fops = &asr_rsa_fops;
+	misc->this_device = NULL;
+	prsa->dev = te200_dd->dev;
+
+	/* register the device */
+	ret = misc_register(misc);
+	if (ret < 0) {
+		dev_err(dev,
+			"asr rsa: unable to register device node /dev/hwrsa\n");
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_rsa_register);
+
+int asr_te200_rsa_unregister(struct asr_te200_dev *te200_dd)
+{
+	struct miscdevice *miscdev;
+
+	miscdev = &te200_dd->asr_rsa.rsa_misc;
+
+	misc_deregister(miscdev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_rsa_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yonggan Wang <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR hwrsa driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.h b/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.h
new file mode 100644
index 0000000..51a2ff9
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-rsa-optee.h
@@ -0,0 +1,62 @@
+#ifndef _ASR_TE200_RSA_OPTEE_H_
+#define _ASR_TE200_RSA_OPTEE_H_
+
+#include "../te200/asr-aca/se_rsa.h"
+
+#define ASR_RSA_ACCESS_UUID 									\
+		{ 														\
+			0xf98a8a72, 0x8acb, 0x11ef, 						\
+			{ 0x9e, 0xcc, 0xd7, 0x6e, 0x16, 0xa8, 0xa1, 0xa8 } 	\
+		}														\
+
+/*
+ *	n:	 	The modulus
+ *	e:	 	The public exponent
+ *	d:		The private exponent
+ *	p:		The first prime factor
+ *	q:		The second prime factor
+ *
+ * signature params
+ *
+ * [in]		pParams[0].memref.buffer		rsa key = (n + e + d + p + q) with blinding mode,
+ *											rsa key = (n + e + d) with non blinding mode.
+ *
+ * [in]		pParams[0].memref.size			key size = (n_size + e_size + d_size + p_size + q_size) with blinding mode,
+ *											key size = (n_size + e_size + d_size) with non blinding mode.
+ *
+ * [in]		pParams[1].value.a				n_size + e_size + d_size
+ *											bit 9:0, n_size
+ *											bit 19:10, e_size
+ *											bit 29:19, d_size
+ *											bit 30:30, is_bliding
+ *
+ * [in]		pParams[1].value.b				p_size + q_size, ignore it if use non blinding mode
+ *											bit 9:0, p_size
+ *											bit 19:10, q_size
+ *
+ * [in]		pParams[2].memref.buffer		message
+ * [in]		pParams[2].memref.size			length of message
+ * [out]	pParams[3].memref.buffer		signature
+ */
+#define CMD_RSA_SIGN_PKCS_V15_SHA1			0x1
+#define CMD_RSA_SIGN_PKCS_V15_SHA256		0x2
+#define CMD_RSA_SIGN_PKCS_V21_SHA1			0x3
+#define CMD_RSA_SIGN_PKCS_V21_SHA256		0x4
+
+/*
+ * verify params
+ *
+ * [in]		pParams[0].memref.buffer		n + e
+ * [in]		pParams[0].memref.size			n_size + e_size
+ * [in]		pParams[1].memref.buffer		message data
+ * [in]		pParams[1].memref.size			length of message
+ * [in]		pParams[2].memref.buffer		signature
+ * [in]		pParams[2].memref.size			length of signature
+ * [in]		pParams[3].value.a				n_size
+ */
+#define CMD_RSA_VERIFY_PKCS_V15_SHA1		0x5
+#define CMD_RSA_VERIFY_PKCS_V15_SHA256		0x6
+#define CMD_RSA_VERIFY_PKCS_V21_SHA1		0x7
+#define CMD_RSA_VERIFY_PKCS_V21_SHA256		0x8
+
+#endif
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.c b/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.c
new file mode 100644
index 0000000..f315b91
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.c
@@ -0,0 +1,1002 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <linux/of_device.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <crypto/hmac.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#include "asr-sha-optee.h"
+
+static struct asr_te200_sha *asr_sha_local = NULL;
+
+static struct teec_uuid pta_sha_uuid = ASR_SHA_ACCESS_UUID;
+static struct mutex queue_lock = __MUTEX_INITIALIZER(queue_lock);
+
+static int asrte200_optee_acquire_hash_init(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, u32 alg)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+
+	ret = asrte200_optee_open_ta(&ctx->asrte200_tee_ctx, uuid);
+	if (ret != 0) {
+		return ret;
+	}
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrte200_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[0].u.value.a = alg;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+
+	return ret;
+
+exit:
+	asrte200_optee_close_ta(&ctx->asrte200_tee_ctx);
+	return ret;
+}
+
+static int asrte200_optee_acquire_hash_update(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, \
+									u32 alg, uint8_t *in, u32 inlen)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+	struct tee_shm *shm = NULL;
+	u8 *pbuf = NULL;
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrte200_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+	shm = tee_shm_alloc(ctx->asrte200_tee_ctx.tee_ctx, inlen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	pbuf = tee_shm_get_va(shm, 0);
+	memcpy(pbuf, in, inlen);
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = inlen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+		
+	tee_shm_free(shm);
+	return ret;
+
+exit:
+	tee_shm_free(shm);
+	asrte200_optee_close_ta(&ctx->asrte200_tee_ctx);
+	return ret;
+}
+
+static int asrte200_optee_acquire_hash_final(struct asr_optee_sha_reqctx *ctx, struct teec_uuid *uuid, u32 cmd, u32 alg, u8 *out, u8 outlen)
+{
+	struct tee_ioctl_invoke_arg invoke_arg;
+	struct tee_param params[2];
+	int ret = 0;
+	struct tee_shm *shm = NULL;
+	u8 *pbuf = NULL;
+
+	memset(&invoke_arg, 0x0, sizeof(struct tee_ioctl_invoke_arg));
+	invoke_arg.func = cmd;
+	invoke_arg.session  = ctx->asrte200_tee_ctx.session;
+	invoke_arg.num_params = 2;
+
+	shm = tee_shm_alloc(ctx->asrte200_tee_ctx.tee_ctx, outlen, TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
+	if (!shm) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	params[0].attr = TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT;
+	params[0].u.memref.shm_offs = 0;
+	params[0].u.memref.size = outlen;
+	params[0].u.memref.shm = shm;
+
+	params[1].attr = TEE_IOCTL_PARAM_ATTR_TYPE_VALUE_INPUT;
+	params[1].u.value.a = (uint32_t)ctx;
+
+	ret = tee_client_invoke_func(ctx->asrte200_tee_ctx.tee_ctx, &invoke_arg, params);
+	if (ret != 0) {
+		goto exit;
+	} else if (invoke_arg.ret != 0) {
+		ret = -EIO;
+		goto exit;
+	}
+	
+	pbuf = tee_shm_get_va(shm, 0);
+	memcpy(out, pbuf, outlen);
+
+exit:
+    tee_shm_free(shm);
+	asrte200_optee_close_ta(&ctx->asrte200_tee_ctx);
+	return ret;
+}
+
+static int asr_sha_handle_queue(struct asr_te200_sha *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct asr_sha_ctx *ctx;
+	unsigned long flags;
+	bool start_async;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req) {
+		return ret;
+	}
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(async_req->tfm);
+
+	dd->req = ahash_request_cast(async_req);
+	start_async = (dd->req != req);
+	dd->is_async = start_async;
+	dd->force_complete = false;
+
+	/* WARNING: ctx->start() MAY change dd->is_async. */
+	err = ctx->start(dd);
+	return (start_async) ? ret : err;
+}
+
+static int asr_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	struct asr_te200_sha *dd = ctx->dd;
+
+	ctx->op = op;
+
+	return asr_sha_handle_queue(dd, req);
+}
+
+static void asr_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	if (!req->result)
+		return;
+
+	switch (ctx->flags & SHA_FLAGS_ALGO_MASK) {
+	case SHA_FLAGS_SHA1:
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA224:
+		memcpy(req->result, ctx->digest, SHA224_DIGEST_SIZE);
+		break;
+	case SHA_FLAGS_SHA256:
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+		break;
+	default:
+		return;
+	}
+}
+
+static inline int asr_sha_complete(struct asr_te200_sha *dd, int err)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	dd->flags &= ~(SHA_FLAGS_BUSY);
+	ctx->flags &= ~(SHA_FLAGS_FINAL);
+
+	if ((dd->is_async || dd->force_complete) && req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->queue_task);
+
+	return err;
+}
+
+static size_t asr_sha_append_sg(struct asr_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0) {
+			/*
+			* Check if count <= 0 because the buffer is full or
+			* because the sg length is 0. In the latest case,
+			* check if there is another sg in the list, a 0 length
+			* sg doesn't necessarily mean the end of the sg list.
+			*/
+			if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+				ctx->sg = sg_next(ctx->sg);
+				continue;
+			} else {
+				break;
+			}
+		}
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int asr_sha_buff_init(struct asr_te200_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	ctx->buffer = (void *)__get_free_pages(GFP_KERNEL, get_order(len));
+	if (!ctx->buffer) {
+		dev_err(dd->dev, "unable to alloc pages.\n");
+		return -ENOMEM;
+	}
+
+	ctx->buflen = PAGE_SIZE << get_order(len);
+
+	return 0;
+}
+
+static void asr_sha_buff_cleanup(struct asr_te200_sha *dd, uint32_t len)
+{
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	free_pages((unsigned long)ctx->buffer, get_order(len));
+	ctx->buflen = 0;
+}
+
+static int sha_init_req(struct asr_optee_sha_reqctx *optee_ctx)
+{
+	int ret = 0;
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	/* hardware: hash init */
+	ret = asrte200_optee_acquire_hash_init(optee_ctx, &pta_sha_uuid, \
+								CMD_SHA_INIT, ctx->alg);
+	if (ret)
+		return -EINVAL;
+	return 0;
+}
+
+static int sha_update_req(struct asr_optee_sha_reqctx *optee_ctx)
+{
+	int ret = 0;
+	size_t bufcnt;
+	uint8_t *pdata;
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	uint32_t buflen = ctx->total;
+
+	ret = asr_sha_buff_init(ctx->dd, ctx->total);
+	if (ret)
+		return -ENOMEM;
+	
+	asr_sha_append_sg(ctx);
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	pdata = (uint8_t *)ctx->buffer;
+
+	/* hashware: hash process */
+	ret = asrte200_optee_acquire_hash_update(optee_ctx, &pta_sha_uuid, \
+			CMD_SHA_UPDATE, ctx->alg, pdata, bufcnt);
+	if (ret)
+		ret = -EINVAL;
+
+	asr_sha_buff_cleanup(ctx->dd, buflen);
+	return ret;
+}
+
+static void sha_finish_req(struct asr_optee_sha_reqctx *optee_ctx, int *err)
+{
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(ctx->dd->req);
+	uint8_t *hash = (uint8_t *)ctx->digest;
+	uint32_t outlen = crypto_ahash_digestsize(tfm);
+
+	if (!(*err) && (ctx->flags & SHA_FLAGS_FINAL)) {
+		*err = asrte200_optee_acquire_hash_final(optee_ctx, &pta_sha_uuid, CMD_SHA_FINAL, \
+											ctx->alg, (uint8_t *)hash, outlen);
+		ctx->flags &= (~SHA_FLAGS_FINAL);
+		asr_sha_copy_ready_hash(ctx->dd->req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+}
+
+static void sha_next_req(struct asr_optee_sha_reqctx *optee_ctx, int *err)
+{
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	if (likely(!(*err) && (SHA_FLAGS_FINAL & ctx->flags)))
+		sha_finish_req(optee_ctx, err);
+
+	(void)asr_sha_complete(ctx->dd, *err);
+}
+
+static int asr_sha_done(struct asr_te200_sha *dd);
+
+static int asr_sha_start(struct asr_te200_sha *dd)
+{
+	int err = 0;
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	 
+	mutex_lock(&queue_lock);
+
+	dd->resume = asr_sha_done;
+
+	if ((ctx->flags & SHA_FLAGS_INIT)) {
+		err = sha_init_req(optee_ctx);
+		ctx->flags &= (~SHA_FLAGS_INIT);
+	}
+
+	if (!err) {
+		if (ctx->op == SHA_OP_UPDATE) {
+			err = sha_update_req(optee_ctx);
+			if (!err && (ctx->flags & SHA_FLAGS_FINUP))
+				/* no final() after finup() */
+				sha_finish_req(optee_ctx, &err);
+		} else if (ctx->op == SHA_OP_FINAL) {
+			sha_finish_req(optee_ctx, &err);
+		}
+	}
+
+	if (unlikely(err != -EINPROGRESS))
+		/* Task will not finish it, so do it here */
+		sha_next_req(optee_ctx, &err);
+
+	mutex_unlock(&queue_lock);
+	return err;
+}
+
+static int asr_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct asr_optee_sha_reqctx));
+	ctx->start = asr_sha_start;
+
+	return 0;
+}
+
+static void asr_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct asr_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static inline void asr_sha_get(struct asr_te200_sha *dd)
+{
+	mutex_lock(&dd->sha_lock);
+}
+
+static inline void asr_sha_put(struct asr_te200_sha *dd)
+{
+	if(mutex_is_locked(&dd->sha_lock))
+		mutex_unlock(&dd->sha_lock);
+}
+
+static int asr_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	struct asr_te200_sha *dd = asr_sha_local;
+
+	asr_sha_get(dd);
+
+	ctx->dd = dd;
+	ctx->flags = 0;
+
+	switch (crypto_ahash_digestsize(tfm)) {
+	case SHA1_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA1;
+		ctx->alg = TEE_ALG_SHA1;
+		break;
+	case SHA224_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA224;
+		ctx->alg = TEE_ALG_SHA224;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->flags |= SHA_FLAGS_SHA256;
+		ctx->alg = TEE_ALG_SHA256;
+		break;
+	default:
+		asr_sha_put(dd);
+		return -EINVAL;
+	}
+
+	ctx->bufcnt = 0;
+	ctx->flags |= SHA_FLAGS_INIT;
+
+	asr_sha_put(dd);
+	return 0;
+}
+
+static int asr_sha_update(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	asr_sha_get(ctx->dd);
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	ret = asr_sha_enqueue(req, SHA_OP_UPDATE);
+	
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_final(struct ahash_request *req)
+{
+	int ret = 0;
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+
+	asr_sha_get(ctx->dd);
+
+	ctx->flags |= SHA_FLAGS_FINAL;
+	if (ctx->flags & SHA_FLAGS_ERROR) {
+		asr_sha_put(ctx->dd);
+		return 0; /* uncompleted hash is not needed */
+	}
+	ret = asr_sha_enqueue(req, SHA_OP_FINAL);
+
+	asr_sha_put(ctx->dd);
+	return ret;
+}
+
+static int asr_sha_finup(struct ahash_request *req)
+{
+	struct asr_optee_sha_reqctx *optee_ctx = ahash_request_ctx(req);
+	struct asr_sha_reqctx *ctx = &optee_ctx->reqctx;
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = asr_sha_update(req);
+	if (err1 == -EINPROGRESS ||
+		(err1 == -EBUSY && (ahash_request_flags(req) &
+				CRYPTO_TFM_REQ_MAY_BACKLOG))) {
+		asr_sha_put(ctx->dd);
+		return err1;
+	}
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = asr_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int asr_sha_digest(struct ahash_request *req)
+{
+	return asr_sha_init(req) ?: asr_sha_finup(req);
+}
+
+static int asr_sha_export(struct ahash_request *req, void *out)
+{
+	const struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(out, ctx, sizeof(*ctx));
+	return 0;
+}
+
+static int asr_sha_import(struct ahash_request *req, const void *in)
+{
+	struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	memcpy(ctx, in, sizeof(*ctx));
+	return 0;
+}
+
+static struct ahash_alg sha_algs[] = {
+	/* sha1 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA1_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha1",
+				.cra_driver_name	= "asr-sha1",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA1_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha256 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA256_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha256",
+				.cra_driver_name	= "asr-sha256",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA256_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+
+	/* sha224 */
+	{
+		.init		= asr_sha_init,
+		.update		= asr_sha_update,
+		.final		= asr_sha_final,
+		.finup		= asr_sha_finup,
+		.digest		= asr_sha_digest,
+		.export		= asr_sha_export,
+		.import		= asr_sha_import,
+		.halg = {
+			.digestsize	= SHA224_DIGEST_SIZE,
+			.statesize	= sizeof(struct asr_optee_sha_reqctx),
+			.base	= {
+				.cra_name		= "sha224",
+				.cra_driver_name	= "asr-sha224",
+				.cra_priority		= 300,
+				.cra_flags		= CRYPTO_ALG_ASYNC,
+				.cra_blocksize		= SHA224_BLOCK_SIZE,
+				.cra_ctxsize		= sizeof(struct asr_sha_ctx),
+				.cra_alignmask		= 0,
+				.cra_module		= THIS_MODULE,
+				.cra_init		= asr_sha_cra_init,
+				.cra_exit		= asr_sha_cra_exit,
+			}
+		}
+	},
+};
+
+static void asr_sha_queue_task(unsigned long data)
+{
+	struct asr_te200_sha *dd = (struct asr_te200_sha *)data;
+
+	asr_sha_handle_queue(dd, NULL);
+}
+
+static int asr_sha_done(struct asr_te200_sha *dd)
+{
+	int err = 0;
+	struct ahash_request *req = dd->req;
+	struct asr_optee_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	sha_finish_req(ctx, 0);
+
+	return err;
+}
+
+static void asr_sha_done_task(unsigned long data)
+{
+	struct asr_te200_sha *dd = (struct asr_te200_sha *)data;
+
+	dd->is_async = true;
+	(void)dd->resume(dd);
+}
+
+static int hash_handle(int alg, uint8_t *in, uint32_t inlen, uint8_t *out)
+{
+	int ret = 0;
+	uint32_t outlen;
+	struct asr_optee_sha_reqctx ctx;
+
+	switch(alg) {
+	case TEE_ALG_SHA256:
+		outlen = 32;
+		break;
+	case TEE_ALG_SHA224:
+		outlen = 28;
+		break;
+	case TEE_ALG_SHA1:
+		outlen = 20;
+		break;
+	default:
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrte200_optee_acquire_hash_init(&ctx, &pta_sha_uuid, CMD_SHA_INIT, alg);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx, &pta_sha_uuid, CMD_SHA_UPDATE, alg, in, inlen);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+	ret = asrte200_optee_acquire_hash_final(&ctx, &pta_sha_uuid, CMD_SHA_FINAL, alg, out, outlen);
+	if (ret) {
+		ret = -1;
+		goto exit;
+	}
+
+exit:
+	return ret;
+}
+
+static int tee_hwhash_func_verify(void)
+{
+	int ret = 0;
+	unsigned char out_sha256[32] = {0};
+	const struct {
+		const char *msg;
+		uint8_t hash[32];
+	} sha256_tests = {
+		"abc", 
+		{   0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 
+			0xCF, 0xEA, 0x41, 0x41, 0x40, 0xDE, 
+			0x5D, 0xAE, 0x22, 0x23, 0xB0, 0x03, 
+			0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C, 
+			0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 
+			0x15, 0xAD
+		}
+	};
+
+	ret = hash_handle(TEE_ALG_SHA256, (uint8_t *)sha256_tests.msg, strlen(sha256_tests.msg), out_sha256);
+	if (ret) 
+		return ret;
+
+	if (memcmp(out_sha256, sha256_tests.hash, sizeof(out_sha256))) {
+		return -1;
+	}
+
+	return 0;
+}
+
+// #define ASR_TE200_SHA_TEST
+
+#ifdef ASR_TE200_SHA_TEST
+static int te200_sha_test(void);
+#endif
+
+int asr_te200_sha_register(struct asr_te200_dev *te200_dd)
+{
+	int err, i, j;
+	struct asr_te200_sha *sha_dd;
+
+	sha_dd = &te200_dd->asr_sha;
+	sha_dd->dev = te200_dd->dev;
+
+	asr_sha_local = sha_dd;
+
+	spin_lock_init(&sha_dd->lock);
+	mutex_init(&sha_dd->sha_lock);
+	tasklet_init(&sha_dd->done_task, asr_sha_done_task,
+					(unsigned long)sha_dd);
+	tasklet_init(&sha_dd->queue_task, asr_sha_queue_task,
+					(unsigned long)sha_dd);
+	crypto_init_queue(&sha_dd->queue, ASR_SHA_QUEUE_LENGTH);
+
+	/* don't register sha if hash verify err in tos */
+	err = tee_hwhash_func_verify();
+	if (err) 
+		return err;
+	
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+#ifdef ASR_TE200_SHA_TEST
+	te200_sha_test();
+#endif
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(asr_te200_sha_register);
+
+int asr_te200_sha_unregister(struct asr_te200_dev *te200_dd)
+{
+	int i;
+	struct asr_te200_sha *sha_dd = &te200_dd->asr_sha;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+
+	tasklet_kill(&sha_dd->queue_task);
+	tasklet_kill(&sha_dd->done_task);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asr_te200_sha_unregister);
+
+
+
+#ifdef ASR_TE200_SHA_TEST
+static int te200_sha_test(void)
+{
+	int ret = 0;
+	
+	const struct {
+		const char *msg;
+		uint8_t hash[20];
+	} sha1_tests[] = {
+		{
+			"abc", 
+			{   0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 
+				0x81, 0x6a, 0xba, 0x3e, 0x25, 0x71, 
+				0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0,
+				0xd8, 0x9d 
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqdsjklfdfjlkdfkfs" \
+			"fkjlfskjdflioherfjjfdjkfdnkfdfdojjodfjdfjflj;sljjlfkkl" \
+			"nfnkgbhhoigfhigfopojpfjojpoffkjlfskjdflioherfjjfdjkfdn" \
+			"kfdfdojjodfjdfjfljnfnkgbhhoigfhigfoponfnkgbhhoigfhigfopojpfjo",
+			{
+				0x93, 0x84, 0x7f, 0x98, 0x22, 0x5e, 
+				0x6d, 0xf2, 0x09, 0x1c, 0xc9, 0xac, 
+				0xbb, 0x5d, 0x00, 0x2d, 0x64, 0x81, 
+				0xe3, 0xcd
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqdsjklfdfjlkdfkfs" \
+			"fkjlfskjdflioherfjjfdjkfdnkfdfdojjodfjdfjflj;sljjlfkkl" \
+			"nfnkgbhhoigfhigfopojpfjojpoffkjlfskjdflioherfjjfdjkfdn" \
+			"kfdfdojjodfjdfjfljnfnkgbhhoigfhigfoponfnkgbhhoigfhigfopojpfjoewiroiowiod",
+			{
+				0x6a, 0x66, 0xc2, 0x87, 0x84, 0x36, 
+				0x14, 0x90, 0x99, 0x03, 0x90, 0xf0, 
+				0xaa, 0x7e, 0xbd, 0xc7, 0xdb, 0x38, 
+				0x54, 0x09
+			}
+		},
+		{
+			"asjhsdjljfdsdjjkdfwyqeuwouzxkmcxjkmwqds"
+			"jklfdfjlkdfkfsfkjlfskjdflioherfjjfdjkfd"
+			"nkfdfdojjodfjdfjflj;sljjlfkklnfnkgbhhoi"
+			"gfhigfopojpfjojpoffkjlfskjdflioherfjjfd"
+			"jkfdnkfdfdojjodfjdfjfljnfnkgbhhoigfhigf"
+			"oponfnkgbhhoigfhigfopojpfjoewiroiowiods"
+			"djkisijdknknkskdnknflnnesniewinoinknmdn"
+			"kknknsdnjjfsnnkfnkknslnklknfnknkflksnlk"
+			"lskldklklklnmlflmlmlfmlfml",
+			{
+				0xc4, 0x53, 0xca, 0x24, 0xfa, 0xe5,
+				0x39, 0x53, 0x08, 0x8c, 0x57, 0x1a, 
+				0x96, 0xe9, 0x64, 0x7f, 0xd5, 0xf9, 
+				0x13, 0x91
+			}
+		}
+	};
+
+	struct asr_optee_sha_reqctx ctx1;
+	struct asr_optee_sha_reqctx ctx2;
+	struct asr_optee_sha_reqctx ctx3;
+	struct asr_optee_sha_reqctx ctx4;
+	unsigned char out_sha1_1[20] = {0};
+	unsigned char out_sha1_2[20] = {0};
+	unsigned char out_sha1_3[20] = {0};
+	unsigned char out_sha1_4[20] = {0};
+
+	ret = asrte200_optee_acquire_hash_init(&ctx1, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_init(&ctx2, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx1, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+										(uint8_t *)sha1_tests[0].msg, strlen(sha1_tests[0].msg));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_init(&ctx3, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx2, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+											(uint8_t *)sha1_tests[1].msg, 10);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx2, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+							(uint8_t *)(((uint32_t)sha1_tests[1].msg)+10), strlen(sha1_tests[1].msg) - 10);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_final(&ctx1, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+											out_sha1_1, sizeof(out_sha1_1));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx3, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+											(uint8_t *)sha1_tests[2].msg, 25);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_init(&ctx4, &pta_sha_uuid, CMD_SHA_INIT, TEE_ALG_SHA1);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_final(&ctx2, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+											out_sha1_2, sizeof(out_sha1_2));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx3, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+						(uint8_t *)(((uint32_t)sha1_tests[2].msg)+25), strlen(sha1_tests[2].msg)-25);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_final(&ctx3, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+											out_sha1_3, sizeof(out_sha1_3));
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_update(&ctx4, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+											(uint8_t *)sha1_tests[3].msg, 43);
+	if (ret) {
+		return ret;
+	}
+	ret = asrte200_optee_acquire_hash_update(&ctx4, &pta_sha_uuid, CMD_SHA_UPDATE, TEE_ALG_SHA1, 
+						(uint8_t *)(((uint32_t)sha1_tests[3].msg)+43), strlen(sha1_tests[3].msg)-43);
+	if (ret) {
+		return ret;
+	}
+
+	ret = asrte200_optee_acquire_hash_final(&ctx4, &pta_sha_uuid, CMD_SHA_FINAL, TEE_ALG_SHA1, 
+											out_sha1_4, sizeof(out_sha1_4));
+	if (ret) {
+		return ret;
+	}
+
+	if (memcmp(out_sha1_1, sha1_tests[0].hash, sizeof(out_sha1_1))) {
+		printk("sha1 test 0 failed");
+	} else {
+		printk("sha1 test 0 pass");
+	}
+	if (memcmp(out_sha1_2, sha1_tests[1].hash, sizeof(out_sha1_2))) {
+		printk("sha1 test 1 failed");
+	} else {
+		printk("sha1 test 1 pass");
+	}
+	if (memcmp(out_sha1_3, sha1_tests[2].hash, sizeof(out_sha1_3))) {
+		printk("sha1 test 2 failed");
+	} else {
+		printk("sha1 test 2 pass");
+	}
+	if (memcmp(out_sha1_4, sha1_tests[3].hash, sizeof(out_sha1_4))) {
+		printk("sha1 test 3 failed");
+	} else {
+		printk("sha1 test 4 pass");
+	}
+
+
+	return 0;
+}
+#endif
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("wangyonggan <yongganwang@asrmicro.com>");
+MODULE_DESCRIPTION("ASR te200 sha driver");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.h b/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.h
new file mode 100644
index 0000000..acad9de
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-sha-optee.h
@@ -0,0 +1,60 @@
+#ifndef _ASR_TE200_SHA_OPTEE_H_
+#define _ASR_TE200_SHA_OPTEE_H_
+
+#include "asr-te200-optee.h"
+#ifdef CONFIG_TEE
+#include <linux/tee_drv.h>
+#endif
+
+#define ASR_SHA_ACCESS_UUID 									\
+        { 														\
+            0xc6445f2a, 0x3365, 0x11ef, 						\
+            { 0x9e, 0x32, 0xe7, 0x0c, 0x07, 0x9f, 0x77, 0xec } 	\
+        }														\
+
+#define TEE_ALG_MD5                             0x50000001
+#define TEE_ALG_SHA1                            0x50000002
+#define TEE_ALG_SHA224                          0x50000003
+#define TEE_ALG_SHA256                          0x50000004
+#define TEE_ALG_SHA384                          0x50000005
+#define TEE_ALG_SHA512                          0x50000006
+
+#define HASH_CONTEXT_SIZE (256)
+
+/*
+ * hash init params
+ *
+ * [in]     pParams[0].value.a          hash algorithm type
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ */
+ #define CMD_SHA_INIT         0x1
+
+/*
+ * hash update params
+ * when input addr is share mem, such as params from kernel:
+ * [in]     pParams[0].memref.buffer    input data
+ * [in]     pParams[0].memref.size      length of input data
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ *
+ * when input addr is physical addr, such as params from uboot:
+ * [in]     pParams[0].value.a      input data addr
+ * [in]     pParams[0].value.b      length of input data
+ * [in]     pParams[1].value.a      whether physical addr
+ */
+ #define CMD_SHA_UPDATE       0x2
+
+/*
+ * hash finish params
+ *
+ * [out]    pParams[0].memref.buffer    output hash
+ * [out]    pParams[0].memref.size      length of output hash
+ * [in]     pParams[1].value.a          hash context addr from external, such as kernel
+ */
+#define CMD_SHA_FINAL         0x3
+
+struct asr_optee_sha_reqctx {
+    struct asr_sha_reqctx reqctx;
+    struct asrte200_tee_context asrte200_tee_ctx;
+};
+
+#endif
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.c b/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.c
new file mode 100644
index 0000000..7196382
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.c
@@ -0,0 +1,231 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cputype.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <linux/tee_drv.h>
+
+#include "asr-te200-optee.h"
+
+static void asrte200_uuid_to_octets(uint8_t d[TEE_IOCTL_UUID_LEN], struct teec_uuid *s)
+{
+	d[0] = s->timeLow >> 24;
+	d[1] = s->timeLow >> 16;
+	d[2] = s->timeLow >> 8;
+	d[3] = s->timeLow;
+	d[4] = s->timeMid >> 8;
+	d[5] = s->timeMid;
+	d[6] = s->timeHiAndVersion >> 8;
+	d[7] = s->timeHiAndVersion;
+	memcpy(d + 8, s->clockSeqAndNode, sizeof(s->clockSeqAndNode));
+}
+
+static int asrte200_tee_match_cb(struct tee_ioctl_version_data *ver, const void *data)
+{
+	return 1;
+}
+
+int asrte200_optee_open_ta(struct asrte200_tee_context *ctx, struct teec_uuid *uuid)
+{
+	struct tee_ioctl_open_session_arg open_session_arg;
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ctx->session = 0;
+	ctx->tee_ctx = tee_client_open_context(NULL, asrte200_tee_match_cb, NULL, NULL);
+	if (IS_ERR(ctx->tee_ctx)) {
+		ret = PTR_ERR(ctx->tee_ctx);
+		ctx->tee_ctx = NULL;
+		return ret;
+	}
+
+	memset(&open_session_arg, 0x0, sizeof(struct tee_ioctl_open_session_arg));
+	asrte200_uuid_to_octets(open_session_arg.uuid, uuid);
+	open_session_arg.clnt_login = TEE_IOCTL_LOGIN_PUBLIC;
+	open_session_arg.num_params = 0;
+	ret = tee_client_open_session(ctx->tee_ctx, &open_session_arg, NULL);
+	if (ret != 0) {
+		goto err_exit;
+	} else if (open_session_arg.ret != 0) {
+		ret = -EIO;
+		goto err_exit;
+	}
+
+	ctx->session = open_session_arg.session;
+
+	return ret;
+err_exit:
+	tee_client_close_context(ctx->tee_ctx);
+	ctx->tee_ctx = NULL;
+	return ret;
+}
+
+int asrte200_optee_close_ta(struct asrte200_tee_context *ctx)
+{
+	int ret;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	ret = tee_client_close_session(ctx->tee_ctx, ctx->session);
+
+	tee_client_close_context(ctx->tee_ctx);
+
+	return ret;
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id asr_te200_dt_ids[] = {
+	{ .compatible = "asr,asr-te200" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, asr_te200_dt_ids);
+#endif
+
+static int asr_te200_probe(struct platform_device *pdev)
+{
+	struct asr_te200_dev *te200_dd;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = NULL;
+	int err = 0, devnum = 0;
+
+	te200_dd = devm_kzalloc(&pdev->dev, sizeof(*te200_dd), GFP_KERNEL);
+	if (te200_dd == NULL) {
+		err = -ENOMEM;
+		goto no_mem_err;
+	}
+
+	np = dev->of_node;
+	te200_dd->dev = dev;
+
+	platform_set_drvdata(pdev, te200_dd);
+
+#ifdef CONFIG_ASR_TE200_CIPHER
+	if (of_get_property(np, "asr,asr-cipher", NULL)) {
+		err = asr_te200_cipher_register(te200_dd);
+		if (err)
+			goto res_err;
+		dev_info(dev, "Cipher engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+#ifdef CONFIG_ASR_TE200_SHA
+	if (of_get_property(np, "asr,asr-sha", NULL)) {
+		err = asr_te200_sha_register(te200_dd);
+		if (err)
+			goto sha_err;
+		dev_info(dev, "SHA engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+#ifdef CONFIG_ASR_TE200_RSA
+	if (of_get_property(np, "asr,asr-rsa", NULL)) {
+		err = asr_te200_rsa_register(te200_dd);
+		if (err)
+			goto rsa_err;
+		dev_info(dev, "RSA engine is initialized\n");
+		devnum ++;
+	}
+#endif
+
+	if (!devnum) {
+		dev_err(dev, "No TE200 device enabled\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+
+	return 0;
+
+#ifdef CONFIG_ASR_TE200_RSA
+rsa_err:
+#ifdef CONFIG_ASR_TE200_SHA
+	asr_te200_sha_unregister(te200_dd);
+#endif
+#endif
+#ifdef CONFIG_ASR_TE200_SHA
+sha_err:
+#ifdef CONFIG_ASR_TE200_CIPHER
+	asr_te200_cipher_unregister(te200_dd);
+#endif
+#endif
+res_err:
+	devm_kfree(dev, te200_dd);
+no_mem_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int asr_te200_remove(struct platform_device *pdev)
+{
+	struct asr_te200_dev *te200_dd;
+
+	te200_dd = platform_get_drvdata(pdev);
+	if (!te200_dd)
+		return -ENODEV;
+
+#ifdef CONFIG_ASR_TE200_CIPHER
+	asr_te200_cipher_unregister(te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_SHA
+	asr_te200_sha_unregister(te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_RSA
+	asr_te200_rsa_unregister(te200_dd);
+#endif
+
+	devm_kfree(te200_dd->dev, te200_dd);
+
+	return 0;
+}
+
+static struct platform_driver asr_te200_driver = {
+	.probe		= asr_te200_probe,
+	.remove		= asr_te200_remove,
+	.driver		= {
+		.name	= "asr_te200",
+		.of_match_table = of_match_ptr(asr_te200_dt_ids),
+	},
+};
+
+static int __init asr_te200_init(void)
+{
+	int ret;
+
+	if (!cpu_is_asr1903_b0()) {
+		return 0;
+	}
+
+	ret = platform_driver_register(&asr_te200_driver);
+
+	return ret;
+}
+
+device_initcall_sync(asr_te200_init);
+
+MODULE_DESCRIPTION("TE200: ASR Trust Engine support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Yonggan Wang");
\ No newline at end of file
diff --git a/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.h b/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.h
new file mode 100644
index 0000000..6729498
--- /dev/null
+++ b/marvell/linux/drivers/crypto/asr/te200_optee/asr-te200-optee.h
@@ -0,0 +1,55 @@
+#ifndef _ASR_TE200_OPTEE_H_
+#define _ASR_TE200_OPTEE_H_
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/miscdevice.h>
+
+#include "../te200/asr-sha.h"
+#include "asr-cipher-optee.h"
+#include "asr-rsa-optee.h"
+
+struct teec_uuid {
+	uint32_t timeLow;
+	uint16_t timeMid;
+	uint16_t timeHiAndVersion;
+	uint8_t clockSeqAndNode[8];
+};
+
+struct asrte200_tee_context {
+	struct tee_context *tee_ctx;
+	int session;
+};
+
+struct asr_te200_dev {
+	struct device		*dev;
+	struct asr_te200_sha asr_sha;
+	struct asr_te200_cipher asr_cipher;
+	struct asr_te200_rsa asr_rsa;
+};
+
+struct asr_te200_ops {
+	int (*dev_get)(struct asr_te200_dev *);
+	int (*dev_put)(struct asr_te200_dev *);
+};
+
+int asrte200_optee_open_ta(struct asrte200_tee_context *ctx, struct teec_uuid *uuid);
+int asrte200_optee_close_ta(struct asrte200_tee_context *ctx);
+
+#ifdef CONFIG_ASR_TE200_CIPHER
+int asr_te200_cipher_register(struct asr_te200_dev *te200_dd);
+int asr_te200_cipher_unregister(struct asr_te200_dev *te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_SHA
+int asr_te200_sha_register(struct asr_te200_dev *te200_dd);
+int asr_te200_sha_unregister(struct asr_te200_dev *te200_dd);
+#endif
+
+#ifdef CONFIG_ASR_TE200_RSA
+int asr_te200_rsa_register(struct asr_te200_dev *te200_dd);
+int asr_te200_rsa_unregister(struct asr_te200_dev *te200_dd);
+#endif
+#endif
\ No newline at end of file