[Feature]add MT2731_MP2_MR2_SVN388 baseline version

Change-Id: Ief04314834b31e27effab435d3ca8ba33b499059
diff --git a/src/bsp/lk/lib/sha256/arch/arm64/sha256-armv8.S b/src/bsp/lk/lib/sha256/arch/arm64/sha256-armv8.S
new file mode 100644
index 0000000..28be18a
--- /dev/null
+++ b/src/bsp/lk/lib/sha256/arch/arm64/sha256-armv8.S
@@ -0,0 +1,198 @@
+/*

+ * ====================================================================

+ * Written by Andy Polyakov <appro@openssl.org> for the OpenSSL

+ * project. The module is, however, dual licensed under OpenSSL and

+ * CRYPTOGAMS licenses depending on where you obtain it. For further

+ * details see http://www.openssl.org/~appro/cryptogams/.

+ * ====================================================================

+ *

+ * SHA256/512 for ARMv8.

+ *

+ * Performance in cycles per processed byte and improvement coefficient

+ * over code generated with "default" compiler:

+ *

+ *		SHA256-hw	SHA256(*)	SHA512

+ * Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))

+ * Cortex-A53	2.38		15.6 (+110%)	10.1 (+190%(***))

+ * Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))

+ *

+ * (*)	Software SHA256 results are of lesser relevance, presented

+ *	mostly for informational purposes.

+ * (**)	The result is a trade-off: it's possible to improve it by

+ *	10% (or by 1 cycle per round), but at the cost of 20% loss

+ *	on Cortex-A53 (or by 4 cycles per round).

+ * (***)	Super-impressive coefficients over gcc-generated code are

+ *	indication of some compiler "pathology", most notably code

+ *	generated with -mgeneral-regs-only is significanty faster

+ *	and lags behind assembly only by 50-90%.

+ */

+

+.text

+.globl	sha256_block_data_order

+.type	sha256_block_data_order,%function

+.align	6

+.type	.LK256,%object

+.LK256:

+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5

+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5

+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3

+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174

+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc

+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da

+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7

+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967

+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13

+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85

+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3

+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070

+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5

+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3

+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208

+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2

+.long	0	//terminator

+.size	.LK256,.-.LK256

+.align	3

+.LOPENSSL_armcap_P:

+.quad	OPENSSL_armcap_P-.

+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0

+.align	2

+.align	2

+.type	sha256_block_data_order,%function

+.align	6

+sha256_block_data_order:

+.Lv8_entry:

+	stp	x29,x30,[sp,#-16]!

+	add	x29,sp,#0

+

+	ld1	{v0.4s,v1.4s},[x0]

+	adr	x3,.LK256

+

+.Loop_hw:

+	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64

+	sub	x2,x2,#1

+	ld1	{v16.4s},[x3],#16

+	rev32	v4.16b,v4.16b

+	rev32	v5.16b,v5.16b

+	rev32	v6.16b,v6.16b

+	rev32	v7.16b,v7.16b

+	orr	v18.16b,v0.16b,v0.16b		// offload

+	orr	v19.16b,v1.16b,v1.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v4.4s

+.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v5.4s

+.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v6.4s

+.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v7.4s

+.inst	0x5e282887	//sha256su0 v7.16b,v4.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v4.4s

+.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v5.4s

+.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v6.4s

+.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v7.4s

+.inst	0x5e282887	//sha256su0 v7.16b,v4.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v4.4s

+.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v5.4s

+.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v6.4s

+.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v7.4s

+.inst	0x5e282887	//sha256su0 v7.16b,v4.16b

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b

+	ld1	{v17.4s},[x3],#16

+	add	v16.4s,v16.4s,v4.4s

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+

+	ld1	{v16.4s},[x3],#16

+	add	v17.4s,v17.4s,v5.4s

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+

+	ld1	{v17.4s},[x3]

+	add	v16.4s,v16.4s,v6.4s

+	sub	x3,x3,#64*4-16	// rewind

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s

+.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s

+

+	add	v17.4s,v17.4s,v7.4s

+	orr	v2.16b,v0.16b,v0.16b

+.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s

+.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s

+

+	add	v0.4s,v0.4s,v18.4s

+	add	v1.4s,v1.4s,v19.4s

+

+	cbnz	x2,.Loop_hw

+

+	st1	{v0.4s,v1.4s},[x0]

+

+	ldr	x29,[sp],#16

+	ret

+.size	sha256_block_data_order,.-sha256_block_data_order

+.comm	OPENSSL_armcap_P,4,4