[T106][ZXW-22]7520V3SCV2.01.01.02P42U09_VEC_V0.8_AP_VEC origin source commit

Change-Id: Ic6e05d89ecd62fc34f82b23dcf306c93764aec4b
diff --git a/ap/build/uClibc/libc/string/avr32/memcpy.S b/ap/build/uClibc/libc/string/avr32/memcpy.S
new file mode 100644
index 0000000..bf091ab
--- /dev/null
+++ b/ap/build/uClibc/libc/string/avr32/memcpy.S
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2004-2007 Atmel Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ */
+
+/* Don't use r12 as dst since we must return it unmodified */
+#define dst r9
+#define src r11
+#define len r10
+
+	.text
+	.global	memcpy
+	.type	memcpy, @function
+memcpy:
+	pref	src[0]
+	mov	dst, r12
+
+	/* If we have less than 32 bytes, don't do anything fancy */
+	cp.w	len, 32
+	brge	.Lmore_than_31
+
+	sub	len, 1
+	retlt	r12
+1:	ld.ub	r8, src++
+	st.b	dst++, r8
+	sub	len, 1
+	brge	1b
+	retal	r12
+
+.Lmore_than_31:
+	pushm	r0-r7, lr
+
+	/* Check alignment */
+	mov	r8, src
+	andl	r8, 31, COH
+	brne	.Lunaligned_src
+	mov	r8, dst
+	andl	r8, 3, COH
+	brne	.Lunaligned_dst
+
+.Laligned_copy:
+	sub	len, 32
+	brlt	.Lless_than_32
+
+1:	/* Copy 32 bytes at a time */
+	ldm	src, r0-r7
+	sub	src, -32
+	stm	dst, r0-r7
+	sub	dst, -32
+	sub	len, 32
+	brge	1b
+
+.Lless_than_32:
+	/* Copy 16 more bytes if possible */
+	sub	len, -16
+	brlt	.Lless_than_16
+	ldm	src, r0-r3
+	sub	src, -16
+	sub	len, 16
+	stm	dst, r0-r3
+	sub	dst, -16
+
+.Lless_than_16:
+	/* Do the remaining as byte copies */
+	neg	len
+	add	pc, pc, len << 2
+	.rept	15
+	ld.ub	r0, src++
+	st.b	dst++, r0
+	.endr
+
+	popm	r0-r7, pc
+
+.Lunaligned_src:
+	/* Make src cacheline-aligned. r8 = (src & 31) */
+	rsub	r8, r8, 32
+	sub	len, r8
+1:	ld.ub	r0, src++
+	st.b	dst++, r0
+	sub	r8, 1
+	brne	1b
+
+	/* If dst is word-aligned, we're ready to go */
+	pref	src[0]
+	mov	r8, 3
+	tst	dst, r8
+	breq	.Laligned_copy
+
+.Lunaligned_dst:
+	/* src is aligned, but dst is not. Expect bad performance */
+	sub	len, 4
+	brlt	2f
+1:	ld.w	r0, src++
+	st.w	dst++, r0
+	sub	len, 4
+	brge	1b
+
+2:	neg	len
+	add	pc, pc, len << 2
+	.rept	3
+	ld.ub	r0, src++
+	st.b	dst++, r0
+	.endr
+
+	popm	r0-r7, pc
+	.size	memcpy, . - memcpy
+
+libc_hidden_def(memcpy)