[T106][ZXW-22]7520V3SCV2.01.01.02P42U09_VEC_V0.8_AP_VEC origin source commit

Change-Id: Ic6e05d89ecd62fc34f82b23dcf306c93764aec4b
diff --git a/ap/build/uClibc/libc/string/powerpc/memcpy.c b/ap/build/uClibc/libc/string/powerpc/memcpy.c
new file mode 100644
index 0000000..22794ec
--- /dev/null
+++ b/ap/build/uClibc/libc/string/powerpc/memcpy.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2004 Joakim Tjernlund
+ * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/* These are carefully optimized mem*() functions for PPC written in C.
+ * Don't muck around with these function without checking the generated
+ * assembler code.
+ * It is possible to optimize these significantly more by using specific
+ * data cache instructions(mainly dcbz). However that requires knownledge
+ * about the CPU's cache line size.
+ *
+ * BUG ALERT!
+ * The cache instructions on MPC8xx CPU's are buggy(they don't update
+ * the DAR register when causing a DTLB Miss/Error) and cannot be
+ * used on 8xx CPU's without a kernel patch to work around this
+ * problem.
+ */
+
+#include <string.h>
+
+/* PPC can do pre increment and load/store, but not post increment and
+   load/store.  Therefore use *++ptr instead of *ptr++.  */
+void *memcpy(void *to, const void *from, size_t len)
+{
+	unsigned long rem, chunks, tmp1, tmp2;
+	unsigned char *tmp_to;
+	unsigned char *tmp_from = (unsigned char *)from;
+
+	chunks = len / 8;
+	tmp_from -= 4;
+	tmp_to = to - 4;
+	if (!chunks)
+		goto lessthan8;
+	rem = (unsigned long )tmp_to % 4;
+	if (rem)
+		goto align;
+ copy_chunks:
+	do {
+		/* make gcc to load all data, then store it */
+		tmp1 = *(unsigned long *)(tmp_from+4);
+		tmp_from += 8;
+		tmp2 = *(unsigned long *)tmp_from;
+		*(unsigned long *)(tmp_to+4) = tmp1;
+		tmp_to += 8;
+		*(unsigned long *)tmp_to = tmp2;
+	} while (--chunks);
+ lessthan8:
+	len = len % 8;
+	if (len >= 4) {
+		tmp_from += 4;
+		tmp_to += 4;
+		*(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from);
+		len -= 4;
+	}
+	if (!len)
+		return to;
+	tmp_from += 3;
+	tmp_to += 3;
+	do {
+		*++tmp_to = *++tmp_from;
+	} while (--len);
+
+	return to;
+ align:
+	/* ???: Do we really need to generate the carry flag here? If not, then:
+	rem -= 4; */
+	rem = 4 - rem;
+	len -= rem;
+	do {
+		*(tmp_to+4) = *(tmp_from+4);
+		++tmp_from;
+		++tmp_to;
+	} while (--rem);
+	chunks = len / 8;
+	if (chunks)
+		goto copy_chunks;
+	goto lessthan8;
+}
+libc_hidden_def(memcpy)