| /* |
| * Copyright (C) 2004 Joakim Tjernlund |
| * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> |
| * |
| * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. |
| */ |
| |
| /* These are carefully optimized mem*() functions for PPC written in C. |
| * Don't muck around with these function without checking the generated |
| * assembler code. |
| * It is possible to optimize these significantly more by using specific |
| * data cache instructions(mainly dcbz). However that requires knownledge |
| * about the CPU's cache line size. |
| * |
| * BUG ALERT! |
| * The cache instructions on MPC8xx CPU's are buggy(they don't update |
| * the DAR register when causing a DTLB Miss/Error) and cannot be |
| * used on 8xx CPU's without a kernel patch to work around this |
| * problem. |
| */ |
| |
| #include <string.h> |
| |
| /* PPC can do pre increment and load/store, but not post increment and |
| load/store. Therefore use *++ptr instead of *ptr++. */ |
| void *memcpy(void *to, const void *from, size_t len) |
| { |
| unsigned long rem, chunks, tmp1, tmp2; |
| unsigned char *tmp_to; |
| unsigned char *tmp_from = (unsigned char *)from; |
| |
| chunks = len / 8; |
| tmp_from -= 4; |
| tmp_to = to - 4; |
| if (!chunks) |
| goto lessthan8; |
| rem = (unsigned long )tmp_to % 4; |
| if (rem) |
| goto align; |
| copy_chunks: |
| do { |
| /* make gcc to load all data, then store it */ |
| tmp1 = *(unsigned long *)(tmp_from+4); |
| tmp_from += 8; |
| tmp2 = *(unsigned long *)tmp_from; |
| *(unsigned long *)(tmp_to+4) = tmp1; |
| tmp_to += 8; |
| *(unsigned long *)tmp_to = tmp2; |
| } while (--chunks); |
| lessthan8: |
| len = len % 8; |
| if (len >= 4) { |
| tmp_from += 4; |
| tmp_to += 4; |
| *(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from); |
| len -= 4; |
| } |
| if (!len) |
| return to; |
| tmp_from += 3; |
| tmp_to += 3; |
| do { |
| *++tmp_to = *++tmp_from; |
| } while (--len); |
| |
| return to; |
| align: |
| /* ???: Do we really need to generate the carry flag here? If not, then: |
| rem -= 4; */ |
| rem = 4 - rem; |
| len -= rem; |
| do { |
| *(tmp_to+4) = *(tmp_from+4); |
| ++tmp_from; |
| ++tmp_to; |
| } while (--rem); |
| chunks = len / 8; |
| if (chunks) |
| goto copy_chunks; |
| goto lessthan8; |
| } |
| libc_hidden_def(memcpy) |