| /* | 
 |  * Copyright (C) 2004 Joakim Tjernlund | 
 |  * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> | 
 |  * | 
 |  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. | 
 |  */ | 
 |  | 
 | /* These are carefully optimized mem*() functions for PPC written in C. | 
 |  * Don't muck around with these function without checking the generated | 
 |  * assembler code. | 
 |  * It is possible to optimize these significantly more by using specific | 
 |  * data cache instructions(mainly dcbz). However that requires knownledge | 
 |  * about the CPU's cache line size. | 
 |  * | 
 |  * BUG ALERT! | 
 |  * The cache instructions on MPC8xx CPU's are buggy(they don't update | 
 |  * the DAR register when causing a DTLB Miss/Error) and cannot be | 
 |  * used on 8xx CPU's without a kernel patch to work around this | 
 |  * problem. | 
 |  */ | 
 |  | 
 | #include <string.h> | 
 |  | 
 | /* PPC can do pre increment and load/store, but not post increment and | 
 |    load/store.  Therefore use *++ptr instead of *ptr++.  */ | 
 | void *memcpy(void *to, const void *from, size_t len) | 
 | { | 
 | 	unsigned long rem, chunks, tmp1, tmp2; | 
 | 	unsigned char *tmp_to; | 
 | 	unsigned char *tmp_from = (unsigned char *)from; | 
 |  | 
 | 	chunks = len / 8; | 
 | 	tmp_from -= 4; | 
 | 	tmp_to = to - 4; | 
 | 	if (!chunks) | 
 | 		goto lessthan8; | 
 | 	rem = (unsigned long )tmp_to % 4; | 
 | 	if (rem) | 
 | 		goto align; | 
 |  copy_chunks: | 
 | 	do { | 
 | 		/* make gcc to load all data, then store it */ | 
 | 		tmp1 = *(unsigned long *)(tmp_from+4); | 
 | 		tmp_from += 8; | 
 | 		tmp2 = *(unsigned long *)tmp_from; | 
 | 		*(unsigned long *)(tmp_to+4) = tmp1; | 
 | 		tmp_to += 8; | 
 | 		*(unsigned long *)tmp_to = tmp2; | 
 | 	} while (--chunks); | 
 |  lessthan8: | 
 | 	len = len % 8; | 
 | 	if (len >= 4) { | 
 | 		tmp_from += 4; | 
 | 		tmp_to += 4; | 
 | 		*(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from); | 
 | 		len -= 4; | 
 | 	} | 
 | 	if (!len) | 
 | 		return to; | 
 | 	tmp_from += 3; | 
 | 	tmp_to += 3; | 
 | 	do { | 
 | 		*++tmp_to = *++tmp_from; | 
 | 	} while (--len); | 
 |  | 
 | 	return to; | 
 |  align: | 
 | 	/* ???: Do we really need to generate the carry flag here? If not, then: | 
 | 	rem -= 4; */ | 
 | 	rem = 4 - rem; | 
 | 	len -= rem; | 
 | 	do { | 
 | 		*(tmp_to+4) = *(tmp_from+4); | 
 | 		++tmp_from; | 
 | 		++tmp_to; | 
 | 	} while (--rem); | 
 | 	chunks = len / 8; | 
 | 	if (chunks) | 
 | 		goto copy_chunks; | 
 | 	goto lessthan8; | 
 | } | 
 | libc_hidden_def(memcpy) |