| /* A memcpy for CRIS. | 
 |    Copyright (C) 1994-2008 Axis Communications. | 
 |    All rights reserved. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions | 
 |    are met: | 
 |  | 
 |    1. Redistributions of source code must retain the above copyright | 
 |       notice, this list of conditions and the following disclaimer. | 
 |  | 
 |    2. Neither the name of Axis Communications nor the names of its | 
 |       contributors may be used to endorse or promote products derived | 
 |       from this software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS | 
 |    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS | 
 |    COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | 
 |    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 |    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | 
 |    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
 |    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | 
 |    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | 
 |    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
 |    POSSIBILITY OF SUCH DAMAGE.  */ | 
 |  | 
 | /* FIXME: This file should really only be used for reference, as the | 
 |    result is somewhat depending on gcc generating what we expect rather | 
 |    than what we describe.  An assembly file should be used instead.  */ | 
 |  | 
 | #include <string.h> | 
 |  | 
 | #ifdef __arch_v32 | 
 | /* For CRISv32, movem is very cheap.  */ | 
 | #define MEMCPY_BY_BLOCK_THRESHOLD (44) | 
 | #else | 
 | /* Break even between movem and move16 is really at 38.7 * 2, but | 
 |    modulo 44, so up to the next multiple of 44, we use ordinary code.  */ | 
 | #define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) | 
 | #endif | 
 |  | 
 | /* No name ambiguities in this file.  */ | 
 | __asm__ (".syntax no_register_prefix"); | 
 |  | 
 | void * | 
 | memcpy(void *pdst, const void *psrc, size_t pn) | 
 | { | 
 |   /* Now we want the parameters put in special registers. | 
 |      Make sure the compiler is able to make something useful of this. | 
 |      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | 
 |  | 
 |      If gcc was allright, it really would need no temporaries, and no | 
 |      stack space to save stuff on.  */ | 
 |  | 
 |   register void *return_dst __asm__ ("r10") = pdst; | 
 |   register unsigned char *dst __asm__ ("r13") = pdst; | 
 |   register unsigned const char *src __asm__ ("r11") = psrc; | 
 |   register int n __asm__ ("r12") = pn; | 
 |  | 
 |   /* When src is aligned but not dst, this makes a few extra needless | 
 |      cycles.  I believe it would take as many to check that the | 
 |      re-alignment was unnecessary.  */ | 
 |   if (((unsigned long) dst & 3) != 0 | 
 |       /* Don't align if we wouldn't copy more than a few bytes; so we | 
 |          don't have to check further for overflows.  */ | 
 |       && n >= 3) | 
 |   { | 
 |     if ((unsigned long) dst & 1) | 
 |       { | 
 |         n--; | 
 |         *dst = *src; | 
 |         src++; | 
 |         dst++; | 
 |       } | 
 |  | 
 |     if ((unsigned long) dst & 2) | 
 |       { | 
 |         n -= 2; | 
 |         *(short *) dst = *(short *) src; | 
 |         src += 2; | 
 |         dst += 2; | 
 |       } | 
 |   } | 
 |  | 
 |   /* Decide which copying method to use.  */ | 
 |   if (n >= MEMCPY_BY_BLOCK_THRESHOLD) | 
 |     { | 
 |       /* It is not optimal to tell the compiler about clobbering any | 
 |          registers; that will move the saving/restoring of those registers | 
 |          to the function prologue/epilogue, and make non-movem sizes | 
 |          suboptimal.  */ | 
 |       __asm__ __volatile__ | 
 |         ("\ | 
 |          ;; GCC does promise correct register allocations, but let's    \n\ | 
 |          ;; make sure it keeps its promises.                            \n\ | 
 |          .ifnc %0-%1-%2,$r13-$r11-$r12                                  \n\ | 
 |          .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"       \n\ | 
 |          .endif                                                         \n\ | 
 |                                                                         \n\ | 
 |          ;; Save the registers we'll use in the movem process           \n\ | 
 |          ;; on the stack.                                               \n\ | 
 |          subq   11*4,sp                                                 \n\ | 
 |          movem  r10,[sp]                                                \n\ | 
 |                                                                         \n\ | 
 |          ;; Now we've got this:                                         \n\ | 
 |          ;; r11 - src                                                   \n\ | 
 |          ;; r13 - dst                                                   \n\ | 
 |          ;; r12 - n                                                     \n\ | 
 |                                                                         \n\ | 
 |          ;; Update n for the first loop.                                \n\ | 
 |          subq    44,r12                                                 \n\ | 
 | 0:                                                                      \n\ | 
 | " | 
 | #ifdef __arch_common_v10_v32 | 
 |          /* Cater to branch offset difference between v32 and v10.  We | 
 |             assume the branch below has an 8-bit offset.  */ | 
 | "        setf\n" | 
 | #endif | 
 | "        movem  [r11+],r10                                              \n\ | 
 |          subq   44,r12                                                  \n\ | 
 |          bge     0b                                                     \n\ | 
 |          movem  r10,[r13+]                                              \n\ | 
 |                                                                         \n\ | 
 |          ;; Compensate for last loop underflowing n.                    \n\ | 
 |          addq   44,r12                                                  \n\ | 
 |                                                                         \n\ | 
 |          ;; Restore registers from stack.                               \n\ | 
 |          movem [sp+],r10" | 
 |  | 
 |          /* Outputs.  */ | 
 |          : "=r" (dst), "=r" (src), "=r" (n) | 
 |  | 
 |          /* Inputs.  */ | 
 |          : "0" (dst), "1" (src), "2" (n)); | 
 |     } | 
 |  | 
 |   while (n >= 16) | 
 |     { | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |  | 
 |       n -= 16; | 
 |     } | 
 |  | 
 |   switch (n) | 
 |     { | 
 |     case 0: | 
 |       break; | 
 |  | 
 |     case 1: | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 2: | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 3: | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 4: | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 5: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 6: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 7: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 8: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 9: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 10: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 11: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 12: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; | 
 |       break; | 
 |  | 
 |     case 13: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *dst = *src; | 
 |       break; | 
 |  | 
 |     case 14: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; | 
 |       break; | 
 |  | 
 |     case 15: | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(long *) dst = *(long *) src; dst += 4; src += 4; | 
 |       *(short *) dst = *(short *) src; dst += 2; src += 2; | 
 |       *dst = *src; | 
 |       break; | 
 |     } | 
 |  | 
 |   return return_dst; | 
 | } | 
 | libc_hidden_def(memcpy) |