| /* Copyright (C) 2002, 2003 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Hartvig Ekner <hartvige@mips.com>, 2002. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, write to the Free |
| Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307 USA. */ |
| |
| #include <features.h> |
| /*#include <sysdep.h>*/ |
| #include <endian.h> |
| #include "sysdep.h" |
| |
| /* void *memcpy(void *s1, const void *s2, size_t n); */ |
| |
| #ifdef __mips64 |
| |
| #include <sys/asm.h> |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| # define LDHI ldl /* high part is left in big-endian */ |
| # define SDHI sdl /* high part is left in big-endian */ |
| # define LDLO ldr /* low part is right in big-endian */ |
| # define SDLO sdr /* low part is right in big-endian */ |
| #else |
| # define LDHI ldr /* high part is right in little-endian */ |
| # define SDHI sdr /* high part is right in little-endian */ |
| # define LDLO ldl /* low part is left in little-endian */ |
| # define SDLO sdl /* low part is left in little-endian */ |
| #endif |
| |
| ENTRY (memcpy) |
| .set noreorder |
| |
| slti t0, a2, 16 # Less than 16? |
| bne t0, zero, L(last16) |
| move v0, a0 # Setup exit value before too late |
| |
| xor t0, a1, a0 # Find a0/a1 displacement |
| andi t0, 0x7 |
| bne t0, zero, L(shift) # Go handle the unaligned case |
| PTR_SUBU t1, zero, a1 |
| andi t1, 0x7 # a0/a1 are aligned, but are we |
| beq t1, zero, L(chk8w) # starting in the middle of a word? |
| PTR_SUBU a2, t1 |
| LDHI t0, 0(a1) # Yes we are... take care of that |
| PTR_ADDU a1, t1 |
| SDHI t0, 0(a0) |
| PTR_ADDU a0, t1 |
| |
| L(chk8w): |
| andi t0, a2, 0x3f # 64 or more bytes left? |
| beq t0, a2, L(chk1w) |
| PTR_SUBU a3, a2, t0 # Yes |
| PTR_ADDU a3, a1 # a3 = end address of loop |
| move a2, t0 # a2 = what will be left after loop |
| L(lop8w): |
| ld t0, 0(a1) # Loop taking 8 words at a time |
| ld t1, 8(a1) |
| ld t2, 16(a1) |
| ld t3, 24(a1) |
| ld ta0, 32(a1) |
| ld ta1, 40(a1) |
| ld ta2, 48(a1) |
| ld ta3, 56(a1) |
| PTR_ADDIU a0, 64 |
| PTR_ADDIU a1, 64 |
| sd t0, -64(a0) |
| sd t1, -56(a0) |
| sd t2, -48(a0) |
| sd t3, -40(a0) |
| sd ta0, -32(a0) |
| sd ta1, -24(a0) |
| sd ta2, -16(a0) |
| bne a1, a3, L(lop8w) |
| sd ta3, -8(a0) |
| |
| L(chk1w): |
| andi t0, a2, 0x7 # 8 or more bytes left? |
| beq t0, a2, L(last16) |
| PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time |
| PTR_ADDU a3, a1 # a3 again end address |
| move a2, t0 |
| L(lop1w): |
| ld t0, 0(a1) |
| PTR_ADDIU a0, 8 |
| PTR_ADDIU a1, 8 |
| bne a1, a3, L(lop1w) |
| sd t0, -8(a0) |
| |
| L(last16): |
| blez a2, L(lst16e) # Handle last 16 bytes, one at a time |
| PTR_ADDU a3, a2, a1 |
| L(lst16l): |
| lb t0, 0(a1) |
| PTR_ADDIU a0, 1 |
| PTR_ADDIU a1, 1 |
| bne a1, a3, L(lst16l) |
| sb t0, -1(a0) |
| L(lst16e): |
| jr ra # Bye, bye |
| nop |
| |
| L(shift): |
| PTR_SUBU a3, zero, a0 # Src and Dest unaligned |
| andi a3, 0x7 # (unoptimized case...) |
| beq a3, zero, L(shft1) |
| PTR_SUBU a2, a3 # a2 = bytes left |
| LDHI t0, 0(a1) # Take care of first odd part |
| LDLO t0, 7(a1) |
| PTR_ADDU a1, a3 |
| SDHI t0, 0(a0) |
| PTR_ADDU a0, a3 |
| L(shft1): |
| andi t0, a2, 0x7 |
| PTR_SUBU a3, a2, t0 |
| PTR_ADDU a3, a1 |
| L(shfth): |
| LDHI t1, 0(a1) # Limp through, dword by dword |
| LDLO t1, 7(a1) |
| PTR_ADDIU a0, 8 |
| PTR_ADDIU a1, 8 |
| bne a1, a3, L(shfth) |
| sd t1, -8(a0) |
| b L(last16) # Handle anything which may be left |
| move a2, t0 |
| |
| .set reorder |
| END (memcpy) |
| |
| #else /* !__mips64 */ |
| |
| #if __BYTE_ORDER == __BIG_ENDIAN |
| # define LWHI lwl /* high part is left in big-endian */ |
| # define SWHI swl /* high part is left in big-endian */ |
| # define LWLO lwr /* low part is right in big-endian */ |
| # define SWLO swr /* low part is right in big-endian */ |
| #else |
| # define LWHI lwr /* high part is right in little-endian */ |
| # define SWHI swr /* high part is right in little-endian */ |
| # define LWLO lwl /* low part is left in little-endian */ |
| # define SWLO swl /* low part is left in little-endian */ |
| #endif |
| |
| ENTRY (memcpy) |
| .set noreorder |
| |
| slti t0, a2, 8 # Less than 8? |
| bne t0, zero, L(last8) |
| move v0, a0 # Setup exit value before too late |
| |
| xor t0, a1, a0 # Find a0/a1 displacement |
| andi t0, 0x3 |
| bne t0, zero, L(shift) # Go handle the unaligned case |
| subu t1, zero, a1 |
| andi t1, 0x3 # a0/a1 are aligned, but are we |
| beq t1, zero, L(chk8w) # starting in the middle of a word? |
| subu a2, t1 |
| LWHI t0, 0(a1) # Yes we are... take care of that |
| addu a1, t1 |
| SWHI t0, 0(a0) |
| addu a0, t1 |
| |
| L(chk8w): |
| andi t0, a2, 0x1f # 32 or more bytes left? |
| beq t0, a2, L(chk1w) |
| subu a3, a2, t0 # Yes |
| addu a3, a1 # a3 = end address of loop |
| move a2, t0 # a2 = what will be left after loop |
| L(lop8w): |
| lw t0, 0(a1) # Loop taking 8 words at a time |
| lw t1, 4(a1) |
| lw t2, 8(a1) |
| lw t3, 12(a1) |
| lw t4, 16(a1) |
| lw t5, 20(a1) |
| lw t6, 24(a1) |
| lw t7, 28(a1) |
| addiu a0, 32 |
| addiu a1, 32 |
| sw t0, -32(a0) |
| sw t1, -28(a0) |
| sw t2, -24(a0) |
| sw t3, -20(a0) |
| sw t4, -16(a0) |
| sw t5, -12(a0) |
| sw t6, -8(a0) |
| bne a1, a3, L(lop8w) |
| sw t7, -4(a0) |
| |
| L(chk1w): |
| andi t0, a2, 0x3 # 4 or more bytes left? |
| beq t0, a2, L(last8) |
| subu a3, a2, t0 # Yes, handle them one word at a time |
| addu a3, a1 # a3 again end address |
| move a2, t0 |
| L(lop1w): |
| lw t0, 0(a1) |
| addiu a0, 4 |
| addiu a1, 4 |
| bne a1, a3, L(lop1w) |
| sw t0, -4(a0) |
| |
| L(last8): |
| blez a2, L(lst8e) # Handle last 8 bytes, one at a time |
| addu a3, a2, a1 |
| L(lst8l): |
| lb t0, 0(a1) |
| addiu a0, 1 |
| addiu a1, 1 |
| bne a1, a3, L(lst8l) |
| sb t0, -1(a0) |
| L(lst8e): |
| jr ra # Bye, bye |
| nop |
| |
| L(shift): |
| subu a3, zero, a0 # Src and Dest unaligned |
| andi a3, 0x3 # (unoptimized case...) |
| beq a3, zero, L(shft1) |
| subu a2, a3 # a2 = bytes left |
| LWHI t0, 0(a1) # Take care of first odd part |
| LWLO t0, 3(a1) |
| addu a1, a3 |
| SWHI t0, 0(a0) |
| addu a0, a3 |
| L(shft1): |
| andi t0, a2, 0x3 |
| subu a3, a2, t0 |
| addu a3, a1 |
| L(shfth): |
| LWHI t1, 0(a1) # Limp through, word by word |
| LWLO t1, 3(a1) |
| addiu a0, 4 |
| addiu a1, 4 |
| bne a1, a3, L(shfth) |
| sw t1, -4(a0) |
| b L(last8) # Handle anything which may be left |
| move a2, t0 |
| |
| .set reorder |
| END (memcpy) |
| |
| #endif /* !__mips64 */ |
| |
| libc_hidden_def(memcpy) |