lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame^] | 1 | /* Copyright (C) 2002, 2003 Free Software Foundation, Inc. |
| 2 | This file is part of the GNU C Library. |
| 3 | Contributed by Hartvig Ekner <hartvige@mips.com>, 2002. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, write to the Free |
| 17 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 18 | 02111-1307 USA. */ |
| 19 | |
| 20 | #include <features.h> |
| 21 | /*#include <sysdep.h>*/ |
| 22 | #include <endian.h> |
| 23 | #include "sysdep.h" |
| 24 | |
| 25 | /* void *memcpy(void *s1, const void *s2, size_t n); */ |
| 26 | |
| 27 | #ifdef __mips64 |
| 28 | |
| 29 | #include <sys/asm.h> |
| 30 | |
| 31 | #if __BYTE_ORDER == __BIG_ENDIAN |
| 32 | # define LDHI ldl /* high part is left in big-endian */ |
| 33 | # define SDHI sdl /* high part is left in big-endian */ |
| 34 | # define LDLO ldr /* low part is right in big-endian */ |
| 35 | # define SDLO sdr /* low part is right in big-endian */ |
| 36 | #else |
| 37 | # define LDHI ldr /* high part is right in little-endian */ |
| 38 | # define SDHI sdr /* high part is right in little-endian */ |
| 39 | # define LDLO ldl /* low part is left in little-endian */ |
| 40 | # define SDLO sdl /* low part is left in little-endian */ |
| 41 | #endif |
| 42 | |
| 43 | ENTRY (memcpy) |
| 44 | .set noreorder |
| 45 | |
| 46 | slti t0, a2, 16 # Less than 16? |
| 47 | bne t0, zero, L(last16) |
| 48 | move v0, a0 # Setup exit value before too late |
| 49 | |
| 50 | xor t0, a1, a0 # Find a0/a1 displacement |
| 51 | andi t0, 0x7 |
| 52 | bne t0, zero, L(shift) # Go handle the unaligned case |
| 53 | PTR_SUBU t1, zero, a1 |
| 54 | andi t1, 0x7 # a0/a1 are aligned, but are we |
| 55 | beq t1, zero, L(chk8w) # starting in the middle of a word? |
| 56 | PTR_SUBU a2, t1 |
| 57 | LDHI t0, 0(a1) # Yes we are... take care of that |
| 58 | PTR_ADDU a1, t1 |
| 59 | SDHI t0, 0(a0) |
| 60 | PTR_ADDU a0, t1 |
| 61 | |
| 62 | L(chk8w): |
| 63 | andi t0, a2, 0x3f # 64 or more bytes left? |
| 64 | beq t0, a2, L(chk1w) |
| 65 | PTR_SUBU a3, a2, t0 # Yes |
| 66 | PTR_ADDU a3, a1 # a3 = end address of loop |
| 67 | move a2, t0 # a2 = what will be left after loop |
| 68 | L(lop8w): |
| 69 | ld t0, 0(a1) # Loop taking 8 words at a time |
| 70 | ld t1, 8(a1) |
| 71 | ld t2, 16(a1) |
| 72 | ld t3, 24(a1) |
| 73 | ld ta0, 32(a1) |
| 74 | ld ta1, 40(a1) |
| 75 | ld ta2, 48(a1) |
| 76 | ld ta3, 56(a1) |
| 77 | PTR_ADDIU a0, 64 |
| 78 | PTR_ADDIU a1, 64 |
| 79 | sd t0, -64(a0) |
| 80 | sd t1, -56(a0) |
| 81 | sd t2, -48(a0) |
| 82 | sd t3, -40(a0) |
| 83 | sd ta0, -32(a0) |
| 84 | sd ta1, -24(a0) |
| 85 | sd ta2, -16(a0) |
| 86 | bne a1, a3, L(lop8w) |
| 87 | sd ta3, -8(a0) |
| 88 | |
| 89 | L(chk1w): |
| 90 | andi t0, a2, 0x7 # 8 or more bytes left? |
| 91 | beq t0, a2, L(last16) |
| 92 | PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time |
| 93 | PTR_ADDU a3, a1 # a3 again end address |
| 94 | move a2, t0 |
| 95 | L(lop1w): |
| 96 | ld t0, 0(a1) |
| 97 | PTR_ADDIU a0, 8 |
| 98 | PTR_ADDIU a1, 8 |
| 99 | bne a1, a3, L(lop1w) |
| 100 | sd t0, -8(a0) |
| 101 | |
| 102 | L(last16): |
| 103 | blez a2, L(lst16e) # Handle last 16 bytes, one at a time |
| 104 | PTR_ADDU a3, a2, a1 |
| 105 | L(lst16l): |
| 106 | lb t0, 0(a1) |
| 107 | PTR_ADDIU a0, 1 |
| 108 | PTR_ADDIU a1, 1 |
| 109 | bne a1, a3, L(lst16l) |
| 110 | sb t0, -1(a0) |
| 111 | L(lst16e): |
| 112 | jr ra # Bye, bye |
| 113 | nop |
| 114 | |
| 115 | L(shift): |
| 116 | PTR_SUBU a3, zero, a0 # Src and Dest unaligned |
| 117 | andi a3, 0x7 # (unoptimized case...) |
| 118 | beq a3, zero, L(shft1) |
| 119 | PTR_SUBU a2, a3 # a2 = bytes left |
| 120 | LDHI t0, 0(a1) # Take care of first odd part |
| 121 | LDLO t0, 7(a1) |
| 122 | PTR_ADDU a1, a3 |
| 123 | SDHI t0, 0(a0) |
| 124 | PTR_ADDU a0, a3 |
| 125 | L(shft1): |
| 126 | andi t0, a2, 0x7 |
| 127 | PTR_SUBU a3, a2, t0 |
| 128 | PTR_ADDU a3, a1 |
| 129 | L(shfth): |
| 130 | LDHI t1, 0(a1) # Limp through, dword by dword |
| 131 | LDLO t1, 7(a1) |
| 132 | PTR_ADDIU a0, 8 |
| 133 | PTR_ADDIU a1, 8 |
| 134 | bne a1, a3, L(shfth) |
| 135 | sd t1, -8(a0) |
| 136 | b L(last16) # Handle anything which may be left |
| 137 | move a2, t0 |
| 138 | |
| 139 | .set reorder |
| 140 | END (memcpy) |
| 141 | |
| 142 | #else /* !__mips64 */ |
| 143 | |
| 144 | #if __BYTE_ORDER == __BIG_ENDIAN |
| 145 | # define LWHI lwl /* high part is left in big-endian */ |
| 146 | # define SWHI swl /* high part is left in big-endian */ |
| 147 | # define LWLO lwr /* low part is right in big-endian */ |
| 148 | # define SWLO swr /* low part is right in big-endian */ |
| 149 | #else |
| 150 | # define LWHI lwr /* high part is right in little-endian */ |
| 151 | # define SWHI swr /* high part is right in little-endian */ |
| 152 | # define LWLO lwl /* low part is left in little-endian */ |
| 153 | # define SWLO swl /* low part is left in little-endian */ |
| 154 | #endif |
| 155 | |
| 156 | ENTRY (memcpy) |
| 157 | .set noreorder |
| 158 | |
| 159 | slti t0, a2, 8 # Less than 8? |
| 160 | bne t0, zero, L(last8) |
| 161 | move v0, a0 # Setup exit value before too late |
| 162 | |
| 163 | xor t0, a1, a0 # Find a0/a1 displacement |
| 164 | andi t0, 0x3 |
| 165 | bne t0, zero, L(shift) # Go handle the unaligned case |
| 166 | subu t1, zero, a1 |
| 167 | andi t1, 0x3 # a0/a1 are aligned, but are we |
| 168 | beq t1, zero, L(chk8w) # starting in the middle of a word? |
| 169 | subu a2, t1 |
| 170 | LWHI t0, 0(a1) # Yes we are... take care of that |
| 171 | addu a1, t1 |
| 172 | SWHI t0, 0(a0) |
| 173 | addu a0, t1 |
| 174 | |
| 175 | L(chk8w): |
| 176 | andi t0, a2, 0x1f # 32 or more bytes left? |
| 177 | beq t0, a2, L(chk1w) |
| 178 | subu a3, a2, t0 # Yes |
| 179 | addu a3, a1 # a3 = end address of loop |
| 180 | move a2, t0 # a2 = what will be left after loop |
| 181 | L(lop8w): |
| 182 | lw t0, 0(a1) # Loop taking 8 words at a time |
| 183 | lw t1, 4(a1) |
| 184 | lw t2, 8(a1) |
| 185 | lw t3, 12(a1) |
| 186 | lw t4, 16(a1) |
| 187 | lw t5, 20(a1) |
| 188 | lw t6, 24(a1) |
| 189 | lw t7, 28(a1) |
| 190 | addiu a0, 32 |
| 191 | addiu a1, 32 |
| 192 | sw t0, -32(a0) |
| 193 | sw t1, -28(a0) |
| 194 | sw t2, -24(a0) |
| 195 | sw t3, -20(a0) |
| 196 | sw t4, -16(a0) |
| 197 | sw t5, -12(a0) |
| 198 | sw t6, -8(a0) |
| 199 | bne a1, a3, L(lop8w) |
| 200 | sw t7, -4(a0) |
| 201 | |
| 202 | L(chk1w): |
| 203 | andi t0, a2, 0x3 # 4 or more bytes left? |
| 204 | beq t0, a2, L(last8) |
| 205 | subu a3, a2, t0 # Yes, handle them one word at a time |
| 206 | addu a3, a1 # a3 again end address |
| 207 | move a2, t0 |
| 208 | L(lop1w): |
| 209 | lw t0, 0(a1) |
| 210 | addiu a0, 4 |
| 211 | addiu a1, 4 |
| 212 | bne a1, a3, L(lop1w) |
| 213 | sw t0, -4(a0) |
| 214 | |
| 215 | L(last8): |
| 216 | blez a2, L(lst8e) # Handle last 8 bytes, one at a time |
| 217 | addu a3, a2, a1 |
| 218 | L(lst8l): |
| 219 | lb t0, 0(a1) |
| 220 | addiu a0, 1 |
| 221 | addiu a1, 1 |
| 222 | bne a1, a3, L(lst8l) |
| 223 | sb t0, -1(a0) |
| 224 | L(lst8e): |
| 225 | jr ra # Bye, bye |
| 226 | nop |
| 227 | |
| 228 | L(shift): |
| 229 | subu a3, zero, a0 # Src and Dest unaligned |
| 230 | andi a3, 0x3 # (unoptimized case...) |
| 231 | beq a3, zero, L(shft1) |
| 232 | subu a2, a3 # a2 = bytes left |
| 233 | LWHI t0, 0(a1) # Take care of first odd part |
| 234 | LWLO t0, 3(a1) |
| 235 | addu a1, a3 |
| 236 | SWHI t0, 0(a0) |
| 237 | addu a0, a3 |
| 238 | L(shft1): |
| 239 | andi t0, a2, 0x3 |
| 240 | subu a3, a2, t0 |
| 241 | addu a3, a1 |
| 242 | L(shfth): |
| 243 | LWHI t1, 0(a1) # Limp through, word by word |
| 244 | LWLO t1, 3(a1) |
| 245 | addiu a0, 4 |
| 246 | addiu a1, 4 |
| 247 | bne a1, a3, L(shfth) |
| 248 | sw t1, -4(a0) |
| 249 | b L(last8) # Handle anything which may be left |
| 250 | move a2, t0 |
| 251 | |
| 252 | .set reorder |
| 253 | END (memcpy) |
| 254 | |
| 255 | #endif /* !__mips64 */ |
| 256 | |
| 257 | libc_hidden_def(memcpy) |