| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 2013-2016 Free Software Foundation, Inc. | 
|  | 2 | This file is part of the GNU C Library. | 
|  | 3 |  | 
|  | 4 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 5 | modify it under the terms of the GNU Lesser General Public | 
|  | 6 | License as published by the Free Software Foundation; either | 
|  | 7 | version 2.1 of the License, or (at your option) any later version. | 
|  | 8 |  | 
|  | 9 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 12 | Lesser General Public License for more details. | 
|  | 13 |  | 
|  | 14 | You should have received a copy of the GNU Lesser General Public | 
|  | 15 | License along with the GNU C Library.  If not, see | 
|  | 16 | <http://www.gnu.org/licenses/>.  */ | 
|  | 17 |  | 
|  | 18 | #ifdef ANDROID_CHANGES | 
|  | 19 | # include "machine/asm.h" | 
|  | 20 | # include "machine/regdef.h" | 
|  | 21 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
|  | 22 | #elif _LIBC | 
|  | 23 | # include <sysdep.h> | 
|  | 24 | # include <regdef.h> | 
|  | 25 | # include <sys/asm.h> | 
|  | 26 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
|  | 27 | #elif defined _COMPILING_NEWLIB | 
|  | 28 | # include "machine/asm.h" | 
|  | 29 | # include "machine/regdef.h" | 
|  | 30 | # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE | 
|  | 31 | #else | 
|  | 32 | # include <regdef.h> | 
|  | 33 | # include <sys/asm.h> | 
|  | 34 | #endif | 
|  | 35 |  | 
|  | 36 | /* Check to see if the MIPS architecture we are compiling for supports | 
|  | 37 | prefetching.  */ | 
|  | 38 |  | 
|  | 39 | #if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64) | 
|  | 40 | # ifndef DISABLE_PREFETCH | 
|  | 41 | #  define USE_PREFETCH | 
|  | 42 | # endif | 
|  | 43 | #endif | 
|  | 44 |  | 
|  | 45 | #if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32)) | 
|  | 46 | # ifndef DISABLE_DOUBLE | 
|  | 47 | #  define USE_DOUBLE | 
|  | 48 | # endif | 
|  | 49 | #endif | 
|  | 50 |  | 
|  | 51 | #ifndef USE_DOUBLE | 
|  | 52 | # ifndef DISABLE_DOUBLE_ALIGN | 
|  | 53 | #  define DOUBLE_ALIGN | 
|  | 54 | # endif | 
|  | 55 | #endif | 
|  | 56 |  | 
|  | 57 |  | 
|  | 58 | /* Some asm.h files do not have the L macro definition.  */ | 
|  | 59 | #ifndef L | 
|  | 60 | # if _MIPS_SIM == _ABIO32 | 
|  | 61 | #  define L(label) $L ## label | 
|  | 62 | # else | 
|  | 63 | #  define L(label) .L ## label | 
|  | 64 | # endif | 
|  | 65 | #endif | 
|  | 66 |  | 
|  | 67 | /* Some asm.h files do not have the PTR_ADDIU macro definition.  */ | 
|  | 68 | #ifndef PTR_ADDIU | 
|  | 69 | # ifdef USE_DOUBLE | 
|  | 70 | #  define PTR_ADDIU	daddiu | 
|  | 71 | # else | 
|  | 72 | #  define PTR_ADDIU	addiu | 
|  | 73 | # endif | 
|  | 74 | #endif | 
|  | 75 |  | 
|  | 76 | /* New R6 instructions that may not be in asm.h.  */ | 
|  | 77 | #ifndef PTR_LSA | 
|  | 78 | # if _MIPS_SIM == _ABI64 | 
|  | 79 | #  define PTR_LSA        dlsa | 
|  | 80 | # else | 
|  | 81 | #  define PTR_LSA        lsa | 
|  | 82 | # endif | 
|  | 83 | #endif | 
|  | 84 |  | 
|  | 85 | /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE | 
|  | 86 | or PREFETCH_STORE_STREAMED offers a large performance advantage | 
|  | 87 | but PREPAREFORSTORE has some special restrictions to consider. | 
|  | 88 |  | 
|  | 89 | Prefetch with the 'prepare for store' hint does not copy a memory | 
|  | 90 | location into the cache, it just allocates a cache line and zeros | 
|  | 91 | it out.  This means that if you do not write to the entire cache | 
|  | 92 | line before writing it out to memory some data will get zero'ed out | 
|  | 93 | when the cache line is written back to memory and data will be lost. | 
|  | 94 |  | 
|  | 95 | There are ifdef'ed sections of this memcpy to make sure that it does not | 
|  | 96 | do prefetches on cache lines that are not going to be completely written. | 
|  | 97 | This code is only needed and only used when PREFETCH_STORE_HINT is set to | 
|  | 98 | PREFETCH_HINT_PREPAREFORSTORE.  This code assumes that cache lines are | 
|  | 99 | less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will | 
|  | 100 | not work correctly.  */ | 
|  | 101 |  | 
|  | 102 | #ifdef USE_PREFETCH | 
|  | 103 | # define PREFETCH_HINT_STORE		1 | 
|  | 104 | # define PREFETCH_HINT_STORE_STREAMED	5 | 
|  | 105 | # define PREFETCH_HINT_STORE_RETAINED	7 | 
|  | 106 | # define PREFETCH_HINT_PREPAREFORSTORE	30 | 
|  | 107 |  | 
|  | 108 | /* If we have not picked out what hints to use at this point use the | 
|  | 109 | standard load and store prefetch hints.  */ | 
|  | 110 | # ifndef PREFETCH_STORE_HINT | 
|  | 111 | #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE | 
|  | 112 | # endif | 
|  | 113 |  | 
|  | 114 | /* We double everything when USE_DOUBLE is true so we do 2 prefetches to | 
|  | 115 | get 64 bytes in that case.  The assumption is that each individual | 
|  | 116 | prefetch brings in 32 bytes.  */ | 
|  | 117 | # ifdef USE_DOUBLE | 
|  | 118 | #  define PREFETCH_CHUNK 64 | 
|  | 119 | #  define PREFETCH_FOR_STORE(chunk, reg) \ | 
|  | 120 | pref PREFETCH_STORE_HINT, (chunk)*64(reg); \ | 
|  | 121 | pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg) | 
|  | 122 | # else | 
|  | 123 | #  define PREFETCH_CHUNK 32 | 
|  | 124 | #  define PREFETCH_FOR_STORE(chunk, reg) \ | 
|  | 125 | pref PREFETCH_STORE_HINT, (chunk)*32(reg) | 
|  | 126 | # endif | 
|  | 127 |  | 
|  | 128 | /* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less | 
|  | 129 | than PREFETCH_CHUNK, the assumed size of each prefetch.  If the real size | 
|  | 130 | of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE | 
|  | 131 | hint is used, the code will not work correctly.  If PREPAREFORSTORE is not | 
|  | 132 | used than MAX_PREFETCH_SIZE does not matter.  */ | 
|  | 133 | # define MAX_PREFETCH_SIZE 128 | 
|  | 134 | /* PREFETCH_LIMIT is set based on the fact that we never use an offset greater | 
|  | 135 | than 5 on a STORE prefetch and that a single prefetch can never be larger | 
|  | 136 | than MAX_PREFETCH_SIZE.  We add the extra 32 when USE_DOUBLE is set because | 
|  | 137 | we actually do two prefetches in that case, one 32 bytes after the other.  */ | 
|  | 138 | # ifdef USE_DOUBLE | 
|  | 139 | #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE | 
|  | 140 | # else | 
|  | 141 | #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE | 
|  | 142 | # endif | 
|  | 143 |  | 
|  | 144 | # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \ | 
|  | 145 | && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE) | 
|  | 146 | /* We cannot handle this because the initial prefetches may fetch bytes that | 
|  | 147 | are before the buffer being copied.  We start copies with an offset | 
|  | 148 | of 4 so avoid this situation when using PREPAREFORSTORE.  */ | 
|  | 149 | #  error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small." | 
|  | 150 | # endif | 
|  | 151 | #else /* USE_PREFETCH not defined */ | 
|  | 152 | # define PREFETCH_FOR_STORE(offset, reg) | 
|  | 153 | #endif | 
|  | 154 |  | 
|  | 155 | #if __mips_isa_rev > 5 | 
|  | 156 | # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
|  | 157 | #  undef PREFETCH_STORE_HINT | 
|  | 158 | #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED | 
|  | 159 | # endif | 
|  | 160 | # define R6_CODE | 
|  | 161 | #endif | 
|  | 162 |  | 
|  | 163 | /* Allow the routine to be named something else if desired.  */ | 
|  | 164 | #ifndef MEMSET_NAME | 
|  | 165 | # define MEMSET_NAME memset | 
|  | 166 | #endif | 
|  | 167 |  | 
|  | 168 | /* We load/store 64 bits at a time when USE_DOUBLE is true. | 
|  | 169 | The C_ prefix stands for CHUNK and is used to avoid macro name | 
|  | 170 | conflicts with system header files.  */ | 
|  | 171 |  | 
|  | 172 | #ifdef USE_DOUBLE | 
|  | 173 | # define C_ST	sd | 
|  | 174 | # ifdef __MIPSEB | 
|  | 175 | #  define C_STHI	sdl	/* high part is left in big-endian	*/ | 
|  | 176 | # else | 
|  | 177 | #  define C_STHI	sdr	/* high part is right in little-endian	*/ | 
|  | 178 | # endif | 
|  | 179 | #else | 
|  | 180 | # define C_ST	sw | 
|  | 181 | # ifdef __MIPSEB | 
|  | 182 | #  define C_STHI	swl	/* high part is left in big-endian	*/ | 
|  | 183 | # else | 
|  | 184 | #  define C_STHI	swr	/* high part is right in little-endian	*/ | 
|  | 185 | # endif | 
|  | 186 | #endif | 
|  | 187 |  | 
|  | 188 | /* Bookkeeping values for 32 vs. 64 bit mode.  */ | 
|  | 189 | #ifdef USE_DOUBLE | 
|  | 190 | # define NSIZE 8 | 
|  | 191 | # define NSIZEMASK 0x3f | 
|  | 192 | # define NSIZEDMASK 0x7f | 
|  | 193 | #else | 
|  | 194 | # define NSIZE 4 | 
|  | 195 | # define NSIZEMASK 0x1f | 
|  | 196 | # define NSIZEDMASK 0x3f | 
|  | 197 | #endif | 
|  | 198 | #define UNIT(unit) ((unit)*NSIZE) | 
|  | 199 | #define UNITM1(unit) (((unit)*NSIZE)-1) | 
|  | 200 |  | 
|  | 201 | #ifdef ANDROID_CHANGES | 
|  | 202 | LEAF(MEMSET_NAME,0) | 
|  | 203 | #else | 
|  | 204 | LEAF(MEMSET_NAME) | 
|  | 205 | #endif | 
|  | 206 |  | 
|  | 207 | .set	nomips16 | 
|  | 208 | .set	noreorder | 
|  | 209 | /* If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of | 
|  | 210 | size, copy dst pointer to v0 for the return value.  */ | 
|  | 211 | slti	t2,a2,(2 * NSIZE) | 
|  | 212 | bne	t2,zero,L(lastb) | 
|  | 213 | move	v0,a0 | 
|  | 214 |  | 
|  | 215 | /* If memset value is not zero, we copy it to all the bytes in a 32 or 64 | 
|  | 216 | bit word.  */ | 
|  | 217 | beq	a1,zero,L(set0)		/* If memset value is zero no smear  */ | 
|  | 218 | PTR_SUBU a3,zero,a0 | 
|  | 219 | nop | 
|  | 220 |  | 
|  | 221 | /* smear byte into 32 or 64 bit word */ | 
|  | 222 | #if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2) | 
|  | 223 | # ifdef USE_DOUBLE | 
|  | 224 | dins	a1, a1, 8, 8        /* Replicate fill byte into half-word.  */ | 
|  | 225 | dins	a1, a1, 16, 16      /* Replicate fill byte into word.       */ | 
|  | 226 | dins	a1, a1, 32, 32      /* Replicate fill byte into dbl word.   */ | 
|  | 227 | # else | 
|  | 228 | ins	a1, a1, 8, 8        /* Replicate fill byte into half-word.  */ | 
|  | 229 | ins	a1, a1, 16, 16      /* Replicate fill byte into word.       */ | 
|  | 230 | # endif | 
|  | 231 | #else | 
|  | 232 | # ifdef USE_DOUBLE | 
|  | 233 | and     a1,0xff | 
|  | 234 | dsll	t2,a1,8 | 
|  | 235 | or	a1,t2 | 
|  | 236 | dsll	t2,a1,16 | 
|  | 237 | or	a1,t2 | 
|  | 238 | dsll	t2,a1,32 | 
|  | 239 | or	a1,t2 | 
|  | 240 | # else | 
|  | 241 | and     a1,0xff | 
|  | 242 | sll	t2,a1,8 | 
|  | 243 | or	a1,t2 | 
|  | 244 | sll	t2,a1,16 | 
|  | 245 | or	a1,t2 | 
|  | 246 | # endif | 
|  | 247 | #endif | 
|  | 248 |  | 
|  | 249 | /* If the destination address is not aligned do a partial store to get it | 
|  | 250 | aligned.  If it is already aligned just jump to L(aligned).  */ | 
|  | 251 | L(set0): | 
|  | 252 | #ifndef R6_CODE | 
|  | 253 | andi	t2,a3,(NSIZE-1)		/* word-unaligned address?          */ | 
|  | 254 | beq	t2,zero,L(aligned)	/* t2 is the unalignment count      */ | 
|  | 255 | PTR_SUBU a2,a2,t2 | 
|  | 256 | C_STHI	a1,0(a0) | 
|  | 257 | PTR_ADDU a0,a0,t2 | 
|  | 258 | #else /* R6_CODE */ | 
|  | 259 | andi	t2,a0,(NSIZE-1) | 
|  | 260 | lapc	t9,L(atable) | 
|  | 261 | PTR_LSA	t9,t2,t9,2 | 
|  | 262 | jrc	t9 | 
|  | 263 | L(atable): | 
|  | 264 | bc	L(aligned) | 
|  | 265 | # ifdef USE_DOUBLE | 
|  | 266 | bc	L(lb7) | 
|  | 267 | bc	L(lb6) | 
|  | 268 | bc	L(lb5) | 
|  | 269 | bc	L(lb4) | 
|  | 270 | # endif | 
|  | 271 | bc	L(lb3) | 
|  | 272 | bc	L(lb2) | 
|  | 273 | bc	L(lb1) | 
|  | 274 | L(lb7): | 
|  | 275 | sb	a1,6(a0) | 
|  | 276 | L(lb6): | 
|  | 277 | sb	a1,5(a0) | 
|  | 278 | L(lb5): | 
|  | 279 | sb	a1,4(a0) | 
|  | 280 | L(lb4): | 
|  | 281 | sb	a1,3(a0) | 
|  | 282 | L(lb3): | 
|  | 283 | sb	a1,2(a0) | 
|  | 284 | L(lb2): | 
|  | 285 | sb	a1,1(a0) | 
|  | 286 | L(lb1): | 
|  | 287 | sb	a1,0(a0) | 
|  | 288 |  | 
|  | 289 | li	t9,NSIZE | 
|  | 290 | subu	t2,t9,t2 | 
|  | 291 | PTR_SUBU a2,a2,t2 | 
|  | 292 | PTR_ADDU a0,a0,t2 | 
|  | 293 | #endif /* R6_CODE */ | 
|  | 294 |  | 
|  | 295 | L(aligned): | 
|  | 296 | /* If USE_DOUBLE is not set we may still want to align the data on a 16 | 
|  | 297 | byte boundry instead of an 8 byte boundry to maximize the opportunity | 
|  | 298 | of proAptiv chips to do memory bonding (combining two sequential 4 | 
|  | 299 | byte stores into one 8 byte store).  We know there are at least 4 bytes | 
|  | 300 | left to store or we would have jumped to L(lastb) earlier in the code.  */ | 
|  | 301 | #ifdef DOUBLE_ALIGN | 
|  | 302 | andi	t2,a3,4 | 
|  | 303 | beq	t2,zero,L(double_aligned) | 
|  | 304 | PTR_SUBU a2,a2,t2 | 
|  | 305 | sw	a1,0(a0) | 
|  | 306 | PTR_ADDU a0,a0,t2 | 
|  | 307 | L(double_aligned): | 
|  | 308 | #endif | 
|  | 309 |  | 
|  | 310 | /* Now the destination is aligned to (word or double word) aligned address | 
|  | 311 | Set a2 to count how many bytes we have to copy after all the 64/128 byte | 
|  | 312 | chunks are copied and a3 to the dest pointer after all the 64/128 byte | 
|  | 313 | chunks have been copied.  We will loop, incrementing a0 until it equals | 
|  | 314 | a3.  */ | 
|  | 315 | andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */ | 
|  | 316 | beq	a2,t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */ | 
|  | 317 | PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */ | 
|  | 318 | PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */ | 
|  | 319 |  | 
|  | 320 | /* When in the loop we may prefetch with the 'prepare to store' hint, | 
|  | 321 | in this case the a0+x should not be past the "t0-32" address.  This | 
|  | 322 | means: for x=128 the last "safe" a0 address is "t0-160".  Alternatively, | 
|  | 323 | for x=64 the last "safe" a0 address is "t0-96" In the current version we | 
|  | 324 | will use "prefetch hint,128(a0)", so "t0-160" is the limit.  */ | 
|  | 325 | #if defined(USE_PREFETCH) \ | 
|  | 326 | && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
|  | 327 | PTR_ADDU t0,a0,a2		/* t0 is the "past the end" address */ | 
|  | 328 | PTR_SUBU t9,t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */ | 
|  | 329 | #endif | 
|  | 330 | #if defined(USE_PREFETCH) \ | 
|  | 331 | && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) | 
|  | 332 | PREFETCH_FOR_STORE (1, a0) | 
|  | 333 | PREFETCH_FOR_STORE (2, a0) | 
|  | 334 | PREFETCH_FOR_STORE (3, a0) | 
|  | 335 | #endif | 
|  | 336 |  | 
|  | 337 | L(loop16w): | 
|  | 338 | #if defined(USE_PREFETCH) \ | 
|  | 339 | && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) | 
|  | 340 | sltu	v1,t9,a0		/* If a0 > t9 don't use next prefetch */ | 
|  | 341 | bgtz	v1,L(skip_pref) | 
|  | 342 | nop | 
|  | 343 | #endif | 
|  | 344 | #ifdef R6_CODE | 
|  | 345 | PREFETCH_FOR_STORE (2, a0) | 
|  | 346 | #else | 
|  | 347 | PREFETCH_FOR_STORE (4, a0) | 
|  | 348 | PREFETCH_FOR_STORE (5, a0) | 
|  | 349 | #endif | 
|  | 350 | L(skip_pref): | 
|  | 351 | C_ST	a1,UNIT(0)(a0) | 
|  | 352 | C_ST	a1,UNIT(1)(a0) | 
|  | 353 | C_ST	a1,UNIT(2)(a0) | 
|  | 354 | C_ST	a1,UNIT(3)(a0) | 
|  | 355 | C_ST	a1,UNIT(4)(a0) | 
|  | 356 | C_ST	a1,UNIT(5)(a0) | 
|  | 357 | C_ST	a1,UNIT(6)(a0) | 
|  | 358 | C_ST	a1,UNIT(7)(a0) | 
|  | 359 | C_ST	a1,UNIT(8)(a0) | 
|  | 360 | C_ST	a1,UNIT(9)(a0) | 
|  | 361 | C_ST	a1,UNIT(10)(a0) | 
|  | 362 | C_ST	a1,UNIT(11)(a0) | 
|  | 363 | C_ST	a1,UNIT(12)(a0) | 
|  | 364 | C_ST	a1,UNIT(13)(a0) | 
|  | 365 | C_ST	a1,UNIT(14)(a0) | 
|  | 366 | C_ST	a1,UNIT(15)(a0) | 
|  | 367 | PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */ | 
|  | 368 | bne	a0,a3,L(loop16w) | 
|  | 369 | nop | 
|  | 370 | move	a2,t8 | 
|  | 371 |  | 
|  | 372 | /* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go. | 
|  | 373 | Check for a 32(64) byte chunk and copy if if there is one.  Otherwise | 
|  | 374 | jump down to L(chk1w) to handle the tail end of the copy.  */ | 
|  | 375 | L(chkw): | 
|  | 376 | andi	t8,a2,NSIZEMASK	/* is there a 32-byte/64-byte chunk.  */ | 
|  | 377 | /* the t8 is the reminder count past 32-bytes */ | 
|  | 378 | beq	a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */ | 
|  | 379 | nop | 
|  | 380 | C_ST	a1,UNIT(0)(a0) | 
|  | 381 | C_ST	a1,UNIT(1)(a0) | 
|  | 382 | C_ST	a1,UNIT(2)(a0) | 
|  | 383 | C_ST	a1,UNIT(3)(a0) | 
|  | 384 | C_ST	a1,UNIT(4)(a0) | 
|  | 385 | C_ST	a1,UNIT(5)(a0) | 
|  | 386 | C_ST	a1,UNIT(6)(a0) | 
|  | 387 | C_ST	a1,UNIT(7)(a0) | 
|  | 388 | PTR_ADDIU a0,a0,UNIT(8) | 
|  | 389 |  | 
|  | 390 | /* Here we have less than 32(64) bytes to set.  Set up for a loop to | 
|  | 391 | copy one word (or double word) at a time.  Set a2 to count how many | 
|  | 392 | bytes we have to copy after all the word (or double word) chunks are | 
|  | 393 | copied and a3 to the dest pointer after all the (d)word chunks have | 
|  | 394 | been copied.  We will loop, incrementing a0 until a0 equals a3.  */ | 
|  | 395 | L(chk1w): | 
|  | 396 | andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */ | 
|  | 397 | beq	a2,t8,L(lastb) | 
|  | 398 | PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */ | 
|  | 399 | PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */ | 
|  | 400 |  | 
|  | 401 | /* copying in words (4-byte or 8 byte chunks) */ | 
|  | 402 | L(wordCopy_loop): | 
|  | 403 | PTR_ADDIU a0,a0,UNIT(1) | 
|  | 404 | bne	a0,a3,L(wordCopy_loop) | 
|  | 405 | C_ST	a1,UNIT(-1)(a0) | 
|  | 406 |  | 
|  | 407 | /* Copy the last 8 (or 16) bytes */ | 
|  | 408 | L(lastb): | 
|  | 409 | blez	a2,L(leave) | 
|  | 410 | PTR_ADDU a3,a0,a2       /* a3 is the last dst address */ | 
|  | 411 | L(lastbloop): | 
|  | 412 | PTR_ADDIU a0,a0,1 | 
|  | 413 | bne	a0,a3,L(lastbloop) | 
|  | 414 | sb	a1,-1(a0) | 
|  | 415 | L(leave): | 
|  | 416 | j	ra | 
|  | 417 | nop | 
|  | 418 |  | 
|  | 419 | .set	at | 
|  | 420 | .set	reorder | 
|  | 421 | END(MEMSET_NAME) | 
|  | 422 | #ifndef ANDROID_CHANGES | 
|  | 423 | # ifdef _LIBC | 
|  | 424 | libc_hidden_builtin_def (MEMSET_NAME) | 
|  | 425 | # endif | 
|  | 426 | #endif |