blob: b26080d0281dc4de48b75661a146a32c84b6d6e7 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/*-
2 * Copyright (c) 1997 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Neil A. Carson and Mark Brinicombe
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the NetBSD
19 * Foundation, Inc. and its contributors.
20 * 4. Neither the name of The NetBSD Foundation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 *
36 * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
37 * by Erik Andersen <andersen@codepoet.org>
38 */
39
40#include <features.h>
41#include <endian.h>
42#include <bits/arm_asm.h>
43
44#if !defined(THUMB1_ONLY)
45/*
46 * This is one fun bit of code ...
47 * Some easy listening music is suggested while trying to understand this
48 * code e.g. Iron Maiden
49 *
50 * For anyone attempting to understand it :
51 *
52 * The core code is implemented here with simple stubs for memcpy()
53 * memmove() and bcopy().
54 *
55 * All local labels are prefixed with Lmemcpy_
56 * Following the prefix a label starting f is used in the forward copy code
57 * while a label using b is used in the backwards copy code
58 * The source and destination addresses determine whether a forward or
59 * backward copy is performed.
60 * Separate bits of code are used to deal with the following situations
61 * for both the forward and backwards copy.
62 * unaligned source address
63 * unaligned destination address
64 * Separate copy routines are used to produce an optimised result for each
65 * of these cases.
66 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
67 * a time where possible.
68 *
69 * Note: r12 (aka ip) can be trashed during the function along with
70 * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout.
71 * Additional registers are preserved prior to use i.e. r4, r5 & lr
72 * The return value in r0 must be the destination address.
73 *
74 * Apologies for the state of the comments ;-)
75 */
76
77.text
78.global _memcpy
79.hidden _memcpy
80.type _memcpy,%function
81.align 4
82
83/* XXX: The Thumb-2 conditionals can be removed if/when we require an
84 assembler that supports unified syntax. */
85.macro copy regs
86#if defined(__thumb2__)
87 ittt ge
88 ldmiage r1!, \regs
89 stmiage r0!, \regs
90#else
91 ldmgeia r1!, \regs
92 stmgeia r0!, \regs
93#endif
94.endm
95
96.macro copydb regs
97#if defined(__thumb2__)
98 ittt ge
99 ldmdbge r1!, \regs
100 stmdbge r0!, \regs
101#else
102 ldmgedb r1!, \regs
103 stmgedb r0!, \regs
104#endif
105.endm
106
107_memcpy:
108 /* Determine copy direction */
109 cmp r1, r0
110 bcc .Lmemcpy_backwards
111
112 IT(t, eq) /* Quick abort for src=dst */
113#if defined(__USE_BX__)
114 bxeq lr
115#else
116 moveq pc, lr
117#endif
118 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
119 subs r2, r2, #4
120 blt .Lmemcpy_fl4 /* less than 4 bytes */
121 ands r12, r0, #3
122 bne .Lmemcpy_fdestul /* oh unaligned destination addr */
123 ands r12, r1, #3
124 bne .Lmemcpy_fsrcul /* oh unaligned source addr */
125
126.Lmemcpy_ft8:
127 /* We have aligned source and destination */
128 subs r2, r2, #8
129 blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
130 subs r2, r2, #0x14
131 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
132 str r4, [sp, #-4]! /* borrow r4 */
133
134 /* blat 32 bytes at a time */
135 /* XXX for really big copies perhaps we should use more registers */
136.Lmemcpy_floop32:
137 ldmia r1!, {r3, r4, r12, lr}
138 stmia r0!, {r3, r4, r12, lr}
139 ldmia r1!, {r3, r4, r12, lr}
140 stmia r0!, {r3, r4, r12, lr}
141 subs r2, r2, #0x20
142 bge .Lmemcpy_floop32
143
144 cmn r2, #0x10
145 /* blat a remaining 16 bytes */
146 copy "{r3, r4, r12, lr}"
147 subge r2, r2, #0x10
148 ldr r4, [sp], #4 /* restore r4 */
149
150.Lmemcpy_fl32:
151 adds r2, r2, #0x14
152
153 /* blat 12 bytes at a time */
154.Lmemcpy_floop12:
155 copy "{r3, r12, lr}"
156#if defined(__thumb2__)
157 subsge r2, r2, #0x0c
158#else
159 subges r2, r2, #0x0c
160#endif
161 bge .Lmemcpy_floop12
162
163.Lmemcpy_fl12:
164 adds r2, r2, #8
165 blt .Lmemcpy_fl4
166
167 subs r2, r2, #4
168 IT(tt, lt)
169 ldrlt r3, [r1], #4
170 strlt r3, [r0], #4
171 copy "{r3, r12}"
172 subge r2, r2, #4
173
174.Lmemcpy_fl4:
175 /* less than 4 bytes to go */
176 adds r2, r2, #4
177#if defined(__thumb2__)
178 it eq
179 popeq {r0, pc} /* done */
180#elif defined(__ARM_ARCH_4T__)
181 ldmeqia sp!, {r0, r3} /* done */
182 bxeq r3
183#else
184 ldmeqia sp!, {r0, pc} /* done */
185#endif
186
187 /* copy the crud byte at a time */
188 cmp r2, #2
189 ldrb r3, [r1], #1
190 strb r3, [r0], #1
191#if defined(__thumb2__)
192 itt ge
193 ldrbge r3, [r1], #1
194 strbge r3, [r0], #1
195 itt gt
196 ldrbgt r3, [r1], #1
197 strbgt r3, [r0], #1
198#else
199 ldrgeb r3, [r1], #1
200 strgeb r3, [r0], #1
201 ldrgtb r3, [r1], #1
202 strgtb r3, [r0], #1
203#endif
204#if defined(__ARM_ARCH_4T__)
205 ldmia sp!, {r0, r3}
206 bx r3
207#else
208 ldmia sp!, {r0, pc}
209#endif
210
211 /* erg - unaligned destination */
212.Lmemcpy_fdestul:
213 rsb r12, r12, #4
214 cmp r12, #2
215
216 /* align destination with byte copies */
217 ldrb r3, [r1], #1
218 strb r3, [r0], #1
219#if defined(__thumb2__)
220 itt ge
221 ldrbge r3, [r1], #1
222 strbge r3, [r0], #1
223 itt gt
224 ldrbgt r3, [r1], #1
225 strbgt r3, [r0], #1
226#else
227 ldrgeb r3, [r1], #1
228 strgeb r3, [r0], #1
229 ldrgtb r3, [r1], #1
230 strgtb r3, [r0], #1
231#endif
232 subs r2, r2, r12
233 blt .Lmemcpy_fl4 /* less the 4 bytes */
234
235 ands r12, r1, #3
236 beq .Lmemcpy_ft8 /* we have an aligned source */
237
238 /* erg - unaligned source */
239 /* This is where it gets nasty ... */
240.Lmemcpy_fsrcul:
241 bic r1, r1, #3
242 ldr lr, [r1], #4
243 cmp r12, #2
244 bgt .Lmemcpy_fsrcul3
245 beq .Lmemcpy_fsrcul2
246 cmp r2, #0x0c
247 blt .Lmemcpy_fsrcul1loop4
248 sub r2, r2, #0x0c
249 stmdb sp!, {r4, r5}
250
251.Lmemcpy_fsrcul1loop16:
252#if __BYTE_ORDER == __BIG_ENDIAN
253 mov r3, lr, lsl #8
254 ldmia r1!, {r4, r5, r12, lr}
255 orr r3, r3, r4, lsr #24
256 mov r4, r4, lsl #8
257 orr r4, r4, r5, lsr #24
258 mov r5, r5, lsl #8
259 orr r5, r5, r12, lsr #24
260 mov r12, r12, lsl #8
261 orr r12, r12, lr, lsr #24
262#else
263 mov r3, lr, lsr #8
264 ldmia r1!, {r4, r5, r12, lr}
265 orr r3, r3, r4, lsl #24
266 mov r4, r4, lsr #8
267 orr r4, r4, r5, lsl #24
268 mov r5, r5, lsr #8
269 orr r5, r5, r12, lsl #24
270 mov r12, r12, lsr #8
271 orr r12, r12, lr, lsl #24
272#endif
273 stmia r0!, {r3-r5, r12}
274 subs r2, r2, #0x10
275 bge .Lmemcpy_fsrcul1loop16
276 ldmia sp!, {r4, r5}
277 adds r2, r2, #0x0c
278 blt .Lmemcpy_fsrcul1l4
279
280.Lmemcpy_fsrcul1loop4:
281#if __BYTE_ORDER == __BIG_ENDIAN
282 mov r12, lr, lsl #8
283 ldr lr, [r1], #4
284 orr r12, r12, lr, lsr #24
285#else
286 mov r12, lr, lsr #8
287 ldr lr, [r1], #4
288 orr r12, r12, lr, lsl #24
289#endif
290 str r12, [r0], #4
291 subs r2, r2, #4
292 bge .Lmemcpy_fsrcul1loop4
293
294.Lmemcpy_fsrcul1l4:
295 sub r1, r1, #3
296 b .Lmemcpy_fl4
297
298.Lmemcpy_fsrcul2:
299 cmp r2, #0x0c
300 blt .Lmemcpy_fsrcul2loop4
301 sub r2, r2, #0x0c
302 stmdb sp!, {r4, r5}
303
304.Lmemcpy_fsrcul2loop16:
305#if __BYTE_ORDER == __BIG_ENDIAN
306 mov r3, lr, lsl #16
307 ldmia r1!, {r4, r5, r12, lr}
308 orr r3, r3, r4, lsr #16
309 mov r4, r4, lsl #16
310 orr r4, r4, r5, lsr #16
311 mov r5, r5, lsl #16
312 orr r5, r5, r12, lsr #16
313 mov r12, r12, lsl #16
314 orr r12, r12, lr, lsr #16
315#else
316 mov r3, lr, lsr #16
317 ldmia r1!, {r4, r5, r12, lr}
318 orr r3, r3, r4, lsl #16
319 mov r4, r4, lsr #16
320 orr r4, r4, r5, lsl #16
321 mov r5, r5, lsr #16
322 orr r5, r5, r12, lsl #16
323 mov r12, r12, lsr #16
324 orr r12, r12, lr, lsl #16
325#endif
326 stmia r0!, {r3-r5, r12}
327 subs r2, r2, #0x10
328 bge .Lmemcpy_fsrcul2loop16
329 ldmia sp!, {r4, r5}
330 adds r2, r2, #0x0c
331 blt .Lmemcpy_fsrcul2l4
332
333.Lmemcpy_fsrcul2loop4:
334#if __BYTE_ORDER == __BIG_ENDIAN
335 mov r12, lr, lsl #16
336 ldr lr, [r1], #4
337 orr r12, r12, lr, lsr #16
338#else
339 mov r12, lr, lsr #16
340 ldr lr, [r1], #4
341 orr r12, r12, lr, lsl #16
342#endif
343 str r12, [r0], #4
344 subs r2, r2, #4
345 bge .Lmemcpy_fsrcul2loop4
346
347.Lmemcpy_fsrcul2l4:
348 sub r1, r1, #2
349 b .Lmemcpy_fl4
350
351.Lmemcpy_fsrcul3:
352 cmp r2, #0x0c
353 blt .Lmemcpy_fsrcul3loop4
354 sub r2, r2, #0x0c
355 stmdb sp!, {r4, r5}
356
357.Lmemcpy_fsrcul3loop16:
358#if __BYTE_ORDER == __BIG_ENDIAN
359 mov r3, lr, lsl #24
360 ldmia r1!, {r4, r5, r12, lr}
361 orr r3, r3, r4, lsr #8
362 mov r4, r4, lsl #24
363 orr r4, r4, r5, lsr #8
364 mov r5, r5, lsl #24
365 orr r5, r5, r12, lsr #8
366 mov r12, r12, lsl #24
367 orr r12, r12, lr, lsr #8
368#else
369 mov r3, lr, lsr #24
370 ldmia r1!, {r4, r5, r12, lr}
371 orr r3, r3, r4, lsl #8
372 mov r4, r4, lsr #24
373 orr r4, r4, r5, lsl #8
374 mov r5, r5, lsr #24
375 orr r5, r5, r12, lsl #8
376 mov r12, r12, lsr #24
377 orr r12, r12, lr, lsl #8
378#endif
379 stmia r0!, {r3-r5, r12}
380 subs r2, r2, #0x10
381 bge .Lmemcpy_fsrcul3loop16
382 ldmia sp!, {r4, r5}
383 adds r2, r2, #0x0c
384 blt .Lmemcpy_fsrcul3l4
385
386.Lmemcpy_fsrcul3loop4:
387#if __BYTE_ORDER == __BIG_ENDIAN
388 mov r12, lr, lsl #24
389 ldr lr, [r1], #4
390 orr r12, r12, lr, lsr #8
391#else
392 mov r12, lr, lsr #24
393 ldr lr, [r1], #4
394 orr r12, r12, lr, lsl #8
395#endif
396 str r12, [r0], #4
397 subs r2, r2, #4
398 bge .Lmemcpy_fsrcul3loop4
399
400.Lmemcpy_fsrcul3l4:
401 sub r1, r1, #1
402 b .Lmemcpy_fl4
403
404.Lmemcpy_backwards:
405 add r1, r1, r2
406 add r0, r0, r2
407 subs r2, r2, #4
408 blt .Lmemcpy_bl4 /* less than 4 bytes */
409 ands r12, r0, #3
410 bne .Lmemcpy_bdestul /* oh unaligned destination addr */
411 ands r12, r1, #3
412 bne .Lmemcpy_bsrcul /* oh unaligned source addr */
413
414.Lmemcpy_bt8:
415 /* We have aligned source and destination */
416 subs r2, r2, #8
417 blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
418 stmdb sp!, {r4, lr}
419 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
420 blt .Lmemcpy_bl32
421
422 /* blat 32 bytes at a time */
423 /* XXX for really big copies perhaps we should use more registers */
424.Lmemcpy_bloop32:
425 ldmdb r1!, {r3, r4, r12, lr}
426 stmdb r0!, {r3, r4, r12, lr}
427 ldmdb r1!, {r3, r4, r12, lr}
428 stmdb r0!, {r3, r4, r12, lr}
429 subs r2, r2, #0x20
430 bge .Lmemcpy_bloop32
431
432.Lmemcpy_bl32:
433 cmn r2, #0x10
434 /* blat a remaining 16 bytes */
435 copydb "{r3, r4, r12, lr}"
436 subge r2, r2, #0x10
437 adds r2, r2, #0x14
438 /* blat a remaining 12 bytes */
439 copydb "{r3, r12, lr}"
440 subge r2, r2, #0x0c
441 ldmia sp!, {r4, lr}
442
443.Lmemcpy_bl12:
444 adds r2, r2, #8
445 blt .Lmemcpy_bl4
446 subs r2, r2, #4
447 IT(tt, lt)
448 ldrlt r3, [r1, #-4]!
449 strlt r3, [r0, #-4]!
450 copydb "{r3, r12}"
451 subge r2, r2, #4
452
453.Lmemcpy_bl4:
454 /* less than 4 bytes to go */
455 adds r2, r2, #4
456 IT(t, eq)
457#if defined(__USE_BX__)
458 bxeq lr
459#else
460 moveq pc, lr /* done */
461#endif
462 /* copy the crud byte at a time */
463 cmp r2, #2
464 ldrb r3, [r1, #-1]!
465 strb r3, [r0, #-1]!
466#ifdef __thumb2__
467 itt ge
468 ldrbge r3, [r1, #-1]!
469 strbge r3, [r0, #-1]!
470 itt gt
471 ldrbgt r3, [r1, #-1]!
472 strbgt r3, [r0, #-1]!
473#else
474 ldrgeb r3, [r1, #-1]!
475 strgeb r3, [r0, #-1]!
476 ldrgtb r3, [r1, #-1]!
477 strgtb r3, [r0, #-1]!
478#endif
479#if defined(__USE_BX__)
480 bx lr
481#else
482 mov pc, lr
483#endif
484 /* erg - unaligned destination */
485.Lmemcpy_bdestul:
486 cmp r12, #2
487
488 /* align destination with byte copies */
489 ldrb r3, [r1, #-1]!
490 strb r3, [r0, #-1]!
491#ifdef __thumb2__
492 itt ge
493 ldrbge r3, [r1, #-1]!
494 strbge r3, [r0, #-1]!
495 itt gt
496 ldrbgt r3, [r1, #-1]!
497 strbgt r3, [r0, #-1]!
498#else
499 ldrgeb r3, [r1, #-1]!
500 strgeb r3, [r0, #-1]!
501 ldrgtb r3, [r1, #-1]!
502 strgtb r3, [r0, #-1]!
503#endif
504 subs r2, r2, r12
505 blt .Lmemcpy_bl4 /* less than 4 bytes to go */
506 ands r12, r1, #3
507 beq .Lmemcpy_bt8 /* we have an aligned source */
508
509 /* erg - unaligned source */
510 /* This is where it gets nasty ... */
511.Lmemcpy_bsrcul:
512 bic r1, r1, #3
513 ldr r3, [r1, #0]
514 cmp r12, #2
515 blt .Lmemcpy_bsrcul1
516 beq .Lmemcpy_bsrcul2
517 cmp r2, #0x0c
518 blt .Lmemcpy_bsrcul3loop4
519 sub r2, r2, #0x0c
520 stmdb sp!, {r4, r5, lr}
521
522.Lmemcpy_bsrcul3loop16:
523#if __BYTE_ORDER == __BIG_ENDIAN
524 mov lr, r3, lsr #8
525 ldmdb r1!, {r3-r5, r12}
526 orr lr, lr, r12, lsl #24
527 mov r12, r12, lsr #8
528 orr r12, r12, r5, lsl #24
529 mov r5, r5, lsr #8
530 orr r5, r5, r4, lsl #24
531 mov r4, r4, lsr #8
532 orr r4, r4, r3, lsl #24
533#else
534 mov lr, r3, lsl #8
535 ldmdb r1!, {r3-r5, r12}
536 orr lr, lr, r12, lsr #24
537 mov r12, r12, lsl #8
538 orr r12, r12, r5, lsr #24
539 mov r5, r5, lsl #8
540 orr r5, r5, r4, lsr #24
541 mov r4, r4, lsl #8
542 orr r4, r4, r3, lsr #24
543#endif
544 stmdb r0!, {r4, r5, r12, lr}
545 subs r2, r2, #0x10
546 bge .Lmemcpy_bsrcul3loop16
547 ldmia sp!, {r4, r5, lr}
548 adds r2, r2, #0x0c
549 blt .Lmemcpy_bsrcul3l4
550
551.Lmemcpy_bsrcul3loop4:
552#if __BYTE_ORDER == __BIG_ENDIAN
553 mov r12, r3, lsr #8
554 ldr r3, [r1, #-4]!
555 orr r12, r12, r3, lsl #24
556#else
557 mov r12, r3, lsl #8
558 ldr r3, [r1, #-4]!
559 orr r12, r12, r3, lsr #24
560#endif
561 str r12, [r0, #-4]!
562 subs r2, r2, #4
563 bge .Lmemcpy_bsrcul3loop4
564
565.Lmemcpy_bsrcul3l4:
566 add r1, r1, #3
567 b .Lmemcpy_bl4
568
569.Lmemcpy_bsrcul2:
570 cmp r2, #0x0c
571 blt .Lmemcpy_bsrcul2loop4
572 sub r2, r2, #0x0c
573 stmdb sp!, {r4, r5, lr}
574
575.Lmemcpy_bsrcul2loop16:
576#if __BYTE_ORDER == __BIG_ENDIAN
577 mov lr, r3, lsr #16
578 ldmdb r1!, {r3-r5, r12}
579 orr lr, lr, r12, lsl #16
580 mov r12, r12, lsr #16
581 orr r12, r12, r5, lsl #16
582 mov r5, r5, lsr #16
583 orr r5, r5, r4, lsl #16
584 mov r4, r4, lsr #16
585 orr r4, r4, r3, lsl #16
586#else
587 mov lr, r3, lsl #16
588 ldmdb r1!, {r3-r5, r12}
589 orr lr, lr, r12, lsr #16
590 mov r12, r12, lsl #16
591 orr r12, r12, r5, lsr #16
592 mov r5, r5, lsl #16
593 orr r5, r5, r4, lsr #16
594 mov r4, r4, lsl #16
595 orr r4, r4, r3, lsr #16
596#endif
597 stmdb r0!, {r4, r5, r12, lr}
598 subs r2, r2, #0x10
599 bge .Lmemcpy_bsrcul2loop16
600 ldmia sp!, {r4, r5, lr}
601 adds r2, r2, #0x0c
602 blt .Lmemcpy_bsrcul2l4
603
604.Lmemcpy_bsrcul2loop4:
605#if __BYTE_ORDER == __BIG_ENDIAN
606 mov r12, r3, lsr #16
607 ldr r3, [r1, #-4]!
608 orr r12, r12, r3, lsl #16
609#else
610 mov r12, r3, lsl #16
611 ldr r3, [r1, #-4]!
612 orr r12, r12, r3, lsr #16
613#endif
614 str r12, [r0, #-4]!
615 subs r2, r2, #4
616 bge .Lmemcpy_bsrcul2loop4
617
618.Lmemcpy_bsrcul2l4:
619 add r1, r1, #2
620 b .Lmemcpy_bl4
621
622.Lmemcpy_bsrcul1:
623 cmp r2, #0x0c
624 blt .Lmemcpy_bsrcul1loop4
625 sub r2, r2, #0x0c
626 stmdb sp!, {r4, r5, lr}
627
628.Lmemcpy_bsrcul1loop32:
629#if __BYTE_ORDER == __BIG_ENDIAN
630 mov lr, r3, lsr #24
631 ldmdb r1!, {r3-r5, r12}
632 orr lr, lr, r12, lsl #8
633 mov r12, r12, lsr #24
634 orr r12, r12, r5, lsl #8
635 mov r5, r5, lsr #24
636 orr r5, r5, r4, lsl #8
637 mov r4, r4, lsr #24
638 orr r4, r4, r3, lsl #8
639#else
640 mov lr, r3, lsl #24
641 ldmdb r1!, {r3-r5, r12}
642 orr lr, lr, r12, lsr #8
643 mov r12, r12, lsl #24
644 orr r12, r12, r5, lsr #8
645 mov r5, r5, lsl #24
646 orr r5, r5, r4, lsr #8
647 mov r4, r4, lsl #24
648 orr r4, r4, r3, lsr #8
649#endif
650 stmdb r0!, {r4, r5, r12, lr}
651 subs r2, r2, #0x10
652 bge .Lmemcpy_bsrcul1loop32
653 ldmia sp!, {r4, r5, lr}
654 adds r2, r2, #0x0c
655 blt .Lmemcpy_bsrcul1l4
656
657.Lmemcpy_bsrcul1loop4:
658#if __BYTE_ORDER == __BIG_ENDIAN
659 mov r12, r3, lsr #24
660 ldr r3, [r1, #-4]!
661 orr r12, r12, r3, lsl #8
662#else
663 mov r12, r3, lsl #24
664 ldr r3, [r1, #-4]!
665 orr r12, r12, r3, lsr #8
666#endif
667 str r12, [r0, #-4]!
668 subs r2, r2, #4
669 bge .Lmemcpy_bsrcul1loop4
670
671.Lmemcpy_bsrcul1l4:
672 add r1, r1, #1
673 b .Lmemcpy_bl4
674
675#else /* THUMB1_ONLY */
676
677/* This is a fairly dumb implementation for when we can't use the 32-bit code
678 above. */
679.text
680.global _memcpy
681.hidden _memcpy
682.type _memcpy,%function
683.align 4
684.thumb
685_memcpy:
686 push {r0, r4}
687 cmp r2, #0
688 beq .Lmemcpy_exit
689 @ See if we have overlapping regions, and need to reverse the
690 @ direction of the copy
691 cmp r0, r1
692 bls .Lmemcpy_forwards
693 add r4, r1, r2
694 cmp r0, r4
695 bcc .Lmemcpy_backwards
696.Lmemcpy_forwards:
697 /* Forwards. */
698 mov r3, r0
699 eor r3, r1
700 mov r4, #3
701 tst r3, r4
702 bne .Lmemcpy_funaligned
703 cmp r2, #8
704 bcc .Lmemcpy_funaligned
7051: @ copy up to the first word boundary.
706 tst r0, r4
707 beq 1f
708 ldrb r3, [r1]
709 add r1, r1, #1
710 strb r3, [r0]
711 add r0, r0, #1
712 sub r2, r2, #1
713 b 1b
7141: @ Copy aligned words
715 ldr r3, [r1]
716 add r1, r1, #4
717 str r3, [r0]
718 add r0, r0, #4
719 sub r2, r2, #4
720 cmp r2, #4
721 bcs 1b
722 cmp r2, #0
723 beq .Lmemcpy_exit
724.Lmemcpy_funaligned:
7251:
726 ldrb r3, [r1]
727 add r1, r1, #1
728 strb r3, [r0]
729 add r0, r0, #1
730 sub r2, r2, #1
731 bne 1b
732.Lmemcpy_exit:
733 pop {r0, r4}
734 bx lr
735
736.Lmemcpy_backwards:
737 add r0, r0, r2
738 add r1, r1, r2
7391:
740 sub r0, r0, #1
741 sub r1, r1, #1
742 ldrb r3, [r1]
743 strb r3, [r0]
744 sub r2, r2, #1
745 bne 1b
746 b .Lmemcpy_exit
747#endif