blob: 04e3f773917f54d6f55df01a74de346472645004 [file] [log] [blame]
xf.li84027492024-04-09 00:17:51 -07001/* Copyright (C) 2006-2021 Free Software Foundation, Inc.
xf.libdd93d52023-05-12 07:10:14 -07002 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
xf.li84027492024-04-09 00:17:51 -070018 <https://www.gnu.org/licenses/>. */
xf.libdd93d52023-05-12 07:10:14 -070019
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
22#include <sysdep.h>
23#include <arm-features.h>
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
49#define PULL lsr
50#define PUSH lsl
51#else
52#define PULL lsl
53#define PUSH lsr
54#endif
55
56 .text
57 .syntax unified
58
59/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
60
61ENTRY(memcpy)
62
63 push {r0, r4, lr}
64 cfi_adjust_cfa_offset (12)
65 cfi_rel_offset (r4, 4)
66 cfi_rel_offset (lr, 8)
67
68 cfi_remember_state
69
70 subs r2, r2, #4
xf.li84027492024-04-09 00:17:51 -070071 blo 8f
xf.libdd93d52023-05-12 07:10:14 -070072 ands ip, r0, #3
xf.li84027492024-04-09 00:17:51 -070073 PLD( pld [r1, #0] )
xf.libdd93d52023-05-12 07:10:14 -070074 bne 9f
75 ands ip, r1, #3
76 bne 10f
77
781: subs r2, r2, #(28)
79 push {r5 - r8}
80 cfi_adjust_cfa_offset (16)
81 cfi_rel_offset (r5, 0)
82 cfi_rel_offset (r6, 4)
83 cfi_rel_offset (r7, 8)
84 cfi_rel_offset (r8, 12)
xf.li84027492024-04-09 00:17:51 -070085 blo 5f
xf.libdd93d52023-05-12 07:10:14 -070086
87 CALGN( ands ip, r1, #31 )
88 CALGN( rsb r3, ip, #32 )
89 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
90 CALGN( bcs 2f )
91 CALGN( adr r4, 6f )
92 CALGN( subs r2, r2, r3 ) @ C gets set
93#ifndef ARM_ALWAYS_BX
94 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95#else
96 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97 CALGN( bx r4 )
98#endif
99
xf.li84027492024-04-09 00:17:51 -0700100 PLD( pld [r1, #0] )
1012: PLD( cmp r2, #96 )
102 PLD( pld [r1, #28] )
103 PLD( blo 4f )
104 PLD( pld [r1, #60] )
105 PLD( pld [r1, #92] )
xf.libdd93d52023-05-12 07:10:14 -0700106
xf.li84027492024-04-09 00:17:51 -07001073: PLD( pld [r1, #124] )
1084: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
xf.libdd93d52023-05-12 07:10:14 -0700109 subs r2, r2, #32
xf.li84027492024-04-09 00:17:51 -0700110 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
111 bhs 3b
xf.libdd93d52023-05-12 07:10:14 -0700112
1135: ands ip, r2, #28
114 rsb ip, ip, #32
115#ifndef ARM_ALWAYS_BX
116 /* C is always clear here. */
117 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
118 b 7f
119#else
120 beq 7f
121 push {r10}
122 cfi_adjust_cfa_offset (4)
123 cfi_rel_offset (r10, 0)
1240: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
125 /* If alignment is not perfect, then there will be some
126 padding (nop) instructions between this BX and label 6.
127 The computation above assumed that two instructions
128 later is exactly the right spot. */
129 add r10, #(6f - (0b + PC_OFS))
130 bx r10
131#endif
132 .p2align ARM_BX_ALIGN_LOG2
1336: nop
134 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700135 ldr r3, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700136 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700137 ldr r4, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700138 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700139 ldr r5, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700140 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700141 ldr r6, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700142 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700143 ldr r7, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700144 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700145 ldr r8, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700146 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700147 ldr lr, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700148
149#ifndef ARM_ALWAYS_BX
150 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
151 nop
152#else
1530: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
154 /* If alignment is not perfect, then there will be some
155 padding (nop) instructions between this BX and label 66.
156 The computation above assumed that two instructions
157 later is exactly the right spot. */
158 add r10, #(66f - (0b + PC_OFS))
159 bx r10
160#endif
161 .p2align ARM_BX_ALIGN_LOG2
16266: nop
163 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700164 str r3, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700165 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700166 str r4, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700167 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700168 str r5, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700169 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700170 str r6, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700171 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700172 str r7, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700173 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700174 str r8, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700175 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700176 str lr, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700177
178#ifdef ARM_ALWAYS_BX
179 pop {r10}
180 cfi_adjust_cfa_offset (-4)
181 cfi_restore (r10)
182#endif
183
184 CALGN( bcs 2b )
185
1867: pop {r5 - r8}
187 cfi_adjust_cfa_offset (-16)
188 cfi_restore (r5)
189 cfi_restore (r6)
190 cfi_restore (r7)
191 cfi_restore (r8)
192
1938: movs r2, r2, lsl #31
xf.li84027492024-04-09 00:17:51 -0700194 ldrbne r3, [r1], #1
195 ldrbcs r4, [r1], #1
196 ldrbcs ip, [r1]
197 strbne r3, [r0], #1
198 strbcs r4, [r0], #1
199 strbcs ip, [r0]
xf.libdd93d52023-05-12 07:10:14 -0700200
201#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
202 || defined (ARM_ALWAYS_BX))
203 pop {r0, r4, lr}
204 cfi_adjust_cfa_offset (-12)
205 cfi_restore (r4)
206 cfi_restore (lr)
207 bx lr
208#else
209 pop {r0, r4, pc}
210#endif
211
212 cfi_restore_state
213
2149: rsb ip, ip, #4
215 cmp ip, #2
xf.li84027492024-04-09 00:17:51 -0700216 ldrbgt r3, [r1], #1
217 ldrbge r4, [r1], #1
218 ldrb lr, [r1], #1
219 strbgt r3, [r0], #1
220 strbge r4, [r0], #1
xf.libdd93d52023-05-12 07:10:14 -0700221 subs r2, r2, ip
xf.li84027492024-04-09 00:17:51 -0700222 strb lr, [r0], #1
223 blo 8b
xf.libdd93d52023-05-12 07:10:14 -0700224 ands ip, r1, #3
225 beq 1b
226
22710: bic r1, r1, #3
228 cmp ip, #2
xf.li84027492024-04-09 00:17:51 -0700229 ldr lr, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700230 beq 17f
231 bgt 18f
232
233
234 .macro forward_copy_shift pull push
235
236 subs r2, r2, #28
xf.li84027492024-04-09 00:17:51 -0700237 blo 14f
xf.libdd93d52023-05-12 07:10:14 -0700238
239 CALGN( ands ip, r1, #31 )
240 CALGN( rsb ip, ip, #32 )
241 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
242 CALGN( subcc r2, r2, ip )
243 CALGN( bcc 15f )
244
24511: push {r5 - r8, r10}
246 cfi_adjust_cfa_offset (20)
247 cfi_rel_offset (r5, 0)
248 cfi_rel_offset (r6, 4)
249 cfi_rel_offset (r7, 8)
250 cfi_rel_offset (r8, 12)
251 cfi_rel_offset (r10, 16)
252
xf.li84027492024-04-09 00:17:51 -0700253 PLD( pld [r1, #0] )
254 PLD( cmp r2, #96 )
255 PLD( pld [r1, #28] )
256 PLD( blo 13f )
257 PLD( pld [r1, #60] )
258 PLD( pld [r1, #92] )
xf.libdd93d52023-05-12 07:10:14 -0700259
xf.li84027492024-04-09 00:17:51 -070026012: PLD( pld [r1, #124] )
26113: ldmia r1!, {r4, r5, r6, r7}
xf.libdd93d52023-05-12 07:10:14 -0700262 mov r3, lr, PULL #\pull
263 subs r2, r2, #32
xf.li84027492024-04-09 00:17:51 -0700264 ldmia r1!, {r8, r10, ip, lr}
xf.libdd93d52023-05-12 07:10:14 -0700265 orr r3, r3, r4, PUSH #\push
266 mov r4, r4, PULL #\pull
267 orr r4, r4, r5, PUSH #\push
268 mov r5, r5, PULL #\pull
269 orr r5, r5, r6, PUSH #\push
270 mov r6, r6, PULL #\pull
271 orr r6, r6, r7, PUSH #\push
272 mov r7, r7, PULL #\pull
273 orr r7, r7, r8, PUSH #\push
274 mov r8, r8, PULL #\pull
275 orr r8, r8, r10, PUSH #\push
276 mov r10, r10, PULL #\pull
277 orr r10, r10, ip, PUSH #\push
278 mov ip, ip, PULL #\pull
279 orr ip, ip, lr, PUSH #\push
xf.li84027492024-04-09 00:17:51 -0700280 stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
281 bhs 12b
xf.libdd93d52023-05-12 07:10:14 -0700282
283 pop {r5 - r8, r10}
284 cfi_adjust_cfa_offset (-20)
285 cfi_restore (r5)
286 cfi_restore (r6)
287 cfi_restore (r7)
288 cfi_restore (r8)
289 cfi_restore (r10)
290
29114: ands ip, r2, #28
292 beq 16f
293
29415: mov r3, lr, PULL #\pull
xf.li84027492024-04-09 00:17:51 -0700295 ldr lr, [r1], #4
xf.libdd93d52023-05-12 07:10:14 -0700296 subs ip, ip, #4
297 orr r3, r3, lr, PUSH #\push
xf.li84027492024-04-09 00:17:51 -0700298 str r3, [r0], #4
xf.libdd93d52023-05-12 07:10:14 -0700299 bgt 15b
300 CALGN( cmp r2, #0 )
301 CALGN( bge 11b )
302
30316: sub r1, r1, #(\push / 8)
304 b 8b
305
306 .endm
307
308
309 forward_copy_shift pull=8 push=24
310
31117: forward_copy_shift pull=16 push=16
312
31318: forward_copy_shift pull=24 push=8
314
315END(memcpy)
316libc_hidden_builtin_def (memcpy)