blob: db8ba50af35ad21294c76cbfe9f857210449faa0 [file] [log] [blame]
xf.libdd93d52023-05-12 07:10:14 -07001/* Copyright (C) 2006-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
22#include <sysdep.h>
23#include <arm-features.h>
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
49#define PULL lsr
50#define PUSH lsl
51#else
52#define PULL lsl
53#define PUSH lsr
54#endif
55
56 .text
57 .syntax unified
58
59/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
60
61ENTRY(memcpy)
62
63 push {r0, r4, lr}
64 cfi_adjust_cfa_offset (12)
65 cfi_rel_offset (r4, 4)
66 cfi_rel_offset (lr, 8)
67
68 cfi_remember_state
69
70 subs r2, r2, #4
71 blt 8f
72 ands ip, r0, #3
73 PLD( sfi_pld r1, #0 )
74 bne 9f
75 ands ip, r1, #3
76 bne 10f
77
781: subs r2, r2, #(28)
79 push {r5 - r8}
80 cfi_adjust_cfa_offset (16)
81 cfi_rel_offset (r5, 0)
82 cfi_rel_offset (r6, 4)
83 cfi_rel_offset (r7, 8)
84 cfi_rel_offset (r8, 12)
85 blt 5f
86
87 CALGN( ands ip, r1, #31 )
88 CALGN( rsb r3, ip, #32 )
89 CALGN( sbcsne r4, r3, r2 ) @ C is always set here
90 CALGN( bcs 2f )
91 CALGN( adr r4, 6f )
92 CALGN( subs r2, r2, r3 ) @ C gets set
93#ifndef ARM_ALWAYS_BX
94 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
95#else
96 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
97 CALGN( bx r4 )
98#endif
99
100 PLD( sfi_pld r1, #0 )
1012: PLD( subs r2, r2, #96 )
102 PLD( sfi_pld r1, #28 )
103 PLD( blt 4f )
104 PLD( sfi_pld r1, #60 )
105 PLD( sfi_pld r1, #92 )
106
1073: PLD( sfi_pld r1, #124 )
1084: sfi_breg r1, \
109 ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
110 subs r2, r2, #32
111 sfi_breg r0, \
112 stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
113 bge 3b
114 PLD( cmn r2, #96 )
115 PLD( bge 4b )
116
1175: ands ip, r2, #28
118 rsb ip, ip, #32
119#ifndef ARM_ALWAYS_BX
120 /* C is always clear here. */
121 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
122 b 7f
123#else
124 beq 7f
125 push {r10}
126 cfi_adjust_cfa_offset (4)
127 cfi_rel_offset (r10, 0)
1280: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
129 /* If alignment is not perfect, then there will be some
130 padding (nop) instructions between this BX and label 6.
131 The computation above assumed that two instructions
132 later is exactly the right spot. */
133 add r10, #(6f - (0b + PC_OFS))
134 bx r10
135#endif
136 .p2align ARM_BX_ALIGN_LOG2
1376: nop
138 .p2align ARM_BX_ALIGN_LOG2
139 sfi_breg r1, \
140 ldr r3, [\B], #4
141 .p2align ARM_BX_ALIGN_LOG2
142 sfi_breg r1, \
143 ldr r4, [\B], #4
144 .p2align ARM_BX_ALIGN_LOG2
145 sfi_breg r1, \
146 ldr r5, [\B], #4
147 .p2align ARM_BX_ALIGN_LOG2
148 sfi_breg r1, \
149 ldr r6, [\B], #4
150 .p2align ARM_BX_ALIGN_LOG2
151 sfi_breg r1, \
152 ldr r7, [\B], #4
153 .p2align ARM_BX_ALIGN_LOG2
154 sfi_breg r1, \
155 ldr r8, [\B], #4
156 .p2align ARM_BX_ALIGN_LOG2
157 sfi_breg r1, \
158 ldr lr, [\B], #4
159
160#ifndef ARM_ALWAYS_BX
161 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
162 nop
163#else
1640: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
165 /* If alignment is not perfect, then there will be some
166 padding (nop) instructions between this BX and label 66.
167 The computation above assumed that two instructions
168 later is exactly the right spot. */
169 add r10, #(66f - (0b + PC_OFS))
170 bx r10
171#endif
172 .p2align ARM_BX_ALIGN_LOG2
17366: nop
174 .p2align ARM_BX_ALIGN_LOG2
175 sfi_breg r0, \
176 str r3, [\B], #4
177 .p2align ARM_BX_ALIGN_LOG2
178 sfi_breg r0, \
179 str r4, [\B], #4
180 .p2align ARM_BX_ALIGN_LOG2
181 sfi_breg r0, \
182 str r5, [\B], #4
183 .p2align ARM_BX_ALIGN_LOG2
184 sfi_breg r0, \
185 str r6, [\B], #4
186 .p2align ARM_BX_ALIGN_LOG2
187 sfi_breg r0, \
188 str r7, [\B], #4
189 .p2align ARM_BX_ALIGN_LOG2
190 sfi_breg r0, \
191 str r8, [\B], #4
192 .p2align ARM_BX_ALIGN_LOG2
193 sfi_breg r0, \
194 str lr, [\B], #4
195
196#ifdef ARM_ALWAYS_BX
197 pop {r10}
198 cfi_adjust_cfa_offset (-4)
199 cfi_restore (r10)
200#endif
201
202 CALGN( bcs 2b )
203
2047: pop {r5 - r8}
205 cfi_adjust_cfa_offset (-16)
206 cfi_restore (r5)
207 cfi_restore (r6)
208 cfi_restore (r7)
209 cfi_restore (r8)
210
2118: movs r2, r2, lsl #31
212 sfi_breg r1, \
213 ldrbne r3, [\B], #1
214 sfi_breg r1, \
215 ldrbcs r4, [\B], #1
216 sfi_breg r1, \
217 ldrbcs ip, [\B]
218 sfi_breg r0, \
219 strbne r3, [\B], #1
220 sfi_breg r0, \
221 strbcs r4, [\B], #1
222 sfi_breg r0, \
223 strbcs ip, [\B]
224
225#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
226 || defined (ARM_ALWAYS_BX))
227 pop {r0, r4, lr}
228 cfi_adjust_cfa_offset (-12)
229 cfi_restore (r4)
230 cfi_restore (lr)
231 bx lr
232#else
233 pop {r0, r4, pc}
234#endif
235
236 cfi_restore_state
237
2389: rsb ip, ip, #4
239 cmp ip, #2
240 sfi_breg r1, \
241 ldrbgt r3, [\B], #1
242 sfi_breg r1, \
243 ldrbge r4, [\B], #1
244 sfi_breg r1, \
245 ldrb lr, [\B], #1
246 sfi_breg r0, \
247 strbgt r3, [\B], #1
248 sfi_breg r0, \
249 strbge r4, [\B], #1
250 subs r2, r2, ip
251 sfi_breg r0, \
252 strb lr, [\B], #1
253 blt 8b
254 ands ip, r1, #3
255 beq 1b
256
25710: bic r1, r1, #3
258 cmp ip, #2
259 sfi_breg r1, \
260 ldr lr, [\B], #4
261 beq 17f
262 bgt 18f
263
264
265 .macro forward_copy_shift pull push
266
267 subs r2, r2, #28
268 blt 14f
269
270 CALGN( ands ip, r1, #31 )
271 CALGN( rsb ip, ip, #32 )
272 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
273 CALGN( subcc r2, r2, ip )
274 CALGN( bcc 15f )
275
27611: push {r5 - r8, r10}
277 cfi_adjust_cfa_offset (20)
278 cfi_rel_offset (r5, 0)
279 cfi_rel_offset (r6, 4)
280 cfi_rel_offset (r7, 8)
281 cfi_rel_offset (r8, 12)
282 cfi_rel_offset (r10, 16)
283
284 PLD( sfi_pld r1, #0 )
285 PLD( subs r2, r2, #96 )
286 PLD( sfi_pld r1, #28 )
287 PLD( blt 13f )
288 PLD( sfi_pld r1, #60 )
289 PLD( sfi_pld r1, #92 )
290
29112: PLD( sfi_pld r1, #124 )
29213: sfi_breg r1, \
293 ldmia \B!, {r4, r5, r6, r7}
294 mov r3, lr, PULL #\pull
295 subs r2, r2, #32
296 sfi_breg r1, \
297 ldmia \B!, {r8, r10, ip, lr}
298 orr r3, r3, r4, PUSH #\push
299 mov r4, r4, PULL #\pull
300 orr r4, r4, r5, PUSH #\push
301 mov r5, r5, PULL #\pull
302 orr r5, r5, r6, PUSH #\push
303 mov r6, r6, PULL #\pull
304 orr r6, r6, r7, PUSH #\push
305 mov r7, r7, PULL #\pull
306 orr r7, r7, r8, PUSH #\push
307 mov r8, r8, PULL #\pull
308 orr r8, r8, r10, PUSH #\push
309 mov r10, r10, PULL #\pull
310 orr r10, r10, ip, PUSH #\push
311 mov ip, ip, PULL #\pull
312 orr ip, ip, lr, PUSH #\push
313 sfi_breg r0, \
314 stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
315 bge 12b
316 PLD( cmn r2, #96 )
317 PLD( bge 13b )
318
319 pop {r5 - r8, r10}
320 cfi_adjust_cfa_offset (-20)
321 cfi_restore (r5)
322 cfi_restore (r6)
323 cfi_restore (r7)
324 cfi_restore (r8)
325 cfi_restore (r10)
326
32714: ands ip, r2, #28
328 beq 16f
329
33015: mov r3, lr, PULL #\pull
331 sfi_breg r1, \
332 ldr lr, [\B], #4
333 subs ip, ip, #4
334 orr r3, r3, lr, PUSH #\push
335 sfi_breg r0, \
336 str r3, [\B], #4
337 bgt 15b
338 CALGN( cmp r2, #0 )
339 CALGN( bge 11b )
340
34116: sub r1, r1, #(\push / 8)
342 b 8b
343
344 .endm
345
346
347 forward_copy_shift pull=8 push=24
348
34917: forward_copy_shift pull=16 push=16
350
35118: forward_copy_shift pull=24 push=8
352
353END(memcpy)
354libc_hidden_builtin_def (memcpy)