blob: dde877e5f3db547cbed597f09bb162676e2cad50 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
18 <http://www.gnu.org/licenses/>. */
19
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
22#include <sysdep.h>
23#include <arm-features.h>
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
49#define PULL lsr
50#define PUSH lsl
51#else
52#define PULL lsl
53#define PUSH lsr
54#endif
55
56 .text
57 .syntax unified
58
59/*
60 * Prototype: void *memmove(void *dest, const void *src, size_t n);
61 *
62 * Note:
63 *
64 * If the memory regions don't overlap, we simply branch to memcpy which is
65 * normally a bit faster. Otherwise the copy is done going downwards.
66 */
67
68ENTRY(memmove)
69
70 subs ip, r0, r1
71 cmphi r2, ip
72#if !IS_IN (libc)
73 bls memcpy
74#else
75 bls HIDDEN_JUMPTARGET(memcpy)
76#endif
77
78 push {r0, r4, lr}
79 cfi_adjust_cfa_offset (12)
80 cfi_rel_offset (r4, 4)
81 cfi_rel_offset (lr, 8)
82
83 cfi_remember_state
84
85 add r1, r1, r2
86 add r0, r0, r2
87 subs r2, r2, #4
88 blt 8f
89 ands ip, r0, #3
90 PLD( sfi_pld r1, #-4 )
91 bne 9f
92 ands ip, r1, #3
93 bne 10f
94
951: subs r2, r2, #(28)
96 push {r5 - r8}
97 cfi_adjust_cfa_offset (16)
98 cfi_rel_offset (r5, 0)
99 cfi_rel_offset (r6, 4)
100 cfi_rel_offset (r7, 8)
101 cfi_rel_offset (r8, 12)
102 blt 5f
103
104 CALGN( ands ip, r1, #31 )
105 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
106 CALGN( bcs 2f )
107 CALGN( adr r4, 6f )
108 CALGN( subs r2, r2, ip ) @ C is set here
109#ifndef ARM_ALWAYS_BX
110 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
111#else
112 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
113 CALGN( bx r4 )
114#endif
115
116 PLD( sfi_pld r1, #-4 )
1172: PLD( subs r2, r2, #96 )
118 PLD( sfi_pld r1, #-32 )
119 PLD( blt 4f )
120 PLD( sfi_pld r1, #-64 )
121 PLD( sfi_pld r1, #-96 )
122
1233: PLD( sfi_pld r1, #-128 )
1244: sfi_breg r1, \
125 ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
126 subs r2, r2, #32
127 sfi_breg r0, \
128 stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
129 bge 3b
130 PLD( cmn r2, #96 )
131 PLD( bge 4b )
132
1335: ands ip, r2, #28
134 rsb ip, ip, #32
135#ifndef ARM_ALWAYS_BX
136 /* C is always clear here. */
137 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
138 b 7f
139#else
140 beq 7f
141 push {r10}
142 cfi_adjust_cfa_offset (4)
143 cfi_rel_offset (r10, 0)
1440: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
145 /* If alignment is not perfect, then there will be some
146 padding (nop) instructions between this BX and label 6.
147 The computation above assumed that two instructions
148 later is exactly the right spot. */
149 add r10, #(6f - (0b + PC_OFS))
150 bx r10
151#endif
152 .p2align ARM_BX_ALIGN_LOG2
1536: nop
154 .p2align ARM_BX_ALIGN_LOG2
155 sfi_breg r1, \
156 ldr r3, [\B, #-4]!
157 .p2align ARM_BX_ALIGN_LOG2
158 sfi_breg r1, \
159 ldr r4, [\B, #-4]!
160 .p2align ARM_BX_ALIGN_LOG2
161 sfi_breg r1, \
162 ldr r5, [\B, #-4]!
163 .p2align ARM_BX_ALIGN_LOG2
164 sfi_breg r1, \
165 ldr r6, [\B, #-4]!
166 .p2align ARM_BX_ALIGN_LOG2
167 sfi_breg r1, \
168 ldr r7, [\B, #-4]!
169 .p2align ARM_BX_ALIGN_LOG2
170 sfi_breg r1, \
171 ldr r8, [\B, #-4]!
172 .p2align ARM_BX_ALIGN_LOG2
173 sfi_breg r1, \
174 ldr lr, [\B, #-4]!
175
176#ifndef ARM_ALWAYS_BX
177 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
178 nop
179#else
1800: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
181 /* If alignment is not perfect, then there will be some
182 padding (nop) instructions between this BX and label 66.
183 The computation above assumed that two instructions
184 later is exactly the right spot. */
185 add r10, #(66f - (0b + PC_OFS))
186 bx r10
187#endif
188 .p2align ARM_BX_ALIGN_LOG2
18966: nop
190 .p2align ARM_BX_ALIGN_LOG2
191 sfi_breg r0, \
192 str r3, [\B, #-4]!
193 .p2align ARM_BX_ALIGN_LOG2
194 sfi_breg r0, \
195 str r4, [\B, #-4]!
196 .p2align ARM_BX_ALIGN_LOG2
197 sfi_breg r0, \
198 str r5, [\B, #-4]!
199 .p2align ARM_BX_ALIGN_LOG2
200 sfi_breg r0, \
201 str r6, [\B, #-4]!
202 .p2align ARM_BX_ALIGN_LOG2
203 sfi_breg r0, \
204 str r7, [\B, #-4]!
205 .p2align ARM_BX_ALIGN_LOG2
206 sfi_breg r0, \
207 str r8, [\B, #-4]!
208 .p2align ARM_BX_ALIGN_LOG2
209 sfi_breg r0, \
210 str lr, [\B, #-4]!
211
212#ifdef ARM_ALWAYS_BX
213 pop {r10}
214 cfi_adjust_cfa_offset (-4)
215 cfi_restore (r10)
216#endif
217
218 CALGN( bcs 2b )
219
2207: pop {r5 - r8}
221 cfi_adjust_cfa_offset (-16)
222 cfi_restore (r5)
223 cfi_restore (r6)
224 cfi_restore (r7)
225 cfi_restore (r8)
226
2278: movs r2, r2, lsl #31
228 sfi_breg r1, \
229 ldrbne r3, [\B, #-1]!
230 sfi_breg r1, \
231 ldrbcs r4, [\B, #-1]!
232 sfi_breg r1, \
233 ldrbcs ip, [\B, #-1]
234 sfi_breg r0, \
235 strbne r3, [\B, #-1]!
236 sfi_breg r0, \
237 strbcs r4, [\B, #-1]!
238 sfi_breg r0, \
239 strbcs ip, [\B, #-1]
240
241#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
242 || defined (ARM_ALWAYS_BX))
243 pop {r0, r4, lr}
244 cfi_adjust_cfa_offset (-12)
245 cfi_restore (r4)
246 cfi_restore (lr)
247 bx lr
248#else
249 pop {r0, r4, pc}
250#endif
251
252 cfi_restore_state
253
2549: cmp ip, #2
255 sfi_breg r1, \
256 ldrbgt r3, [\B, #-1]!
257 sfi_breg r1, \
258 ldrbge r4, [\B, #-1]!
259 sfi_breg r1, \
260 ldrb lr, [\B, #-1]!
261 sfi_breg r0, \
262 strbgt r3, [\B, #-1]!
263 sfi_breg r0, \
264 strbge r4, [\B, #-1]!
265 subs r2, r2, ip
266 sfi_breg r0, \
267 strb lr, [\B, #-1]!
268 blt 8b
269 ands ip, r1, #3
270 beq 1b
271
27210: bic r1, r1, #3
273 cmp ip, #2
274 sfi_breg r1, \
275 ldr r3, [\B, #0]
276 beq 17f
277 blt 18f
278
279
280 .macro backward_copy_shift push pull
281
282 subs r2, r2, #28
283 blt 14f
284
285 CALGN( ands ip, r1, #31 )
286 CALGN( rsb ip, ip, #32 )
287 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
288 CALGN( subcc r2, r2, ip )
289 CALGN( bcc 15f )
290
29111: push {r5 - r8, r10}
292 cfi_adjust_cfa_offset (20)
293 cfi_rel_offset (r5, 0)
294 cfi_rel_offset (r6, 4)
295 cfi_rel_offset (r7, 8)
296 cfi_rel_offset (r8, 12)
297 cfi_rel_offset (r10, 16)
298
299 PLD( sfi_pld r1, #-4 )
300 PLD( subs r2, r2, #96 )
301 PLD( sfi_pld r1, #-32 )
302 PLD( blt 13f )
303 PLD( sfi_pld r1, #-64 )
304 PLD( sfi_pld r1, #-96 )
305
30612: PLD( sfi_pld r1, #-128 )
30713: sfi_breg r1, \
308 ldmdb \B!, {r7, r8, r10, ip}
309 mov lr, r3, PUSH #\push
310 subs r2, r2, #32
311 sfi_breg r1, \
312 ldmdb \B!, {r3, r4, r5, r6}
313 orr lr, lr, ip, PULL #\pull
314 mov ip, ip, PUSH #\push
315 orr ip, ip, r10, PULL #\pull
316 mov r10, r10, PUSH #\push
317 orr r10, r10, r8, PULL #\pull
318 mov r8, r8, PUSH #\push
319 orr r8, r8, r7, PULL #\pull
320 mov r7, r7, PUSH #\push
321 orr r7, r7, r6, PULL #\pull
322 mov r6, r6, PUSH #\push
323 orr r6, r6, r5, PULL #\pull
324 mov r5, r5, PUSH #\push
325 orr r5, r5, r4, PULL #\pull
326 mov r4, r4, PUSH #\push
327 orr r4, r4, r3, PULL #\pull
328 sfi_breg r0, \
329 stmdb \B!, {r4 - r8, r10, ip, lr}
330 bge 12b
331 PLD( cmn r2, #96 )
332 PLD( bge 13b )
333
334 pop {r5 - r8, r10}
335 cfi_adjust_cfa_offset (-20)
336 cfi_restore (r5)
337 cfi_restore (r6)
338 cfi_restore (r7)
339 cfi_restore (r8)
340 cfi_restore (r10)
341
34214: ands ip, r2, #28
343 beq 16f
344
34515: mov lr, r3, PUSH #\push
346 sfi_breg r1, \
347 ldr r3, [\B, #-4]!
348 subs ip, ip, #4
349 orr lr, lr, r3, PULL #\pull
350 sfi_breg r0, \
351 str lr, [\B, #-4]!
352 bgt 15b
353 CALGN( cmp r2, #0 )
354 CALGN( bge 11b )
355
35616: add r1, r1, #(\pull / 8)
357 b 8b
358
359 .endm
360
361
362 backward_copy_shift push=8 pull=24
363
36417: backward_copy_shift push=16 pull=16
365
36618: backward_copy_shift push=24 pull=8
367
368
369END(memmove)
370libc_hidden_builtin_def (memmove)