blob: b01164a7e8e73347ca510b928d97bfbd602aa15f [file] [log] [blame]
xf.li84027492024-04-09 00:17:51 -07001/* Copyright (C) 2006-2021 Free Software Foundation, Inc.
xf.libdd93d52023-05-12 07:10:14 -07002 This file is part of the GNU C Library.
3
4 Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library. If not, see
xf.li84027492024-04-09 00:17:51 -070018 <https://www.gnu.org/licenses/>. */
xf.libdd93d52023-05-12 07:10:14 -070019
20/* Thumb requires excessive IT insns here. */
21#define NO_THUMB
22#include <sysdep.h>
23#include <arm-features.h>
24
25/*
26 * Data preload for architectures that support it (ARM V5TE and above)
27 */
28#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
29 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
30 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
31 && !defined (__ARM_ARCH_5T__))
32#define PLD(code...) code
33#else
34#define PLD(code...)
35#endif
36
37/*
38 * This can be used to enable code to cacheline align the source pointer.
39 * Experiments on tested architectures (StrongARM and XScale) didn't show
40 * this a worthwhile thing to do. That might be different in the future.
41 */
42//#define CALGN(code...) code
43#define CALGN(code...)
44
45/*
46 * Endian independent macros for shifting bytes within registers.
47 */
48#ifndef __ARMEB__
49#define PULL lsr
50#define PUSH lsl
51#else
52#define PULL lsl
53#define PUSH lsr
54#endif
55
56 .text
57 .syntax unified
58
59/*
60 * Prototype: void *memmove(void *dest, const void *src, size_t n);
61 *
62 * Note:
63 *
64 * If the memory regions don't overlap, we simply branch to memcpy which is
65 * normally a bit faster. Otherwise the copy is done going downwards.
66 */
67
68ENTRY(memmove)
69
70 subs ip, r0, r1
71 cmphi r2, ip
72#if !IS_IN (libc)
73 bls memcpy
74#else
75 bls HIDDEN_JUMPTARGET(memcpy)
76#endif
77
78 push {r0, r4, lr}
79 cfi_adjust_cfa_offset (12)
80 cfi_rel_offset (r4, 4)
81 cfi_rel_offset (lr, 8)
82
83 cfi_remember_state
84
85 add r1, r1, r2
86 add r0, r0, r2
87 subs r2, r2, #4
xf.li84027492024-04-09 00:17:51 -070088 blo 8f
xf.libdd93d52023-05-12 07:10:14 -070089 ands ip, r0, #3
xf.li84027492024-04-09 00:17:51 -070090 PLD( pld [r1, #-4] )
xf.libdd93d52023-05-12 07:10:14 -070091 bne 9f
92 ands ip, r1, #3
93 bne 10f
94
951: subs r2, r2, #(28)
96 push {r5 - r8}
97 cfi_adjust_cfa_offset (16)
98 cfi_rel_offset (r5, 0)
99 cfi_rel_offset (r6, 4)
100 cfi_rel_offset (r7, 8)
101 cfi_rel_offset (r8, 12)
xf.li84027492024-04-09 00:17:51 -0700102 blo 5f
xf.libdd93d52023-05-12 07:10:14 -0700103
104 CALGN( ands ip, r1, #31 )
105 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
106 CALGN( bcs 2f )
107 CALGN( adr r4, 6f )
108 CALGN( subs r2, r2, ip ) @ C is set here
109#ifndef ARM_ALWAYS_BX
110 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
111#else
112 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
113 CALGN( bx r4 )
114#endif
115
xf.li84027492024-04-09 00:17:51 -0700116 PLD( pld [r1, #-4] )
1172: PLD( cmp r2, #96 )
118 PLD( pld [r1, #-32] )
119 PLD( blo 4f )
120 PLD( pld [r1, #-64] )
121 PLD( pld [r1, #-96] )
xf.libdd93d52023-05-12 07:10:14 -0700122
xf.li84027492024-04-09 00:17:51 -07001233: PLD( pld [r1, #-128] )
1244: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
xf.libdd93d52023-05-12 07:10:14 -0700125 subs r2, r2, #32
xf.li84027492024-04-09 00:17:51 -0700126 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
127 bhs 3b
xf.libdd93d52023-05-12 07:10:14 -0700128
1295: ands ip, r2, #28
130 rsb ip, ip, #32
131#ifndef ARM_ALWAYS_BX
132 /* C is always clear here. */
133 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
134 b 7f
135#else
136 beq 7f
137 push {r10}
138 cfi_adjust_cfa_offset (4)
139 cfi_rel_offset (r10, 0)
1400: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
141 /* If alignment is not perfect, then there will be some
142 padding (nop) instructions between this BX and label 6.
143 The computation above assumed that two instructions
144 later is exactly the right spot. */
145 add r10, #(6f - (0b + PC_OFS))
146 bx r10
147#endif
148 .p2align ARM_BX_ALIGN_LOG2
1496: nop
150 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700151 ldr r3, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700152 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700153 ldr r4, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700154 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700155 ldr r5, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700156 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700157 ldr r6, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700158 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700159 ldr r7, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700160 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700161 ldr r8, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700162 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700163 ldr lr, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700164
165#ifndef ARM_ALWAYS_BX
166 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
167 nop
168#else
1690: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
170 /* If alignment is not perfect, then there will be some
171 padding (nop) instructions between this BX and label 66.
172 The computation above assumed that two instructions
173 later is exactly the right spot. */
174 add r10, #(66f - (0b + PC_OFS))
175 bx r10
176#endif
177 .p2align ARM_BX_ALIGN_LOG2
17866: nop
179 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700180 str r3, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700181 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700182 str r4, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700183 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700184 str r5, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700185 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700186 str r6, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700187 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700188 str r7, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700189 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700190 str r8, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700191 .p2align ARM_BX_ALIGN_LOG2
xf.li84027492024-04-09 00:17:51 -0700192 str lr, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700193
194#ifdef ARM_ALWAYS_BX
195 pop {r10}
196 cfi_adjust_cfa_offset (-4)
197 cfi_restore (r10)
198#endif
199
200 CALGN( bcs 2b )
201
2027: pop {r5 - r8}
203 cfi_adjust_cfa_offset (-16)
204 cfi_restore (r5)
205 cfi_restore (r6)
206 cfi_restore (r7)
207 cfi_restore (r8)
208
2098: movs r2, r2, lsl #31
xf.li84027492024-04-09 00:17:51 -0700210 ldrbne r3, [r1, #-1]!
211 ldrbcs r4, [r1, #-1]!
212 ldrbcs ip, [r1, #-1]
213 strbne r3, [r0, #-1]!
214 strbcs r4, [r0, #-1]!
215 strbcs ip, [r0, #-1]
xf.libdd93d52023-05-12 07:10:14 -0700216
217#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
218 || defined (ARM_ALWAYS_BX))
219 pop {r0, r4, lr}
220 cfi_adjust_cfa_offset (-12)
221 cfi_restore (r4)
222 cfi_restore (lr)
223 bx lr
224#else
225 pop {r0, r4, pc}
226#endif
227
228 cfi_restore_state
229
2309: cmp ip, #2
xf.li84027492024-04-09 00:17:51 -0700231 ldrbgt r3, [r1, #-1]!
232 ldrbge r4, [r1, #-1]!
233 ldrb lr, [r1, #-1]!
234 strbgt r3, [r0, #-1]!
235 strbge r4, [r0, #-1]!
xf.libdd93d52023-05-12 07:10:14 -0700236 subs r2, r2, ip
xf.li84027492024-04-09 00:17:51 -0700237 strb lr, [r0, #-1]!
238 blo 8b
xf.libdd93d52023-05-12 07:10:14 -0700239 ands ip, r1, #3
240 beq 1b
241
24210: bic r1, r1, #3
243 cmp ip, #2
xf.li84027492024-04-09 00:17:51 -0700244 ldr r3, [r1, #0]
xf.libdd93d52023-05-12 07:10:14 -0700245 beq 17f
246 blt 18f
247
248
249 .macro backward_copy_shift push pull
250
251 subs r2, r2, #28
xf.li84027492024-04-09 00:17:51 -0700252 blo 14f
xf.libdd93d52023-05-12 07:10:14 -0700253
254 CALGN( ands ip, r1, #31 )
255 CALGN( rsb ip, ip, #32 )
256 CALGN( sbcsne r4, ip, r2 ) @ C is always set here
257 CALGN( subcc r2, r2, ip )
258 CALGN( bcc 15f )
259
26011: push {r5 - r8, r10}
261 cfi_adjust_cfa_offset (20)
262 cfi_rel_offset (r5, 0)
263 cfi_rel_offset (r6, 4)
264 cfi_rel_offset (r7, 8)
265 cfi_rel_offset (r8, 12)
266 cfi_rel_offset (r10, 16)
267
xf.li84027492024-04-09 00:17:51 -0700268 PLD( pld [r1, #-4] )
269 PLD( cmp r2, #96 )
270 PLD( pld [r1, #-32] )
271 PLD( blo 13f )
272 PLD( pld [r1, #-64] )
273 PLD( pld [r1, #-96] )
xf.libdd93d52023-05-12 07:10:14 -0700274
xf.li84027492024-04-09 00:17:51 -070027512: PLD( pld [r1, #-128] )
27613: ldmdb r1!, {r7, r8, r10, ip}
xf.libdd93d52023-05-12 07:10:14 -0700277 mov lr, r3, PUSH #\push
278 subs r2, r2, #32
xf.li84027492024-04-09 00:17:51 -0700279 ldmdb r1!, {r3, r4, r5, r6}
xf.libdd93d52023-05-12 07:10:14 -0700280 orr lr, lr, ip, PULL #\pull
281 mov ip, ip, PUSH #\push
282 orr ip, ip, r10, PULL #\pull
283 mov r10, r10, PUSH #\push
284 orr r10, r10, r8, PULL #\pull
285 mov r8, r8, PUSH #\push
286 orr r8, r8, r7, PULL #\pull
287 mov r7, r7, PUSH #\push
288 orr r7, r7, r6, PULL #\pull
289 mov r6, r6, PUSH #\push
290 orr r6, r6, r5, PULL #\pull
291 mov r5, r5, PUSH #\push
292 orr r5, r5, r4, PULL #\pull
293 mov r4, r4, PUSH #\push
294 orr r4, r4, r3, PULL #\pull
xf.li84027492024-04-09 00:17:51 -0700295 stmdb r0!, {r4 - r8, r10, ip, lr}
296 bhs 12b
xf.libdd93d52023-05-12 07:10:14 -0700297
298 pop {r5 - r8, r10}
299 cfi_adjust_cfa_offset (-20)
300 cfi_restore (r5)
301 cfi_restore (r6)
302 cfi_restore (r7)
303 cfi_restore (r8)
304 cfi_restore (r10)
305
30614: ands ip, r2, #28
307 beq 16f
308
30915: mov lr, r3, PUSH #\push
xf.li84027492024-04-09 00:17:51 -0700310 ldr r3, [r1, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700311 subs ip, ip, #4
312 orr lr, lr, r3, PULL #\pull
xf.li84027492024-04-09 00:17:51 -0700313 str lr, [r0, #-4]!
xf.libdd93d52023-05-12 07:10:14 -0700314 bgt 15b
315 CALGN( cmp r2, #0 )
316 CALGN( bge 11b )
317
31816: add r1, r1, #(\pull / 8)
319 b 8b
320
321 .endm
322
323
324 backward_copy_shift push=8 pull=24
325
32617: backward_copy_shift push=16 pull=16
327
32818: backward_copy_shift push=24 pull=8
329
330
331END(memmove)
332libc_hidden_builtin_def (memmove)