Blame - ap/libc/glibc/glibc-2.23/sysdeps/arm/memmove.S - T106_DC

blob: 96b2366e9292193804e824d9555e9eb1ad235d14 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame]	1	/* Copyright (C) 2006-2016 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3
				4	Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library. If not, see
				18	<http://www.gnu.org/licenses/>. */
				19
				20	/* Thumb requires excessive IT insns here. */
				21	#define NO_THUMB
				22	#include <sysdep.h>
				23	#include <arm-features.h>
				24
				25	/*
				26	* Data preload for architectures that support it (ARM V5TE and above)
				27	*/
				28	#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
				29	&& !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
				30	&& !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
				31	&& !defined (__ARM_ARCH_5T__))
				32	#define PLD(code...) code
				33	#else
				34	#define PLD(code...)
				35	#endif
				36
				37	/*
				38	* This can be used to enable code to cacheline align the source pointer.
				39	* Experiments on tested architectures (StrongARM and XScale) didn't show
				40	* this a worthwhile thing to do. That might be different in the future.
				41	*/
				42	//#define CALGN(code...) code
				43	#define CALGN(code...)
				44
				45	/*
				46	* Endian independent macros for shifting bytes within registers.
				47	*/
				48	#ifndef __ARMEB__
				49	#define PULL lsr
				50	#define PUSH lsl
				51	#else
				52	#define PULL lsl
				53	#define PUSH lsr
				54	#endif
				55
				56	.text
				57	.syntax unified
				58
				59	/*
				60	* Prototype: void memmove(void dest, const void *src, size_t n);
				61	*
				62	* Note:
				63	*
				64	* If the memory regions don't overlap, we simply branch to memcpy which is
				65	* normally a bit faster. Otherwise the copy is done going downwards.
				66	*/
				67
				68	ENTRY(memmove)
				69
				70	subs ip, r0, r1
				71	cmphi r2, ip
				72	#if !IS_IN (libc)
				73	bls memcpy
				74	#else
				75	bls HIDDEN_JUMPTARGET(memcpy)
				76	#endif
				77
				78	push {r0, r4, lr}
				79	cfi_adjust_cfa_offset (12)
				80	cfi_rel_offset (r4, 4)
				81	cfi_rel_offset (lr, 8)
				82
				83	cfi_remember_state
				84
				85	add r1, r1, r2
				86	add r0, r0, r2
				87	subs r2, r2, #4
				88	blt 8f
				89	ands ip, r0, #3
				90	PLD( sfi_pld r1, #-4 )
				91	bne 9f
				92	ands ip, r1, #3
				93	bne 10f
				94
				95	1: subs r2, r2, #(28)
				96	push {r5 - r8}
				97	cfi_adjust_cfa_offset (16)
				98	cfi_rel_offset (r5, 0)
				99	cfi_rel_offset (r6, 4)
				100	cfi_rel_offset (r7, 8)
				101	cfi_rel_offset (r8, 12)
				102	blt 5f
				103
				104	CALGN( ands ip, r1, #31 )
				105	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
				106	CALGN( bcs 2f )
				107	CALGN( adr r4, 6f )
				108	CALGN( subs r2, r2, ip ) @ C is set here
				109	#ifndef ARM_ALWAYS_BX
				110	CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
				111	#else
				112	CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
				113	CALGN( bx r4 )
				114	#endif
				115
				116	PLD( sfi_pld r1, #-4 )
				117	2: PLD( subs r2, r2, #96 )
				118	PLD( sfi_pld r1, #-32 )
				119	PLD( blt 4f )
				120	PLD( sfi_pld r1, #-64 )
				121	PLD( sfi_pld r1, #-96 )
				122
				123	3: PLD( sfi_pld r1, #-128 )
				124	4: sfi_breg r1, \
				125	ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
				126	subs r2, r2, #32
				127	sfi_breg r0, \
				128	stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
				129	bge 3b
				130	PLD( cmn r2, #96 )
				131	PLD( bge 4b )
				132
				133	5: ands ip, r2, #28
				134	rsb ip, ip, #32
				135	#ifndef ARM_ALWAYS_BX
				136	/* C is always clear here. */
				137	addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				138	b 7f
				139	#else
				140	beq 7f
				141	push {r10}
				142	cfi_adjust_cfa_offset (4)
				143	cfi_rel_offset (r10, 0)
				144	0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				145	/* If alignment is not perfect, then there will be some
				146	padding (nop) instructions between this BX and label 6.
				147	The computation above assumed that two instructions
				148	later is exactly the right spot. */
				149	add r10, #(6f - (0b + PC_OFS))
				150	bx r10
				151	#endif
				152	.p2align ARM_BX_ALIGN_LOG2
				153	6: nop
				154	.p2align ARM_BX_ALIGN_LOG2
				155	sfi_breg r1, \
				156	ldr r3, [\B, #-4]!
				157	.p2align ARM_BX_ALIGN_LOG2
				158	sfi_breg r1, \
				159	ldr r4, [\B, #-4]!
				160	.p2align ARM_BX_ALIGN_LOG2
				161	sfi_breg r1, \
				162	ldr r5, [\B, #-4]!
				163	.p2align ARM_BX_ALIGN_LOG2
				164	sfi_breg r1, \
				165	ldr r6, [\B, #-4]!
				166	.p2align ARM_BX_ALIGN_LOG2
				167	sfi_breg r1, \
				168	ldr r7, [\B, #-4]!
				169	.p2align ARM_BX_ALIGN_LOG2
				170	sfi_breg r1, \
				171	ldr r8, [\B, #-4]!
				172	.p2align ARM_BX_ALIGN_LOG2
				173	sfi_breg r1, \
				174	ldr lr, [\B, #-4]!
				175
				176	#ifndef ARM_ALWAYS_BX
				177	add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				178	nop
				179	#else
				180	0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				181	/* If alignment is not perfect, then there will be some
				182	padding (nop) instructions between this BX and label 66.
				183	The computation above assumed that two instructions
				184	later is exactly the right spot. */
				185	add r10, #(66f - (0b + PC_OFS))
				186	bx r10
				187	#endif
				188	.p2align ARM_BX_ALIGN_LOG2
				189	66: nop
				190	.p2align ARM_BX_ALIGN_LOG2
				191	sfi_breg r0, \
				192	str r3, [\B, #-4]!
				193	.p2align ARM_BX_ALIGN_LOG2
				194	sfi_breg r0, \
				195	str r4, [\B, #-4]!
				196	.p2align ARM_BX_ALIGN_LOG2
				197	sfi_breg r0, \
				198	str r5, [\B, #-4]!
				199	.p2align ARM_BX_ALIGN_LOG2
				200	sfi_breg r0, \
				201	str r6, [\B, #-4]!
				202	.p2align ARM_BX_ALIGN_LOG2
				203	sfi_breg r0, \
				204	str r7, [\B, #-4]!
				205	.p2align ARM_BX_ALIGN_LOG2
				206	sfi_breg r0, \
				207	str r8, [\B, #-4]!
				208	.p2align ARM_BX_ALIGN_LOG2
				209	sfi_breg r0, \
				210	str lr, [\B, #-4]!
				211
				212	#ifdef ARM_ALWAYS_BX
				213	pop {r10}
				214	cfi_adjust_cfa_offset (-4)
				215	cfi_restore (r10)
				216	#endif
				217
				218	CALGN( bcs 2b )
				219
				220	7: pop {r5 - r8}
				221	cfi_adjust_cfa_offset (-16)
				222	cfi_restore (r5)
				223	cfi_restore (r6)
				224	cfi_restore (r7)
				225	cfi_restore (r8)
				226
				227	8: movs r2, r2, lsl #31
				228	sfi_breg r1, \
				229	ldrbne r3, [\B, #-1]!
				230	sfi_breg r1, \
				231	ldrbcs r4, [\B, #-1]!
				232	sfi_breg r1, \
				233	ldrbcs ip, [\B, #-1]
				234	sfi_breg r0, \
				235	strbne r3, [\B, #-1]!
				236	sfi_breg r0, \
				237	strbcs r4, [\B, #-1]!
				238	sfi_breg r0, \
				239	strbcs ip, [\B, #-1]
				240
				241	#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
				242	\|\| defined (ARM_ALWAYS_BX))
				243	pop {r0, r4, lr}
				244	cfi_adjust_cfa_offset (-12)
				245	cfi_restore (r4)
				246	cfi_restore (lr)
				247	bx lr
				248	#else
				249	pop {r0, r4, pc}
				250	#endif
				251
				252	cfi_restore_state
				253
				254	9: cmp ip, #2
				255	sfi_breg r1, \
				256	ldrbgt r3, [\B, #-1]!
				257	sfi_breg r1, \
				258	ldrbge r4, [\B, #-1]!
				259	sfi_breg r1, \
				260	ldrb lr, [\B, #-1]!
				261	sfi_breg r0, \
				262	strbgt r3, [\B, #-1]!
				263	sfi_breg r0, \
				264	strbge r4, [\B, #-1]!
				265	subs r2, r2, ip
				266	sfi_breg r0, \
				267	strb lr, [\B, #-1]!
				268	blt 8b
				269	ands ip, r1, #3
				270	beq 1b
				271
				272	10: bic r1, r1, #3
				273	cmp ip, #2
				274	sfi_breg r1, \
				275	ldr r3, [\B, #0]
				276	beq 17f
				277	blt 18f
				278
				279
				280	.macro backward_copy_shift push pull
				281
				282	subs r2, r2, #28
				283	blt 14f
				284
				285	CALGN( ands ip, r1, #31 )
				286	CALGN( rsb ip, ip, #32 )
				287	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
				288	CALGN( subcc r2, r2, ip )
				289	CALGN( bcc 15f )
				290
				291	11: push {r5 - r8, r10}
				292	cfi_adjust_cfa_offset (20)
				293	cfi_rel_offset (r5, 0)
				294	cfi_rel_offset (r6, 4)
				295	cfi_rel_offset (r7, 8)
				296	cfi_rel_offset (r8, 12)
				297	cfi_rel_offset (r10, 16)
				298
				299	PLD( sfi_pld r1, #-4 )
				300	PLD( subs r2, r2, #96 )
				301	PLD( sfi_pld r1, #-32 )
				302	PLD( blt 13f )
				303	PLD( sfi_pld r1, #-64 )
				304	PLD( sfi_pld r1, #-96 )
				305
				306	12: PLD( sfi_pld r1, #-128 )
				307	13: sfi_breg r1, \
				308	ldmdb \B!, {r7, r8, r10, ip}
				309	mov lr, r3, PUSH #\push
				310	subs r2, r2, #32
				311	sfi_breg r1, \
				312	ldmdb \B!, {r3, r4, r5, r6}
				313	orr lr, lr, ip, PULL #\pull
				314	mov ip, ip, PUSH #\push
				315	orr ip, ip, r10, PULL #\pull
				316	mov r10, r10, PUSH #\push
				317	orr r10, r10, r8, PULL #\pull
				318	mov r8, r8, PUSH #\push
				319	orr r8, r8, r7, PULL #\pull
				320	mov r7, r7, PUSH #\push
				321	orr r7, r7, r6, PULL #\pull
				322	mov r6, r6, PUSH #\push
				323	orr r6, r6, r5, PULL #\pull
				324	mov r5, r5, PUSH #\push
				325	orr r5, r5, r4, PULL #\pull
				326	mov r4, r4, PUSH #\push
				327	orr r4, r4, r3, PULL #\pull
				328	sfi_breg r0, \
				329	stmdb \B!, {r4 - r8, r10, ip, lr}
				330	bge 12b
				331	PLD( cmn r2, #96 )
				332	PLD( bge 13b )
				333
				334	pop {r5 - r8, r10}
				335	cfi_adjust_cfa_offset (-20)
				336	cfi_restore (r5)
				337	cfi_restore (r6)
				338	cfi_restore (r7)
				339	cfi_restore (r8)
				340	cfi_restore (r10)
				341
				342	14: ands ip, r2, #28
				343	beq 16f
				344
				345	15: mov lr, r3, PUSH #\push
				346	sfi_breg r1, \
				347	ldr r3, [\B, #-4]!
				348	subs ip, ip, #4
				349	orr lr, lr, r3, PULL #\pull
				350	sfi_breg r0, \
				351	str lr, [\B, #-4]!
				352	bgt 15b
				353	CALGN( cmp r2, #0 )
				354	CALGN( bge 11b )
				355
				356	16: add r1, r1, #(\pull / 8)
				357	b 8b
				358
				359	.endm
				360
				361
				362	backward_copy_shift push=8 pull=24
				363
				364	17: backward_copy_shift push=16 pull=16
				365
				366	18: backward_copy_shift push=24 pull=8
				367
				368
				369	END(memmove)
				370	libc_hidden_builtin_def (memmove)