Blame - ap/libc/glibc/glibc-2.23/sysdeps/arm/memcpy.S - T106_DC

blob: db8ba50af35ad21294c76cbfe9f857210449faa0 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame^]	1	/* Copyright (C) 2006-2016 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3
				4	Contributed by MontaVista Software, Inc. (written by Nicolas Pitre)
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library. If not, see
				18	<http://www.gnu.org/licenses/>. */
				19
				20	/* Thumb requires excessive IT insns here. */
				21	#define NO_THUMB
				22	#include <sysdep.h>
				23	#include <arm-features.h>
				24
				25	/*
				26	* Data preload for architectures that support it (ARM V5TE and above)
				27	*/
				28	#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
				29	&& !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
				30	&& !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
				31	&& !defined (__ARM_ARCH_5T__))
				32	#define PLD(code...) code
				33	#else
				34	#define PLD(code...)
				35	#endif
				36
				37	/*
				38	* This can be used to enable code to cacheline align the source pointer.
				39	* Experiments on tested architectures (StrongARM and XScale) didn't show
				40	* this a worthwhile thing to do. That might be different in the future.
				41	*/
				42	//#define CALGN(code...) code
				43	#define CALGN(code...)
				44
				45	/*
				46	* Endian independent macros for shifting bytes within registers.
				47	*/
				48	#ifndef __ARMEB__
				49	#define PULL lsr
				50	#define PUSH lsl
				51	#else
				52	#define PULL lsl
				53	#define PUSH lsr
				54	#endif
				55
				56	.text
				57	.syntax unified
				58
				59	/* Prototype: void memcpy(void dest, const void src, size_t n); /
				60
				61	ENTRY(memcpy)
				62
				63	push {r0, r4, lr}
				64	cfi_adjust_cfa_offset (12)
				65	cfi_rel_offset (r4, 4)
				66	cfi_rel_offset (lr, 8)
				67
				68	cfi_remember_state
				69
				70	subs r2, r2, #4
				71	blt 8f
				72	ands ip, r0, #3
				73	PLD( sfi_pld r1, #0 )
				74	bne 9f
				75	ands ip, r1, #3
				76	bne 10f
				77
				78	1: subs r2, r2, #(28)
				79	push {r5 - r8}
				80	cfi_adjust_cfa_offset (16)
				81	cfi_rel_offset (r5, 0)
				82	cfi_rel_offset (r6, 4)
				83	cfi_rel_offset (r7, 8)
				84	cfi_rel_offset (r8, 12)
				85	blt 5f
				86
				87	CALGN( ands ip, r1, #31 )
				88	CALGN( rsb r3, ip, #32 )
				89	CALGN( sbcsne r4, r3, r2 ) @ C is always set here
				90	CALGN( bcs 2f )
				91	CALGN( adr r4, 6f )
				92	CALGN( subs r2, r2, r3 ) @ C gets set
				93	#ifndef ARM_ALWAYS_BX
				94	CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
				95	#else
				96	CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2))
				97	CALGN( bx r4 )
				98	#endif
				99
				100	PLD( sfi_pld r1, #0 )
				101	2: PLD( subs r2, r2, #96 )
				102	PLD( sfi_pld r1, #28 )
				103	PLD( blt 4f )
				104	PLD( sfi_pld r1, #60 )
				105	PLD( sfi_pld r1, #92 )
				106
				107	3: PLD( sfi_pld r1, #124 )
				108	4: sfi_breg r1, \
				109	ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
				110	subs r2, r2, #32
				111	sfi_breg r0, \
				112	stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr}
				113	bge 3b
				114	PLD( cmn r2, #96 )
				115	PLD( bge 4b )
				116
				117	5: ands ip, r2, #28
				118	rsb ip, ip, #32
				119	#ifndef ARM_ALWAYS_BX
				120	/* C is always clear here. */
				121	addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				122	b 7f
				123	#else
				124	beq 7f
				125	push {r10}
				126	cfi_adjust_cfa_offset (4)
				127	cfi_rel_offset (r10, 0)
				128	0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				129	/* If alignment is not perfect, then there will be some
				130	padding (nop) instructions between this BX and label 6.
				131	The computation above assumed that two instructions
				132	later is exactly the right spot. */
				133	add r10, #(6f - (0b + PC_OFS))
				134	bx r10
				135	#endif
				136	.p2align ARM_BX_ALIGN_LOG2
				137	6: nop
				138	.p2align ARM_BX_ALIGN_LOG2
				139	sfi_breg r1, \
				140	ldr r3, [\B], #4
				141	.p2align ARM_BX_ALIGN_LOG2
				142	sfi_breg r1, \
				143	ldr r4, [\B], #4
				144	.p2align ARM_BX_ALIGN_LOG2
				145	sfi_breg r1, \
				146	ldr r5, [\B], #4
				147	.p2align ARM_BX_ALIGN_LOG2
				148	sfi_breg r1, \
				149	ldr r6, [\B], #4
				150	.p2align ARM_BX_ALIGN_LOG2
				151	sfi_breg r1, \
				152	ldr r7, [\B], #4
				153	.p2align ARM_BX_ALIGN_LOG2
				154	sfi_breg r1, \
				155	ldr r8, [\B], #4
				156	.p2align ARM_BX_ALIGN_LOG2
				157	sfi_breg r1, \
				158	ldr lr, [\B], #4
				159
				160	#ifndef ARM_ALWAYS_BX
				161	add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				162	nop
				163	#else
				164	0: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
				165	/* If alignment is not perfect, then there will be some
				166	padding (nop) instructions between this BX and label 66.
				167	The computation above assumed that two instructions
				168	later is exactly the right spot. */
				169	add r10, #(66f - (0b + PC_OFS))
				170	bx r10
				171	#endif
				172	.p2align ARM_BX_ALIGN_LOG2
				173	66: nop
				174	.p2align ARM_BX_ALIGN_LOG2
				175	sfi_breg r0, \
				176	str r3, [\B], #4
				177	.p2align ARM_BX_ALIGN_LOG2
				178	sfi_breg r0, \
				179	str r4, [\B], #4
				180	.p2align ARM_BX_ALIGN_LOG2
				181	sfi_breg r0, \
				182	str r5, [\B], #4
				183	.p2align ARM_BX_ALIGN_LOG2
				184	sfi_breg r0, \
				185	str r6, [\B], #4
				186	.p2align ARM_BX_ALIGN_LOG2
				187	sfi_breg r0, \
				188	str r7, [\B], #4
				189	.p2align ARM_BX_ALIGN_LOG2
				190	sfi_breg r0, \
				191	str r8, [\B], #4
				192	.p2align ARM_BX_ALIGN_LOG2
				193	sfi_breg r0, \
				194	str lr, [\B], #4
				195
				196	#ifdef ARM_ALWAYS_BX
				197	pop {r10}
				198	cfi_adjust_cfa_offset (-4)
				199	cfi_restore (r10)
				200	#endif
				201
				202	CALGN( bcs 2b )
				203
				204	7: pop {r5 - r8}
				205	cfi_adjust_cfa_offset (-16)
				206	cfi_restore (r5)
				207	cfi_restore (r6)
				208	cfi_restore (r7)
				209	cfi_restore (r8)
				210
				211	8: movs r2, r2, lsl #31
				212	sfi_breg r1, \
				213	ldrbne r3, [\B], #1
				214	sfi_breg r1, \
				215	ldrbcs r4, [\B], #1
				216	sfi_breg r1, \
				217	ldrbcs ip, [\B]
				218	sfi_breg r0, \
				219	strbne r3, [\B], #1
				220	sfi_breg r0, \
				221	strbcs r4, [\B], #1
				222	sfi_breg r0, \
				223	strbcs ip, [\B]
				224
				225	#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
				226	\|\| defined (ARM_ALWAYS_BX))
				227	pop {r0, r4, lr}
				228	cfi_adjust_cfa_offset (-12)
				229	cfi_restore (r4)
				230	cfi_restore (lr)
				231	bx lr
				232	#else
				233	pop {r0, r4, pc}
				234	#endif
				235
				236	cfi_restore_state
				237
				238	9: rsb ip, ip, #4
				239	cmp ip, #2
				240	sfi_breg r1, \
				241	ldrbgt r3, [\B], #1
				242	sfi_breg r1, \
				243	ldrbge r4, [\B], #1
				244	sfi_breg r1, \
				245	ldrb lr, [\B], #1
				246	sfi_breg r0, \
				247	strbgt r3, [\B], #1
				248	sfi_breg r0, \
				249	strbge r4, [\B], #1
				250	subs r2, r2, ip
				251	sfi_breg r0, \
				252	strb lr, [\B], #1
				253	blt 8b
				254	ands ip, r1, #3
				255	beq 1b
				256
				257	10: bic r1, r1, #3
				258	cmp ip, #2
				259	sfi_breg r1, \
				260	ldr lr, [\B], #4
				261	beq 17f
				262	bgt 18f
				263
				264
				265	.macro forward_copy_shift pull push
				266
				267	subs r2, r2, #28
				268	blt 14f
				269
				270	CALGN( ands ip, r1, #31 )
				271	CALGN( rsb ip, ip, #32 )
				272	CALGN( sbcsne r4, ip, r2 ) @ C is always set here
				273	CALGN( subcc r2, r2, ip )
				274	CALGN( bcc 15f )
				275
				276	11: push {r5 - r8, r10}
				277	cfi_adjust_cfa_offset (20)
				278	cfi_rel_offset (r5, 0)
				279	cfi_rel_offset (r6, 4)
				280	cfi_rel_offset (r7, 8)
				281	cfi_rel_offset (r8, 12)
				282	cfi_rel_offset (r10, 16)
				283
				284	PLD( sfi_pld r1, #0 )
				285	PLD( subs r2, r2, #96 )
				286	PLD( sfi_pld r1, #28 )
				287	PLD( blt 13f )
				288	PLD( sfi_pld r1, #60 )
				289	PLD( sfi_pld r1, #92 )
				290
				291	12: PLD( sfi_pld r1, #124 )
				292	13: sfi_breg r1, \
				293	ldmia \B!, {r4, r5, r6, r7}
				294	mov r3, lr, PULL #\pull
				295	subs r2, r2, #32
				296	sfi_breg r1, \
				297	ldmia \B!, {r8, r10, ip, lr}
				298	orr r3, r3, r4, PUSH #\push
				299	mov r4, r4, PULL #\pull
				300	orr r4, r4, r5, PUSH #\push
				301	mov r5, r5, PULL #\pull
				302	orr r5, r5, r6, PUSH #\push
				303	mov r6, r6, PULL #\pull
				304	orr r6, r6, r7, PUSH #\push
				305	mov r7, r7, PULL #\pull
				306	orr r7, r7, r8, PUSH #\push
				307	mov r8, r8, PULL #\pull
				308	orr r8, r8, r10, PUSH #\push
				309	mov r10, r10, PULL #\pull
				310	orr r10, r10, ip, PUSH #\push
				311	mov ip, ip, PULL #\pull
				312	orr ip, ip, lr, PUSH #\push
				313	sfi_breg r0, \
				314	stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip}
				315	bge 12b
				316	PLD( cmn r2, #96 )
				317	PLD( bge 13b )
				318
				319	pop {r5 - r8, r10}
				320	cfi_adjust_cfa_offset (-20)
				321	cfi_restore (r5)
				322	cfi_restore (r6)
				323	cfi_restore (r7)
				324	cfi_restore (r8)
				325	cfi_restore (r10)
				326
				327	14: ands ip, r2, #28
				328	beq 16f
				329
				330	15: mov r3, lr, PULL #\pull
				331	sfi_breg r1, \
				332	ldr lr, [\B], #4
				333	subs ip, ip, #4
				334	orr r3, r3, lr, PUSH #\push
				335	sfi_breg r0, \
				336	str r3, [\B], #4
				337	bgt 15b
				338	CALGN( cmp r2, #0 )
				339	CALGN( bge 11b )
				340
				341	16: sub r1, r1, #(\push / 8)
				342	b 8b
				343
				344	.endm
				345
				346
				347	forward_copy_shift pull=8 push=24
				348
				349	17: forward_copy_shift pull=16 push=16
				350
				351	18: forward_copy_shift pull=24 push=8
				352
				353	END(memcpy)
				354	libc_hidden_builtin_def (memcpy)