Blame - ap/libc/glibc/glibc-2.23/sysdeps/alpha/stxcpy.S - T106_DC

blob: ba6c9f74ddd6a95fe39e73f064c6a74f26461998 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame^]	1	/* Copyright (C) 1996-2016 Free Software Foundation, Inc.
				2	Contributed by Richard Henderson (rth@tamu.edu)
				3	This file is part of the GNU C Library.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library. If not, see
				17	<http://www.gnu.org/licenses/>. */
				18
				19	/* Copy a null-terminated string from SRC to DST.
				20
				21	This is an internal routine used by strcpy, stpcpy, and strcat.
				22	As such, it uses special linkage conventions to make implementation
				23	of these public functions more efficient.
				24
				25	On input:
				26	t9 = return address
				27	a0 = DST
				28	a1 = SRC
				29
				30	On output:
				31	t8 = bitmask (with one bit set) indicating the last byte written
				32	a0 = unaligned address of the last word written
				33
				34	Furthermore, v0, a3-a5, t11, and t12 are untouched.
				35	*/
				36
				37	/* This is generally scheduled for the EV5, but should still be pretty
				38	good for the EV4 too. */
				39
				40	#include <sysdep.h>
				41
				42	.set noat
				43	.set noreorder
				44
				45	.text
				46	.type __stxcpy, @function
				47	.globl __stxcpy
				48	.usepv __stxcpy, no
				49
				50	cfi_startproc
				51	cfi_return_column (t9)
				52
				53	/* On entry to this basic block:
				54	t0 == the first destination word for masking back in
				55	t1 == the first source word. */
				56	.align 3
				57	stxcpy_aligned:
				58	/* Create the 1st output word and detect 0's in the 1st input word. */
				59	lda t2, -1 # e1 : build a mask against false zero
				60	mskqh t2, a1, t2 # e0 : detection in the src word
				61	mskqh t1, a1, t3 # e0 :
				62	ornot t1, t2, t2 # .. e1 :
				63	mskql t0, a1, t0 # e0 : assemble the first output word
				64	cmpbge zero, t2, t7 # .. e1 : bits set iff null found
				65	or t0, t3, t1 # e0 :
				66	bne t7, $a_eos # .. e1 :
				67
				68	/* On entry to this basic block:
				69	t0 == the first destination word for masking back in
				70	t1 == a source word not containing a null. */
				71	$a_loop:
				72	stq_u t1, 0(a0) # e0 :
				73	addq a0, 8, a0 # .. e1 :
				74	ldq_u t1, 0(a1) # e0 :
				75	addq a1, 8, a1 # .. e1 :
				76	cmpbge zero, t1, t7 # e0 (stall)
				77	beq t7, $a_loop # .. e1 (zdb)
				78
				79	/* Take care of the final (partial) word store.
				80	On entry to this basic block we have:
				81	t1 == the source word containing the null
				82	t7 == the cmpbge mask that found it. */
				83	$a_eos:
				84	negq t7, t6 # e0 : find low bit set
				85	and t7, t6, t8 # e1 (stall)
				86
				87	/* For the sake of the cache, don't read a destination word
				88	if we're not going to need it. */
				89	and t8, 0x80, t6 # e0 :
				90	bne t6, 1f # .. e1 (zdb)
				91
				92	/* We're doing a partial word store and so need to combine
				93	our source and original destination words. */
				94	ldq_u t0, 0(a0) # e0 :
				95	subq t8, 1, t6 # .. e1 :
				96	zapnot t1, t6, t1 # e0 : clear src bytes >= null
				97	or t8, t6, t7 # .. e1 :
				98	zap t0, t7, t0 # e0 : clear dst bytes <= null
				99	or t0, t1, t1 # e1 :
				100
				101	1: stq_u t1, 0(a0) # e0 :
				102	ret (t9) # .. e1 :
				103
				104	.align 3
				105	__stxcpy:
				106	/* Are source and destination co-aligned? */
				107	xor a0, a1, t0 # e0 :
				108	unop # :
				109	and t0, 7, t0 # e0 :
				110	bne t0, $unaligned # .. e1 :
				111
				112	/* We are co-aligned; take care of a partial first word. */
				113	ldq_u t1, 0(a1) # e0 : load first src word
				114	and a0, 7, t0 # .. e1 : take care not to load a word ...
				115	addq a1, 8, a1 # e0 :
				116	beq t0, stxcpy_aligned # .. e1 : ... if we wont need it
				117	ldq_u t0, 0(a0) # e0 :
				118	br stxcpy_aligned # .. e1 :
				119
				120
				121	/* The source and destination are not co-aligned. Align the destination
				122	and cope. We have to be very careful about not reading too much and
				123	causing a SEGV. */
				124
				125	.align 3
				126	$u_head:
				127	/* We know just enough now to be able to assemble the first
				128	full source word. We can still find a zero at the end of it
				129	that prevents us from outputting the whole thing.
				130
				131	On entry to this basic block:
				132	t0 == the first dest word, for masking back in, if needed else 0
				133	t1 == the low bits of the first source word
				134	t6 == bytemask that is -1 in dest word bytes */
				135
				136	ldq_u t2, 8(a1) # e0 :
				137	addq a1, 8, a1 # .. e1 :
				138
				139	extql t1, a1, t1 # e0 :
				140	extqh t2, a1, t4 # e0 :
				141	mskql t0, a0, t0 # e0 :
				142	or t1, t4, t1 # .. e1 :
				143	mskqh t1, a0, t1 # e0 :
				144	or t0, t1, t1 # e1 :
				145
				146	or t1, t6, t6 # e0 :
				147	cmpbge zero, t6, t7 # .. e1 :
				148	lda t6, -1 # e0 : for masking just below
				149	bne t7, $u_final # .. e1 :
				150
				151	mskql t6, a1, t6 # e0 : mask out the bits we have
				152	or t6, t2, t2 # e1 : already extracted before
				153	cmpbge zero, t2, t7 # e0 : testing eos
				154	bne t7, $u_late_head_exit # .. e1 (zdb)
				155
				156	/* Finally, we've got all the stupid leading edge cases taken care
				157	of and we can set up to enter the main loop. */
				158
				159	stq_u t1, 0(a0) # e0 : store first output word
				160	addq a0, 8, a0 # .. e1 :
				161	extql t2, a1, t0 # e0 : position ho-bits of lo word
				162	ldq_u t2, 8(a1) # .. e1 : read next high-order source word
				163	addq a1, 8, a1 # e0 :
				164	cmpbge zero, t2, t7 # .. e1 :
				165	nop # e0 :
				166	bne t7, $u_eos # .. e1 :
				167
				168	/* Unaligned copy main loop. In order to avoid reading too much,
				169	the loop is structured to detect zeros in aligned source words.
				170	This has, unfortunately, effectively pulled half of a loop
				171	iteration out into the head and half into the tail, but it does
				172	prevent nastiness from accumulating in the very thing we want
				173	to run as fast as possible.
				174
				175	On entry to this basic block:
				176	t0 == the shifted high-order bits from the previous source word
				177	t2 == the unshifted current source word
				178
				179	We further know that t2 does not contain a null terminator. */
				180
				181	.align 3
				182	$u_loop:
				183	extqh t2, a1, t1 # e0 : extract high bits for current word
				184	addq a1, 8, a1 # .. e1 :
				185	extql t2, a1, t3 # e0 : extract low bits for next time
				186	addq a0, 8, a0 # .. e1 :
				187	or t0, t1, t1 # e0 : current dst word now complete
				188	ldq_u t2, 0(a1) # .. e1 : load high word for next time
				189	stq_u t1, -8(a0) # e0 : save the current word
				190	mov t3, t0 # .. e1 :
				191	cmpbge zero, t2, t7 # e0 : test new word for eos
				192	beq t7, $u_loop # .. e1 :
				193
				194	/* We've found a zero somewhere in the source word we just read.
				195	If it resides in the lower half, we have one (probably partial)
				196	word to write out, and if it resides in the upper half, we
				197	have one full and one partial word left to write out.
				198
				199	On entry to this basic block:
				200	t0 == the shifted high-order bits from the previous source word
				201	t2 == the unshifted current source word. */
				202	$u_eos:
				203	extqh t2, a1, t1 # e0 :
				204	or t0, t1, t1 # e1 : first (partial) source word complete
				205
				206	cmpbge zero, t1, t7 # e0 : is the null in this first bit?
				207	bne t7, $u_final # .. e1 (zdb)
				208
				209	$u_late_head_exit:
				210	stq_u t1, 0(a0) # e0 : the null was in the high-order bits
				211	addq a0, 8, a0 # .. e1 :
				212	extql t2, a1, t1 # e0 :
				213	cmpbge zero, t1, t7 # .. e1 :
				214
				215	/* Take care of a final (probably partial) result word.
				216	On entry to this basic block:
				217	t1 == assembled source word
				218	t7 == cmpbge mask that found the null. */
				219	$u_final:
				220	negq t7, t6 # e0 : isolate low bit set
				221	and t6, t7, t8 # e1 :
				222
				223	and t8, 0x80, t6 # e0 : avoid dest word load if we can
				224	bne t6, 1f # .. e1 (zdb)
				225
				226	ldq_u t0, 0(a0) # e0 :
				227	subq t8, 1, t6 # .. e1 :
				228	or t6, t8, t7 # e0 :
				229	zapnot t1, t6, t1 # .. e1 : kill source bytes >= null
				230	zap t0, t7, t0 # e0 : kill dest bytes <= null
				231	or t0, t1, t1 # e1 :
				232
				233	1: stq_u t1, 0(a0) # e0 :
				234	ret (t9) # .. e1 :
				235
				236	/* Unaligned copy entry point. */
				237	.align 3
				238	$unaligned:
				239
				240	ldq_u t1, 0(a1) # e0 : load first source word
				241
				242	and a0, 7, t4 # .. e1 : find dest misalignment
				243	and a1, 7, t5 # e0 : find src misalignment
				244
				245	/* Conditionally load the first destination word and a bytemask
				246	with 0xff indicating that the destination byte is sacrosanct. */
				247
				248	mov zero, t0 # .. e1 :
				249	mov zero, t6 # e0 :
				250	beq t4, 1f # .. e1 :
				251	ldq_u t0, 0(a0) # e0 :
				252	lda t6, -1 # .. e1 :
				253	mskql t6, a0, t6 # e0 :
				254	1:
				255	subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
				256
				257	/* If source misalignment is larger than dest misalignment, we need
				258	extra startup checks to avoid SEGV. */
				259
				260	cmplt t4, t5, t8 # e0 :
				261	beq t8, $u_head # .. e1 (zdb)
				262
				263	lda t2, -1 # e1 : mask out leading garbage in source
				264	mskqh t2, t5, t2 # e0 :
				265	nop # e0 :
				266	ornot t1, t2, t3 # .. e1 :
				267	cmpbge zero, t3, t7 # e0 : is there a zero?
				268	beq t7, $u_head # .. e1 (zdb)
				269
				270	/* At this point we've found a zero in the first partial word of
				271	the source. We need to isolate the valid source data and mask
				272	it into the original destination data. (Incidentally, we know
				273	that we'll need at least one byte of that original dest word.) */
				274
				275	ldq_u t0, 0(a0) # e0 :
				276
				277	negq t7, t6 # .. e1 : build bitmask of bytes <= zero
				278	and t6, t7, t8 # e0 :
				279	and a1, 7, t5 # .. e1 :
				280	subq t8, 1, t6 # e0 :
				281	or t6, t8, t7 # e1 :
				282	srl t8, t5, t8 # e0 : adjust final null return value
				283
				284	zapnot t2, t7, t2 # .. e1 : prepare source word; mirror changes
				285	and t1, t2, t1 # e1 : to source validity mask
				286	extql t2, a1, t2 # .. e0 :
				287	extql t1, a1, t1 # e0 :
				288
				289	andnot t0, t2, t0 # .. e1 : zero place for source to reside
				290	or t0, t1, t1 # e1 : and put it there
				291	stq_u t1, 0(a0) # .. e0 :
				292	ret (t9)
				293
				294	cfi_endproc