Blame - ap/libc/glibc/glibc-2.23/sysdeps/alpha/strcmp.S - T106_DC

blob: 719950a496cce96b345df973374968e693f154cd [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame^]	1	/* Copyright (C) 1996-2016 Free Software Foundation, Inc.
				2	Contributed by Richard Henderson (rth@tamu.edu)
				3	This file is part of the GNU C Library.
				4
				5	The GNU C Library is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU Lesser General Public
				7	License as published by the Free Software Foundation; either
				8	version 2.1 of the License, or (at your option) any later version.
				9
				10	The GNU C Library is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	Lesser General Public License for more details.
				14
				15	You should have received a copy of the GNU Lesser General Public
				16	License along with the GNU C Library. If not, see
				17	<http://www.gnu.org/licenses/>. */
				18
				19	/* Bytewise compare two null-terminated strings. */
				20
				21	#include <sysdep.h>
				22
				23	.set noat
				24	.set noreorder
				25
				26	.text
				27
				28	ENTRY(strcmp)
				29	#ifdef PROF
				30	ldgp gp, 0(pv)
				31	lda AT, _mcount
				32	jmp AT, (AT), _mcount
				33	.prologue 1
				34	#else
				35	.prologue 0
				36	#endif
				37
				38	ldq_u t0, 0(a0) # e0 : give cache time to catch up
				39	xor a0, a1, t2 # .. e1 : are s1 and s2 co-aligned?
				40	ldq_u t1, 0(a1) # e0 :
				41	and t2, 7, t2 # .. e1 :
				42	lda t3, -1 # e0 :
				43	bne t2, $unaligned # .. e1 :
				44
				45	/* On entry to this basic block:
				46	t0 == the first destination word for masking back in
				47	t1 == the first source word.
				48	t3 == -1. */
				49
				50	$aligned:
				51	mskqh t3, a0, t3 # e0 :
				52	nop # .. e1 :
				53	ornot t1, t3, t1 # e0 :
				54	ornot t0, t3, t0 # .. e1 :
				55	cmpbge zero, t1, t7 # e0 : bits set iff null found
				56	bne t7, $eos # e1 (zdb)
				57
				58	/* Aligned compare main loop.
				59	On entry to this basic block:
				60	t0 == an s1 word.
				61	t1 == an s2 word not containing a null. */
				62
				63	$a_loop:
				64	xor t0, t1, t2 # e0 :
				65	bne t2, $wordcmp # .. e1 (zdb)
				66	ldq_u t1, 8(a1) # e0 :
				67	ldq_u t0, 8(a0) # .. e1 :
				68	addq a1, 8, a1 # e0 :
				69	addq a0, 8, a0 # .. e1 :
				70	cmpbge zero, t1, t7 # e0 :
				71	beq t7, $a_loop # .. e1 (zdb)
				72	br $eos # e1 :
				73
				74	/* The two strings are not co-aligned. Align s1 and cope. */
				75
				76	$unaligned:
				77	and a0, 7, t4 # e0 : find s1 misalignment
				78	and a1, 7, t5 # .. e1 : find s2 misalignment
				79	subq a1, t4, a1 # e0 :
				80
				81	/* If s2 misalignment is larger than s2 misalignment, we need
				82	extra startup checks to avoid SEGV. */
				83
				84	cmplt t4, t5, t8 # .. e1 :
				85	beq t8, $u_head # e1 :
				86
				87	mskqh t3, t5, t3 # e0 :
				88	ornot t1, t3, t3 # e0 :
				89	cmpbge zero, t3, t7 # e1 : is there a zero?
				90	beq t7, $u_head # e1 :
				91
				92	/* We've found a zero in the first partial word of s2. Align
				93	our current s1 and s2 words and compare what we've got. */
				94
				95	extql t1, t5, t1 # e0 :
				96	extql t0, a0, t0 # e0 :
				97	cmpbge zero, t1, t7 # .. e1 : find that zero again
				98	br $eos # e1 : and finish up
				99
				100	.align 3
				101	$u_head:
				102	/* We know just enough now to be able to assemble the first
				103	full word of s2. We can still find a zero at the end of it.
				104
				105	On entry to this basic block:
				106	t0 == first word of s1
				107	t1 == first partial word of s2. */
				108
				109	ldq_u t2, 8(a1) # e0 : load second partial s2 word
				110	lda t3, -1 # .. e1 : create leading garbage mask
				111	extql t1, a1, t1 # e0 : create first s2 word
				112	mskqh t3, a0, t3 # e0 :
				113	extqh t2, a1, t4 # e0 :
				114	ornot t0, t3, t0 # .. e1 : kill s1 garbage
				115	or t1, t4, t1 # e0 : s2 word now complete
				116	cmpbge zero, t0, t7 # .. e1 : find zero in first s1 word
				117	ornot t1, t3, t1 # e0 : kill s2 garbage
				118	lda t3, -1 # .. e1 :
				119	mskql t3, a1, t3 # e0 : mask for s2[1] bits we have seen
				120	bne t7, $eos # .. e1 :
				121	xor t0, t1, t4 # e0 : compare aligned words
				122	bne t4, $wordcmp # .. e1 (zdb)
				123	or t2, t3, t3 # e0 :
				124	cmpbge zero, t3, t7 # e1 :
				125	bne t7, $u_final # e1 :
				126
				127	/* Unaligned copy main loop. In order to avoid reading too much,
				128	the loop is structured to detect zeros in aligned words from s2.
				129	This has, unfortunately, effectively pulled half of a loop
				130	iteration out into the head and half into the tail, but it does
				131	prevent nastiness from accumulating in the very thing we want
				132	to run as fast as possible.
				133
				134	On entry to this basic block:
				135	t2 == the unshifted low-bits from the next s2 word. */
				136
				137	.align 3
				138	$u_loop:
				139	extql t2, a1, t3 # e0 :
				140	ldq_u t2, 16(a1) # .. e1 : load next s2 high bits
				141	ldq_u t0, 8(a0) # e0 : load next s1 word
				142	addq a1, 8, a1 # .. e1 :
				143	addq a0, 8, a0 # e0 :
				144	nop # .. e1 :
				145	extqh t2, a1, t1 # e0 :
				146	cmpbge zero, t0, t7 # .. e1 : find zero in current s1 word
				147	or t1, t3, t1 # e0 :
				148	bne t7, $eos # .. e1 :
				149	xor t0, t1, t4 # e0 : compare the words
				150	bne t4, $wordcmp # .. e1 (zdb)
				151	cmpbge zero, t2, t4 # e0 : find zero in next low bits
				152	beq t4, $u_loop # .. e1 (zdb)
				153
				154	/* We've found a zero in the low bits of the last s2 word. Get
				155	the next s1 word and align them. */
				156	$u_final:
				157	ldq_u t0, 8(a0) # e1 :
				158	extql t2, a1, t1 # .. e0 :
				159	cmpbge zero, t1, t7 # e0 :
				160
				161	/* We've found a zero somewhere in a word we just read.
				162	On entry to this basic block:
				163	t0 == s1 word
				164	t1 == s2 word
				165	t7 == cmpbge mask containing the zero. */
				166
				167	.align 3
				168	$eos:
				169	negq t7, t6 # e0 : create bytemask of valid data
				170	and t6, t7, t8 # e1 :
				171	subq t8, 1, t6 # e0 :
				172	or t6, t8, t7 # e1 :
				173	zapnot t0, t7, t0 # e0 : kill the garbage
				174	zapnot t1, t7, t1 # .. e1 :
				175	xor t0, t1, v0 # e0 : and compare
				176	beq v0, $done # .. e1 :
				177
				178	/* Here we have two differing co-aligned words in t0 & t1.
				179	Bytewise compare them and return (t0 > t1 ? 1 : -1). */
				180	$wordcmp:
				181	cmpbge t0, t1, t2 # e0 : comparison yields bit mask of ge
				182	cmpbge t1, t0, t3 # .. e1 :
				183	xor t2, t3, t0 # e0 : bits set iff t0/t1 bytes differ
				184	negq t0, t1 # e1 : clear all but least bit
				185	and t0, t1, t0 # e0 :
				186	lda v0, -1 # .. e1 :
				187	and t0, t2, t1 # e0 : was bit set in t0 > t1?
				188	cmovne t1, 1, v0 # .. e1 (zdb)
				189
				190	$done:
				191	ret # e1 :
				192
				193	END(strcmp)
				194	libc_hidden_builtin_def (strcmp)