Blame - ap/libc/glibc/glibc-2.22/sysdeps/i386/strrchr.S - T106_DC

blob: c4ae95d4d6fee78107bf4aec87ed218a6344779f [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
				2	For Intel 80x86, x>=3.
				3	Copyright (C) 1994-2015 Free Software Foundation, Inc.
				4	This file is part of the GNU C Library.
				5	Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>
				6	Some optimisations by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>
				7
				8	The GNU C Library is free software; you can redistribute it and/or
				9	modify it under the terms of the GNU Lesser General Public
				10	License as published by the Free Software Foundation; either
				11	version 2.1 of the License, or (at your option) any later version.
				12
				13	The GNU C Library is distributed in the hope that it will be useful,
				14	but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				16	Lesser General Public License for more details.
				17
				18	You should have received a copy of the GNU Lesser General Public
				19	License along with the GNU C Library; if not, see
				20	<http://www.gnu.org/licenses/>. */
				21
				22	#include <sysdep.h>
				23	#include "asm-syntax.h"
				24
				25	#define PARMS 4+8 /* space for 2 saved regs */
				26	#define RTN PARMS
				27	#define STR RTN
				28	#define CHR STR+4
				29
				30	.text
				31	ENTRY (strrchr)
				32
				33	pushl %edi /* Save callee-safe registers used here. */
				34	cfi_adjust_cfa_offset (4)
				35	cfi_rel_offset (edi, 0)
				36	pushl %esi
				37	cfi_adjust_cfa_offset (4)
				38
				39	xorl %eax, %eax
				40	movl STR(%esp), %esi
				41	cfi_rel_offset (esi, 0)
				42	movl CHR(%esp), %ecx
				43
				44	/* At the moment %ecx contains C. What we need for the
				45	algorithm is C in all bytes of the dword. Avoid
				46	operations on 16 bit words because these require an
				47	prefix byte (and one more cycle). */
				48	movb %cl, %ch /* now it is 0\|0\|c\|c */
				49	movl %ecx, %edx
				50	shll $16, %ecx /* now it is c\|c\|0\|0 */
				51	movw %dx, %cx /* and finally c\|c\|c\|c */
				52
				53	/* Before we start with the main loop we process single bytes
				54	until the source pointer is aligned. This has two reasons:
				55	1. aligned 32-bit memory access is faster
				56	and (more important)
				57	2. we process in the main loop 32 bit in one step although
				58	we don't know the end of the string. But accessing at
				59	4-byte alignment guarantees that we never access illegal
				60	memory if this would not also be done by the trivial
				61	implementation (this is because all processor inherent
				62	boundaries are multiples of 4. */
				63
				64	testl $3, %esi /* correctly aligned ? */
				65	jz L(19) /* yes => begin loop */
				66	movb (%esi), %dl /* load byte in question (we need it twice) */
				67	cmpb %dl, %cl /* compare byte */
				68	jne L(11) /* target found => return */
				69	movl %esi, %eax /* remember pointer as possible result */
				70	L(11): orb %dl, %dl /* is NUL? */
				71	jz L(2) /* yes => return NULL */
				72	incl %esi /* increment pointer */
				73
				74	testl $3, %esi /* correctly aligned ? */
				75	jz L(19) /* yes => begin loop */
				76	movb (%esi), %dl /* load byte in question (we need it twice) */
				77	cmpb %dl, %cl /* compare byte */
				78	jne L(12) /* target found => return */
				79	movl %esi, %eax /* remember pointer as result */
				80	L(12): orb %dl, %dl /* is NUL? */
				81	jz L(2) /* yes => return NULL */
				82	incl %esi /* increment pointer */
				83
				84	testl $3, %esi /* correctly aligned ? */
				85	jz L(19) /* yes => begin loop */
				86	movb (%esi), %dl /* load byte in question (we need it twice) */
				87	cmpb %dl, %cl /* compare byte */
				88	jne L(13) /* target found => return */
				89	movl %esi, %eax /* remember pointer as result */
				90	L(13): orb %dl, %dl /* is NUL? */
				91	jz L(2) /* yes => return NULL */
				92	incl %esi /* increment pointer */
				93
				94	/* No we have reached alignment. */
				95	jmp L(19) /* begin loop */
				96
				97	/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
				98	change any of the hole bits of LONGWORD.
				99
				100	1) Is this safe? Will it catch all the zero bytes?
				101	Suppose there is a byte with all zeros. Any carry bits
				102	propagating from its left will fall into the hole at its
				103	least significant bit and stop. Since there will be no
				104	carry from its most significant bit, the LSB of the
				105	byte to the left will be unchanged, and the zero will be
				106	detected.
				107
				108	2) Is this worthwhile? Will it ignore everything except
				109	zero bytes? Suppose every byte of LONGWORD has a bit set
				110	somewhere. There will be a carry into bit 8. If bit 8
				111	is set, this will carry into bit 16. If bit 8 is clear,
				112	one of bits 9-15 must be set, so there will be a carry
				113	into bit 16. Similarly, there will be a carry into bit
				114	24. If one of bits 24-31 is set, there will be a carry
				115	into bit 32 (=carry flag), so all of the hole bits will
				116	be changed.
				117
				118	3) But wait! Aren't we looking for C, not zero?
				119	Good point. So what we do is XOR LONGWORD with a longword,
				120	each of whose bytes is C. This turns each byte that is C
				121	into a zero. */
				122
				123	/* Each round the main loop processes 16 bytes. */
				124
				125	/* Jump to here when the character is detected. We chose this
				126	way around because the character one is looking for is not
				127	as frequent as the rest and taking a conditional jump is more
				128	expensive than ignoring it.
				129
				130	Some more words to the code below: it might not be obvious why
				131	we decrement the source pointer here. In the loop the pointer
				132	is not pre-incremented and so it still points before the word
				133	we are looking at. But you should take a look at the instruction
				134	which gets executed before we get into the loop: `addl $16, %esi'.
				135	This makes the following subs into adds. */
				136
				137	/* These fill bytes make the main loop be correctly aligned.
				138	We cannot use align because it is not the following instruction
				139	which should be aligned. */
				140	.byte 0, 0
				141	#ifndef PROF
				142	/* Profiling adds some code and so changes the alignment. */
				143	.byte 0
				144	#endif
				145
				146	L(4): subl $4, %esi /* adjust pointer */
				147	L(41): subl $4, %esi
				148	L(42): subl $4, %esi
				149	L(43): testl $0xff000000, %edx /* is highest byte == C? */
				150	jnz L(33) /* no => try other bytes */
				151	leal 15(%esi), %eax /* store address as result */
				152	jmp L(1) /* and start loop again */
				153
				154	L(3): subl $4, %esi /* adjust pointer */
				155	L(31): subl $4, %esi
				156	L(32): subl $4, %esi
				157	L(33): testl $0xff0000, %edx /* is C in third byte? */
				158	jnz L(51) /* no => try other bytes */
				159	leal 14(%esi), %eax /* store address as result */
				160	jmp L(1) /* and start loop again */
				161
				162	L(51):
				163	/* At this point we know that the byte is in one of the lower bytes.
				164	We make a guess and correct it if necessary. This reduces the
				165	number of necessary jumps. */
				166	leal 12(%esi), %eax /* guess address of lowest byte as result */
				167	testb %dh, %dh /* is guess correct? */
				168	jnz L(1) /* yes => start loop */
				169	leal 13(%esi), %eax /* correct guess to second byte */
				170
				171	L(1): addl $16, %esi /* increment pointer for full round */
				172
				173	L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
				174	movl $0xfefefeff, %edi /* magic value */
				175	addl %edx, %edi /* add the magic value to the word. We get
				176	carry bits reported for each byte which
				177	is not 0 */
				178
				179	/* According to the algorithm we had to reverse the effect of the
				180	XOR first and then test the overflow bits. But because the
				181	following XOR would destroy the carry flag and it would (in a
				182	representation with more than 32 bits) not alter then last
				183	overflow, we can now test this condition. If no carry is signaled
				184	no overflow must have occurred in the last byte => it was 0. */
				185
				186	jnc L(20) /* found NUL => check last word */
				187
				188	/* We are only interested in carry bits that change due to the
				189	previous add, so remove original bits */
				190	xorl %edx, %edi /* (word+magic)^word */
				191
				192	/* Now test for the other three overflow bits. */
				193	orl $0xfefefeff, %edi /* set all non-carry bits */
				194	incl %edi /* add 1: if one carry bit was not set
				195	the addition will not result in 0. */
				196
				197	/* If at least one byte of the word is C we don't get 0 in %edi. */
				198	jnz L(20) /* found NUL => check last word */
				199
				200	/* Now we made sure the dword does not contain the character we are
				201	looking for. But because we deal with strings we have to check
				202	for the end of string before testing the next dword. */
				203
				204	xorl %ecx, %edx /* XOR with word c\|c\|c\|c => bytes of str == c
				205	are now 0 */
				206	movl $0xfefefeff, %edi /* magic value */
				207	addl %edx, %edi /* add the magic value to the word. We get
				208	carry bits reported for each byte which
				209	is not 0 */
				210	jnc L(4) /* highest byte is C => examine dword */
				211	xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
				212	orl $0xfefefeff, %edi /* set all non-carry bits */
				213	incl %edi /* add 1: if one carry bit was not set
				214	the addition will not result in 0. */
				215	jnz L(3) /* C is detected in the word => examine it */
				216
				217	movl 4(%esi), %edx /* get word (= 4 bytes) in question */
				218	movl $0xfefefeff, %edi /* magic value */
				219	addl %edx, %edi /* add the magic value to the word. We get
				220	carry bits reported for each byte which
				221	is not 0 */
				222	jnc L(21) /* found NUL => check last word */
				223	xorl %edx, %edi /* (word+magic)^word */
				224	orl $0xfefefeff, %edi /* set all non-carry bits */
				225	incl %edi /* add 1: if one carry bit was not set
				226	the addition will not result in 0. */
				227	jnz L(21) /* found NUL => check last word */
				228	xorl %ecx, %edx /* XOR with word c\|c\|c\|c => bytes of str == c
				229	are now 0 */
				230	movl $0xfefefeff, %edi /* magic value */
				231	addl %edx, %edi /* add the magic value to the word. We get
				232	carry bits reported for each byte which
				233	is not 0 */
				234	jnc L(41) /* highest byte is C => examine dword */
				235	xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
				236	orl $0xfefefeff, %edi /* set all non-carry bits */
				237	incl %edi /* add 1: if one carry bit was not set
				238	the addition will not result in 0. */
				239	jnz L(31) /* C is detected in the word => examine it */
				240
				241	movl 8(%esi), %edx /* get word (= 4 bytes) in question */
				242	movl $0xfefefeff, %edi /* magic value */
				243	addl %edx, %edi /* add the magic value to the word. We get
				244	carry bits reported for each byte which
				245	is not 0 */
				246	jnc L(22) /* found NUL => check last word */
				247	xorl %edx, %edi /* (word+magic)^word */
				248	orl $0xfefefeff, %edi /* set all non-carry bits */
				249	incl %edi /* add 1: if one carry bit was not set
				250	the addition will not result in 0. */
				251	jnz L(22) /* found NUL => check last word */
				252	xorl %ecx, %edx /* XOR with word c\|c\|c\|c => bytes of str == c
				253	are now 0 */
				254	movl $0xfefefeff, %edi /* magic value */
				255	addl %edx, %edi /* add the magic value to the word. We get
				256	carry bits reported for each byte which
				257	is not 0 */
				258	jnc L(42) /* highest byte is C => examine dword */
				259	xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
				260	orl $0xfefefeff, %edi /* set all non-carry bits */
				261	incl %edi /* add 1: if one carry bit was not set
				262	the addition will not result in 0. */
				263	jnz L(32) /* C is detected in the word => examine it */
				264
				265	movl 12(%esi), %edx /* get word (= 4 bytes) in question */
				266	movl $0xfefefeff, %edi /* magic value */
				267	addl %edx, %edi /* add the magic value to the word. We get
				268	carry bits reported for each byte which
				269	is not 0 */
				270	jnc L(23) /* found NUL => check last word */
				271	xorl %edx, %edi /* (word+magic)^word */
				272	orl $0xfefefeff, %edi /* set all non-carry bits */
				273	incl %edi /* add 1: if one carry bit was not set
				274	the addition will not result in 0. */
				275	jnz L(23) /* found NUL => check last word */
				276	xorl %ecx, %edx /* XOR with word c\|c\|c\|c => bytes of str == c
				277	are now 0 */
				278	movl $0xfefefeff, %edi /* magic value */
				279	addl %edx, %edi /* add the magic value to the word. We get
				280	carry bits reported for each byte which
				281	is not 0 */
				282	jnc L(43) /* highest byte is C => examine dword */
				283	xorl %edx, %edi /* ((word^charmask)+magic)^(word^charmask) */
				284	orl $0xfefefeff, %edi /* set all non-carry bits */
				285	incl %edi /* add 1: if one carry bit was not set
				286	the addition will not result in 0. */
				287	jz L(1) /* C is not detected => restart loop */
				288	jmp L(33) /* examine word */
				289
				290	L(23): addl $4, %esi /* adjust pointer */
				291	L(22): addl $4, %esi
				292	L(21): addl $4, %esi
				293
				294	/* What remains to do is to test which byte the NUL char is and
				295	whether the searched character appears in one of the bytes
				296	before. A special case is that the searched byte maybe NUL.
				297	In this case a pointer to the terminating NUL char has to be
				298	returned. */
				299
				300	L(20): cmpb %cl, %dl /* is first byte == C? */
				301	jne L(24) /* no => skip */
				302	movl %esi, %eax /* store address as result */
				303	L(24): testb %dl, %dl /* is first byte == NUL? */
				304	jz L(2) /* yes => return */
				305
				306	cmpb %cl, %dh /* is second byte == C? */
				307	jne L(25) /* no => skip */
				308	leal 1(%esi), %eax /* store address as result */
				309	L(25): testb %dh, %dh /* is second byte == NUL? */
				310	jz L(2) /* yes => return */
				311
				312	shrl $16,%edx /* make upper bytes accessible */
				313	cmpb %cl, %dl /* is third byte == C */
				314	jne L(26) /* no => skip */
				315	leal 2(%esi), %eax /* store address as result */
				316	L(26): testb %dl, %dl /* is third byte == NUL */
				317	jz L(2) /* yes => return */
				318
				319	cmpb %cl, %dh /* is fourth byte == C */
				320	jne L(2) /* no => skip */
				321	leal 3(%esi), %eax /* store address as result */
				322
				323	L(2): popl %esi /* restore saved register content */
				324	cfi_adjust_cfa_offset (-4)
				325	cfi_restore (esi)
				326	popl %edi
				327	cfi_adjust_cfa_offset (-4)
				328	cfi_restore (edi)
				329
				330	ret
				331	END (strrchr)
				332
				333	weak_alias (strrchr, rindex)
				334	libc_hidden_builtin_def (strrchr)