| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* strlen(str) -- determine the length of the string STR. | 
|  | 2 | Optimized for Intel 80x86, x>=4. | 
|  | 3 | Copyright (C) 1991-2016 Free Software Foundation, Inc. | 
|  | 4 | Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>. | 
|  | 5 | This file is part of the GNU C Library. | 
|  | 6 |  | 
|  | 7 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 8 | modify it under the terms of the GNU Lesser General Public | 
|  | 9 | License as published by the Free Software Foundation; either | 
|  | 10 | version 2.1 of the License, or (at your option) any later version. | 
|  | 11 |  | 
|  | 12 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 15 | Lesser General Public License for more details. | 
|  | 16 |  | 
|  | 17 | You should have received a copy of the GNU Lesser General Public | 
|  | 18 | License along with the GNU C Library; if not, see | 
|  | 19 | <http://www.gnu.org/licenses/>.  */ | 
|  | 20 |  | 
|  | 21 | #include <sysdep.h> | 
|  | 22 | #include "asm-syntax.h" | 
|  | 23 |  | 
|  | 24 | #define PARMS	4		/* no space for saved regs */ | 
|  | 25 | #define STR	PARMS | 
|  | 26 |  | 
|  | 27 | .text | 
|  | 28 | ENTRY (strlen) | 
|  | 29 |  | 
|  | 30 | movl STR(%esp), %ecx | 
|  | 31 | movl %ecx, %eax		/* duplicate it */ | 
|  | 32 |  | 
|  | 33 | andl $3, %ecx		/* mask alignment bits */ | 
|  | 34 | jz L(1)			/* aligned => start loop */ | 
|  | 35 | cmpb %ch, (%eax)	/* is byte NUL? */ | 
|  | 36 | je L(2)			/* yes => return */ | 
|  | 37 | incl %eax		/* increment pointer */ | 
|  | 38 |  | 
|  | 39 | xorl $3, %ecx		/* was alignment = 3? */ | 
|  | 40 | jz L(1)			/* yes => now it is aligned and start loop */ | 
|  | 41 | cmpb %ch, (%eax)	/* is byte NUL? */ | 
|  | 42 | je L(2)			/* yes => return */ | 
|  | 43 | addl $1, %eax		/* increment pointer */ | 
|  | 44 |  | 
|  | 45 | subl $1, %ecx		/* was alignment = 2? */ | 
|  | 46 | jz L(1)			/* yes => now it is aligned and start loop */ | 
|  | 47 | cmpb %ch, (%eax)	/* is byte NUL? */ | 
|  | 48 | je L(2)			/* yes => return */ | 
|  | 49 |  | 
|  | 50 | /* Don't change the above `addl $1,%eax' and `subl $1, %ecx' into `incl %eax' | 
|  | 51 | and `decl %ecx' resp.  The additional two byte per instruction make the | 
|  | 52 | label 4 to be aligned on a 16 byte boundary with nops. | 
|  | 53 |  | 
|  | 54 | The following `sub $15, %eax' is part of this trick, too.  Together with | 
|  | 55 | the next instruction (`addl $16, %eax') it is in fact a `incl %eax', just | 
|  | 56 | as expected from the algorithm.  But doing so has the advantage that | 
|  | 57 | no jump to label 1 is necessary and so the pipeline is not flushed.  */ | 
|  | 58 |  | 
|  | 59 | subl $15, %eax		/* effectively +1 */ | 
|  | 60 |  | 
|  | 61 |  | 
|  | 62 | L(4):	addl $16, %eax		/* adjust pointer for full loop */ | 
|  | 63 |  | 
|  | 64 | L(1):	movl (%eax), %ecx	/* get word (= 4 bytes) in question */ | 
|  | 65 | movl $0xfefefeff, %edx	/* magic value */ | 
|  | 66 | addl %ecx, %edx		/* add the magic value to the word.  We get | 
|  | 67 | carry bits reported for each byte which | 
|  | 68 | is *not* 0 */ | 
|  | 69 | jnc L(3)		/* highest byte is NUL => return pointer */ | 
|  | 70 | xorl %ecx, %edx		/* (word+magic)^word */ | 
|  | 71 | orl $0xfefefeff, %edx	/* set all non-carry bits */ | 
|  | 72 | incl %edx		/* add 1: if one carry bit was *not* set | 
|  | 73 | the addition will not result in 0.  */ | 
|  | 74 | jnz L(3)		/* found NUL => return pointer */ | 
|  | 75 |  | 
|  | 76 | movl 4(%eax), %ecx	/* get word (= 4 bytes) in question */ | 
|  | 77 | movl $0xfefefeff, %edx	/* magic value */ | 
|  | 78 | addl %ecx, %edx		/* add the magic value to the word.  We get | 
|  | 79 | carry bits reported for each byte which | 
|  | 80 | is *not* 0 */ | 
|  | 81 | jnc L(5)		/* highest byte is NUL => return pointer */ | 
|  | 82 | xorl %ecx, %edx		/* (word+magic)^word */ | 
|  | 83 | orl $0xfefefeff, %edx	/* set all non-carry bits */ | 
|  | 84 | incl %edx		/* add 1: if one carry bit was *not* set | 
|  | 85 | the addition will not result in 0.  */ | 
|  | 86 | jnz L(5)		/* found NUL => return pointer */ | 
|  | 87 |  | 
|  | 88 | movl 8(%eax), %ecx	/* get word (= 4 bytes) in question */ | 
|  | 89 | movl $0xfefefeff, %edx	/* magic value */ | 
|  | 90 | addl %ecx, %edx		/* add the magic value to the word.  We get | 
|  | 91 | carry bits reported for each byte which | 
|  | 92 | is *not* 0 */ | 
|  | 93 | jnc L(6)		/* highest byte is NUL => return pointer */ | 
|  | 94 | xorl %ecx, %edx		/* (word+magic)^word */ | 
|  | 95 | orl $0xfefefeff, %edx	/* set all non-carry bits */ | 
|  | 96 | incl %edx		/* add 1: if one carry bit was *not* set | 
|  | 97 | the addition will not result in 0.  */ | 
|  | 98 | jnz L(6)		/* found NUL => return pointer */ | 
|  | 99 |  | 
|  | 100 | movl 12(%eax), %ecx	/* get word (= 4 bytes) in question */ | 
|  | 101 | movl $0xfefefeff, %edx	/* magic value */ | 
|  | 102 | addl %ecx, %edx		/* add the magic value to the word.  We get | 
|  | 103 | carry bits reported for each byte which | 
|  | 104 | is *not* 0 */ | 
|  | 105 | jnc L(7)		/* highest byte is NUL => return pointer */ | 
|  | 106 | xorl %ecx, %edx		/* (word+magic)^word */ | 
|  | 107 | orl $0xfefefeff, %edx	/* set all non-carry bits */ | 
|  | 108 | incl %edx		/* add 1: if one carry bit was *not* set | 
|  | 109 | the addition will not result in 0.  */ | 
|  | 110 | jz L(4)			/* no NUL found => continue loop */ | 
|  | 111 |  | 
|  | 112 | L(7):	addl $4, %eax		/* adjust pointer */ | 
|  | 113 | L(6):	addl $4, %eax | 
|  | 114 | L(5):	addl $4, %eax | 
|  | 115 |  | 
|  | 116 | L(3):	testb %cl, %cl		/* is first byte NUL? */ | 
|  | 117 | jz L(2)			/* yes => return */ | 
|  | 118 | incl %eax		/* increment pointer */ | 
|  | 119 |  | 
|  | 120 | testb %ch, %ch		/* is second byte NUL? */ | 
|  | 121 | jz L(2)			/* yes => return */ | 
|  | 122 | incl %eax		/* increment pointer */ | 
|  | 123 |  | 
|  | 124 | testl $0xff0000, %ecx	/* is third byte NUL? */ | 
|  | 125 | jz L(2)			/* yes => return pointer */ | 
|  | 126 | incl %eax		/* increment pointer */ | 
|  | 127 |  | 
|  | 128 | L(2):	subl STR(%esp), %eax	/* compute difference to string start */ | 
|  | 129 |  | 
|  | 130 | ret | 
|  | 131 | END (strlen) | 
|  | 132 | libc_hidden_builtin_def (strlen) |