| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Optimized version of the standard strlen() function. | 
|  | 2 | This file is part of the GNU C Library. | 
|  | 3 | Copyright (C) 2000-2016 Free Software Foundation, Inc. | 
|  | 4 | Contributed by Dan Pop <Dan.Pop@cern.ch>. | 
|  | 5 |  | 
|  | 6 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 7 | modify it under the terms of the GNU Lesser General Public | 
|  | 8 | License as published by the Free Software Foundation; either | 
|  | 9 | version 2.1 of the License, or (at your option) any later version. | 
|  | 10 |  | 
|  | 11 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | Lesser General Public License for more details. | 
|  | 15 |  | 
|  | 16 | You should have received a copy of the GNU Lesser General Public | 
|  | 17 | License along with the GNU C Library; if not, see | 
|  | 18 | <http://www.gnu.org/licenses/>.  */ | 
|  | 19 |  | 
|  | 20 | /* Return: the length of the input string | 
|  | 21 |  | 
|  | 22 | Input: | 
|  | 23 | in0:    str | 
|  | 24 |  | 
|  | 25 | Look for the null character byte by byte, until we reach a word aligned | 
|  | 26 | address, then search word by word, using the czx instruction.  We're | 
|  | 27 | also doing one word of read ahead, which could cause problems if the | 
|  | 28 | null character is on the last word of a page and the next page is not | 
|  | 29 | mapped in the process address space.  Hence the use of the speculative | 
|  | 30 | load. | 
|  | 31 |  | 
|  | 32 | This implementation assumes little endian mode.  For big endian mode, | 
|  | 33 | the instruction czx1.r should be replaced by czx1.l.  */ | 
|  | 34 |  | 
|  | 35 | #include <sysdep.h> | 
|  | 36 | #undef ret | 
|  | 37 |  | 
|  | 38 | #define saved_lc	r18 | 
|  | 39 | #define str		r19 | 
|  | 40 | #define pos0		r20 | 
|  | 41 | #define val1		r21 | 
|  | 42 | #define val2		r22 | 
|  | 43 | #define origadd		r23 | 
|  | 44 | #define tmp		r24 | 
|  | 45 | #define loopcnt		r30 | 
|  | 46 | #define len		ret0 | 
|  | 47 |  | 
|  | 48 | ENTRY(strlen) | 
|  | 49 | .prologue | 
|  | 50 | alloc r2 = ar.pfs, 1, 0, 0, 0 | 
|  | 51 | .save ar.lc, saved_lc | 
|  | 52 | mov 	saved_lc = ar.lc 	// save the loop counter | 
|  | 53 | .body | 
|  | 54 | mov 	str = in0 | 
|  | 55 | mov 	len = r0		// len = 0 | 
|  | 56 | and 	tmp = 7, in0		// tmp = str % 8 | 
|  | 57 | ;; | 
|  | 58 | sub	loopcnt = 8, tmp	// loopcnt = 8 - tmp | 
|  | 59 | cmp.eq	p6, p0 = tmp, r0 | 
|  | 60 | (p6)	br.cond.sptk	.str_aligned;; | 
|  | 61 | adds	loopcnt = -1, loopcnt;; | 
|  | 62 | mov	ar.lc = loopcnt | 
|  | 63 | .l1: | 
|  | 64 | ld1	val2 = [str], 1 | 
|  | 65 | ;; | 
|  | 66 | cmp.eq	p6, p0 = val2, r0 | 
|  | 67 | (p6)	br.cond.spnt	.restore_and_exit | 
|  | 68 | adds	len = 1, len | 
|  | 69 | br.cloop.dptk	.l1 | 
|  | 70 | .str_aligned: | 
|  | 71 | mov	origadd = str		// origadd = orig | 
|  | 72 | ld8	val1 = [str], 8;; | 
|  | 73 | nop.b	0 | 
|  | 74 | nop.b 	0 | 
|  | 75 | .l2:	ld8.s	val2 = [str], 8		// don't bomb out here | 
|  | 76 | czx1.r	pos0 = val1 | 
|  | 77 | ;; | 
|  | 78 | cmp.ne	p6, p0 = 8, pos0 | 
|  | 79 | (p6)	br.cond.spnt .foundit | 
|  | 80 | chk.s	val2, .recovery | 
|  | 81 | .back: | 
|  | 82 | mov	val1 = val2 | 
|  | 83 | br.cond.dptk	.l2 | 
|  | 84 | .foundit: | 
|  | 85 | sub	tmp = str, origadd	// tmp = crt address - orig | 
|  | 86 | add	len = len, pos0;; | 
|  | 87 | add	len = len, tmp;; | 
|  | 88 | adds	len = -16, len | 
|  | 89 | .restore_and_exit: | 
|  | 90 | mov ar.lc = saved_lc		// restore the loop counter | 
|  | 91 | br.ret.sptk.many b0 | 
|  | 92 | .recovery: | 
|  | 93 | adds	str = -8, str;; | 
|  | 94 | ld8	val2 = [str], 8		// bomb out here | 
|  | 95 | br.cond.sptk	.back | 
|  | 96 | END(strlen) | 
|  | 97 | libc_hidden_builtin_def (strlen) |