lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame^] | 1 | /* Optimized version of the standard strlen() function. |
| 2 | This file is part of the GNU C Library. |
| 3 | Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. |
| 4 | Contributed by Dan Pop <Dan.Pop@cern.ch>. |
| 5 | |
| 6 | The GNU C Library is free software; you can redistribute it and/or |
| 7 | modify it under the terms of the GNU Lesser General Public |
| 8 | License as published by the Free Software Foundation; either |
| 9 | version 2.1 of the License, or (at your option) any later version. |
| 10 | |
| 11 | The GNU C Library is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | Lesser General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public |
| 17 | License along with the GNU C Library; if not, write to the Free |
| 18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 19 | 02111-1307 USA. */ |
| 20 | |
| 21 | /* Return: the length of the input string |
| 22 | |
| 23 | Input: |
| 24 | in0: str |
| 25 | |
| 26 | Look for the null character byte by byte, until we reach a word aligned |
| 27 | address, then search word by word, using the czx instruction. We're |
| 28 | also doing one word of read ahead, which could cause problems if the |
| 29 | null character is on the last word of a page and the next page is not |
| 30 | mapped in the process address space. Hence the use of the speculative |
| 31 | load. |
| 32 | |
| 33 | This implementation assumes little endian mode. For big endian mode, |
| 34 | the instruction czx1.r should be replaced by czx1.l. */ |
| 35 | |
| 36 | #include "sysdep.h" |
| 37 | #undef ret |
| 38 | |
| 39 | #define saved_lc r18 |
| 40 | #define str r19 |
| 41 | #define pos0 r20 |
| 42 | #define val1 r21 |
| 43 | #define val2 r22 |
| 44 | #define origadd r23 |
| 45 | #define tmp r24 |
| 46 | #define loopcnt r30 |
| 47 | #define len ret0 |
| 48 | |
| 49 | ENTRY(strlen) |
| 50 | .prologue |
| 51 | alloc r2 = ar.pfs, 1, 0, 0, 0 |
| 52 | .save ar.lc, saved_lc |
| 53 | mov saved_lc = ar.lc /* save the loop counter */ |
| 54 | .body |
| 55 | mov str = in0 |
| 56 | mov len = r0 /* len = 0 */ |
| 57 | and tmp = 7, in0 /* tmp = str % 8 */ |
| 58 | ;; |
| 59 | sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ |
| 60 | cmp.eq p6, p0 = tmp, r0 |
| 61 | (p6) br.cond.sptk .str_aligned;; |
| 62 | adds loopcnt = -1, loopcnt;; |
| 63 | mov ar.lc = loopcnt |
| 64 | .l1: |
| 65 | ld1 val2 = [str], 1 |
| 66 | ;; |
| 67 | cmp.eq p6, p0 = val2, r0 |
| 68 | (p6) br.cond.spnt .restore_and_exit |
| 69 | adds len = 1, len |
| 70 | br.cloop.dptk .l1 |
| 71 | .str_aligned: |
| 72 | mov origadd = str /* origadd = orig */ |
| 73 | ld8 val1 = [str], 8;; |
| 74 | nop.b 0 |
| 75 | nop.b 0 |
| 76 | .l2: ld8.s val2 = [str], 8 /* don't bomb out here */ |
| 77 | czx1.r pos0 = val1 |
| 78 | ;; |
| 79 | cmp.ne p6, p0 = 8, pos0 |
| 80 | (p6) br.cond.spnt .foundit |
| 81 | chk.s val2, .recovery |
| 82 | .back: |
| 83 | mov val1 = val2 |
| 84 | br.cond.dptk .l2 |
| 85 | .foundit: |
| 86 | sub tmp = str, origadd /* tmp = crt address - orig */ |
| 87 | add len = len, pos0;; |
| 88 | add len = len, tmp;; |
| 89 | adds len = -16, len |
| 90 | .restore_and_exit: |
| 91 | mov ar.lc = saved_lc /* restore the loop counter */ |
| 92 | br.ret.sptk.many b0 |
| 93 | .recovery: |
| 94 | adds str = -8, str;; |
| 95 | ld8 val2 = [str], 8 /* bomb out here */ |
| 96 | br.cond.sptk .back |
| 97 | END(strlen) |
| 98 | libc_hidden_def (strlen) |