xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame^] | 1 | /* Optimized version of the standard strlen() function. |
| 2 | This file is part of the GNU C Library. |
| 3 | Copyright (C) 2000-2016 Free Software Foundation, Inc. |
| 4 | Contributed by Dan Pop <Dan.Pop@cern.ch>. |
| 5 | |
| 6 | The GNU C Library is free software; you can redistribute it and/or |
| 7 | modify it under the terms of the GNU Lesser General Public |
| 8 | License as published by the Free Software Foundation; either |
| 9 | version 2.1 of the License, or (at your option) any later version. |
| 10 | |
| 11 | The GNU C Library is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | Lesser General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public |
| 17 | License along with the GNU C Library; if not, see |
| 18 | <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | /* Return: the length of the input string |
| 21 | |
| 22 | Input: |
| 23 | in0: str |
| 24 | |
| 25 | Look for the null character byte by byte, until we reach a word aligned |
| 26 | address, then search word by word, using the czx instruction. We're |
| 27 | also doing one word of read ahead, which could cause problems if the |
| 28 | null character is on the last word of a page and the next page is not |
| 29 | mapped in the process address space. Hence the use of the speculative |
| 30 | load. |
| 31 | |
| 32 | This implementation assumes little endian mode. For big endian mode, |
| 33 | the instruction czx1.r should be replaced by czx1.l. */ |
| 34 | |
| 35 | #include <sysdep.h> |
| 36 | #undef ret |
| 37 | |
| 38 | #define saved_lc r18 |
| 39 | #define str r19 |
| 40 | #define pos0 r20 |
| 41 | #define val1 r21 |
| 42 | #define val2 r22 |
| 43 | #define origadd r23 |
| 44 | #define tmp r24 |
| 45 | #define loopcnt r30 |
| 46 | #define len ret0 |
| 47 | |
| 48 | ENTRY(strlen) |
| 49 | .prologue |
| 50 | alloc r2 = ar.pfs, 1, 0, 0, 0 |
| 51 | .save ar.lc, saved_lc |
| 52 | mov saved_lc = ar.lc // save the loop counter |
| 53 | .body |
| 54 | mov str = in0 |
| 55 | mov len = r0 // len = 0 |
| 56 | and tmp = 7, in0 // tmp = str % 8 |
| 57 | ;; |
| 58 | sub loopcnt = 8, tmp // loopcnt = 8 - tmp |
| 59 | cmp.eq p6, p0 = tmp, r0 |
| 60 | (p6) br.cond.sptk .str_aligned;; |
| 61 | adds loopcnt = -1, loopcnt;; |
| 62 | mov ar.lc = loopcnt |
| 63 | .l1: |
| 64 | ld1 val2 = [str], 1 |
| 65 | ;; |
| 66 | cmp.eq p6, p0 = val2, r0 |
| 67 | (p6) br.cond.spnt .restore_and_exit |
| 68 | adds len = 1, len |
| 69 | br.cloop.dptk .l1 |
| 70 | .str_aligned: |
| 71 | mov origadd = str // origadd = orig |
| 72 | ld8 val1 = [str], 8;; |
| 73 | nop.b 0 |
| 74 | nop.b 0 |
| 75 | .l2: ld8.s val2 = [str], 8 // don't bomb out here |
| 76 | czx1.r pos0 = val1 |
| 77 | ;; |
| 78 | cmp.ne p6, p0 = 8, pos0 |
| 79 | (p6) br.cond.spnt .foundit |
| 80 | chk.s val2, .recovery |
| 81 | .back: |
| 82 | mov val1 = val2 |
| 83 | br.cond.dptk .l2 |
| 84 | .foundit: |
| 85 | sub tmp = str, origadd // tmp = crt address - orig |
| 86 | add len = len, pos0;; |
| 87 | add len = len, tmp;; |
| 88 | adds len = -16, len |
| 89 | .restore_and_exit: |
| 90 | mov ar.lc = saved_lc // restore the loop counter |
| 91 | br.ret.sptk.many b0 |
| 92 | .recovery: |
| 93 | adds str = -8, str;; |
| 94 | ld8 val2 = [str], 8 // bomb out here |
| 95 | br.cond.sptk .back |
| 96 | END(strlen) |
| 97 | libc_hidden_builtin_def (strlen) |