xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 2014-2016 Free Software Foundation, Inc. |
| 2 | This file is part of the GNU C Library. |
| 3 | |
| 4 | The GNU C Library is free software; you can redistribute it and/or |
| 5 | modify it under the terms of the GNU Lesser General Public |
| 6 | License as published by the Free Software Foundation; either |
| 7 | version 2.1 of the License, or (at your option) any later version. |
| 8 | |
| 9 | The GNU C Library is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | Lesser General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU Lesser General Public |
| 15 | License along with the GNU C Library. If not, see |
| 16 | <http://www.gnu.org/licenses/>. */ |
| 17 | |
| 18 | #ifdef ANDROID_CHANGES |
| 19 | # include "machine/asm.h" |
| 20 | # include "machine/regdef.h" |
| 21 | #elif _LIBC |
| 22 | # include <sysdep.h> |
| 23 | # include <regdef.h> |
| 24 | # include <sys/asm.h> |
| 25 | #elif defined _COMPILING_NEWLIB |
| 26 | # include "machine/asm.h" |
| 27 | # include "machine/regdef.h" |
| 28 | #else |
| 29 | # include <regdef.h> |
| 30 | # include <sys/asm.h> |
| 31 | #endif |
| 32 | |
| 33 | /* Technically strcmp should not read past the end of the strings being |
| 34 | compared. We will read a full word that may contain excess bits beyond |
| 35 | the NULL string terminator but unless ENABLE_READAHEAD is set, we will not |
| 36 | read the next word after the end of string. Setting ENABLE_READAHEAD will |
| 37 | improve performance but is technically illegal based on the definition of |
| 38 | strcmp. */ |
| 39 | #ifdef ENABLE_READAHEAD |
| 40 | # define DELAY_READ |
| 41 | #else |
| 42 | # define DELAY_READ nop |
| 43 | #endif |
| 44 | |
| 45 | /* Testing on a little endian machine showed using CLZ was a |
| 46 | performance loss, so we are not turning it on by default. */ |
| 47 | #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) |
| 48 | # define USE_CLZ |
| 49 | #endif |
| 50 | |
| 51 | /* Some asm.h files do not have the L macro definition. */ |
| 52 | #ifndef L |
| 53 | # if _MIPS_SIM == _ABIO32 |
| 54 | # define L(label) $L ## label |
| 55 | # else |
| 56 | # define L(label) .L ## label |
| 57 | # endif |
| 58 | #endif |
| 59 | |
| 60 | /* Some asm.h files do not have the PTR_ADDIU macro definition. */ |
| 61 | #ifndef PTR_ADDIU |
| 62 | # ifdef USE_DOUBLE |
| 63 | # define PTR_ADDIU daddiu |
| 64 | # else |
| 65 | # define PTR_ADDIU addiu |
| 66 | # endif |
| 67 | #endif |
| 68 | |
| 69 | /* Allow the routine to be named something else if desired. */ |
| 70 | #ifndef STRCMP_NAME |
| 71 | # define STRCMP_NAME strcmp |
| 72 | #endif |
| 73 | |
| 74 | #ifdef ANDROID_CHANGES |
| 75 | LEAF(STRCMP_NAME, 0) |
| 76 | #else |
| 77 | LEAF(STRCMP_NAME) |
| 78 | #endif |
| 79 | .set nomips16 |
| 80 | .set noreorder |
| 81 | |
| 82 | or t0, a0, a1 |
| 83 | andi t0,0x3 |
| 84 | bne t0, zero, L(byteloop) |
| 85 | |
| 86 | /* Both strings are 4 byte aligned at this point. */ |
| 87 | |
| 88 | lui t8, 0x0101 |
| 89 | ori t8, t8, 0x0101 |
| 90 | lui t9, 0x7f7f |
| 91 | ori t9, 0x7f7f |
| 92 | |
| 93 | #define STRCMP32(OFFSET) \ |
| 94 | lw v0, OFFSET(a0); \ |
| 95 | lw v1, OFFSET(a1); \ |
| 96 | subu t0, v0, t8; \ |
| 97 | bne v0, v1, L(worddiff); \ |
| 98 | nor t1, v0, t9; \ |
| 99 | and t0, t0, t1; \ |
| 100 | bne t0, zero, L(returnzero) |
| 101 | |
| 102 | L(wordloop): |
| 103 | STRCMP32(0) |
| 104 | DELAY_READ |
| 105 | STRCMP32(4) |
| 106 | DELAY_READ |
| 107 | STRCMP32(8) |
| 108 | DELAY_READ |
| 109 | STRCMP32(12) |
| 110 | DELAY_READ |
| 111 | STRCMP32(16) |
| 112 | DELAY_READ |
| 113 | STRCMP32(20) |
| 114 | DELAY_READ |
| 115 | STRCMP32(24) |
| 116 | DELAY_READ |
| 117 | STRCMP32(28) |
| 118 | PTR_ADDIU a0, a0, 32 |
| 119 | b L(wordloop) |
| 120 | PTR_ADDIU a1, a1, 32 |
| 121 | |
| 122 | L(returnzero): |
| 123 | j ra |
| 124 | move v0, zero |
| 125 | |
| 126 | L(worddiff): |
| 127 | #ifdef USE_CLZ |
| 128 | subu t0, v0, t8 |
| 129 | nor t1, v0, t9 |
| 130 | and t1, t0, t1 |
| 131 | xor t0, v0, v1 |
| 132 | or t0, t0, t1 |
| 133 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| 134 | wsbh t0, t0 |
| 135 | rotr t0, t0, 16 |
| 136 | # endif |
| 137 | clz t1, t0 |
| 138 | and t1, 0xf8 |
| 139 | # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| 140 | neg t1 |
| 141 | addu t1, 24 |
| 142 | # endif |
| 143 | rotrv v0, v0, t1 |
| 144 | rotrv v1, v1, t1 |
| 145 | and v0, v0, 0xff |
| 146 | and v1, v1, 0xff |
| 147 | j ra |
| 148 | subu v0, v0, v1 |
| 149 | #else /* USE_CLZ */ |
| 150 | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| 151 | andi t0, v0, 0xff |
| 152 | beq t0, zero, L(wexit01) |
| 153 | andi t1, v1, 0xff |
| 154 | bne t0, t1, L(wexit01) |
| 155 | |
| 156 | srl t8, v0, 8 |
| 157 | srl t9, v1, 8 |
| 158 | andi t8, t8, 0xff |
| 159 | beq t8, zero, L(wexit89) |
| 160 | andi t9, t9, 0xff |
| 161 | bne t8, t9, L(wexit89) |
| 162 | |
| 163 | srl t0, v0, 16 |
| 164 | srl t1, v1, 16 |
| 165 | andi t0, t0, 0xff |
| 166 | beq t0, zero, L(wexit01) |
| 167 | andi t1, t1, 0xff |
| 168 | bne t0, t1, L(wexit01) |
| 169 | |
| 170 | srl t8, v0, 24 |
| 171 | srl t9, v1, 24 |
| 172 | # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
| 173 | srl t0, v0, 24 |
| 174 | beq t0, zero, L(wexit01) |
| 175 | srl t1, v1, 24 |
| 176 | bne t0, t1, L(wexit01) |
| 177 | |
| 178 | srl t8, v0, 16 |
| 179 | srl t9, v1, 16 |
| 180 | andi t8, t8, 0xff |
| 181 | beq t8, zero, L(wexit89) |
| 182 | andi t9, t9, 0xff |
| 183 | bne t8, t9, L(wexit89) |
| 184 | |
| 185 | srl t0, v0, 8 |
| 186 | srl t1, v1, 8 |
| 187 | andi t0, t0, 0xff |
| 188 | beq t0, zero, L(wexit01) |
| 189 | andi t1, t1, 0xff |
| 190 | bne t0, t1, L(wexit01) |
| 191 | |
| 192 | andi t8, v0, 0xff |
| 193 | andi t9, v1, 0xff |
| 194 | # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ |
| 195 | |
| 196 | L(wexit89): |
| 197 | j ra |
| 198 | subu v0, t8, t9 |
| 199 | L(wexit01): |
| 200 | j ra |
| 201 | subu v0, t0, t1 |
| 202 | #endif /* USE_CLZ */ |
| 203 | |
| 204 | /* It might seem better to do the 'beq' instruction between the two 'lbu' |
| 205 | instructions so that the nop is not needed but testing showed that this |
| 206 | code is actually faster (based on glibc strcmp test). */ |
| 207 | #define BYTECMP01(OFFSET) \ |
| 208 | lbu v0, OFFSET(a0); \ |
| 209 | lbu v1, OFFSET(a1); \ |
| 210 | beq v0, zero, L(bexit01); \ |
| 211 | nop; \ |
| 212 | bne v0, v1, L(bexit01) |
| 213 | |
| 214 | #define BYTECMP89(OFFSET) \ |
| 215 | lbu t8, OFFSET(a0); \ |
| 216 | lbu t9, OFFSET(a1); \ |
| 217 | beq t8, zero, L(bexit89); \ |
| 218 | nop; \ |
| 219 | bne t8, t9, L(bexit89) |
| 220 | |
| 221 | L(byteloop): |
| 222 | BYTECMP01(0) |
| 223 | BYTECMP89(1) |
| 224 | BYTECMP01(2) |
| 225 | BYTECMP89(3) |
| 226 | BYTECMP01(4) |
| 227 | BYTECMP89(5) |
| 228 | BYTECMP01(6) |
| 229 | BYTECMP89(7) |
| 230 | PTR_ADDIU a0, a0, 8 |
| 231 | b L(byteloop) |
| 232 | PTR_ADDIU a1, a1, 8 |
| 233 | |
| 234 | L(bexit01): |
| 235 | j ra |
| 236 | subu v0, v0, v1 |
| 237 | L(bexit89): |
| 238 | j ra |
| 239 | subu v0, t8, t9 |
| 240 | |
| 241 | .set at |
| 242 | .set reorder |
| 243 | |
| 244 | END(STRCMP_NAME) |
| 245 | #ifndef ANDROID_CHANGES |
| 246 | # ifdef _LIBC |
| 247 | libc_hidden_builtin_def (STRCMP_NAME) |
| 248 | # endif |
| 249 | #endif |