blob: df9e69179d2b2bb2f1d6cf436e6e26deff84b471 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/* Compare two strings for differences.
2 For SPARC v9.
3 Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 02111-1307 USA. */
22
23#include <asm/asi.h>
24#ifndef XCC
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28#endif
29
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
44 */
45
46 .text
47 .align 32
48ENTRY(strcmp)
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 andcc %o0, 7, %g0 /* IEU1 */
51 bne,pn %icc, 7f /* CTI */
52 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
53
54 andcc %o1, 7, %g3 /* IEU1 */
55 bne,pn %icc, 9f /* CTI */
56 sllx %g1, 32, %g2 /* IEU0 Group */
57 ldx [%o0], %o2 /* Load */
58
59 or %g1, %g2, %g1 /* IEU0 Group */
601: ldx [%o1], %o3 /* Load */
61 sub %o1, %o0, %o1 /* IEU1 */
62 sllx %g1, 7, %g2 /* IEU0 Group */
63
642: add %o0, 8, %o0 /* IEU1 */
65 sub %o2, %g1, %g3 /* IEU0 Group */
66 subcc %o2, %o3, %g0 /* IEU1 */
67 bne,pn %xcc, 13f /* CTI */
68
69#ifdef EIGHTBIT_NOT_RARE
70 andn %g3, %o2, %g4 /* IEU0 Group */
71 ldxa [%o0] ASI_PNF, %o2 /* Load */
72 andcc %g4, %g2, %g0 /* IEU1 Group */
73#else
74 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
75 andcc %g3, %g2, %g0 /* IEU1 */
76#endif
77 be,a,pt %xcc, 2b /* CTI */
78 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */
79
80 addcc %g3, %g1, %o4 /* IEU1 */
81 srlx %g3, 32, %g3 /* IEU0 */
82 andcc %g3, %g2, %g0 /* IEU1 Group */
83 be,pt %xcc, 3f /* CTI */
84
85 srlx %o4, 56, %o5 /* IEU0 */
86 andcc %o5, 0xff, %g0 /* IEU1 Group */
87 be,pn %icc, 4f /* CTI */
88 srlx %o4, 48, %o5 /* IEU0 */
89
90 andcc %o5, 0xff, %g0 /* IEU1 Group */
91 be,pn %icc, 4f /* CTI */
92 srlx %o4, 40, %o5 /* IEU0 */
93 andcc %o5, 0xff, %g0 /* IEU1 Group */
94
95 be,pn %icc, 4f /* CTI */
96 srlx %o4, 32, %o5 /* IEU0 */
97 andcc %o5, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 4f /* CTI */
99
1003: srlx %o4, 24, %o5 /* IEU0 */
101 andcc %o5, 0xff, %g0 /* IEU1 Group */
102 be,pn %icc, 4f /* CTI */
103 srlx %o4, 16, %o5 /* IEU0 */
104
105 andcc %o5, 0xff, %g0 /* IEU1 Group */
106 be,pn %icc, 4f /* CTI */
107 srlx %o4, 8, %o5 /* IEU0 */
108 andcc %o5, 0xff, %g0 /* IEU1 Group */
109
110 be,pn %icc, 4f /* CTI */
111 andcc %o4, 0xff, %g0 /* IEU1 Group */
112 bne,a,pn %icc, 2b /* CTI */
113 ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */
114
1154: retl /* CTI+IEU1 Group */
116 clr %o0 /* IEU0 */
117
118 .align 32
11913: mov 0xff, %g6 /* IEU0 Group */
120#ifdef EIGHTBIT_NOT_RARE
121 andcc %g4, %g2, %g0 /* IEU1 */
122#else
123 andcc %g3, %g2, %g0 /* IEU1 */
124#endif
125 be,pt %xcc, 25f /* CTI */
126 addcc %g3, %g1, %o4 /* IEU1 Group */
127
128 srlx %g3, 32, %g3 /* IEU0 */
129 andcc %g3, %g2, %g0 /* IEU1 Group */
130 be,pt %xcc, 23f /* CTI */
131 sllx %g6, 56, %o5 /* IEU0 */
132
133 andcc %o4, %o5, %g0 /* IEU1 Group */
134 be,pn %xcc, 24f /* CTI */
135 sllx %g6, 48, %o5 /* IEU0 */
136 andcc %o4, %o5, %g0 /* IEU1 Group */
137
138 be,pn %xcc, 24f /* CTI */
139 sllx %g6, 40, %o5 /* IEU0 */
140 andcc %o4, %o5, %g0 /* IEU1 Group */
141 be,pn %xcc, 24f /* CTI */
142
143 sllx %g6, 32, %o5 /* IEU0 */
144 andcc %o4, %o5, %g0 /* IEU1 Group */
145 be,pn %xcc, 24f /* CTI */
14623: sllx %g6, 24, %o5 /* IEU0 */
147
148 andcc %o4, %o5, %g0 /* IEU1 Group */
149 be,pn %icc, 24f /* CTI */
150 sllx %g6, 16, %o5 /* IEU0 */
151 andcc %o4, %o5, %g0 /* IEU1 Group */
152
153 be,pn %icc, 24f /* CTI */
154 sllx %g6, 8, %o5 /* IEU0 */
155 andcc %o4, %o5, %g0 /* IEU1 Group */
156 be,pn %icc, 24f /* CTI */
157
158 mov %g6, %o5 /* IEU0 */
15925: cmp %o4, %o3 /* IEU1 Group */
1605: mov -1, %o0 /* IEU0 */
161 retl /* CTI+IEU1 Group */
162
163 movgu %xcc, 1, %o0 /* Single Group */
164
165 .align 16
16624: sub %o5, 1, %g6 /* IEU0 Group */
167 clr %o0 /* IEU1 */
168 or %o5, %g6, %o5 /* IEU0 Group */
169 andn %o4, %o5, %o4 /* IEU0 Group */
170
171 andn %o3, %o5, %o3 /* IEU1 */
172 cmp %o4, %o3 /* IEU1 Group */
173 movgu %xcc, 1, %o0 /* Single Group */
174 retl /* CTI+IEU1 Group */
175
176 movlu %xcc, -1, %o0 /* Single Group */
1776: retl /* CTI+IEU1 Group */
178 mov %o4, %o0 /* IEU0 */
179
180 .align 16
1817: ldub [%o0], %o2 /* Load */
182 add %o0, 1, %o0 /* IEU1 */
183 ldub [%o1], %o3 /* Load Group */
184 sllx %g1, 32, %g2 /* IEU0 */
185
1868: add %o1, 1, %o1 /* IEU1 */
187 subcc %o2, %o3, %o4 /* IEU1 Group */
188 bne,pn %xcc, 6b /* CTI */
189 lduba [%o0] ASI_PNF, %o2 /* Load */
190
191 brz,pn %o3, 4b /* CTI+IEU1 Group */
192 lduba [%o1] ASI_PNF, %o3 /* Load */
193 andcc %o0, 7, %g0 /* IEU1 Group */
194 bne,a,pn %icc, 8b /* CTI */
195
196 add %o0, 1, %o0 /* IEU0 */
197 or %g1, %g2, %g1 /* IEU0 Group */
198 andcc %o1, 7, %g3 /* IEU1 */
199 be,a,pn %icc, 1b /* CTI */
200
201 ldxa [%o0] ASI_PNF, %o2 /* Load Group */
2029: sllx %g3, 3, %g5 /* IEU0 */
203 mov 64, %o5 /* IEU1 */
204 sub %o1, %g3, %o1 /* IEU0 Group */
205
206 sub %o5, %g5, %o5 /* IEU1 */
207 ldxa [%o1] ASI_PNF, %g6 /* Load Group */
208 or %g1, %g2, %g1 /* IEU0 */
209 sub %o1, %o0, %o1 /* IEU1 */
210
211 sllx %g1, 7, %g2 /* IEU0 Group */
212 add %o1, 8, %o1 /* IEU1 */
213 /* %g1 = 0101010101010101
214 * %g2 = 8080808080800880
215 * %g5 = number of bits to shift left
216 * %o5 = number of bits to shift right */
21710: sllx %g6, %g5, %o3 /* IEU0 Group */
218 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
219
22011: srlx %g6, %o5, %o4 /* IEU0 Group */
221 ldxa [%o0] ASI_PNF, %o2 /* Load */
222 or %o3, %o4, %o3 /* IEU1 */
223 add %o0, 8, %o0 /* IEU0 Group */
224
225 subcc %o2, %o3, %g0 /* IEU1 */
226#ifdef EIGHTBIT_NOT_RARE
227 sub %o2, %g1, %g3 /* IEU0 Group */
228 bne,pn %xcc, 13b /* CTI */
229 andn %g3, %o2, %g4 /* IEU0 Group */
230
231 andcc %g4, %g2, %g0 /* IEU1 Group */
232 be,pt %xcc, 10b /* CTI */
233 srlx %g4, 32, %g4 /* IEU0 */
234 andcc %g4, %g2, %g0 /* IEU1 Group */
235#else
236 bne,pn %xcc, 13b /* CTI */
237 sub %o2, %g1, %g3 /* IEU0 Group */
238 andcc %g3, %g2, %g0 /* IEU1 Group */
239
240 be,pt %xcc, 10b /* CTI */
241 srlx %g3, 32, %g3 /* IEU0 */
242 andcc %g3, %g2, %g0 /* IEU1 Group */
243#endif
244 be,pt %xcc, 12f /* CTI */
245
246 srlx %o2, 56, %g3 /* IEU0 */
247 andcc %g3, 0xff, %g0 /* IEU1 Group */
248 be,pn %icc, 4b /* CTI */
249 srlx %o2, 48, %g3 /* IEU0 */
250
251 andcc %g3, 0xff, %g0 /* IEU1 Group */
252 be,pn %icc, 4b /* CTI */
253 srlx %o2, 40, %g3 /* IEU0 */
254 andcc %g3, 0xff, %g0 /* IEU1 Group */
255
256 be,pn %icc, 4b /* CTI */
257 srlx %o2, 32, %g3 /* IEU0 */
258 andcc %g3, 0xff, %g0 /* IEU1 Group */
259 be,pn %icc, 4b /* CTI */
260
26112: srlx %o2, 24, %g3 /* IEU0 */
262 andcc %g3, 0xff, %g0 /* IEU1 Group */
263 be,pn %icc, 4b /* CTI */
264 srlx %o2, 16, %g3 /* IEU0 */
265
266 andcc %g3, 0xff, %g0 /* IEU1 Group */
267 be,pn %icc, 4b /* CTI */
268 srlx %o2, 8, %g3 /* IEU0 */
269 andcc %g3, 0xff, %g0 /* IEU1 Group */
270
271 be,pn %icc, 4b /* CTI */
272 andcc %o2, 0xff, %g0 /* IEU1 Group */
273 be,pn %icc, 4b /* CTI */
274 sllx %g6, %g5, %o3 /* IEU0 */
275
276 ba,pt %xcc, 11b /* CTI Group */
277 ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
278END(strcmp)
279libc_hidden_def(strcmp)