blob: 8c26c6becd5a030a0845f8d6f006d44760e16086 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2 For SPARC v9.
3 Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
7
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 02111-1307 USA. */
22
23#include <asm/asi.h>
24#ifndef XCC
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
28#endif
29
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
44 */
45
46 .text
47 .align 32
48ENTRY(stpcpy)
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
51 andcc %o0, 7, %g0 /* IEU1 */
52 sllx %g1, 32, %g2 /* IEU0 Group */
53
54 bne,pn %icc, 12f /* CTI */
55 andcc %o1, 7, %g3 /* IEU1 */
56 or %g1, %g2, %g1 /* IEU0 Group */
57 bne,pn %icc, 14f /* CTI */
58
59 sllx %g1, 7, %g2 /* IEU0 Group */
601: ldx [%o1], %o3 /* Load */
61 add %o1, 8, %o1 /* IEU1 */
622: mov %o3, %g3 /* IEU0 Group */
63
64 sub %o3, %g1, %o2 /* IEU1 */
653: ldxa [%o1] ASI_PNF, %o3 /* Load */
66#ifdef EIGHTBIT_NOT_RARE
67 andn %o2, %g3, %o2 /* IEU0 Group */
68#endif
69 add %o0, 8, %o0 /* IEU0 Group */
70 andcc %o2, %g2, %g0 /* IEU1 */
71
72 add %o1, 8, %o1 /* IEU0 Group */
73 be,a,pt %xcc, 2b /* CTI */
74 stx %g3, [%o0 - 8] /* Store */
75 srlx %g3, 56, %g5 /* IEU0 Group */
76
77 andcc %g5, 0xff, %g0 /* IEU1 Group */
78 be,pn %icc, 11f /* CTI */
79 srlx %g3, 48, %g4 /* IEU0 */
80 andcc %g4, 0xff, %g0 /* IEU1 Group */
81
82 be,pn %icc, 10f /* CTI */
83 srlx %g3, 40, %g5 /* IEU0 */
84 andcc %g5, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 9f /* CTI */
86
87 srlx %g3, 32, %g4 /* IEU0 */
88 andcc %g4, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 8f /* CTI */
90 srlx %g3, 24, %g5 /* IEU0 */
91
92 andcc %g5, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 7f /* CTI */
94 srlx %g3, 16, %g4 /* IEU0 */
95 andcc %g4, 0xff, %g0 /* IEU1 Group */
96
97 be,pn %icc, 6f /* CTI */
98 srlx %g3, 8, %g5 /* IEU0 */
99 andcc %g5, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 5f /* CTI */
101
102 sub %o3, %g1, %o2 /* IEU0 */
103 stx %g3, [%o0 - 8] /* Store Group */
104 andcc %g3, 0xff, %g0 /* IEU1 */
105 bne,pt %icc, 3b /* CTI */
106
107 mov %o3, %g3 /* IEU0 Group */
1084: retl /* CTI+IEU1 Group */
109 sub %o0, 1, %o0 /* IEU0 */
110
111 .align 16
1126: ba,pt %xcc, 23f /* CTI Group */
113 sub %o0, 3, %g6 /* IEU0 */
1145: sub %o0, 2, %g6 /* IEU0 Group */
115 stb %g5, [%o0 - 2] /* Store */
116
117 srlx %g3, 16, %g4 /* IEU0 Group */
11823: sth %g4, [%o0 - 4] /* Store */
119 srlx %g3, 32, %g4 /* IEU0 Group */
120 stw %g4, [%o0 - 8] /* Store */
121
122 retl /* CTI+IEU1 Group */
123 mov %g6, %o0 /* IEU0 */
1248: ba,pt %xcc, 24f /* CTI Group */
125 sub %o0, 5, %g6 /* IEU0 */
126
1277: sub %o0, 4, %g6 /* IEU0 Group */
128 stb %g5, [%o0 - 4] /* Store */
129 srlx %g3, 32, %g4 /* IEU0 Group */
13024: stw %g4, [%o0 - 8] /* Store */
131
132 retl /* CTI+IEU1 Group */
133 mov %g6, %o0 /* IEU0 */
13410: ba,pt %xcc, 25f /* CTI Group */
135 sub %o0, 7, %g6 /* IEU0 */
136
1379: sub %o0, 6, %g6 /* IEU0 Group */
138 stb %g5, [%o0 - 6] /* Store */
139 srlx %g3, 48, %g4 /* IEU0 */
14025: sth %g4, [%o0 - 8] /* Store Group */
141
142 retl /* CTI+IEU1 Group */
143 mov %g6, %o0 /* IEU0 */
14411: stb %g5, [%o0 - 8] /* Store Group */
145 retl /* CTI+IEU1 Group */
146
147 sub %o0, 8, %o0 /* IEU0 */
148
149 .align 16
15012: or %g1, %g2, %g1 /* IEU0 Group */
151 ldub [%o1], %o3 /* Load */
152 sllx %g1, 7, %g2 /* IEU0 Group */
153 stb %o3, [%o0] /* Store Group */
154
15513: add %o0, 1, %o0 /* IEU0 */
156 add %o1, 1, %o1 /* IEU1 */
157 andcc %o3, 0xff, %g0 /* IEU1 Group */
158 be,pn %icc, 4b /* CTI */
159
160 lduba [%o1] ASI_PNF, %o3 /* Load */
161 andcc %o0, 7, %g0 /* IEU1 Group */
162 bne,a,pt %icc, 13b /* CTI */
163 stb %o3, [%o0] /* Store */
164
165 andcc %o1, 7, %g3 /* IEU1 Group */
166 be,a,pt %icc, 1b /* CTI */
167 ldx [%o1], %o3 /* Load */
16814: orcc %g0, 64, %g4 /* IEU1 Group */
169
170 sllx %g3, 3, %g5 /* IEU0 */
171 sub %o1, %g3, %o1 /* IEU0 Group */
172 sub %g4, %g5, %g4 /* IEU1 */
173 /* %g1 = 0101010101010101 *
174 * %g2 = 8080808080808080 *
175 * %g3 = source alignment *
176 * %g5 = number of bits to shift left *
177 * %g4 = number of bits to shift right */
178 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
179
180 addcc %o1, 8, %o1 /* IEU1 */
18115: sllx %o5, %g5, %o3 /* IEU0 Group */
182 ldxa [%o1] ASI_PNF, %o5 /* Load */
183 srlx %o5, %g4, %o4 /* IEU0 Group */
184
185 add %o0, 8, %o0 /* IEU1 */
186 or %o3, %o4, %o3 /* IEU0 Group */
187 add %o1, 8, %o1 /* IEU1 */
188 sub %o3, %g1, %o4 /* IEU0 Group */
189
190#ifdef EIGHTBIT_NOT_RARE
191 andn %o4, %o3, %o4 /* IEU0 Group */
192#endif
193 andcc %o4, %g2, %g0 /* IEU1 Group */
194 be,a,pt %xcc, 15b /* CTI */
195 stx %o3, [%o0 - 8] /* Store */
196 srlx %o3, 56, %o4 /* IEU0 Group */
197
198 andcc %o4, 0xff, %g0 /* IEU1 Group */
199 be,pn %icc, 22f /* CTI */
200 srlx %o3, 48, %o4 /* IEU0 */
201 andcc %o4, 0xff, %g0 /* IEU1 Group */
202
203 be,pn %icc, 21f /* CTI */
204 srlx %o3, 40, %o4 /* IEU0 */
205 andcc %o4, 0xff, %g0 /* IEU1 Group */
206 be,pn %icc, 20f /* CTI */
207
208 srlx %o3, 32, %o4 /* IEU0 */
209 andcc %o4, 0xff, %g0 /* IEU1 Group */
210 be,pn %icc, 19f /* CTI */
211 srlx %o3, 24, %o4 /* IEU0 */
212
213 andcc %o4, 0xff, %g0 /* IEU1 Group */
214 be,pn %icc, 18f /* CTI */
215 srlx %o3, 16, %o4 /* IEU0 */
216 andcc %o4, 0xff, %g0 /* IEU1 Group */
217
218 be,pn %icc, 17f /* CTI */
219 srlx %o3, 8, %o4 /* IEU0 */
220 andcc %o4, 0xff, %g0 /* IEU1 Group */
221 be,pn %icc, 16f /* CTI */
222
223 andcc %o3, 0xff, %g0 /* IEU1 Group */
224 bne,pn %icc, 15b /* CTI */
225 stx %o3, [%o0 - 8] /* Store */
226 retl /* CTI+IEU1 Group */
227
228 sub %o0, 1, %o0 /* IEU0 */
229
230 .align 16
23117: ba,pt %xcc, 26f /* CTI Group */
232 subcc %o0, 3, %g6 /* IEU1 */
23318: ba,pt %xcc, 27f /* CTI Group */
234 subcc %o0, 4, %g6 /* IEU1 */
235
23619: ba,pt %xcc, 28f /* CTI Group */
237 subcc %o0, 5, %g6 /* IEU1 */
23816: subcc %o0, 2, %g6 /* IEU1 Group */
239 srlx %o3, 8, %o4 /* IEU0 */
240
241 stb %o4, [%o0 - 2] /* Store */
24226: srlx %o3, 16, %o4 /* IEU0 Group */
243 stb %o4, [%o0 - 3] /* Store */
24427: srlx %o3, 24, %o4 /* IEU0 Group */
245
246 stb %o4, [%o0 - 4] /* Store */
24728: srlx %o3, 32, %o4 /* IEU0 Group */
248 stw %o4, [%o0 - 8] /* Store */
249 retl /* CTI+IEU1 Group */
250
251 mov %g6, %o0 /* IEU0 */
252
253 .align 16
25421: ba,pt %xcc, 29f /* CTI Group */
255 subcc %o0, 7, %g6 /* IEU1 */
25622: ba,pt %xcc, 30f /* CTI Group */
257 subcc %o0, 8, %g6 /* IEU1 */
258
25920: subcc %o0, 6, %g6 /* IEU1 Group */
260 srlx %o3, 40, %o4 /* IEU0 */
261 stb %o4, [%o0 - 6] /* Store */
26229: srlx %o3, 48, %o4 /* IEU0 Group */
263
264 stb %o4, [%o0 - 7] /* Store */
26530: srlx %o3, 56, %o4 /* IEU0 Group */
266 stb %o4, [%o0 - 8] /* Store */
267 retl /* CTI+IEU1 Group */
268
269 mov %g6, %o0 /* IEU0 */
270END(stpcpy)
271libc_hidden_def(stpcpy)