blob: 3f29bbd521a350e44cf7df97f6897a3ffbe142d7 [file] [log] [blame]
yuezonghe824eb0c2024-06-27 02:32:26 -07001/* Optimized version of the standard strncpy() function.
2 This file is part of the GNU C Library.
3 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Dan Pop <Dan.Pop@cern.ch>
5 and Jakub Jelinek <jakub@redhat.com>.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA. */
21
22/* Return: dest
23
24 Inputs:
25 in0: dest
26 in1: src
27 in2: len
28
29 In this form, it assumes little endian mode.
30 */
31
32#include "sysdep.h"
33#undef ret
34
35#define saved_lc r15
36#define saved_pr r16
37#define thresh r17
38#define dest r18
39#define dest2 r19
40#define src r20
41#define len r21
42#define asrc r22
43#define tmp r23
44#define pos r24
45#define w0 r25
46#define w1 r26
47#define c r27
48#define sh2 r28
49#define sh1 r29
50#define loopcnt r30
51#define value r31
52
53ENTRY(strncpy)
54 .prologue
55 alloc r2 = ar.pfs, 3, 0, 29, 32
56
57#define MEMLAT 2
58 .rotr r[MEMLAT + 2]
59 .rotp p[MEMLAT + 1]
60
61 mov ret0 = in0 /* return value = dest */
62 .save pr, saved_pr
63 mov saved_pr = pr /* save the predicate registers */
64 .save ar.lc, saved_lc
65 mov saved_lc = ar.lc /* save the loop counter */
66 mov ar.ec = 0 /* ec is not guaranteed to */
67 /* be zero upon function entry */
68 .body
69 cmp.geu p6, p5 = 24, in2
70(p6) br.cond.spnt .short_len
71 sub tmp = r0, in0 ;; /* tmp = -dest */
72 mov len = in2 /* len */
73 mov dest = in0 /* dest */
74 mov src = in1 /* src */
75 and tmp = 7, tmp ;; /* loopcnt = -dest % 8 */
76 cmp.eq p6, p7 = tmp, r0
77 adds loopcnt = -1, tmp /* --loopcnt */
78(p6) br.cond.sptk .dest_aligned ;;
79 sub len = len, tmp /* len -= -dest % 8 */
80 mov ar.lc = loopcnt
81.l1: /* copy -dest % 8 bytes */
82(p5) ld1 c = [src], 1 /* c = *src++ */
83 ;;
84 st1 [dest] = c, 1 /* *dest++ = c */
85 cmp.ne p5, p7 = c, r0
86 br.cloop.dptk .l1 ;;
87(p7) br.cond.dpnt .found0_align
88
89.dest_aligned: /* p7 should be cleared here */
90 shr.u c = len, 3 /* c = len / 8 */
91 and sh1 = 7, src /* sh1 = src % 8 */
92 and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */
93 adds c = (MEMLAT-1), c /* c = (len / 8) + MEMLAT - 1 */
94 sub thresh = 8, sh1
95 mov pr.rot = 1 << 16 /* set rotating predicates */
96 shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */
97 mov ar.lc = c /* "infinite" loop */
98 sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
99 cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
100(p6) br.cond.sptk .src_aligned
101 adds c = -(MEMLAT-1), c ;; /* c = (len / 8) */
102 ld8 r[1] = [asrc],8
103 mov ar.lc = c ;;
104
105 .align 32
106.l2:
107(p6) st8 [dest] = value, 8 /* store val to dest */
108 ld8.s r[0] = [asrc], 8
109 shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */
110 czx1.r pos = value ;; /* do we have an "early" zero */
111 cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */
112 adds len = -8, len /* len -= 8 */
113(p7) br.cond.dpnt .nonalign_found0
114 chk.s r[0], .recovery2 /* it is safe to do that only */
115.back2: /* after the previous test */
116 shl tmp = r[0], sh2 /* tmp = w1 << sh2 */
117 ;;
118 or value = value, tmp ;; /* value |= tmp */
119 czx1.r pos = value ;;
120 cmp.ne p7, p6 = 8, pos
121(p7) br.cond.dpnt .nonalign_found0
122 br.ctop.dptk .l2 ;;
123 adds len = 8, len
124 br.cond.sptk .not_found0 ;;
125.nonalign_found0:
126 cmp.gtu p6, p0 = -8, len
127(p6) br.cond.dptk .found0
128 adds len = 8, len
129 br.cond.sptk .not_found0 ;;
130
131 .align 32
132.src_aligned:
133.l3:
134(p[0]) ld8.s r[0] = [src], 8
135(p[MEMLAT]) chk.s r[MEMLAT], .recovery3
136.back3:
137(p[MEMLAT]) mov value = r[MEMLAT]
138(p[MEMLAT]) czx1.r pos = r[MEMLAT] ;;
139(p[MEMLAT]) cmp.ne p7, p0 = 8, pos
140(p[MEMLAT]) adds len = -8, len /* len -= 8 */
141(p7) br.cond.dpnt .found0
142(p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
143 br.ctop.dptk .l3 ;;
144
145 chk.s r[MEMLAT-1], .recovery4
146.back4:
147 mov value = r[MEMLAT-1]
148
149.not_found0:
150 cmp.eq p5, p6 = len, r0
151 adds len = -1, len
152(p5) br.cond.dptk .restore_and_exit ;;
153 mov ar.lc = len
154.l4:
155(p6) extr.u c = value, 0, 8 /* c = value & 0xff */
156(p6) shr.u value = value, 8 ;;
157 st1 [dest] = c, 1
158 cmp.ne p6, p0 = c, r0
159 br.cloop.dptk .l4
160 br.cond.sptk .restore_and_exit
161
162.found0_align:
163 mov pos = 0
164 adds len = -8, len
165 mov value = 0 ;;
166.found0:
167 shl tmp = pos, 3
168 shr.u loopcnt = len, 4 /* loopcnt = len / 16 */
169 mov c = -1 ;;
170 cmp.eq p6, p0 = loopcnt, r0
171 adds loopcnt = -1, loopcnt
172 shl c = c, tmp ;;
173 and len = 0xf, len
174 andcm value = value, c
175 mov ar.lc = loopcnt ;;
176 cmp.le p7, p0 = 8, len
177 adds dest2 = 16, dest
178 st8 [dest] = value, 8
179 and len = 0x7, len
180(p6) br.cond.dpnt .l6 ;;
181.l5:
182 st8 [dest] = r0, 16
183 st8 [dest2] = r0, 16
184 br.cloop.dptk .l5 ;;
185.l6:
186(p7) st8 [dest] = r0, 8
187 cmp.eq p5, p0 = len, r0
188 adds len = -1, len
189(p5) br.cond.dptk .restore_and_exit ;;
190 mov ar.lc = len ;;
191.l7:
192 st1 [dest] = r0, 1
193 br.cloop.dptk .l7 ;;
194.restore_and_exit:
195 mov ar.lc = saved_lc /* restore the loop counter */
196 mov pr = saved_pr, -1 /* restore the predicate registers */
197 br.ret.sptk.many b0
198
199.short_len:
200 cmp.eq p5, p0 = in2, r0
201 adds loopcnt = -1, in2
202(p5) br.cond.spnt .restore_and_exit ;;
203 mov ar.lc = loopcnt /* p6 should be set when we get here */
204.l8:
205(p6) ld1 c = [in1], 1 /* c = *src++ */
206 ;;
207 st1 [in0] = c, 1 /* *dest++ = c */
208(p6) cmp.ne p6, p0 = c, r0
209 br.cloop.dptk .l8
210 ;;
211 mov ar.lc = saved_lc /* restore the loop counter */
212 mov pr = saved_pr, -1 /* restore the predicate registers */
213 br.ret.sptk.many b0
214.recovery2:
215 add c = 8, len
216 add tmp = -8, asrc ;;
217 cmp.gtu p8, p5 = c, thresh ;;
218(p8) ld8 r[0] = [tmp]
219(p5) mov r[0] = r0
220 br.cond.sptk .back2
221.recovery3:
222 add tmp = -(MEMLAT + 1) * 8, src ;;
223 ld8 r[MEMLAT] = [tmp]
224 br.cond.sptk .back3
225.recovery4:
226 cmp.eq p5, p6 = len, r0
227 add tmp = -MEMLAT * 8, src ;;
228(p6) ld8 r[MEMLAT - 1] = [tmp]
229(p5) mov r[MEMLAT - 1] = r0
230 br.cond.sptk .back4
231END(strncpy)
232libc_hidden_def (strncpy)