blob: 36d6a8638ead8622fbb633d8355842d6312f0415 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * User Space Access Routines
4 *
5 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
6 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
7 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
8 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
9 * Copyright (C) 2017 Helge Deller <deller@gmx.de>
10 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
11 */
12
13/*
14 * These routines still have plenty of room for optimization
15 * (word & doubleword load/store, dual issue, store hints, etc.).
16 */
17
18/*
19 * The following routines assume that space register 3 (sr3) contains
20 * the space id associated with the current users address space.
21 */
22
23
24 .text
25
26#include <asm/assembly.h>
27#include <asm/errno.h>
28#include <linux/linkage.h>
29
30 /*
31 * get_sr gets the appropriate space value into
32 * sr1 for kernel/user space access, depending
33 * on the flag stored in the task structure.
34 */
35
36 .macro get_sr
37 mfctl %cr30,%r1
38 ldw TI_SEGMENT(%r1),%r22
39 mfsp %sr3,%r1
40 or,<> %r22,%r0,%r0
41 copy %r0,%r1
42 mtsp %r1,%sr1
43 .endm
44
45 /*
46 * unsigned long lclear_user(void *to, unsigned long n)
47 *
48 * Returns 0 for success.
49 * otherwise, returns number of bytes not transferred.
50 */
51
52ENTRY_CFI(lclear_user)
53 comib,=,n 0,%r25,$lclu_done
54 get_sr
55$lclu_loop:
56 addib,<> -1,%r25,$lclu_loop
571: stbs,ma %r0,1(%sr1,%r26)
58
59$lclu_done:
60 bv %r0(%r2)
61 copy %r25,%r28
62
632: b $lclu_done
64 ldo 1(%r25),%r25
65
66 ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
67ENDPROC_CFI(lclear_user)
68
69
70 /*
71 * long lstrnlen_user(char *s, long n)
72 *
73 * Returns 0 if exception before zero byte or reaching N,
74 * N+1 if N would be exceeded,
75 * else strlen + 1 (i.e. includes zero byte).
76 */
77
78ENTRY_CFI(lstrnlen_user)
79 comib,= 0,%r25,$lslen_nzero
80 copy %r26,%r24
81 get_sr
821: ldbs,ma 1(%sr1,%r26),%r1
83$lslen_loop:
84 comib,=,n 0,%r1,$lslen_done
85 addib,<> -1,%r25,$lslen_loop
862: ldbs,ma 1(%sr1,%r26),%r1
87$lslen_done:
88 bv %r0(%r2)
89 sub %r26,%r24,%r28
90
91$lslen_nzero:
92 b $lslen_done
93 ldo 1(%r26),%r26 /* special case for N == 0 */
94
953: b $lslen_done
96 copy %r24,%r26 /* reset r26 so 0 is returned on fault */
97
98 ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
99 ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
100
101ENDPROC_CFI(lstrnlen_user)
102
103
104/*
105 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
106 *
107 * Inputs:
108 * - sr1 already contains space of source region
109 * - sr2 already contains space of destination region
110 *
111 * Returns:
112 * - number of bytes that could not be copied.
113 * On success, this will be zero.
114 *
115 * This code is based on a C-implementation of a copy routine written by
116 * Randolph Chung, which in turn was derived from the glibc.
117 *
118 * Several strategies are tried to try to get the best performance for various
119 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
120 * at a time using general registers. Unaligned copies are handled either by
121 * aligning the destination and then using shift-and-write method, or in a few
122 * cases by falling back to a byte-at-a-time copy.
123 *
124 * Testing with various alignments and buffer sizes shows that this code is
125 * often >10x faster than a simple byte-at-a-time copy, even for strangely
126 * aligned operands. It is interesting to note that the glibc version of memcpy
127 * (written in C) is actually quite fast already. This routine is able to beat
128 * it by 30-40% for aligned copies because of the loop unrolling, but in some
129 * cases the glibc version is still slightly faster. This lends more
130 * credibility that gcc can generate very good code as long as we are careful.
131 *
132 * Possible optimizations:
133 * - add cache prefetching
134 * - try not to use the post-increment address modifiers; they may create
135 * additional interlocks. Assumption is that those were only efficient on old
136 * machines (pre PA8000 processors)
137 */
138
139 dst = arg0
140 src = arg1
141 len = arg2
142 end = arg3
143 t1 = r19
144 t2 = r20
145 t3 = r21
146 t4 = r22
147 srcspc = sr1
148 dstspc = sr2
149
150 t0 = r1
151 a1 = t1
152 a2 = t2
153 a3 = t3
154 a0 = t4
155
156 save_src = ret0
157 save_dst = ret1
158 save_len = r31
159
160ENTRY_CFI(pa_memcpy)
161 /* Last destination address */
162 add dst,len,end
163
164 /* short copy with less than 16 bytes? */
165 cmpib,COND(>>=),n 15,len,.Lbyte_loop
166
167 /* same alignment? */
168 xor src,dst,t0
169 extru t0,31,2,t1
170 cmpib,<>,n 0,t1,.Lunaligned_copy
171
172#ifdef CONFIG_64BIT
173 /* only do 64-bit copies if we can get aligned. */
174 extru t0,31,3,t1
175 cmpib,<>,n 0,t1,.Lalign_loop32
176
177 /* loop until we are 64-bit aligned */
178.Lalign_loop64:
179 extru dst,31,3,t1
180 cmpib,=,n 0,t1,.Lcopy_loop_16_start
18120: ldb,ma 1(srcspc,src),t1
18221: stb,ma t1,1(dstspc,dst)
183 b .Lalign_loop64
184 ldo -1(len),len
185
186 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
187 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
188
189.Lcopy_loop_16_start:
190 ldi 31,t0
191.Lcopy_loop_16:
192 cmpb,COND(>>=),n t0,len,.Lword_loop
193
19410: ldd 0(srcspc,src),t1
19511: ldd 8(srcspc,src),t2
196 ldo 16(src),src
19712: std,ma t1,8(dstspc,dst)
19813: std,ma t2,8(dstspc,dst)
19914: ldd 0(srcspc,src),t1
20015: ldd 8(srcspc,src),t2
201 ldo 16(src),src
20216: std,ma t1,8(dstspc,dst)
20317: std,ma t2,8(dstspc,dst)
204
205 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
206 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
207 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
208 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
209 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
210 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
211 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
212 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
213
214 b .Lcopy_loop_16
215 ldo -32(len),len
216
217.Lword_loop:
218 cmpib,COND(>>=),n 3,len,.Lbyte_loop
21920: ldw,ma 4(srcspc,src),t1
22021: stw,ma t1,4(dstspc,dst)
221 b .Lword_loop
222 ldo -4(len),len
223
224 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
225 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
226
227#endif /* CONFIG_64BIT */
228
229 /* loop until we are 32-bit aligned */
230.Lalign_loop32:
231 extru dst,31,2,t1
232 cmpib,=,n 0,t1,.Lcopy_loop_8
23320: ldb,ma 1(srcspc,src),t1
23421: stb,ma t1,1(dstspc,dst)
235 b .Lalign_loop32
236 ldo -1(len),len
237
238 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
239 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
240
241
242.Lcopy_loop_8:
243 cmpib,COND(>>=),n 15,len,.Lbyte_loop
244
24510: ldw 0(srcspc,src),t1
24611: ldw 4(srcspc,src),t2
24712: stw,ma t1,4(dstspc,dst)
24813: stw,ma t2,4(dstspc,dst)
24914: ldw 8(srcspc,src),t1
25015: ldw 12(srcspc,src),t2
251 ldo 16(src),src
25216: stw,ma t1,4(dstspc,dst)
25317: stw,ma t2,4(dstspc,dst)
254
255 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
256 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
257 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
258 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
259 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
260 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
261 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
262 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
263
264 b .Lcopy_loop_8
265 ldo -16(len),len
266
267.Lbyte_loop:
268 cmpclr,COND(<>) len,%r0,%r0
269 b,n .Lcopy_done
27020: ldb 0(srcspc,src),t1
271 ldo 1(src),src
27221: stb,ma t1,1(dstspc,dst)
273 b .Lbyte_loop
274 ldo -1(len),len
275
276 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
277 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
278
279.Lcopy_done:
280 bv %r0(%r2)
281 sub end,dst,ret0
282
283
284 /* src and dst are not aligned the same way. */
285 /* need to go the hard way */
286.Lunaligned_copy:
287 /* align until dst is 32bit-word-aligned */
288 extru dst,31,2,t1
289 cmpib,=,n 0,t1,.Lcopy_dstaligned
29020: ldb 0(srcspc,src),t1
291 ldo 1(src),src
29221: stb,ma t1,1(dstspc,dst)
293 b .Lunaligned_copy
294 ldo -1(len),len
295
296 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
297 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
298
299.Lcopy_dstaligned:
300
301 /* store src, dst and len in safe place */
302 copy src,save_src
303 copy dst,save_dst
304 copy len,save_len
305
306 /* len now needs give number of words to copy */
307 SHRREG len,2,len
308
309 /*
310 * Copy from a not-aligned src to an aligned dst using shifts.
311 * Handles 4 words per loop.
312 */
313
314 depw,z src,28,2,t0
315 subi 32,t0,t0
316 mtsar t0
317 extru len,31,2,t0
318 cmpib,= 2,t0,.Lcase2
319 /* Make src aligned by rounding it down. */
320 depi 0,31,2,src
321
322 cmpiclr,<> 3,t0,%r0
323 b,n .Lcase3
324 cmpiclr,<> 1,t0,%r0
325 b,n .Lcase1
326.Lcase0:
327 cmpb,COND(=) %r0,len,.Lcda_finish
328 nop
329
3301: ldw,ma 4(srcspc,src), a3
331 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3321: ldw,ma 4(srcspc,src), a0
333 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
334 b,n .Ldo3
335.Lcase1:
3361: ldw,ma 4(srcspc,src), a2
337 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3381: ldw,ma 4(srcspc,src), a3
339 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
340 ldo -1(len),len
341 cmpb,COND(=),n %r0,len,.Ldo0
342.Ldo4:
3431: ldw,ma 4(srcspc,src), a0
344 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
345 shrpw a2, a3, %sar, t0
3461: stw,ma t0, 4(dstspc,dst)
347 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
348.Ldo3:
3491: ldw,ma 4(srcspc,src), a1
350 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
351 shrpw a3, a0, %sar, t0
3521: stw,ma t0, 4(dstspc,dst)
353 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
354.Ldo2:
3551: ldw,ma 4(srcspc,src), a2
356 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
357 shrpw a0, a1, %sar, t0
3581: stw,ma t0, 4(dstspc,dst)
359 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
360.Ldo1:
3611: ldw,ma 4(srcspc,src), a3
362 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
363 shrpw a1, a2, %sar, t0
3641: stw,ma t0, 4(dstspc,dst)
365 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
366 ldo -4(len),len
367 cmpb,COND(<>) %r0,len,.Ldo4
368 nop
369.Ldo0:
370 shrpw a2, a3, %sar, t0
3711: stw,ma t0, 4(dstspc,dst)
372 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
373
374.Lcda_rdfault:
375.Lcda_finish:
376 /* calculate new src, dst and len and jump to byte-copy loop */
377 sub dst,save_dst,t0
378 add save_src,t0,src
379 b .Lbyte_loop
380 sub save_len,t0,len
381
382.Lcase3:
3831: ldw,ma 4(srcspc,src), a0
384 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3851: ldw,ma 4(srcspc,src), a1
386 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
387 b .Ldo2
388 ldo 1(len),len
389.Lcase2:
3901: ldw,ma 4(srcspc,src), a1
391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
3921: ldw,ma 4(srcspc,src), a2
393 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
394 b .Ldo1
395 ldo 2(len),len
396
397
398 /* fault exception fixup handlers: */
399#ifdef CONFIG_64BIT
400.Lcopy16_fault:
401 b .Lcopy_done
40210: std,ma t1,8(dstspc,dst)
403 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
404#endif
405
406.Lcopy8_fault:
407 b .Lcopy_done
40810: stw,ma t1,4(dstspc,dst)
409 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
410ENDPROC_CFI(pa_memcpy)
411
412 .end